79 files changed, 5706 insertions, 1907 deletions
diff --git a/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysis.cpp
index 32241e355eb8..1c7678a602d8 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysis.cpp
@@ -44,6 +44,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/AtomicOrdering.h"
 #include "llvm/Support/Casting.h"
@@ -734,6 +735,15 @@ namespace {
 
 } // end anonymous namespace
 
+ExternalAAWrapperPass::ExternalAAWrapperPass() : ImmutablePass(ID) {
+  initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+ExternalAAWrapperPass::ExternalAAWrapperPass(CallbackT CB)
+    : ImmutablePass(ID), CB(std::move(CB)) {
+  initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
 char ExternalAAWrapperPass::ID = 0;
 
 INITIALIZE_PASS(ExternalAAWrapperPass, "external-aa", "External Alias Analysis",
@@ -784,7 +794,7 @@ bool AAResultsWrapperPass::runOnFunction(Function &F) {
   // previous object first, in this case replacing it with an empty one, before
   // registering new results.
   AAR.reset(
-      new AAResults(getAnalysis<TargetLibraryInfoWrapperPass>().getTLI()));
+      new AAResults(getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F)));
 
   // BasicAA is always available for function analyses. Also, we add it first
   // so that it can trump TBAA results when it proves MustAlias.
@@ -836,11 +846,12 @@ void AAResultsWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addUsedIfAvailable<SCEVAAWrapperPass>();
   AU.addUsedIfAvailable<CFLAndersAAWrapperPass>();
   AU.addUsedIfAvailable<CFLSteensAAWrapperPass>();
+  AU.addUsedIfAvailable<ExternalAAWrapperPass>();
 }
 
 AAResults llvm::createLegacyPMAAResults(Pass &P, Function &F,
                                         BasicAAResult &BAR) {
-  AAResults AAR(P.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI());
+  AAResults AAR(P.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F));
 
   // Add in our explicitly constructed BasicAA results.
   if (!DisableBasicAA)
@@ -861,6 +872,9 @@ AAResults llvm::createLegacyPMAAResults(Pass &P, Function &F,
     AAR.addAAResult(WrapperPass->getResult());
   if (auto *WrapperPass = P.getAnalysisIfAvailable<CFLSteensAAWrapperPass>())
     AAR.addAAResult(WrapperPass->getResult());
+  if (auto *WrapperPass = P.getAnalysisIfAvailable<ExternalAAWrapperPass>())
+    if (WrapperPass->CB)
+      WrapperPass->CB(P, F, AAR);
 
   return AAR;
 }
@@ -904,4 +918,5 @@ void llvm::getAAResultsAnalysisUsage(AnalysisUsage &AU) {
   AU.addUsedIfAvailable<GlobalsAAWrapperPass>();
   AU.addUsedIfAvailable<CFLAndersAAWrapperPass>();
   AU.addUsedIfAvailable<CFLSteensAAWrapperPass>();
+  AU.addUsedIfAvailable<ExternalAAWrapperPass>();
 }
diff --git a/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
index e83703867e09..2e44bbd3a8ca 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -16,6 +16,7 @@
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm-project/llvm/lib/Analysis/AliasSetTracker.cpp
index a6e5b9fab558..5cc5ab597ef9 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/AliasSetTracker.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/AliasSetTracker.cpp
@@ -27,6 +27,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/AtomicOrdering.h"
 #include "llvm/Support/Casting.h"
@@ -119,6 +120,12 @@ void AliasSetTracker::removeAliasSet(AliasSet *AS) {
         TotalMayAliasSetSize -= AS->size();
 
   AliasSets.erase(AS);
+  // If we've removed the saturated alias set, set saturated marker back to
+  // nullptr and ensure this tracker is empty.
+  if (AS == AliasAnyAS) {
+    AliasAnyAS = nullptr;
+    assert(AliasSets.empty() && "Tracker not empty");
+  }
 }
 
 void AliasSet::removeFromTracker(AliasSetTracker &AST) {
@@ -690,8 +697,10 @@ void AliasSet::print(raw_ostream &OS) const {
 }
 
 void AliasSetTracker::print(raw_ostream &OS) const {
-  OS << "Alias Set Tracker: " << AliasSets.size() << " alias sets for "
-     << PointerMap.size() << " pointer values.\n";
+  OS << "Alias Set Tracker: " << AliasSets.size();
+  if (AliasAnyAS)
+    OS << " (Saturated)";
+  OS << " alias sets for " << PointerMap.size() << " pointer values.\n";
   for (const AliasSet &AS : *this)
     AS.print(OS);
   OS << "\n";
diff --git a/contrib/llvm-project/llvm/lib/Analysis/Analysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/Analysis.cpp
index d46a8d8e306c..af718526684b 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/Analysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/Analysis.cpp
@@ -65,6 +65,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
   initializeModuleDebugInfoPrinterPass(Registry);
   initializeModuleSummaryIndexWrapperPassPass(Registry);
   initializeMustExecutePrinterPass(Registry);
+  initializeMustBeExecutedContextPrinterPass(Registry);
   initializeObjCARCAAWrapperPassPass(Registry);
   initializeOptimizationRemarkEmitterWrapperPassPass(Registry);
   initializePhiValuesWrapperPassPass(Registry);
diff --git a/contrib/llvm-project/llvm/lib/Analysis/AssumptionCache.cpp b/contrib/llvm-project/llvm/lib/Analysis/AssumptionCache.cpp
index cf2f845dee0a..f4d4a5ac8f88 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/AssumptionCache.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/AssumptionCache.cpp
@@ -23,6 +23,7 @@
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
@@ -130,7 +131,10 @@ void AssumptionCache::unregisterAssumption(CallInst *CI) {
     if (AVI != AffectedValues.end())
       AffectedValues.erase(AVI);
   }
-  remove_if(AssumeHandles, [CI](WeakTrackingVH &VH) { return CI == VH; });
+
+  AssumeHandles.erase(
+      remove_if(AssumeHandles, [CI](WeakTrackingVH &VH) { return CI == VH; }),
+      AssumeHandles.end());
 }
 
 void AssumptionCache::AffectedValueCallbackVH::deleted() {
@@ -140,7 +144,7 @@ void AssumptionCache::AffectedValueCallbackVH::deleted() {
   // 'this' now dangles!
 }
 
-void AssumptionCache::copyAffectedValuesInCache(Value *OV, Value *NV) {
+void AssumptionCache::transferAffectedValuesInCache(Value *OV, Value *NV) {
   auto &NAVV = getOrInsertAffectedValues(NV);
   auto AVI = AffectedValues.find(OV);
   if (AVI == AffectedValues.end())
@@ -149,6 +153,7 @@ void AssumptionCache::copyAffectedValuesInCache(Value *OV, Value *NV) {
   for (auto &A : AVI->second)
     if (std::find(NAVV.begin(), NAVV.end(), A) == NAVV.end())
       NAVV.push_back(A);
+  AffectedValues.erase(OV);
 }
 
 void AssumptionCache::AffectedValueCallbackVH::allUsesReplacedWith(Value *NV) {
@@ -157,7 +162,7 @@ void AssumptionCache::AffectedValueCallbackVH::allUsesReplacedWith(Value *NV) {
 
   // Any assumptions that affected this value now affect the new value.
 
-  AC->copyAffectedValuesInCache(getValPtr(), NV);
+  AC->transferAffectedValuesInCache(getValPtr(), NV);
   // 'this' now might dangle! If the AffectedValues map was resized to add an
   // entry for NV then this object might have been destroyed in favor of some
   // copy in the grown map.
@@ -252,7 +257,7 @@ AssumptionCache &AssumptionCacheTracker::getAssumptionCache(Function &F) {
   // Ok, build a new cache by scanning the function, insert it and the value
   // handle into our map, and return the newly populated cache.
   auto IP = AssumptionCaches.insert(std::make_pair(
-      FunctionCallbackVH(&F, this), llvm::make_unique<AssumptionCache>(F)));
+      FunctionCallbackVH(&F, this), std::make_unique<AssumptionCache>(F)));
   assert(IP.second && "Scanning function already in the map?");
   return *IP.first->second;
 }
diff --git a/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 3721c99883b8..e852d663c6b4 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -25,9 +25,9 @@
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/PhiValues.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Analysis/PhiValues.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Constant.h"
@@ -49,6 +49,7 @@
 #include "llvm/IR/Type.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
@@ -233,6 +234,26 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size,
   return ObjectSize != MemoryLocation::UnknownSize && ObjectSize < Size;
 }
 
+/// Return the minimal extent from \p V to the end of the underlying object,
+/// assuming the result is used in an aliasing query. E.g., we do use the query
+/// location size and the fact that null pointers cannot alias here.
+static uint64_t getMinimalExtentFrom(const Value &V,
+                                     const LocationSize &LocSize,
+                                     const DataLayout &DL,
+                                     bool NullIsValidLoc) {
+  // If we have dereferenceability information we know a lower bound for the
+  // extent as accesses for a lower offset would be valid. We need to exclude
+  // the "or null" part if null is a valid pointer.
+  bool CanBeNull;
+  uint64_t DerefBytes = V.getPointerDereferenceableBytes(DL, CanBeNull);
+  DerefBytes = (CanBeNull && NullIsValidLoc) ? 0 : DerefBytes;
+  // If queried with a precise location size, we assume that location size to be
+  // accessed, thus valid.
+  if (LocSize.isPrecise())
+    DerefBytes = std::max(DerefBytes, LocSize.getValue());
+  return DerefBytes;
+}
+
 /// Returns true if we can prove that the object specified by V has size Size.
 static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
                          const TargetLibraryInfo &TLI, bool NullIsValidLoc) {
@@ -481,7 +502,7 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
         // because it should be in sync with CaptureTracking. Not using it may
         // cause weird miscompilations where 2 aliasing pointers are assumed to
         // noalias.
-        if (auto *RP = getArgumentAliasingToReturnedPointer(Call)) {
+        if (auto *RP = getArgumentAliasingToReturnedPointer(Call, false)) {
           V = RP;
           continue;
         }
@@ -1461,7 +1482,8 @@ AliasResult BasicAAResult::aliasGEP(
         // give up if we can't determine conditions that hold for every cycle:
         const Value *V = DecompGEP1.VarIndices[i].V;
 
-        KnownBits Known = computeKnownBits(V, DL, 0, &AC, nullptr, DT);
+        KnownBits Known =
+            computeKnownBits(V, DL, 0, &AC, dyn_cast<Instruction>(GEP1), DT);
         bool SignKnownZero = Known.isNonNegative();
         bool SignKnownOne = Known.isNegative();
 
@@ -1792,10 +1814,12 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
   // If the size of one access is larger than the entire object on the other
   // side, then we know such behavior is undefined and can assume no alias.
   bool NullIsValidLocation = NullPointerIsDefined(&F);
-  if ((V1Size.isPrecise() && isObjectSmallerThan(O2, V1Size.getValue(), DL, TLI,
-                                                 NullIsValidLocation)) ||
-      (V2Size.isPrecise() && isObjectSmallerThan(O1, V2Size.getValue(), DL, TLI,
-                                                 NullIsValidLocation)))
+  if ((isObjectSmallerThan(
+          O2, getMinimalExtentFrom(*V1, V1Size, DL, NullIsValidLocation), DL,
+          TLI, NullIsValidLocation)) ||
+      (isObjectSmallerThan(
+          O1, getMinimalExtentFrom(*V2, V2Size, DL, NullIsValidLocation), DL,
+          TLI, NullIsValidLocation)))
     return NoAlias;
 
   // Check the cache before climbing up use-def chains. This also terminates
@@ -2027,7 +2051,7 @@ BasicAAResult BasicAA::run(Function &F, FunctionAnalysisManager &AM) {
 }
 
 BasicAAWrapperPass::BasicAAWrapperPass() : FunctionPass(ID) {
-    initializeBasicAAWrapperPassPass(*PassRegistry::getPassRegistry());
+  initializeBasicAAWrapperPassPass(*PassRegistry::getPassRegistry());
 }
 
 char BasicAAWrapperPass::ID = 0;
@@ -2053,8 +2077,9 @@ bool BasicAAWrapperPass::runOnFunction(Function &F) {
   auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
   auto *PVWP = getAnalysisIfAvailable<PhiValuesWrapperPass>();
 
-  Result.reset(new BasicAAResult(F.getParent()->getDataLayout(), F, TLIWP.getTLI(),
-                                 ACT.getAssumptionCache(F), &DTWP.getDomTree(),
+  Result.reset(new BasicAAResult(F.getParent()->getDataLayout(), F,
+                                 TLIWP.getTLI(F), ACT.getAssumptionCache(F),
+                                 &DTWP.getDomTree(),
                                  LIWP ? &LIWP->getLoopInfo() : nullptr,
                                  PVWP ? &PVWP->getResult() : nullptr));
 
@@ -2071,8 +2096,7 @@ void BasicAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
 
 BasicAAResult llvm::createLegacyPMBasicAAResult(Pass &P, Function &F) {
   return BasicAAResult(
-      F.getParent()->getDataLayout(),
-      F,
-      P.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
+      F.getParent()->getDataLayout(), F,
+      P.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F),
       P.getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F));
 }
diff --git a/contrib/llvm-project/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/BlockFrequencyInfo.cpp
index de183bbde173..544bd7757ae4 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/BlockFrequencyInfo.cpp
@@ -20,6 +20,7 @@
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/GraphWriter.h"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp
index 5eb95003f5d8..ffba65b5ed5e 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp
@@ -16,6 +16,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
@@ -31,9 +32,11 @@
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
@@ -61,6 +64,12 @@ INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(BranchProbabilityInfoWrapperPass, "branch-prob",
                     "Branch Probability Analysis", false, true)
 
+BranchProbabilityInfoWrapperPass::BranchProbabilityInfoWrapperPass()
+    : FunctionPass(ID) {
+  initializeBranchProbabilityInfoWrapperPassPass(
+      *PassRegistry::getPassRegistry());
+}
+
 char BranchProbabilityInfoWrapperPass::ID = 0;
 
 // Weights are for internal use only. They are used by heuristics to help to
@@ -118,6 +127,13 @@ static const uint32_t ZH_NONTAKEN_WEIGHT = 12;
 static const uint32_t FPH_TAKEN_WEIGHT = 20;
 static const uint32_t FPH_NONTAKEN_WEIGHT = 12;
 
+/// This is the probability for an ordered floating point comparison.
+static const uint32_t FPH_ORD_WEIGHT = 1024 * 1024 - 1;
+/// This is the probability for an unordered floating point comparison, it means
+/// one or two of the operands are NaN. Usually it is used to test for an
+/// exceptional case, so the result is unlikely.
+static const uint32_t FPH_UNO_WEIGHT = 1;
+
 /// Invoke-terminating normal branch taken weight
 ///
 /// This is the weight for branching to the normal destination of an invoke
@@ -131,69 +147,83 @@ static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1;
 /// instruction. This is essentially never taken.
 static const uint32_t IH_NONTAKEN_WEIGHT = 1;
 
-/// Add \p BB to PostDominatedByUnreachable set if applicable.
-void
-BranchProbabilityInfo::updatePostDominatedByUnreachable(const BasicBlock *BB) {
-  const Instruction *TI = BB->getTerminator();
-  if (TI->getNumSuccessors() == 0) {
-    if (isa<UnreachableInst>(TI) ||
-        // If this block is terminated by a call to
-        // @llvm.experimental.deoptimize then treat it like an unreachable since
-        // the @llvm.experimental.deoptimize call is expected to practically
-        // never execute.
-        BB->getTerminatingDeoptimizeCall())
-      PostDominatedByUnreachable.insert(BB);
-    return;
-  }
+static void UpdatePDTWorklist(const BasicBlock *BB, PostDominatorTree *PDT,
+                              SmallVectorImpl<const BasicBlock *> &WorkList,
+                              SmallPtrSetImpl<const BasicBlock *> &TargetSet) {
+  SmallVector<BasicBlock *, 8> Descendants;
+  SmallPtrSet<const BasicBlock *, 16> NewItems;
+
+  PDT->getDescendants(const_cast<BasicBlock *>(BB), Descendants);
+  for (auto *BB : Descendants)
+    if (TargetSet.insert(BB).second)
+      for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+        if (!TargetSet.count(*PI))
+          NewItems.insert(*PI);
+  WorkList.insert(WorkList.end(), NewItems.begin(), NewItems.end());
+}
 
-  // If the terminator is an InvokeInst, check only the normal destination block
-  // as the unwind edge of InvokeInst is also very unlikely taken.
-  if (auto *II = dyn_cast<InvokeInst>(TI)) {
-    if (PostDominatedByUnreachable.count(II->getNormalDest()))
-      PostDominatedByUnreachable.insert(BB);
-    return;
+/// Compute a set of basic blocks that are post-dominated by unreachables.
+void BranchProbabilityInfo::computePostDominatedByUnreachable(
+    const Function &F, PostDominatorTree *PDT) {
+  SmallVector<const BasicBlock *, 8> WorkList;
+  for (auto &BB : F) {
+    const Instruction *TI = BB.getTerminator();
+    if (TI->getNumSuccessors() == 0) {
+      if (isa<UnreachableInst>(TI) ||
+          // If this block is terminated by a call to
+          // @llvm.experimental.deoptimize then treat it like an unreachable
+          // since the @llvm.experimental.deoptimize call is expected to
+          // practically never execute.
+          BB.getTerminatingDeoptimizeCall())
+        UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByUnreachable);
+    }
   }
 
-  for (auto *I : successors(BB))
-    // If any of successor is not post dominated then BB is also not.
-    if (!PostDominatedByUnreachable.count(I))
-      return;
-
-  PostDominatedByUnreachable.insert(BB);
+  while (!WorkList.empty()) {
+    const BasicBlock *BB = WorkList.pop_back_val();
+    if (PostDominatedByUnreachable.count(BB))
+      continue;
+    // If the terminator is an InvokeInst, check only the normal destination
+    // block as the unwind edge of InvokeInst is also very unlikely taken.
+    if (auto *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+      if (PostDominatedByUnreachable.count(II->getNormalDest()))
+        UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable);
+    }
+    // If all the successors are unreachable, BB is unreachable as well.
+    else if (!successors(BB).empty() &&
+             llvm::all_of(successors(BB), [this](const BasicBlock *Succ) {
+               return PostDominatedByUnreachable.count(Succ);
+             }))
+      UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable);
+  }
 }
 
-/// Add \p BB to PostDominatedByColdCall set if applicable.
-void
-BranchProbabilityInfo::updatePostDominatedByColdCall(const BasicBlock *BB) {
-  assert(!PostDominatedByColdCall.count(BB));
-  const Instruction *TI = BB->getTerminator();
-  if (TI->getNumSuccessors() == 0)
-    return;
+/// compute a set of basic blocks that are post-dominated by ColdCalls.
+void BranchProbabilityInfo::computePostDominatedByColdCall(
+    const Function &F, PostDominatorTree *PDT) {
+  SmallVector<const BasicBlock *, 8> WorkList;
+  for (auto &BB : F)
+    for (auto &I : BB)
+      if (const CallInst *CI = dyn_cast<CallInst>(&I))
+        if (CI->hasFnAttr(Attribute::Cold))
+          UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByColdCall);
 
-  // If all of successor are post dominated then BB is also done.
-  if (llvm::all_of(successors(BB), [&](const BasicBlock *SuccBB) {
-        return PostDominatedByColdCall.count(SuccBB);
-      })) {
-    PostDominatedByColdCall.insert(BB);
-    return;
-  }
+  while (!WorkList.empty()) {
+    const BasicBlock *BB = WorkList.pop_back_val();
 
-  // If the terminator is an InvokeInst, check only the normal destination
-  // block as the unwind edge of InvokeInst is also very unlikely taken.
-  if (auto *II = dyn_cast<InvokeInst>(TI))
-    if (PostDominatedByColdCall.count(II->getNormalDest())) {
-      PostDominatedByColdCall.insert(BB);
-      return;
+    // If the terminator is an InvokeInst, check only the normal destination
+    // block as the unwind edge of InvokeInst is also very unlikely taken.
+    if (auto *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+      if (PostDominatedByColdCall.count(II->getNormalDest()))
+        UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall);
     }
-
-  // Otherwise, if the block itself contains a cold function, add it to the
-  // set of blocks post-dominated by a cold call.
-  for (auto &I : *BB)
-    if (const CallInst *CI = dyn_cast<CallInst>(&I))
-      if (CI->hasFnAttr(Attribute::Cold)) {
-        PostDominatedByColdCall.insert(BB);
-        return;
-      }
+    // If all of successor are post dominated then BB is also done.
+    else if (!successors(BB).empty() &&
+             llvm::all_of(successors(BB), [this](const BasicBlock *Succ) {
+               return PostDominatedByColdCall.count(Succ);
+             }))
+      UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall);
+  }
 }
 
 /// Calculate edge weights for successors lead to unreachable.
@@ -778,6 +808,8 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock *BB) {
   if (!FCmp)
     return false;
 
+  uint32_t TakenWeight = FPH_TAKEN_WEIGHT;
+  uint32_t NontakenWeight = FPH_NONTAKEN_WEIGHT;
   bool isProb;
   if (FCmp->isEquality()) {
     // f1 == f2 -> Unlikely
@@ -786,9 +818,13 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock *BB) {
   } else if (FCmp->getPredicate() == FCmpInst::FCMP_ORD) {
     // !isnan -> Likely
     isProb = true;
+    TakenWeight = FPH_ORD_WEIGHT;
+    NontakenWeight = FPH_UNO_WEIGHT;
   } else if (FCmp->getPredicate() == FCmpInst::FCMP_UNO) {
     // isnan -> Unlikely
     isProb = false;
+    TakenWeight = FPH_ORD_WEIGHT;
+    NontakenWeight = FPH_UNO_WEIGHT;
   } else {
     return false;
   }
@@ -798,8 +834,7 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock *BB) {
   if (!isProb)
     std::swap(TakenIdx, NonTakenIdx);
 
-  BranchProbability TakenProb(FPH_TAKEN_WEIGHT,
-                              FPH_TAKEN_WEIGHT + FPH_NONTAKEN_WEIGHT);
+  BranchProbability TakenProb(TakenWeight, TakenWeight + NontakenWeight);
   setEdgeProbability(BB, TakenIdx, TakenProb);
   setEdgeProbability(BB, NonTakenIdx, TakenProb.getCompl());
   return true;
@@ -963,13 +998,16 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI,
     LLVM_DEBUG(dbgs() << "\n");
   }
 
+  std::unique_ptr<PostDominatorTree> PDT =
+      std::make_unique<PostDominatorTree>(const_cast<Function &>(F));
+  computePostDominatedByUnreachable(F, PDT.get());
+  computePostDominatedByColdCall(F, PDT.get());
+
   // Walk the basic blocks in post-order so that we can build up state about
   // the successors of a block iteratively.
   for (auto BB : post_order(&F.getEntryBlock())) {
     LLVM_DEBUG(dbgs() << "Computing probabilities for " << BB->getName()
                       << "\n");
-    updatePostDominatedByUnreachable(BB);
-    updatePostDominatedByColdCall(BB);
     // If there is no at least two successors, no sense to set probability.
     if (BB->getTerminator()->getNumSuccessors() < 2)
       continue;
@@ -1014,7 +1052,8 @@ void BranchProbabilityInfoWrapperPass::getAnalysisUsage(
 
 bool BranchProbabilityInfoWrapperPass::runOnFunction(Function &F) {
   const LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-  const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+  const TargetLibraryInfo &TLI =
+      getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
   BPI.calculate(F, LI, &TLI);
   return false;
 }
diff --git a/contrib/llvm-project/llvm/lib/Analysis/CFG.cpp b/contrib/llvm-project/llvm/lib/Analysis/CFG.cpp
index 18b83d6838cc..8215b4ecbb03 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/CFG.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/CFG.cpp
@@ -87,11 +87,18 @@ unsigned llvm::GetSuccessorNumber(const BasicBlock *BB,
 /// with multiple predecessors.
 bool llvm::isCriticalEdge(const Instruction *TI, unsigned SuccNum,
                           bool AllowIdenticalEdges) {
-  assert(TI->isTerminator() && "Must be a terminator to have successors!");
   assert(SuccNum < TI->getNumSuccessors() && "Illegal edge specification!");
+  return isCriticalEdge(TI, TI->getSuccessor(SuccNum), AllowIdenticalEdges);
+}
+
+bool llvm::isCriticalEdge(const Instruction *TI, const BasicBlock *Dest,
+                          bool AllowIdenticalEdges) {
+  assert(TI->isTerminator() && "Must be a terminator to have successors!");
   if (TI->getNumSuccessors() == 1) return false;
 
-  const BasicBlock *Dest = TI->getSuccessor(SuccNum);
+  assert(find(predecessors(Dest), TI->getParent()) != pred_end(Dest) &&
+         "No edge between TI's block and Dest.");
+
   const_pred_iterator I = pred_begin(Dest), E = pred_end(Dest);
 
   // If there is more than one predecessor, this is a critical edge...
diff --git a/contrib/llvm-project/llvm/lib/Analysis/CFGPrinter.cpp b/contrib/llvm-project/llvm/lib/Analysis/CFGPrinter.cpp
index 619b675b58d8..88e7d3bdede1 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/CFGPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/CFGPrinter.cpp
@@ -18,7 +18,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/CFGPrinter.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
 using namespace llvm;
 
@@ -99,7 +101,7 @@ static void writeCFGToDotFile(Function &F, bool CFGOnly = false) {
   errs() << "Writing '" << Filename << "'...";
 
   std::error_code EC;
-  raw_fd_ostream File(Filename, EC, sys::fs::F_Text);
+  raw_fd_ostream File(Filename, EC, sys::fs::OF_Text);
 
   if (!EC)
     WriteGraph(File, (const Function*)&F, CFGOnly);
diff --git a/contrib/llvm-project/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp
index 690e514d4f5c..eb5c96e6eeca 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp
@@ -69,6 +69,7 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
@@ -88,9 +89,11 @@ using namespace llvm::cflaa;
 
 #define DEBUG_TYPE "cfl-anders-aa"
 
-CFLAndersAAResult::CFLAndersAAResult(const TargetLibraryInfo &TLI) : TLI(TLI) {}
+CFLAndersAAResult::CFLAndersAAResult(
+    std::function<const TargetLibraryInfo &(Function &F)> GetTLI)
+    : GetTLI(std::move(GetTLI)) {}
 CFLAndersAAResult::CFLAndersAAResult(CFLAndersAAResult &&RHS)
-    : AAResultBase(std::move(RHS)), TLI(RHS.TLI) {}
+    : AAResultBase(std::move(RHS)), GetTLI(std::move(RHS.GetTLI)) {}
 CFLAndersAAResult::~CFLAndersAAResult() = default;
 
 namespace {
@@ -779,7 +782,7 @@ static AliasAttrMap buildAttrMap(const CFLGraph &Graph,
 CFLAndersAAResult::FunctionInfo
 CFLAndersAAResult::buildInfoFrom(const Function &Fn) {
   CFLGraphBuilder<CFLAndersAAResult> GraphBuilder(
-      *this, TLI,
+      *this, GetTLI(const_cast<Function &>(Fn)),
       // Cast away the constness here due to GraphBuilder's API requirement
       const_cast<Function &>(Fn));
   auto &Graph = GraphBuilder.getCFLGraph();
@@ -898,7 +901,10 @@ AliasResult CFLAndersAAResult::alias(const MemoryLocation &LocA,
 AnalysisKey CFLAndersAA::Key;
 
 CFLAndersAAResult CFLAndersAA::run(Function &F, FunctionAnalysisManager &AM) {
-  return CFLAndersAAResult(AM.getResult<TargetLibraryAnalysis>(F));
+  auto GetTLI = [&AM](Function &F) -> TargetLibraryInfo & {
+    return AM.getResult<TargetLibraryAnalysis>(F);
+  };
+  return CFLAndersAAResult(GetTLI);
 }
 
 char CFLAndersAAWrapperPass::ID = 0;
@@ -914,8 +920,10 @@ CFLAndersAAWrapperPass::CFLAndersAAWrapperPass() : ImmutablePass(ID) {
 }
 
 void CFLAndersAAWrapperPass::initializePass() {
-  auto &TLIWP = getAnalysis<TargetLibraryInfoWrapperPass>();
-  Result.reset(new CFLAndersAAResult(TLIWP.getTLI()));
+  auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
+    return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+  };
+  Result.reset(new CFLAndersAAResult(GetTLI));
 }
 
 void CFLAndersAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
diff --git a/contrib/llvm-project/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp
index 44b1834f70bf..85a8c3d2a00b 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp
@@ -46,6 +46,7 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -60,10 +61,11 @@ using namespace llvm::cflaa;
 
 #define DEBUG_TYPE "cfl-steens-aa"
 
-CFLSteensAAResult::CFLSteensAAResult(const TargetLibraryInfo &TLI)
-    : AAResultBase(), TLI(TLI) {}
+CFLSteensAAResult::CFLSteensAAResult(
+    std::function<const TargetLibraryInfo &(Function &F)> GetTLI)
+    : AAResultBase(), GetTLI(std::move(GetTLI)) {}
 CFLSteensAAResult::CFLSteensAAResult(CFLSteensAAResult &&Arg)
-    : AAResultBase(std::move(Arg)), TLI(Arg.TLI) {}
+    : AAResultBase(std::move(Arg)), GetTLI(std::move(Arg.GetTLI)) {}
 CFLSteensAAResult::~CFLSteensAAResult() = default;
 
 /// Information we have about a function and would like to keep around.
@@ -181,7 +183,7 @@ CFLSteensAAResult::FunctionInfo::FunctionInfo(
 
 // Builds the graph + StratifiedSets for a function.
 CFLSteensAAResult::FunctionInfo CFLSteensAAResult::buildSetsFrom(Function *Fn) {
-  CFLGraphBuilder<CFLSteensAAResult> GraphBuilder(*this, TLI, *Fn);
+  CFLGraphBuilder<CFLSteensAAResult> GraphBuilder(*this, GetTLI(*Fn), *Fn);
   StratifiedSetsBuilder<InstantiatedValue> SetBuilder;
 
   // Add all CFLGraph nodes and all Dereference edges to StratifiedSets
@@ -331,7 +333,10 @@ AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA,
 AnalysisKey CFLSteensAA::Key;
 
 CFLSteensAAResult CFLSteensAA::run(Function &F, FunctionAnalysisManager &AM) {
-  return CFLSteensAAResult(AM.getResult<TargetLibraryAnalysis>(F));
+  auto GetTLI = [&AM](Function &F) -> const TargetLibraryInfo & {
+    return AM.getResult<TargetLibraryAnalysis>(F);
+  };
+  return CFLSteensAAResult(GetTLI);
 }
 
 char CFLSteensAAWrapperPass::ID = 0;
@@ -347,8 +352,10 @@ CFLSteensAAWrapperPass::CFLSteensAAWrapperPass() : ImmutablePass(ID) {
 }
 
 void CFLSteensAAWrapperPass::initializePass() {
-  auto &TLIWP = getAnalysis<TargetLibraryInfoWrapperPass>();
-  Result.reset(new CFLSteensAAResult(TLIWP.getTLI()));
+  auto GetTLI = [this](Function &F) -> const TargetLibraryInfo & {
+    return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+  };
+  Result.reset(new CFLSteensAAResult(GetTLI));
 }
 
 void CFLSteensAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
diff --git a/contrib/llvm-project/llvm/lib/Analysis/CallGraph.cpp b/contrib/llvm-project/llvm/lib/Analysis/CallGraph.cpp
index ec5e94d499be..8e8a50178518 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/CallGraph.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/CallGraph.cpp
@@ -10,10 +10,11 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Config/llvm-config.h"
-#include "llvm/IR/Module.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
@@ -29,7 +30,7 @@ using namespace llvm;
 
 CallGraph::CallGraph(Module &M)
     : M(M), ExternalCallingNode(getOrInsertFunction(nullptr)),
-      CallsExternalNode(llvm::make_unique<CallGraphNode>(nullptr)) {
+      CallsExternalNode(std::make_unique<CallGraphNode>(nullptr)) {
   // Add every function to the call graph.
   for (Function &F : M)
     addToCallGraph(&F);
@@ -150,7 +151,7 @@ CallGraphNode *CallGraph::getOrInsertFunction(const Function *F) {
     return CGN.get();
 
   assert((!F || F->getParent() == &M) && "Function not in current module!");
-  CGN = llvm::make_unique<CallGraphNode>(const_cast<Function *>(F));
+  CGN = std::make_unique<CallGraphNode>(const_cast<Function *>(F));
   return CGN.get();
 }
 
diff --git a/contrib/llvm-project/llvm/lib/Analysis/CallPrinter.cpp b/contrib/llvm-project/llvm/lib/Analysis/CallPrinter.cpp
index d24cbd104bf6..7246b73bfd4b 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/CallPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/CallPrinter.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Analysis/CallPrinter.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/DOTGraphTraitsPass.h"
+#include "llvm/InitializePasses.h"
 
 using namespace llvm;
 
diff --git a/contrib/llvm-project/llvm/lib/Analysis/CaptureTracking.cpp b/contrib/llvm-project/llvm/lib/Analysis/CaptureTracking.cpp
index adaa83a6c443..20e2f06540a3 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/CaptureTracking.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/CaptureTracking.cpp
@@ -33,6 +33,22 @@ CaptureTracker::~CaptureTracker() {}
 
 bool CaptureTracker::shouldExplore(const Use *U) { return true; }
 
+bool CaptureTracker::isDereferenceableOrNull(Value *O, const DataLayout &DL) {
+  // An inbounds GEP can either be a valid pointer (pointing into
+  // or to the end of an allocation), or be null in the default
+  // address space. So for an inbounds GEP there is no way to let
+  // the pointer escape using clever GEP hacking because doing so
+  // would make the pointer point outside of the allocated object
+  // and thus make the GEP result a poison value. Similarly, other
+  // dereferenceable pointers cannot be manipulated without producing
+  // poison.
+  if (auto *GEP = dyn_cast<GetElementPtrInst>(O))
+    if (GEP->isInBounds())
+      return true;
+  bool CanBeNull;
+  return O->getPointerDereferenceableBytes(DL, CanBeNull);
+}
+
 namespace {
   struct SimpleCaptureTracker : public CaptureTracker {
     explicit SimpleCaptureTracker(bool ReturnCaptures)
@@ -251,7 +267,8 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
       // marked with nocapture do not capture. This means that places like
       // GetUnderlyingObject in ValueTracking or DecomposeGEPExpression
       // in BasicAA also need to know about this property.
-      if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call)) {
+      if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call,
+                                                                      true)) {
         AddUses(Call);
         break;
       }
@@ -330,7 +347,9 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
       AddUses(I);
       break;
     case Instruction::ICmp: {
-      if (auto *CPN = dyn_cast<ConstantPointerNull>(I->getOperand(1))) {
+      unsigned Idx = (I->getOperand(0) == V) ? 0 : 1;
+      unsigned OtherIdx = 1 - Idx;
+      if (auto *CPN = dyn_cast<ConstantPointerNull>(I->getOperand(OtherIdx))) {
         // Don't count comparisons of a no-alias return value against null as
         // captures. This allows us to ignore comparisons of malloc results
         // with null, for example.
@@ -338,29 +357,18 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
           if (isNoAliasCall(V->stripPointerCasts()))
             break;
         if (!I->getFunction()->nullPointerIsDefined()) {
-          auto *O = I->getOperand(0)->stripPointerCastsSameRepresentation();
-          // An inbounds GEP can either be a valid pointer (pointing into
-          // or to the end of an allocation), or be null in the default
-          // address space. So for an inbounds GEPs there is no way to let
-          // the pointer escape using clever GEP hacking because doing so
-          // would make the pointer point outside of the allocated object
-          // and thus make the GEP result a poison value.
-          if (auto *GEP = dyn_cast<GetElementPtrInst>(O))
-            if (GEP->isInBounds())
-              break;
-          // Comparing a dereferenceable_or_null argument against null
-          // cannot lead to pointer escapes, because if it is not null it
-          // must be a valid (in-bounds) pointer.
-          bool CanBeNull;
-          if (O->getPointerDereferenceableBytes(I->getModule()->getDataLayout(), CanBeNull))
+          auto *O = I->getOperand(Idx)->stripPointerCastsSameRepresentation();
+          // Comparing a dereferenceable_or_null pointer against null cannot
+          // lead to pointer escapes, because if it is not null it must be a
+          // valid (in-bounds) pointer.
+          if (Tracker->isDereferenceableOrNull(O, I->getModule()->getDataLayout()))
             break;
         }
       }
       // Comparison against value stored in global variable. Given the pointer
       // does not escape, its value cannot be guessed and stored separately in a
       // global variable.
-      unsigned OtherIndex = (I->getOperand(0) == V) ? 1 : 0;
-      auto *LI = dyn_cast<LoadInst>(I->getOperand(OtherIndex));
+      auto *LI = dyn_cast<LoadInst>(I->getOperand(OtherIdx));
       if (LI && isa<GlobalVariable>(LI->getPointerOperand()))
         break;
       // Otherwise, be conservative. There are crazy ways to capture pointers
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp
index 20231ca78b45..b32924e6497a 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp
@@ -37,6 +37,8 @@
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsX86.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
@@ -93,6 +95,9 @@ static Constant *foldConstVectorToAPInt(APInt &Result, Type *DestTy,
 /// This always returns a non-null constant, but it may be a
 /// ConstantExpr if unfoldable.
 Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
+  assert(CastInst::castIsValid(Instruction::BitCast, C, DestTy) &&
+         "Invalid constantexpr bitcast!");
+
   // Catch the obvious splat cases.
   if (C->isNullValue() && !DestTy->isX86_MMXTy())
     return Constant::getNullValue(DestTy);
@@ -521,8 +526,23 @@ Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy,
       return nullptr;
 
     C = FoldBitCast(C, MapTy->getPointerTo(AS), DL);
-    if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, MapTy, DL))
-      return FoldBitCast(Res, LoadTy, DL);
+    if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, MapTy, DL)) {
+      if (Res->isNullValue() && !LoadTy->isX86_MMXTy())
+        // Materializing a zero can be done trivially without a bitcast
+        return Constant::getNullValue(LoadTy);
+      Type *CastTy = LoadTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(LoadTy) : LoadTy;
+      Res = FoldBitCast(Res, CastTy, DL);
+      if (LoadTy->isPtrOrPtrVectorTy()) {
+        // For vector of pointer, we needed to first convert to a vector of integer, then do vector inttoptr
+        if (Res->isNullValue() && !LoadTy->isX86_MMXTy())
+          return Constant::getNullValue(LoadTy);
+        if (DL.isNonIntegralPointerType(LoadTy->getScalarType()))
+          // Be careful not to replace a load of an addrspace value with an inttoptr here
+          return nullptr;
+        Res = ConstantExpr::getCast(Instruction::IntToPtr, Res, LoadTy);
+      }
+      return Res;
+    }
     return nullptr;
   }
 
@@ -544,7 +564,7 @@ Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy,
   int64_t InitializerSize = DL.getTypeAllocSize(GV->getInitializer()->getType());
 
   // If we're not accessing anything in this constant, the result is undefined.
-  if (Offset + BytesLoaded <= 0)
+  if (Offset <= -1 * static_cast<int64_t>(BytesLoaded))
     return UndefValue::get(IntType);
 
   // If we're not accessing anything in this constant, the result is undefined.
@@ -746,8 +766,8 @@ Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, Constant *Op1,
 Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops,
                          Type *ResultTy, Optional<unsigned> InRangeIndex,
                          const DataLayout &DL, const TargetLibraryInfo *TLI) {
-  Type *IntPtrTy = DL.getIntPtrType(ResultTy);
-  Type *IntPtrScalarTy = IntPtrTy->getScalarType();
+  Type *IntIdxTy = DL.getIndexType(ResultTy);
+  Type *IntIdxScalarTy = IntIdxTy->getScalarType();
 
   bool Any = false;
   SmallVector<Constant*, 32> NewIdxs;
@@ -755,11 +775,11 @@ Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops,
     if ((i == 1 ||
          !isa<StructType>(GetElementPtrInst::getIndexedType(
              SrcElemTy, Ops.slice(1, i - 1)))) &&
-        Ops[i]->getType()->getScalarType() != IntPtrScalarTy) {
+        Ops[i]->getType()->getScalarType() != IntIdxScalarTy) {
       Any = true;
       Type *NewType = Ops[i]->getType()->isVectorTy()
-                          ? IntPtrTy
-                          : IntPtrTy->getScalarType();
+                          ? IntIdxTy
+                          : IntIdxScalarTy;
       NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i],
                                                                       true,
                                                                       NewType,
@@ -781,10 +801,10 @@ Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops,
 }
 
 /// Strip the pointer casts, but preserve the address space information.
-Constant* StripPtrCastKeepAS(Constant* Ptr, Type *&ElemTy) {
+Constant *StripPtrCastKeepAS(Constant *Ptr, Type *&ElemTy) {
   assert(Ptr->getType()->isPointerTy() && "Not a pointer type");
   auto *OldPtrTy = cast<PointerType>(Ptr->getType());
-  Ptr = Ptr->stripPointerCasts();
+  Ptr = cast<Constant>(Ptr->stripPointerCasts());
   auto *NewPtrTy = cast<PointerType>(Ptr->getType());
 
   ElemTy = NewPtrTy->getPointerElementType();
@@ -819,7 +839,7 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
   if (!Ptr->getType()->isPointerTy())
     return nullptr;
 
-  Type *IntPtrTy = DL.getIntPtrType(Ptr->getType());
+  Type *IntIdxTy = DL.getIndexType(Ptr->getType());
 
   // If this is a constant expr gep that is effectively computing an
   // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12'
@@ -830,7 +850,7 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
         // "inttoptr (sub (ptrtoint Ptr), V)"
         if (Ops.size() == 2 && ResElemTy->isIntegerTy(8)) {
           auto *CE = dyn_cast<ConstantExpr>(Ops[1]);
-          assert((!CE || CE->getType() == IntPtrTy) &&
+          assert((!CE || CE->getType() == IntIdxTy) &&
                  "CastGEPIndices didn't canonicalize index types!");
           if (CE && CE->getOpcode() == Instruction::Sub &&
               CE->getOperand(0)->isNullValue()) {
@@ -845,7 +865,7 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
         return nullptr;
       }
 
-  unsigned BitWidth = DL.getTypeSizeInBits(IntPtrTy);
+  unsigned BitWidth = DL.getTypeSizeInBits(IntIdxTy);
   APInt Offset =
       APInt(BitWidth,
             DL.getIndexedOffsetInType(
@@ -925,7 +945,7 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
         // The element size is 0. This may be [0 x Ty]*, so just use a zero
         // index for this level and proceed to the next level to see if it can
         // accommodate the offset.
-        NewIdxs.push_back(ConstantInt::get(IntPtrTy, 0));
+        NewIdxs.push_back(ConstantInt::get(IntIdxTy, 0));
       } else {
         // The element size is non-zero divide the offset by the element
         // size (rounding down), to compute the index at this level.
@@ -934,7 +954,7 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
         if (Overflow)
           break;
         Offset -= NewIdx * ElemSize;
-        NewIdxs.push_back(ConstantInt::get(IntPtrTy, NewIdx));
+        NewIdxs.push_back(ConstantInt::get(IntIdxTy, NewIdx));
       }
     } else {
       auto *STy = cast<StructType>(Ty);
@@ -1038,7 +1058,7 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
     return ConstantExpr::getExtractElement(Ops[0], Ops[1]);
   case Instruction::ExtractValue:
     return ConstantExpr::getExtractValue(
-        Ops[0], dyn_cast<ExtractValueInst>(InstOrCE)->getIndices());
+        Ops[0], cast<ExtractValueInst>(InstOrCE)->getIndices());
   case Instruction::InsertElement:
     return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
   case Instruction::ShuffleVector:
@@ -1464,40 +1484,50 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
 
   if (!F->hasName())
     return false;
-  StringRef Name = F->getName();
 
   // In these cases, the check of the length is required.  We don't want to
   // return true for a name like "cos\0blah" which strcmp would return equal to
   // "cos", but has length 8.
+  StringRef Name = F->getName();
   switch (Name[0]) {
   default:
     return false;
   case 'a':
-    return Name == "acos" || Name == "asin" || Name == "atan" ||
-           Name == "atan2" || Name == "acosf" || Name == "asinf" ||
-           Name == "atanf" || Name == "atan2f";
+    return Name == "acos" || Name == "acosf" ||
+           Name == "asin" || Name == "asinf" ||
+           Name == "atan" || Name == "atanf" ||
+           Name == "atan2" || Name == "atan2f";
   case 'c':
-    return Name == "ceil" || Name == "cos" || Name == "cosh" ||
-           Name == "ceilf" || Name == "cosf" || Name == "coshf";
+    return Name == "ceil" || Name == "ceilf" ||
+           Name == "cos" || Name == "cosf" ||
+           Name == "cosh" || Name == "coshf";
   case 'e':
-    return Name == "exp" || Name == "exp2" || Name == "expf" || Name == "exp2f";
+    return Name == "exp" || Name == "expf" ||
+           Name == "exp2" || Name == "exp2f";
   case 'f':
-    return Name == "fabs" || Name == "floor" || Name == "fmod" ||
-           Name == "fabsf" || Name == "floorf" || Name == "fmodf";
+    return Name == "fabs" || Name == "fabsf" ||
+           Name == "floor" || Name == "floorf" ||
+           Name == "fmod" || Name == "fmodf";
   case 'l':
-    return Name == "log" || Name == "log10" || Name == "logf" ||
-           Name == "log10f";
+    return Name == "log" || Name == "logf" ||
+           Name == "log2" || Name == "log2f" ||
+           Name == "log10" || Name == "log10f";
+  case 'n':
+    return Name == "nearbyint" || Name == "nearbyintf";
   case 'p':
     return Name == "pow" || Name == "powf";
   case 'r':
-    return Name == "round" || Name == "roundf";
+    return Name == "rint" || Name == "rintf" ||
+           Name == "round" || Name == "roundf";
   case 's':
-    return Name == "sin" || Name == "sinh" || Name == "sqrt" ||
-           Name == "sinf" || Name == "sinhf" || Name == "sqrtf";
+    return Name == "sin" || Name == "sinf" ||
+           Name == "sinh" || Name == "sinhf" ||
+           Name == "sqrt" || Name == "sqrtf";
   case 't':
-    return Name == "tan" || Name == "tanh" || Name == "tanf" || Name == "tanhf";
+    return Name == "tan" || Name == "tanf" ||
+           Name == "tanh" || Name == "tanhf" ||
+           Name == "trunc" || Name == "truncf";
   case '_':
-
     // Check for various function names that get used for the math functions
     // when the header files are preprocessed with the macro
     // __FINITE_MATH_ONLY__ enabled.
@@ -1713,40 +1743,37 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
     if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
       return nullptr;
 
-    if (IntrinsicID == Intrinsic::round) {
-      APFloat V = Op->getValueAPF();
-      V.roundToIntegral(APFloat::rmNearestTiesToAway);
-      return ConstantFP::get(Ty->getContext(), V);
+    // Use internal versions of these intrinsics.
+    APFloat U = Op->getValueAPF();
+
+    if (IntrinsicID == Intrinsic::nearbyint || IntrinsicID == Intrinsic::rint) {
+      U.roundToIntegral(APFloat::rmNearestTiesToEven);
+      return ConstantFP::get(Ty->getContext(), U);
     }
 
-    if (IntrinsicID == Intrinsic::floor) {
-      APFloat V = Op->getValueAPF();
-      V.roundToIntegral(APFloat::rmTowardNegative);
-      return ConstantFP::get(Ty->getContext(), V);
+    if (IntrinsicID == Intrinsic::round) {
+      U.roundToIntegral(APFloat::rmNearestTiesToAway);
+      return ConstantFP::get(Ty->getContext(), U);
     }
 
     if (IntrinsicID == Intrinsic::ceil) {
-      APFloat V = Op->getValueAPF();
-      V.roundToIntegral(APFloat::rmTowardPositive);
-      return ConstantFP::get(Ty->getContext(), V);
+      U.roundToIntegral(APFloat::rmTowardPositive);
+      return ConstantFP::get(Ty->getContext(), U);
     }
 
-    if (IntrinsicID == Intrinsic::trunc) {
-      APFloat V = Op->getValueAPF();
-      V.roundToIntegral(APFloat::rmTowardZero);
-      return ConstantFP::get(Ty->getContext(), V);
+    if (IntrinsicID == Intrinsic::floor) {
+      U.roundToIntegral(APFloat::rmTowardNegative);
+      return ConstantFP::get(Ty->getContext(), U);
     }
 
-    if (IntrinsicID == Intrinsic::rint) {
-      APFloat V = Op->getValueAPF();
-      V.roundToIntegral(APFloat::rmNearestTiesToEven);
-      return ConstantFP::get(Ty->getContext(), V);
+    if (IntrinsicID == Intrinsic::trunc) {
+      U.roundToIntegral(APFloat::rmTowardZero);
+      return ConstantFP::get(Ty->getContext(), U);
     }
 
-    if (IntrinsicID == Intrinsic::nearbyint) {
-      APFloat V = Op->getValueAPF();
-      V.roundToIntegral(APFloat::rmNearestTiesToEven);
-      return ConstantFP::get(Ty->getContext(), V);
+    if (IntrinsicID == Intrinsic::fabs) {
+      U.clearSign();
+      return ConstantFP::get(Ty->getContext(), U);
     }
 
     /// We only fold functions with finite arguments. Folding NaN and inf is
@@ -1763,18 +1790,19 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
 
     switch (IntrinsicID) {
       default: break;
-      case Intrinsic::fabs:
-        return ConstantFoldFP(fabs, V, Ty);
-      case Intrinsic::log2:
-        return ConstantFoldFP(Log2, V, Ty);
       case Intrinsic::log:
         return ConstantFoldFP(log, V, Ty);
+      case Intrinsic::log2:
+        // TODO: What about hosts that lack a C99 library?
+        return ConstantFoldFP(Log2, V, Ty);
       case Intrinsic::log10:
+        // TODO: What about hosts that lack a C99 library?
         return ConstantFoldFP(log10, V, Ty);
       case Intrinsic::exp:
         return ConstantFoldFP(exp, V, Ty);
       case Intrinsic::exp2:
-        return ConstantFoldFP(exp2, V, Ty);
+        // Fold exp2(x) as pow(2, x), in case the host lacks a C99 library.
+        return ConstantFoldBinaryFP(pow, 2.0, V, Ty);
       case Intrinsic::sin:
         return ConstantFoldFP(sin, V, Ty);
       case Intrinsic::cos:
@@ -1786,104 +1814,150 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
     if (!TLI)
       return nullptr;
 
-    char NameKeyChar = Name[0];
-    if (Name[0] == '_' && Name.size() > 2 && Name[1] == '_')
-      NameKeyChar = Name[2];
-
-    switch (NameKeyChar) {
-    case 'a':
-      if ((Name == "acos" && TLI->has(LibFunc_acos)) ||
-          (Name == "acosf" && TLI->has(LibFunc_acosf)) ||
-          (Name == "__acos_finite" && TLI->has(LibFunc_acos_finite)) ||
-          (Name == "__acosf_finite" && TLI->has(LibFunc_acosf_finite)))
+    LibFunc Func = NotLibFunc;
+    TLI->getLibFunc(Name, Func);
+    switch (Func) {
+    default:
+      break;
+    case LibFunc_acos:
+    case LibFunc_acosf:
+    case LibFunc_acos_finite:
+    case LibFunc_acosf_finite:
+      if (TLI->has(Func))
         return ConstantFoldFP(acos, V, Ty);
-      else if ((Name == "asin" && TLI->has(LibFunc_asin)) ||
-               (Name == "asinf" && TLI->has(LibFunc_asinf)) ||
-               (Name == "__asin_finite" && TLI->has(LibFunc_asin_finite)) ||
-               (Name == "__asinf_finite" && TLI->has(LibFunc_asinf_finite)))
+      break;
+    case LibFunc_asin:
+    case LibFunc_asinf:
+    case LibFunc_asin_finite:
+    case LibFunc_asinf_finite:
+      if (TLI->has(Func))
         return ConstantFoldFP(asin, V, Ty);
-      else if ((Name == "atan" && TLI->has(LibFunc_atan)) ||
-               (Name == "atanf" && TLI->has(LibFunc_atanf)))
+      break;
+    case LibFunc_atan:
+    case LibFunc_atanf:
+      if (TLI->has(Func))
         return ConstantFoldFP(atan, V, Ty);
       break;
-    case 'c':
-      if ((Name == "ceil" && TLI->has(LibFunc_ceil)) ||
-          (Name == "ceilf" && TLI->has(LibFunc_ceilf)))
-        return ConstantFoldFP(ceil, V, Ty);
-      else if ((Name == "cos" && TLI->has(LibFunc_cos)) ||
-               (Name == "cosf" && TLI->has(LibFunc_cosf)))
+    case LibFunc_ceil:
+    case LibFunc_ceilf:
+      if (TLI->has(Func)) {
+        U.roundToIntegral(APFloat::rmTowardPositive);
+        return ConstantFP::get(Ty->getContext(), U);
+      }
+      break;
+    case LibFunc_cos:
+    case LibFunc_cosf:
+      if (TLI->has(Func))
         return ConstantFoldFP(cos, V, Ty);
-      else if ((Name == "cosh" && TLI->has(LibFunc_cosh)) ||
-               (Name == "coshf" && TLI->has(LibFunc_coshf)) ||
-               (Name == "__cosh_finite" && TLI->has(LibFunc_cosh_finite)) ||
-               (Name == "__coshf_finite" && TLI->has(LibFunc_coshf_finite)))
+      break;
+    case LibFunc_cosh:
+    case LibFunc_coshf:
+    case LibFunc_cosh_finite:
+    case LibFunc_coshf_finite:
+      if (TLI->has(Func))
         return ConstantFoldFP(cosh, V, Ty);
       break;
-    case 'e':
-      if ((Name == "exp" && TLI->has(LibFunc_exp)) ||
-          (Name == "expf" && TLI->has(LibFunc_expf)) ||
-          (Name == "__exp_finite" && TLI->has(LibFunc_exp_finite)) ||
-          (Name == "__expf_finite" && TLI->has(LibFunc_expf_finite)))
+    case LibFunc_exp:
+    case LibFunc_expf:
+    case LibFunc_exp_finite:
+    case LibFunc_expf_finite:
+      if (TLI->has(Func))
         return ConstantFoldFP(exp, V, Ty);
-      if ((Name == "exp2" && TLI->has(LibFunc_exp2)) ||
-          (Name == "exp2f" && TLI->has(LibFunc_exp2f)) ||
-          (Name == "__exp2_finite" && TLI->has(LibFunc_exp2_finite)) ||
-          (Name == "__exp2f_finite" && TLI->has(LibFunc_exp2f_finite)))
-        // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a
-        // C99 library.
+      break;
+    case LibFunc_exp2:
+    case LibFunc_exp2f:
+    case LibFunc_exp2_finite:
+    case LibFunc_exp2f_finite:
+      if (TLI->has(Func))
+        // Fold exp2(x) as pow(2, x), in case the host lacks a C99 library.
         return ConstantFoldBinaryFP(pow, 2.0, V, Ty);
       break;
-    case 'f':
-      if ((Name == "fabs" && TLI->has(LibFunc_fabs)) ||
-          (Name == "fabsf" && TLI->has(LibFunc_fabsf)))
-        return ConstantFoldFP(fabs, V, Ty);
-      else if ((Name == "floor" && TLI->has(LibFunc_floor)) ||
-               (Name == "floorf" && TLI->has(LibFunc_floorf)))
-        return ConstantFoldFP(floor, V, Ty);
+    case LibFunc_fabs:
+    case LibFunc_fabsf:
+      if (TLI->has(Func)) {
+        U.clearSign();
+        return ConstantFP::get(Ty->getContext(), U);
+      }
       break;
-    case 'l':
-      if ((Name == "log" && V > 0 && TLI->has(LibFunc_log)) ||
-          (Name == "logf" && V > 0 && TLI->has(LibFunc_logf)) ||
-          (Name == "__log_finite" && V > 0 &&
-            TLI->has(LibFunc_log_finite)) ||
-          (Name == "__logf_finite" && V > 0 &&
-            TLI->has(LibFunc_logf_finite)))
+    case LibFunc_floor:
+    case LibFunc_floorf:
+      if (TLI->has(Func)) {
+        U.roundToIntegral(APFloat::rmTowardNegative);
+        return ConstantFP::get(Ty->getContext(), U);
+      }
+      break;
+    case LibFunc_log:
+    case LibFunc_logf:
+    case LibFunc_log_finite:
+    case LibFunc_logf_finite:
+      if (V > 0.0 && TLI->has(Func))
         return ConstantFoldFP(log, V, Ty);
-      else if ((Name == "log10" && V > 0 && TLI->has(LibFunc_log10)) ||
-               (Name == "log10f" && V > 0 && TLI->has(LibFunc_log10f)) ||
-               (Name == "__log10_finite" && V > 0 &&
-                 TLI->has(LibFunc_log10_finite)) ||
-               (Name == "__log10f_finite" && V > 0 &&
-                 TLI->has(LibFunc_log10f_finite)))
+      break;
+    case LibFunc_log2:
+    case LibFunc_log2f:
+    case LibFunc_log2_finite:
+    case LibFunc_log2f_finite:
+      if (V > 0.0 && TLI->has(Func))
+        // TODO: What about hosts that lack a C99 library?
+        return ConstantFoldFP(Log2, V, Ty);
+      break;
+    case LibFunc_log10:
+    case LibFunc_log10f:
+    case LibFunc_log10_finite:
+    case LibFunc_log10f_finite:
+      if (V > 0.0 && TLI->has(Func))
+        // TODO: What about hosts that lack a C99 library?
         return ConstantFoldFP(log10, V, Ty);
       break;
-    case 'r':
-      if ((Name == "round" && TLI->has(LibFunc_round)) ||
-          (Name == "roundf" && TLI->has(LibFunc_roundf)))
-        return ConstantFoldFP(round, V, Ty);
+    case LibFunc_nearbyint:
+    case LibFunc_nearbyintf:
+    case LibFunc_rint:
+    case LibFunc_rintf:
+      if (TLI->has(Func)) {
+        U.roundToIntegral(APFloat::rmNearestTiesToEven);
+        return ConstantFP::get(Ty->getContext(), U);
+      }
       break;
-    case 's':
-      if ((Name == "sin" && TLI->has(LibFunc_sin)) ||
-          (Name == "sinf" && TLI->has(LibFunc_sinf)))
+    case LibFunc_round:
+    case LibFunc_roundf:
+      if (TLI->has(Func)) {
+        U.roundToIntegral(APFloat::rmNearestTiesToAway);
+        return ConstantFP::get(Ty->getContext(), U);
+      }
+      break;
+    case LibFunc_sin:
+    case LibFunc_sinf:
+      if (TLI->has(Func))
         return ConstantFoldFP(sin, V, Ty);
-      else if ((Name == "sinh" && TLI->has(LibFunc_sinh)) ||
-               (Name == "sinhf" && TLI->has(LibFunc_sinhf)) ||
-               (Name == "__sinh_finite" && TLI->has(LibFunc_sinh_finite)) ||
-               (Name == "__sinhf_finite" && TLI->has(LibFunc_sinhf_finite)))
+      break;
+    case LibFunc_sinh:
+    case LibFunc_sinhf:
+    case LibFunc_sinh_finite:
+    case LibFunc_sinhf_finite:
+      if (TLI->has(Func))
         return ConstantFoldFP(sinh, V, Ty);
-      else if ((Name == "sqrt" && V >= 0 && TLI->has(LibFunc_sqrt)) ||
-               (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc_sqrtf)))
+      break;
+    case LibFunc_sqrt:
+    case LibFunc_sqrtf:
+      if (V >= 0.0 && TLI->has(Func))
         return ConstantFoldFP(sqrt, V, Ty);
       break;
-    case 't':
-      if ((Name == "tan" && TLI->has(LibFunc_tan)) ||
-          (Name == "tanf" && TLI->has(LibFunc_tanf)))
+    case LibFunc_tan:
+    case LibFunc_tanf:
+      if (TLI->has(Func))
         return ConstantFoldFP(tan, V, Ty);
-      else if ((Name == "tanh" && TLI->has(LibFunc_tanh)) ||
-               (Name == "tanhf" && TLI->has(LibFunc_tanhf)))
+      break;
+    case LibFunc_tanh:
+    case LibFunc_tanhf:
+      if (TLI->has(Func))
         return ConstantFoldFP(tanh, V, Ty);
       break;
-    default:
+    case LibFunc_trunc:
+    case LibFunc_truncf:
+      if (TLI->has(Func)) {
+        U.roundToIntegral(APFloat::rmTowardZero);
+        return ConstantFP::get(Ty->getContext(), U);
+      }
       break;
     }
     return nullptr;
@@ -2002,19 +2076,35 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
 
       if (!TLI)
         return nullptr;
-      if ((Name == "pow" && TLI->has(LibFunc_pow)) ||
-          (Name == "powf" && TLI->has(LibFunc_powf)) ||
-          (Name == "__pow_finite" && TLI->has(LibFunc_pow_finite)) ||
-          (Name == "__powf_finite" && TLI->has(LibFunc_powf_finite)))
-        return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
-      if ((Name == "fmod" && TLI->has(LibFunc_fmod)) ||
-          (Name == "fmodf" && TLI->has(LibFunc_fmodf)))
-        return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty);
-      if ((Name == "atan2" && TLI->has(LibFunc_atan2)) ||
-          (Name == "atan2f" && TLI->has(LibFunc_atan2f)) ||
-          (Name == "__atan2_finite" && TLI->has(LibFunc_atan2_finite)) ||
-          (Name == "__atan2f_finite" && TLI->has(LibFunc_atan2f_finite)))
-        return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
+
+      LibFunc Func = NotLibFunc;
+      TLI->getLibFunc(Name, Func);
+      switch (Func) {
+      default:
+        break;
+      case LibFunc_pow:
+      case LibFunc_powf:
+      case LibFunc_pow_finite:
+      case LibFunc_powf_finite:
+        if (TLI->has(Func))
+          return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
+        break;
+      case LibFunc_fmod:
+      case LibFunc_fmodf:
+        if (TLI->has(Func)) {
+          APFloat V = Op1->getValueAPF();
+          if (APFloat::opStatus::opOK == V.mod(Op2->getValueAPF()))
+            return ConstantFP::get(Ty->getContext(), V);
+        }
+        break;
+      case LibFunc_atan2:
+      case LibFunc_atan2f:
+      case LibFunc_atan2_finite:
+      case LibFunc_atan2f_finite:
+        if (TLI->has(Func))
+          return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
+        break;
+      }
     } else if (auto *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
       if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy())
         return ConstantFP::get(Ty->getContext(),
@@ -2041,20 +2131,27 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
 
     switch (IntrinsicID) {
     default: break;
+    case Intrinsic::usub_with_overflow:
+    case Intrinsic::ssub_with_overflow:
+    case Intrinsic::uadd_with_overflow:
+    case Intrinsic::sadd_with_overflow:
+      // X - undef -> { undef, false }
+      // undef - X -> { undef, false }
+      // X + undef -> { undef, false }
+      // undef + x -> { undef, false }
+      if (!C0 || !C1) {
+        return ConstantStruct::get(
+            cast<StructType>(Ty),
+            {UndefValue::get(Ty->getStructElementType(0)),
+             Constant::getNullValue(Ty->getStructElementType(1))});
+      }
+      LLVM_FALLTHROUGH;
     case Intrinsic::smul_with_overflow:
-    case Intrinsic::umul_with_overflow:
-      // Even if both operands are undef, we cannot fold muls to undef
-      // in the general case. For example, on i2 there are no inputs
-      // that would produce { i2 -1, i1 true } as the result.
+    case Intrinsic::umul_with_overflow: {
+      // undef * X -> { 0, false }
+      // X * undef -> { 0, false }
       if (!C0 || !C1)
         return Constant::getNullValue(Ty);
-      LLVM_FALLTHROUGH;
-    case Intrinsic::sadd_with_overflow:
-    case Intrinsic::uadd_with_overflow:
-    case Intrinsic::ssub_with_overflow:
-    case Intrinsic::usub_with_overflow: {
-      if (!C0 || !C1)
-        return UndefValue::get(Ty);
 
       APInt Res;
       bool Overflow;
@@ -2194,13 +2291,9 @@ static Constant *ConstantFoldScalarCall3(StringRef Name,
         case Intrinsic::fma:
         case Intrinsic::fmuladd: {
           APFloat V = Op1->getValueAPF();
-          APFloat::opStatus s = V.fusedMultiplyAdd(Op2->getValueAPF(),
-                                                   Op3->getValueAPF(),
-                                                   APFloat::rmNearestTiesToEven);
-          if (s != APFloat::opInvalidOp)
-            return ConstantFP::get(Ty->getContext(), V);
-
-          return nullptr;
+          V.fusedMultiplyAdd(Op2->getValueAPF(), Op3->getValueAPF(),
+                             APFloat::rmNearestTiesToEven);
+          return ConstantFP::get(Ty->getContext(), V);
         }
         }
       }
diff --git a/contrib/llvm-project/llvm/lib/Analysis/CostModel.cpp b/contrib/llvm-project/llvm/lib/Analysis/CostModel.cpp
index bf0cdbfd0c8b..953da964c435 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/CostModel.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/CostModel.cpp
@@ -20,6 +20,7 @@
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/DDG.cpp b/contrib/llvm-project/llvm/lib/Analysis/DDG.cpp
new file mode 100644
index 000000000000..90ce13e6f650
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Analysis/DDG.cpp
@@ -0,0 +1,283 @@
+//===- DDG.cpp - Data Dependence Graph -------------------------------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The implementation for the data dependence graph.
+//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/DDG.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+    CreatePiBlocks("ddg-pi-blocks", cl::init(true), cl::Hidden, cl::ZeroOrMore,
+                   cl::desc("Create pi-block nodes."));
+
+#define DEBUG_TYPE "ddg"
+
+template class llvm::DGEdge<DDGNode, DDGEdge>;
+template class llvm::DGNode<DDGNode, DDGEdge>;
+template class llvm::DirectedGraph<DDGNode, DDGEdge>;
+
+//===--------------------------------------------------------------------===//
+// DDGNode implementation
+//===--------------------------------------------------------------------===//
+DDGNode::~DDGNode() {}
+
+bool DDGNode::collectInstructions(
+    llvm::function_ref<bool(Instruction *)> const &Pred,
+    InstructionListType &IList) const {
+  assert(IList.empty() && "Expected the IList to be empty on entry.");
+  if (isa<SimpleDDGNode>(this)) {
+    for (Instruction *I : cast<const SimpleDDGNode>(this)->getInstructions())
+      if (Pred(I))
+        IList.push_back(I);
+  } else if (isa<PiBlockDDGNode>(this)) {
+    for (const DDGNode *PN : cast<const PiBlockDDGNode>(this)->getNodes()) {
+      assert(!isa<PiBlockDDGNode>(PN) && "Nested PiBlocks are not supported.");
+      SmallVector<Instruction *, 8> TmpIList;
+      PN->collectInstructions(Pred, TmpIList);
+      IList.insert(IList.end(), TmpIList.begin(), TmpIList.end());
+    }
+  } else
+    llvm_unreachable("unimplemented type of node");
+  return !IList.empty();
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGNode::NodeKind K) {
+  const char *Out;
+  switch (K) {
+  case DDGNode::NodeKind::SingleInstruction:
+    Out = "single-instruction";
+    break;
+  case DDGNode::NodeKind::MultiInstruction:
+    Out = "multi-instruction";
+    break;
+  case DDGNode::NodeKind::PiBlock:
+    Out = "pi-block";
+    break;
+  case DDGNode::NodeKind::Root:
+    Out = "root";
+    break;
+  case DDGNode::NodeKind::Unknown:
+    Out = "?? (error)";
+    break;
+  }
+  OS << Out;
+  return OS;
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGNode &N) {
+  OS << "Node Address:" << &N << ":" << N.getKind() << "\n";
+  if (isa<SimpleDDGNode>(N)) {
+    OS << " Instructions:\n";
+    for (const Instruction *I : cast<const SimpleDDGNode>(N).getInstructions())
+      OS.indent(2) << *I << "\n";
+  } else if (isa<PiBlockDDGNode>(&N)) {
+    OS << "--- start of nodes in pi-block ---\n";
+    auto &Nodes = cast<const PiBlockDDGNode>(&N)->getNodes();
+    unsigned Count = 0;
+    for (const DDGNode *N : Nodes)
+      OS << *N << (++Count == Nodes.size() ? "" : "\n");
+    OS << "--- end of nodes in pi-block ---\n";
+  } else if (!isa<RootDDGNode>(N))
+    llvm_unreachable("unimplemented type of node");
+
+  OS << (N.getEdges().empty() ? " Edges:none!\n" : " Edges:\n");
+  for (auto &E : N.getEdges())
+    OS.indent(2) << *E;
+  return OS;
+}
+
+//===--------------------------------------------------------------------===//
+// SimpleDDGNode implementation
+//===--------------------------------------------------------------------===//
+
+SimpleDDGNode::SimpleDDGNode(Instruction &I)
+  : DDGNode(NodeKind::SingleInstruction), InstList() {
+  assert(InstList.empty() && "Expected empty list.");
+  InstList.push_back(&I);
+}
+
+SimpleDDGNode::SimpleDDGNode(const SimpleDDGNode &N)
+    : DDGNode(N), InstList(N.InstList) {
+  assert(((getKind() == NodeKind::SingleInstruction && InstList.size() == 1) ||
+          (getKind() == NodeKind::MultiInstruction && InstList.size() > 1)) &&
+         "constructing from invalid simple node.");
+}
+
+SimpleDDGNode::SimpleDDGNode(SimpleDDGNode &&N)
+    : DDGNode(std::move(N)), InstList(std::move(N.InstList)) {
+  assert(((getKind() == NodeKind::SingleInstruction && InstList.size() == 1) ||
+          (getKind() == NodeKind::MultiInstruction && InstList.size() > 1)) &&
+         "constructing from invalid simple node.");
+}
+
+SimpleDDGNode::~SimpleDDGNode() { InstList.clear(); }
+
+//===--------------------------------------------------------------------===//
+// PiBlockDDGNode implementation
+//===--------------------------------------------------------------------===//
+
+PiBlockDDGNode::PiBlockDDGNode(const PiNodeList &List)
+    : DDGNode(NodeKind::PiBlock), NodeList(List) {
+  assert(!NodeList.empty() && "pi-block node constructed with an empty list.");
+}
+
+PiBlockDDGNode::PiBlockDDGNode(const PiBlockDDGNode &N)
+    : DDGNode(N), NodeList(N.NodeList) {
+  assert(getKind() == NodeKind::PiBlock && !NodeList.empty() &&
+         "constructing from invalid pi-block node.");
+}
+
+PiBlockDDGNode::PiBlockDDGNode(PiBlockDDGNode &&N)
+    : DDGNode(std::move(N)), NodeList(std::move(N.NodeList)) {
+  assert(getKind() == NodeKind::PiBlock && !NodeList.empty() &&
+         "constructing from invalid pi-block node.");
+}
+
+PiBlockDDGNode::~PiBlockDDGNode() { NodeList.clear(); }
+
+//===--------------------------------------------------------------------===//
+// DDGEdge implementation
+//===--------------------------------------------------------------------===//
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGEdge::EdgeKind K) {
+  const char *Out;
+  switch (K) {
+  case DDGEdge::EdgeKind::RegisterDefUse:
+    Out = "def-use";
+    break;
+  case DDGEdge::EdgeKind::MemoryDependence:
+    Out = "memory";
+    break;
+  case DDGEdge::EdgeKind::Rooted:
+    Out = "rooted";
+    break;
+  case DDGEdge::EdgeKind::Unknown:
+    Out = "?? (error)";
+    break;
+  }
+  OS << Out;
+  return OS;
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGEdge &E) {
+  OS << "[" << E.getKind() << "] to " << &E.getTargetNode() << "\n";
+  return OS;
+}
+
+//===--------------------------------------------------------------------===//
+// DataDependenceGraph implementation
+//===--------------------------------------------------------------------===//
+using BasicBlockListType = SmallVector<BasicBlock *, 8>;
+
+DataDependenceGraph::DataDependenceGraph(Function &F, DependenceInfo &D)
+    : DependenceGraphInfo(F.getName().str(), D) {
+  // Put the basic blocks in program order for correct dependence
+  // directions.
+  BasicBlockListType BBList;
+  for (auto &SCC : make_range(scc_begin(&F), scc_end(&F)))
+    for (BasicBlock * BB : SCC)
+      BBList.push_back(BB);
+  std::reverse(BBList.begin(), BBList.end());
+  DDGBuilder(*this, D, BBList).populate();
+}
+
+DataDependenceGraph::DataDependenceGraph(Loop &L, LoopInfo &LI,
+                                         DependenceInfo &D)
+    : DependenceGraphInfo(Twine(L.getHeader()->getParent()->getName() + "." +
+                                L.getHeader()->getName())
+                              .str(),
+                          D) {
+  // Put the basic blocks in program order for correct dependence
+  // directions.
+  LoopBlocksDFS DFS(&L);
+  DFS.perform(&LI);
+  BasicBlockListType BBList;
+  for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
+    BBList.push_back(BB);
+  DDGBuilder(*this, D, BBList).populate();
+}
+
+DataDependenceGraph::~DataDependenceGraph() {
+  for (auto *N : Nodes) {
+    for (auto *E : *N)
+      delete E;
+    delete N;
+  }
+}
+
+bool DataDependenceGraph::addNode(DDGNode &N) {
+  if (!DDGBase::addNode(N))
+    return false;
+
+  // In general, if the root node is already created and linked, it is not safe
+  // to add new nodes since they may be unreachable by the root. However,
+  // pi-block nodes need to be added after the root node is linked, and they are
+  // always reachable by the root, because they represent components that are
+  // already reachable by root.
+  auto *Pi = dyn_cast<PiBlockDDGNode>(&N);
+  assert((!Root || Pi) &&
+         "Root node is already added. No more nodes can be added.");
+
+  if (isa<RootDDGNode>(N))
+    Root = &N;
+
+  if (Pi)
+    for (DDGNode *NI : Pi->getNodes())
+      PiBlockMap.insert(std::make_pair(NI, Pi));
+
+  return true;
+}
+
+const PiBlockDDGNode *DataDependenceGraph::getPiBlock(const NodeType &N) const {
+  if (PiBlockMap.find(&N) == PiBlockMap.end())
+    return nullptr;
+  auto *Pi = PiBlockMap.find(&N)->second;
+  assert(PiBlockMap.find(Pi) == PiBlockMap.end() &&
+         "Nested pi-blocks detected.");
+  return Pi;
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const DataDependenceGraph &G) {
+  for (DDGNode *Node : G)
+    // Avoid printing nodes that are part of a pi-block twice. They will get
+    // printed when the pi-block is printed.
+    if (!G.getPiBlock(*Node))
+      OS << *Node << "\n";
+  OS << "\n";
+  return OS;
+}
+
+bool DDGBuilder::shouldCreatePiBlocks() const {
+  return CreatePiBlocks;
+}
+
+//===--------------------------------------------------------------------===//
+// DDG Analysis Passes
+//===--------------------------------------------------------------------===//
+
+/// DDG as a loop pass.
+DDGAnalysis::Result DDGAnalysis::run(Loop &L, LoopAnalysisManager &AM,
+                                     LoopStandardAnalysisResults &AR) {
+  Function *F = L.getHeader()->getParent();
+  DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI);
+  return std::make_unique<DataDependenceGraph>(L, AR.LI, DI);
+}
+AnalysisKey DDGAnalysis::Key;
+
+PreservedAnalyses DDGAnalysisPrinterPass::run(Loop &L, LoopAnalysisManager &AM,
+                                              LoopStandardAnalysisResults &AR,
+                                              LPMUpdater &U) {
+  OS << "'DDG' for loop '" << L.getHeader()->getName() << "':\n";
+  OS << *AM.getResult<DDGAnalysis>(L, AR);
+  return PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/Delinearization.cpp b/contrib/llvm-project/llvm/lib/Analysis/Delinearization.cpp
index c1043e446beb..60cd1b5317d6 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/Delinearization.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/Delinearization.cpp
@@ -24,6 +24,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/DemandedBits.cpp b/contrib/llvm-project/llvm/lib/Analysis/DemandedBits.cpp
index 01b8ff10d355..aaee8c21f289 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/DemandedBits.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/DemandedBits.cpp
@@ -40,6 +40,7 @@
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Use.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/DependenceAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/DependenceAnalysis.cpp
index 75f269e84f9d..9b38053c196b 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -61,6 +61,7 @@
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -141,6 +142,11 @@ INITIALIZE_PASS_END(DependenceAnalysisWrapperPass, "da", "Dependence Analysis",
 
 char DependenceAnalysisWrapperPass::ID = 0;
 
+DependenceAnalysisWrapperPass::DependenceAnalysisWrapperPass()
+    : FunctionPass(ID) {
+  initializeDependenceAnalysisWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
 FunctionPass *llvm::createDependenceAnalysisWrapperPass() {
   return new DependenceAnalysisWrapperPass();
 }
@@ -164,25 +170,25 @@ void DependenceAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequiredTransitive<LoopInfoWrapperPass>();
 }
 
-
 // Used to test the dependence analyzer.
-// Looks through the function, noting loads and stores.
+// Looks through the function, noting instructions that may access memory.
 // Calls depends() on every possible pair and prints out the result.
 // Ignores all other instructions.
 static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA) {
   auto *F = DA->getFunction();
   for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE;
        ++SrcI) {
-    if (isa<StoreInst>(*SrcI) || isa<LoadInst>(*SrcI)) {
+    if (SrcI->mayReadOrWriteMemory()) {
       for (inst_iterator DstI = SrcI, DstE = inst_end(F);
            DstI != DstE; ++DstI) {
-        if (isa<StoreInst>(*DstI) || isa<LoadInst>(*DstI)) {
-          OS << "da analyze - ";
+        if (DstI->mayReadOrWriteMemory()) {
+          OS << "Src:" << *SrcI << " --> Dst:" << *DstI << "\n";
+          OS << "  da analyze - ";
           if (auto D = DA->depends(&*SrcI, &*DstI, true)) {
             D->dump(OS);
             for (unsigned Level = 1; Level <= D->getLevels(); Level++) {
               if (D->isSplitable(Level)) {
-                OS << "da analyze - split level = " << Level;
+                OS << "  da analyze - split level = " << Level;
                 OS << ", iteration = " << *DA->getSplitIteration(*D, Level);
                 OS << "!\n";
               }
@@ -254,7 +260,7 @@ FullDependence::FullDependence(Instruction *Source, Instruction *Destination,
       LoopIndependent(PossiblyLoopIndependent) {
   Consistent = true;
   if (CommonLevels)
-    DV = make_unique<DVEntry[]>(CommonLevels);
+    DV = std::make_unique<DVEntry[]>(CommonLevels);
 }
 
 // The rest are simple getters that hide the implementation.
@@ -876,14 +882,13 @@ void DependenceInfo::removeMatchingExtensions(Subscript *Pair) {
   }
 }
 
-
 // Examine the scev and return true iff it's linear.
 // Collect any loops mentioned in the set of "Loops".
-bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest,
-                                       SmallBitVector &Loops) {
-  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Src);
+bool DependenceInfo::checkSubscript(const SCEV *Expr, const Loop *LoopNest,
+                                    SmallBitVector &Loops, bool IsSrc) {
+  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
   if (!AddRec)
-    return isLoopInvariant(Src, LoopNest);
+    return isLoopInvariant(Expr, LoopNest);
   const SCEV *Start = AddRec->getStart();
   const SCEV *Step = AddRec->getStepRecurrence(*SE);
   const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop());
@@ -896,33 +901,25 @@ bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest,
   }
   if (!isLoopInvariant(Step, LoopNest))
     return false;
-  Loops.set(mapSrcLoop(AddRec->getLoop()));
-  return checkSrcSubscript(Start, LoopNest, Loops);
+  if (IsSrc)
+    Loops.set(mapSrcLoop(AddRec->getLoop()));
+  else
+    Loops.set(mapDstLoop(AddRec->getLoop()));
+  return checkSubscript(Start, LoopNest, Loops, IsSrc);
 }
 
-
+// Examine the scev and return true iff it's linear.
+// Collect any loops mentioned in the set of "Loops".
+bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest,
+                                       SmallBitVector &Loops) {
+  return checkSubscript(Src, LoopNest, Loops, true);
+}
 
 // Examine the scev and return true iff it's linear.
 // Collect any loops mentioned in the set of "Loops".
 bool DependenceInfo::checkDstSubscript(const SCEV *Dst, const Loop *LoopNest,
                                        SmallBitVector &Loops) {
-  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Dst);
-  if (!AddRec)
-    return isLoopInvariant(Dst, LoopNest);
-  const SCEV *Start = AddRec->getStart();
-  const SCEV *Step = AddRec->getStepRecurrence(*SE);
-  const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop());
-  if (!isa<SCEVCouldNotCompute>(UB)) {
-    if (SE->getTypeSizeInBits(Start->getType()) <
-        SE->getTypeSizeInBits(UB->getType())) {
-      if (!AddRec->getNoWrapFlags())
-        return false;
-    }
-  }
-  if (!isLoopInvariant(Step, LoopNest))
-    return false;
-  Loops.set(mapDstLoop(AddRec->getLoop()));
-  return checkDstSubscript(Start, LoopNest, Loops);
+  return checkSubscript(Dst, LoopNest, Loops, false);
 }
 
 
@@ -3407,15 +3404,14 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
   if (Src == Dst)
     PossiblyLoopIndependent = false;
 
-  if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) ||
-      (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory()))
+  if (!(Src->mayReadOrWriteMemory() && Dst->mayReadOrWriteMemory()))
     // if both instructions don't reference memory, there's no dependence
     return nullptr;
 
   if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) {
     // can only analyze simple loads and stores, i.e., no calls, invokes, etc.
     LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n");
-    return make_unique<Dependence>(Src, Dst);
+    return std::make_unique<Dependence>(Src, Dst);
   }
 
   assert(isLoadOrStore(Src) && "instruction is not load or store");
@@ -3430,7 +3426,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
   case PartialAlias:
     // cannot analyse objects if we don't understand their aliasing.
     LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n");
-    return make_unique<Dependence>(Src, Dst);
+    return std::make_unique<Dependence>(Src, Dst);
   case NoAlias:
     // If the objects noalias, they are distinct, accesses are independent.
     LLVM_DEBUG(dbgs() << "no alias\n");
@@ -3777,11 +3773,9 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
       return nullptr;
   }
 
-  return make_unique<FullDependence>(std::move(Result));
+  return std::make_unique<FullDependence>(std::move(Result));
 }
 
-
-
 //===----------------------------------------------------------------------===//
 // getSplitIteration -
 // Rather than spend rarely-used space recording the splitting iteration
diff --git a/contrib/llvm-project/llvm/lib/Analysis/DependenceGraphBuilder.cpp b/contrib/llvm-project/llvm/lib/Analysis/DependenceGraphBuilder.cpp
new file mode 100644
index 000000000000..e8a1a2fff919
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Analysis/DependenceGraphBuilder.cpp
@@ -0,0 +1,407 @@
+//===- DependenceGraphBuilder.cpp ------------------------------------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// This file implements common steps of the build algorithm for construction
+// of dependence graphs such as DDG and PDG.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DependenceGraphBuilder.h"
+#include "llvm/ADT/EnumeratedArray.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DDG.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "dgb"
+
+STATISTIC(TotalGraphs, "Number of dependence graphs created.");
+STATISTIC(TotalDefUseEdges, "Number of def-use edges created.");
+STATISTIC(TotalMemoryEdges, "Number of memory dependence edges created.");
+STATISTIC(TotalFineGrainedNodes, "Number of fine-grained nodes created.");
+STATISTIC(TotalPiBlockNodes, "Number of pi-block nodes created.");
+STATISTIC(TotalConfusedEdges,
+          "Number of confused memory dependencies between two nodes.");
+STATISTIC(TotalEdgeReversals,
+          "Number of times the source and sink of dependence was reversed to "
+          "expose cycles in the graph.");
+
+using InstructionListType = SmallVector<Instruction *, 2>;
+
+//===--------------------------------------------------------------------===//
+// AbstractDependenceGraphBuilder implementation
+//===--------------------------------------------------------------------===//
+
+template <class G>
+void AbstractDependenceGraphBuilder<G>::computeInstructionOrdinals() {
+  // The BBList is expected to be in program order.
+  size_t NextOrdinal = 1;
+  for (auto *BB : BBList)
+    for (auto &I : *BB)
+      InstOrdinalMap.insert(std::make_pair(&I, NextOrdinal++));
+}
+
+template <class G>
+void AbstractDependenceGraphBuilder<G>::createFineGrainedNodes() {
+  ++TotalGraphs;
+  assert(IMap.empty() && "Expected empty instruction map at start");
+  for (BasicBlock *BB : BBList)
+    for (Instruction &I : *BB) {
+      auto &NewNode = createFineGrainedNode(I);
+      IMap.insert(std::make_pair(&I, &NewNode));
+      NodeOrdinalMap.insert(std::make_pair(&NewNode, getOrdinal(I)));
+      ++TotalFineGrainedNodes;
+    }
+}
+
+template <class G>
+void AbstractDependenceGraphBuilder<G>::createAndConnectRootNode() {
+  // Create a root node that connects to every connected component of the graph.
+  // This is done to allow graph iterators to visit all the disjoint components
+  // of the graph, in a single walk.
+  //
+  // This algorithm works by going through each node of the graph and for each
+  // node N, do a DFS starting from N. A rooted edge is established between the
+  // root node and N (if N is not yet visited). All the nodes reachable from N
+  // are marked as visited and are skipped in the DFS of subsequent nodes.
+  //
+  // Note: This algorithm tries to limit the number of edges out of the root
+  // node to some extent, but there may be redundant edges created depending on
+  // the iteration order. For example for a graph {A -> B}, an edge from the
+  // root node is added to both nodes if B is visited before A. While it does
+  // not result in minimal number of edges, this approach saves compile-time
+  // while keeping the number of edges in check.
+  auto &RootNode = createRootNode();
+  df_iterator_default_set<const NodeType *, 4> Visited;
+  for (auto *N : Graph) {
+    if (*N == RootNode)
+      continue;
+    for (auto I : depth_first_ext(N, Visited))
+      if (I == N)
+        createRootedEdge(RootNode, *N);
+  }
+}
+
+template <class G> void AbstractDependenceGraphBuilder<G>::createPiBlocks() {
+  if (!shouldCreatePiBlocks())
+    return;
+
+  LLVM_DEBUG(dbgs() << "==== Start of Creation of Pi-Blocks ===\n");
+
+  // The overall algorithm is as follows:
+  // 1. Identify SCCs and for each SCC create a pi-block node containing all
+  //    the nodes in that SCC.
+  // 2. Identify incoming edges incident to the nodes inside of the SCC and
+  //    reconnect them to the pi-block node.
+  // 3. Identify outgoing edges from the nodes inside of the SCC to nodes
+  //    outside of it and reconnect them so that the edges are coming out of the
+  //    SCC node instead.
+
+  // Adding nodes as we iterate through the SCCs cause the SCC
+  // iterators to get invalidated. To prevent this invalidation, we first
+  // collect a list of nodes that are part of an SCC, and then iterate over
+  // those lists to create the pi-block nodes. Each element of the list is a
+  // list of nodes in an SCC. Note: trivial SCCs containing a single node are
+  // ignored.
+  SmallVector<NodeListType, 4> ListOfSCCs;
+  for (auto &SCC : make_range(scc_begin(&Graph), scc_end(&Graph))) {
+    if (SCC.size() > 1)
+      ListOfSCCs.emplace_back(SCC.begin(), SCC.end());
+  }
+
+  for (NodeListType &NL : ListOfSCCs) {
+    LLVM_DEBUG(dbgs() << "Creating pi-block node with " << NL.size()
+                      << " nodes in it.\n");
+
+    // SCC iterator may put the nodes in an order that's different from the
+    // program order. To preserve original program order, we sort the list of
+    // nodes based on ordinal numbers computed earlier.
+    llvm::sort(NL, [&](NodeType *LHS, NodeType *RHS) {
+      return getOrdinal(*LHS) < getOrdinal(*RHS);
+    });
+
+    NodeType &PiNode = createPiBlock(NL);
+    ++TotalPiBlockNodes;
+
+    // Build a set to speed up the lookup for edges whose targets
+    // are inside the SCC.
+    SmallPtrSet<NodeType *, 4> NodesInSCC(NL.begin(), NL.end());
+
+    // We have the set of nodes in the SCC. We go through the set of nodes
+    // that are outside of the SCC and look for edges that cross the two sets.
+    for (NodeType *N : Graph) {
+
+      // Skip the SCC node and all the nodes inside of it.
+      if (*N == PiNode || NodesInSCC.count(N))
+        continue;
+
+      for (NodeType *SCCNode : NL) {
+
+        enum Direction {
+          Incoming,      // Incoming edges to the SCC
+          Outgoing,      // Edges going ot of the SCC
+          DirectionCount // To make the enum usable as an array index.
+        };
+
+        // Use these flags to help us avoid creating redundant edges. If there
+        // are more than one edges from an outside node to inside nodes, we only
+        // keep one edge from that node to the pi-block node. Similarly, if
+        // there are more than one edges from inside nodes to an outside node,
+        // we only keep one edge from the pi-block node to the outside node.
+        // There is a flag defined for each direction (incoming vs outgoing) and
+        // for each type of edge supported, using a two-dimensional boolean
+        // array.
+        using EdgeKind = typename EdgeType::EdgeKind;
+        EnumeratedArray<bool, EdgeKind> EdgeAlreadyCreated[DirectionCount]{
+            false, false};
+
+        auto createEdgeOfKind = [this](NodeType &Src, NodeType &Dst,
+                                       const EdgeKind K) {
+          switch (K) {
+          case EdgeKind::RegisterDefUse:
+            createDefUseEdge(Src, Dst);
+            break;
+          case EdgeKind::MemoryDependence:
+            createMemoryEdge(Src, Dst);
+            break;
+          case EdgeKind::Rooted:
+            createRootedEdge(Src, Dst);
+            break;
+          default:
+            llvm_unreachable("Unsupported type of edge.");
+          }
+        };
+
+        auto reconnectEdges = [&](NodeType *Src, NodeType *Dst, NodeType *New,
+                                  const Direction Dir) {
+          if (!Src->hasEdgeTo(*Dst))
+            return;
+          LLVM_DEBUG(dbgs()
+                     << "reconnecting("
+                     << (Dir == Direction::Incoming ? "incoming)" : "outgoing)")
+                     << ":\nSrc:" << *Src << "\nDst:" << *Dst
+                     << "\nNew:" << *New << "\n");
+          assert((Dir == Direction::Incoming || Dir == Direction::Outgoing) &&
+                 "Invalid direction.");
+
+          SmallVector<EdgeType *, 10> EL;
+          Src->findEdgesTo(*Dst, EL);
+          for (EdgeType *OldEdge : EL) {
+            EdgeKind Kind = OldEdge->getKind();
+            if (!EdgeAlreadyCreated[Dir][Kind]) {
+              if (Dir == Direction::Incoming) {
+                createEdgeOfKind(*Src, *New, Kind);
+                LLVM_DEBUG(dbgs() << "created edge from Src to New.\n");
+              } else if (Dir == Direction::Outgoing) {
+                createEdgeOfKind(*New, *Dst, Kind);
+                LLVM_DEBUG(dbgs() << "created edge from New to Dst.\n");
+              }
+              EdgeAlreadyCreated[Dir][Kind] = true;
+            }
+            Src->removeEdge(*OldEdge);
+            destroyEdge(*OldEdge);
+            LLVM_DEBUG(dbgs() << "removed old edge between Src and Dst.\n\n");
+          }
+        };
+
+        // Process incoming edges incident to the pi-block node.
+        reconnectEdges(N, SCCNode, &PiNode, Direction::Incoming);
+
+        // Process edges that are coming out of the pi-block node.
+        reconnectEdges(SCCNode, N, &PiNode, Direction::Outgoing);
+      }
+    }
+  }
+
+  // Ordinal maps are no longer needed.
+  InstOrdinalMap.clear();
+  NodeOrdinalMap.clear();
+
+  LLVM_DEBUG(dbgs() << "==== End of Creation of Pi-Blocks ===\n");
+}
+
+template <class G> void AbstractDependenceGraphBuilder<G>::createDefUseEdges() {
+  for (NodeType *N : Graph) {
+    InstructionListType SrcIList;
+    N->collectInstructions([](const Instruction *I) { return true; }, SrcIList);
+
+    // Use a set to mark the targets that we link to N, so we don't add
+    // duplicate def-use edges when more than one instruction in a target node
+    // use results of instructions that are contained in N.
+    SmallPtrSet<NodeType *, 4> VisitedTargets;
+
+    for (Instruction *II : SrcIList) {
+      for (User *U : II->users()) {
+        Instruction *UI = dyn_cast<Instruction>(U);
+        if (!UI)
+          continue;
+        NodeType *DstNode = nullptr;
+        if (IMap.find(UI) != IMap.end())
+          DstNode = IMap.find(UI)->second;
+
+        // In the case of loops, the scope of the subgraph is all the
+        // basic blocks (and instructions within them) belonging to the loop. We
+        // simply ignore all the edges coming from (or going into) instructions
+        // or basic blocks outside of this range.
+        if (!DstNode) {
+          LLVM_DEBUG(
+              dbgs()
+              << "skipped def-use edge since the sink" << *UI
+              << " is outside the range of instructions being considered.\n");
+          continue;
+        }
+
+        // Self dependencies are ignored because they are redundant and
+        // uninteresting.
+        if (DstNode == N) {
+          LLVM_DEBUG(dbgs()
+                     << "skipped def-use edge since the sink and the source ("
+                     << N << ") are the same.\n");
+          continue;
+        }
+
+        if (VisitedTargets.insert(DstNode).second) {
+          createDefUseEdge(*N, *DstNode);
+          ++TotalDefUseEdges;
+        }
+      }
+    }
+  }
+}
+
+template <class G>
+void AbstractDependenceGraphBuilder<G>::createMemoryDependencyEdges() {
+  using DGIterator = typename G::iterator;
+  auto isMemoryAccess = [](const Instruction *I) {
+    return I->mayReadOrWriteMemory();
+  };
+  for (DGIterator SrcIt = Graph.begin(), E = Graph.end(); SrcIt != E; ++SrcIt) {
+    InstructionListType SrcIList;
+    (*SrcIt)->collectInstructions(isMemoryAccess, SrcIList);
+    if (SrcIList.empty())
+      continue;
+
+    for (DGIterator DstIt = SrcIt; DstIt != E; ++DstIt) {
+      if (**SrcIt == **DstIt)
+        continue;
+      InstructionListType DstIList;
+      (*DstIt)->collectInstructions(isMemoryAccess, DstIList);
+      if (DstIList.empty())
+        continue;
+      bool ForwardEdgeCreated = false;
+      bool BackwardEdgeCreated = false;
+      for (Instruction *ISrc : SrcIList) {
+        for (Instruction *IDst : DstIList) {
+          auto D = DI.depends(ISrc, IDst, true);
+          if (!D)
+            continue;
+
+          // If we have a dependence with its left-most non-'=' direction
+          // being '>' we need to reverse the direction of the edge, because
+          // the source of the dependence cannot occur after the sink. For
+          // confused dependencies, we will create edges in both directions to
+          // represent the possibility of a cycle.
+
+          auto createConfusedEdges = [&](NodeType &Src, NodeType &Dst) {
+            if (!ForwardEdgeCreated) {
+              createMemoryEdge(Src, Dst);
+              ++TotalMemoryEdges;
+            }
+            if (!BackwardEdgeCreated) {
+              createMemoryEdge(Dst, Src);
+              ++TotalMemoryEdges;
+            }
+            ForwardEdgeCreated = BackwardEdgeCreated = true;
+            ++TotalConfusedEdges;
+          };
+
+          auto createForwardEdge = [&](NodeType &Src, NodeType &Dst) {
+            if (!ForwardEdgeCreated) {
+              createMemoryEdge(Src, Dst);
+              ++TotalMemoryEdges;
+            }
+            ForwardEdgeCreated = true;
+          };
+
+          auto createBackwardEdge = [&](NodeType &Src, NodeType &Dst) {
+            if (!BackwardEdgeCreated) {
+              createMemoryEdge(Dst, Src);
+              ++TotalMemoryEdges;
+            }
+            BackwardEdgeCreated = true;
+          };
+
+          if (D->isConfused())
+            createConfusedEdges(**SrcIt, **DstIt);
+          else if (D->isOrdered() && !D->isLoopIndependent()) {
+            bool ReversedEdge = false;
+            for (unsigned Level = 1; Level <= D->getLevels(); ++Level) {
+              if (D->getDirection(Level) == Dependence::DVEntry::EQ)
+                continue;
+              else if (D->getDirection(Level) == Dependence::DVEntry::GT) {
+                createBackwardEdge(**SrcIt, **DstIt);
+                ReversedEdge = true;
+                ++TotalEdgeReversals;
+                break;
+              } else if (D->getDirection(Level) == Dependence::DVEntry::LT)
+                break;
+              else {
+                createConfusedEdges(**SrcIt, **DstIt);
+                break;
+              }
+            }
+            if (!ReversedEdge)
+              createForwardEdge(**SrcIt, **DstIt);
+          } else
+            createForwardEdge(**SrcIt, **DstIt);
+
+          // Avoid creating duplicate edges.
+          if (ForwardEdgeCreated && BackwardEdgeCreated)
+            break;
+        }
+
+        // If we've created edges in both directions, there is no more
+        // unique edge that we can create between these two nodes, so we
+        // can exit early.
+        if (ForwardEdgeCreated && BackwardEdgeCreated)
+          break;
+      }
+    }
+  }
+}
+
+template <class G>
+void AbstractDependenceGraphBuilder<G>::sortNodesTopologically() {
+
+  // If we don't create pi-blocks, then we may not have a DAG.
+  if (!shouldCreatePiBlocks())
+    return;
+
+  SmallVector<NodeType *, 64> NodesInPO;
+  using NodeKind = typename NodeType::NodeKind;
+  for (NodeType *N : post_order(&Graph)) {
+    if (N->getKind() == NodeKind::PiBlock) {
+      // Put members of the pi-block right after the pi-block itself, for
+      // convenience.
+      const NodeListType &PiBlockMembers = getNodesInPiBlock(*N);
+      NodesInPO.insert(NodesInPO.end(), PiBlockMembers.begin(),
+                       PiBlockMembers.end());
+    }
+    NodesInPO.push_back(N);
+  }
+
+  size_t OldSize = Graph.Nodes.size();
+  Graph.Nodes.clear();
+  for (NodeType *N : reverse(NodesInPO))
+    Graph.Nodes.push_back(N);
+  if (Graph.Nodes.size() != OldSize)
+    assert(false &&
+           "Expected the number of nodes to stay the same after the sort");
+}
+
+template class llvm::AbstractDependenceGraphBuilder<DataDependenceGraph>;
+template class llvm::DependenceGraphInfo<DDGNode>;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/DivergenceAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/DivergenceAnalysis.cpp
index 0ccd59ef2bfd..3d1be1e1cce0 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/DivergenceAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/DivergenceAnalysis.cpp
@@ -412,6 +412,12 @@ bool DivergenceAnalysis::isDivergent(const Value &V) const {
   return DivergentValues.find(&V) != DivergentValues.end();
 }
 
+bool DivergenceAnalysis::isDivergentUse(const Use &U) const {
+  Value &V = *U.get();
+  Instruction &I = *cast<Instruction>(U.getUser());
+  return isDivergent(V) || isTemporalDivergent(*I.getParent(), V);
+}
+
 void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
   if (DivergentValues.empty())
     return;
@@ -449,6 +455,10 @@ bool GPUDivergenceAnalysis::isDivergent(const Value &val) const {
   return DA.isDivergent(val);
 }
 
+bool GPUDivergenceAnalysis::isDivergentUse(const Use &use) const {
+  return DA.isDivergentUse(use);
+}
+
 void GPUDivergenceAnalysis::print(raw_ostream &OS, const Module *mod) const {
   OS << "Divergence of kernel " << DA.getFunction().getName() << " {\n";
   DA.print(OS, mod);
diff --git a/contrib/llvm-project/llvm/lib/Analysis/DomPrinter.cpp b/contrib/llvm-project/llvm/lib/Analysis/DomPrinter.cpp
index d9f43dd746ef..024a0fb49950 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/DomPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/DomPrinter.cpp
@@ -20,6 +20,7 @@
 #include "llvm/Analysis/DomPrinter.h"
 #include "llvm/Analysis/DOTGraphTraitsPass.h"
 #include "llvm/Analysis/PostDominators.h"
+#include "llvm/InitializePasses.h"
 
 using namespace llvm;
 
diff --git a/contrib/llvm-project/llvm/lib/Analysis/DomTreeUpdater.cpp b/contrib/llvm-project/llvm/lib/Analysis/DomTreeUpdater.cpp
index 49215889cfd6..b374334ea371 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/DomTreeUpdater.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/DomTreeUpdater.cpp
@@ -233,7 +233,7 @@ void DomTreeUpdater::applyUpdates(ArrayRef<DominatorTree::UpdateType> Updates) {
     return;
 
   if (Strategy == UpdateStrategy::Lazy) {
-    for (const auto U : Updates)
+    for (const auto &U : Updates)
       if (!isSelfDominance(U))
         PendUpdates.push_back(U);
 
@@ -253,7 +253,7 @@ void DomTreeUpdater::applyUpdatesPermissive(
 
   SmallSet<std::pair<BasicBlock *, BasicBlock *>, 8> Seen;
   SmallVector<DominatorTree::UpdateType, 8> DeduplicatedUpdates;
-  for (const auto U : Updates) {
+  for (const auto &U : Updates) {
     auto Edge = std::make_pair(U.getFrom(), U.getTo());
     // Because it is illegal to submit updates that have already been applied
     // and updates to an edge need to be strictly ordered,
diff --git a/contrib/llvm-project/llvm/lib/Analysis/DominanceFrontier.cpp b/contrib/llvm-project/llvm/lib/Analysis/DominanceFrontier.cpp
index f9a554acb7ea..14e6965f1259 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/DominanceFrontier.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/DominanceFrontier.cpp
@@ -12,6 +12,7 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp b/contrib/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp
index 0d6c0ffb18a8..4361e0dc9bbd 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp
@@ -25,6 +25,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 using namespace llvm;
@@ -286,7 +287,7 @@ GlobalsAAResult::getFunctionInfo(const Function *F) {
 void GlobalsAAResult::AnalyzeGlobals(Module &M) {
   SmallPtrSet<Function *, 32> TrackedFunctions;
   for (Function &F : M)
-    if (F.hasLocalLinkage())
+    if (F.hasLocalLinkage()) {
       if (!AnalyzeUsesOfPointer(&F)) {
         // Remember that we are tracking this global.
         NonAddressTakenGlobals.insert(&F);
@@ -294,7 +295,9 @@ void GlobalsAAResult::AnalyzeGlobals(Module &M) {
         Handles.emplace_front(*this, &F);
         Handles.front().I = Handles.begin();
         ++NumNonAddrTakenFunctions;
-      }
+      } else
+        UnknownFunctionsWithLocalLinkage = true;
+    }
 
   SmallPtrSet<Function *, 16> Readers, Writers;
   for (GlobalVariable &GV : M.globals())
@@ -370,7 +373,8 @@ bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V,
       // passing into the function.
       if (Call->isDataOperand(&U)) {
         // Detect calls to free.
-        if (Call->isArgOperand(&U) && isFreeCall(I, &TLI)) {
+        if (Call->isArgOperand(&U) &&
+            isFreeCall(I, &GetTLI(*Call->getFunction()))) {
           if (Writers)
             Writers->insert(Call->getParent()->getParent());
         } else {
@@ -432,7 +436,7 @@ bool GlobalsAAResult::AnalyzeIndirectGlobalMemory(GlobalVariable *GV) {
       Value *Ptr = GetUnderlyingObject(SI->getOperand(0),
                                        GV->getParent()->getDataLayout());
 
-      if (!isAllocLikeFn(Ptr, &TLI))
+      if (!isAllocLikeFn(Ptr, &GetTLI(*SI->getFunction())))
         return false; // Too hard to analyze.
 
       // Analyze all uses of the allocation.  If any of them are used in a
@@ -525,9 +529,12 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
             FI.setMayReadAnyGlobal();
         } else {
           FI.addModRefInfo(ModRefInfo::ModRef);
-          // Can't say anything useful unless it's an intrinsic - they don't
-          // read or write global variables of the kind considered here.
-          KnowNothing = !F->isIntrinsic();
+          if (!F->onlyAccessesArgMemory())
+            FI.setMayReadAnyGlobal();
+          if (!F->isIntrinsic()) {
+            KnowNothing = true;
+            break;
+          }
         }
         continue;
       }
@@ -576,6 +583,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
         // We handle calls specially because the graph-relevant aspects are
         // handled above.
         if (auto *Call = dyn_cast<CallBase>(&I)) {
+          auto &TLI = GetTLI(*Node->getFunction());
           if (isAllocationFn(Call, &TLI) || isFreeCall(Call, &TLI)) {
             // FIXME: It is completely unclear why this is necessary and not
             // handled by the above graph code.
@@ -925,7 +933,9 @@ ModRefInfo GlobalsAAResult::getModRefInfo(const CallBase *Call,
   // global we are tracking, return information if we have it.
   if (const GlobalValue *GV =
           dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr, DL)))
-    if (GV->hasLocalLinkage())
+    // If GV is internal to this IR and there is no function with local linkage
+    // that has had their address taken, keep looking for a tighter ModRefInfo.
+    if (GV->hasLocalLinkage() && !UnknownFunctionsWithLocalLinkage)
       if (const Function *F = Call->getCalledFunction())
         if (NonAddressTakenGlobals.count(GV))
           if (const FunctionInfo *FI = getFunctionInfo(F))
@@ -937,12 +947,13 @@ ModRefInfo GlobalsAAResult::getModRefInfo(const CallBase *Call,
   return intersectModRef(Known, AAResultBase::getModRefInfo(Call, Loc, AAQI));
 }
 
-GlobalsAAResult::GlobalsAAResult(const DataLayout &DL,
-                                 const TargetLibraryInfo &TLI)
-    : AAResultBase(), DL(DL), TLI(TLI) {}
+GlobalsAAResult::GlobalsAAResult(
+    const DataLayout &DL,
+    std::function<const TargetLibraryInfo &(Function &F)> GetTLI)
+    : AAResultBase(), DL(DL), GetTLI(std::move(GetTLI)) {}
 
 GlobalsAAResult::GlobalsAAResult(GlobalsAAResult &&Arg)
-    : AAResultBase(std::move(Arg)), DL(Arg.DL), TLI(Arg.TLI),
+    : AAResultBase(std::move(Arg)), DL(Arg.DL), GetTLI(std::move(Arg.GetTLI)),
       NonAddressTakenGlobals(std::move(Arg.NonAddressTakenGlobals)),
       IndirectGlobals(std::move(Arg.IndirectGlobals)),
       AllocsForIndirectGlobals(std::move(Arg.AllocsForIndirectGlobals)),
@@ -957,10 +968,10 @@ GlobalsAAResult::GlobalsAAResult(GlobalsAAResult &&Arg)
 
 GlobalsAAResult::~GlobalsAAResult() {}
 
-/*static*/ GlobalsAAResult
-GlobalsAAResult::analyzeModule(Module &M, const TargetLibraryInfo &TLI,
-                               CallGraph &CG) {
-  GlobalsAAResult Result(M.getDataLayout(), TLI);
+/*static*/ GlobalsAAResult GlobalsAAResult::analyzeModule(
+    Module &M, std::function<const TargetLibraryInfo &(Function &F)> GetTLI,
+    CallGraph &CG) {
+  GlobalsAAResult Result(M.getDataLayout(), GetTLI);
 
   // Discover which functions aren't recursive, to feed into AnalyzeGlobals.
   Result.CollectSCCMembership(CG);
@@ -977,8 +988,12 @@ GlobalsAAResult::analyzeModule(Module &M, const TargetLibraryInfo &TLI,
 AnalysisKey GlobalsAA::Key;
 
 GlobalsAAResult GlobalsAA::run(Module &M, ModuleAnalysisManager &AM) {
-  return GlobalsAAResult::analyzeModule(M,
-                                        AM.getResult<TargetLibraryAnalysis>(M),
+  FunctionAnalysisManager &FAM =
+      AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+  auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
+    return FAM.getResult<TargetLibraryAnalysis>(F);
+  };
+  return GlobalsAAResult::analyzeModule(M, GetTLI,
                                         AM.getResult<CallGraphAnalysis>(M));
 }
 
@@ -999,9 +1014,11 @@ GlobalsAAWrapperPass::GlobalsAAWrapperPass() : ModulePass(ID) {
 }
 
 bool GlobalsAAWrapperPass::runOnModule(Module &M) {
+  auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
+    return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+  };
   Result.reset(new GlobalsAAResult(GlobalsAAResult::analyzeModule(
-      M, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
-      getAnalysis<CallGraphWrapperPass>().getCallGraph())));
+      M, GetTLI, getAnalysis<CallGraphWrapperPass>().getCallGraph())));
   return false;
 }
 
diff --git a/contrib/llvm-project/llvm/lib/Analysis/GuardUtils.cpp b/contrib/llvm-project/llvm/lib/Analysis/GuardUtils.cpp
index cad92f6e56bb..d48283279858 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/GuardUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/GuardUtils.cpp
@@ -13,19 +13,25 @@
 #include "llvm/IR/PatternMatch.h"
 
 using namespace llvm;
+using namespace llvm::PatternMatch;
 
 bool llvm::isGuard(const User *U) {
-  using namespace llvm::PatternMatch;
   return match(U, m_Intrinsic<Intrinsic::experimental_guard>());
 }
 
+bool llvm::isWidenableBranch(const User *U) {
+  Value *Condition, *WidenableCondition;
+  BasicBlock *GuardedBB, *DeoptBB;
+  return parseWidenableBranch(U, Condition, WidenableCondition, GuardedBB,
+                              DeoptBB);
+}
+
 bool llvm::isGuardAsWidenableBranch(const User *U) {
   Value *Condition, *WidenableCondition;
   BasicBlock *GuardedBB, *DeoptBB;
   if (!parseWidenableBranch(U, Condition, WidenableCondition, GuardedBB,
                             DeoptBB))
     return false;
-  using namespace llvm::PatternMatch;
   for (auto &Insn : *DeoptBB) {
     if (match(&Insn, m_Intrinsic<Intrinsic::experimental_deoptimize>()))
       return true;
@@ -38,12 +44,63 @@ bool llvm::isGuardAsWidenableBranch(const User *U) {
 bool llvm::parseWidenableBranch(const User *U, Value *&Condition,
                                 Value *&WidenableCondition,
                                 BasicBlock *&IfTrueBB, BasicBlock *&IfFalseBB) {
-  using namespace llvm::PatternMatch;
-  if (!match(U, m_Br(m_And(m_Value(Condition), m_Value(WidenableCondition)),
-                     IfTrueBB, IfFalseBB)))
+
+  Use *C, *WC;
+  if (parseWidenableBranch(const_cast<User*>(U), C, WC, IfTrueBB, IfFalseBB)) {
+    if (C) 
+      Condition = C->get();
+    else
+      Condition = ConstantInt::getTrue(IfTrueBB->getContext());
+    WidenableCondition = WC->get();
+    return true;
+  }
+  return false;
+}
+
+bool llvm::parseWidenableBranch(User *U, Use *&C,Use *&WC,
+                                BasicBlock *&IfTrueBB, BasicBlock *&IfFalseBB) {
+
+  auto *BI = dyn_cast<BranchInst>(U);
+  if (!BI || !BI->isConditional())
+    return false;
+  auto *Cond = BI->getCondition();
+  if (!Cond->hasOneUse())
+    return false;
+  
+  IfTrueBB = BI->getSuccessor(0);
+  IfFalseBB = BI->getSuccessor(1);
+  
+  if (match(Cond, m_Intrinsic<Intrinsic::experimental_widenable_condition>())) {
+    WC = &BI->getOperandUse(0);
+    C = nullptr;
+    return true;
+  }
+
+  // Check for two cases:
+  // 1) br (i1 (and A, WC())), label %IfTrue, label %IfFalse
+  // 2) br (i1 (and WC(), B)), label %IfTrue, label %IfFalse
+  // We do not check for more generalized and trees as we should canonicalize
+  // to the form above in instcombine. (TODO)
+  Value *A, *B;
+  if (!match(Cond, m_And(m_Value(A), m_Value(B))))
     return false;
-  // TODO: At the moment, we only recognize the branch if the WC call in this
-  // specific position.  We should generalize!
-  return match(WidenableCondition,
-               m_Intrinsic<Intrinsic::experimental_widenable_condition>());
+  auto *And = dyn_cast<Instruction>(Cond);
+  if (!And)
+    // Could be a constexpr
+    return false;
+  
+  if (match(A, m_Intrinsic<Intrinsic::experimental_widenable_condition>()) &&
+      A->hasOneUse()) {
+    WC = &And->getOperandUse(0);
+    C = &And->getOperandUse(1);
+    return true;
+  }
+
+  if (match(B, m_Intrinsic<Intrinsic::experimental_widenable_condition>()) &&
+      B->hasOneUse()) {
+    WC = &And->getOperandUse(1);
+    C = &And->getOperandUse(0);
+    return true;
+  }
+  return false;
 }
diff --git a/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp b/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp
index ce285f82f720..ac81cba836f8 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp
@@ -22,7 +22,6 @@
 #include "llvm/Analysis/MustExecute.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
-#include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -300,7 +299,8 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
       ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr);
       if (!ReduxDesc.isRecurrence())
         return false;
-      if (isa<FPMathOperator>(ReduxDesc.getPatternInst()))
+      // FIXME: FMF is allowed on phi, but propagation is not handled correctly.
+      if (isa<FPMathOperator>(ReduxDesc.getPatternInst()) && !IsAPhi)
         FMF &= ReduxDesc.getPatternInst()->getFastMathFlags();
     }
 
@@ -698,25 +698,48 @@ bool RecurrenceDescriptor::isFirstOrderRecurrence(
   // Ensure every user of the phi node is dominated by the previous value.
   // The dominance requirement ensures the loop vectorizer will not need to
   // vectorize the initial value prior to the first iteration of the loop.
-  // TODO: Consider extending this sinking to handle other kinds of instructions
-  // and expressions, beyond sinking a single cast past Previous.
+  // TODO: Consider extending this sinking to handle memory instructions and
+  // phis with multiple users.
+
+  // Returns true, if all users of I are dominated by DominatedBy.
+  auto allUsesDominatedBy = [DT](Instruction *I, Instruction *DominatedBy) {
+    return all_of(I->uses(), [DT, DominatedBy](Use &U) {
+      return DT->dominates(DominatedBy, U);
+    });
+  };
+
   if (Phi->hasOneUse()) {
-    auto *I = Phi->user_back();
-    if (I->isCast() && (I->getParent() == Phi->getParent()) && I->hasOneUse() &&
-        DT->dominates(Previous, I->user_back())) {
-      if (!DT->dominates(Previous, I)) // Otherwise we're good w/o sinking.
-        SinkAfter[I] = Previous;
+    Instruction *I = Phi->user_back();
+
+    // If the user of the PHI is also the incoming value, we potentially have a
+    // reduction and which cannot be handled by sinking.
+    if (Previous == I)
+      return false;
+
+    // We cannot sink terminator instructions.
+    if (I->getParent()->getTerminator() == I)
+      return false;
+
+    // Do not try to sink an instruction multiple times (if multiple operands
+    // are first order recurrences).
+    // TODO: We can support this case, by sinking the instruction after the
+    // 'deepest' previous instruction.
+    if (SinkAfter.find(I) != SinkAfter.end())
+      return false;
+
+    if (DT->dominates(Previous, I)) // We already are good w/o sinking.
       return true;
-    }
-  }
 
-  for (User *U : Phi->users())
-    if (auto *I = dyn_cast<Instruction>(U)) {
-      if (!DT->dominates(Previous, I))
-        return false;
+    // We can sink any instruction without side effects, as long as all users
+    // are dominated by the instruction we are sinking after.
+    if (I->getParent() == Phi->getParent() && !I->mayHaveSideEffects() &&
+        allUsesDominatedBy(I, Previous)) {
+      SinkAfter[I] = Previous;
+      return true;
     }
+  }
 
-  return true;
+  return allUsesDominatedBy(Phi, Previous);
 }
 
 /// This function returns the identity element (or neutral element) for
diff --git a/contrib/llvm-project/llvm/lib/Analysis/IVUsers.cpp b/contrib/llvm-project/llvm/lib/Analysis/IVUsers.cpp
index 681a0cf7e981..9432696b5a26 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/IVUsers.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/IVUsers.cpp
@@ -27,6 +27,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
diff --git a/contrib/llvm-project/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
index 6ff840efcb64..dc4cbc371ef4 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
@@ -21,6 +21,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include <string>
 #include <utility>
@@ -53,7 +54,7 @@ static cl::opt<unsigned>
                               "call callsite"));
 
 ICallPromotionAnalysis::ICallPromotionAnalysis() {
-  ValueDataArray = llvm::make_unique<InstrProfValueData[]>(MaxNumPromotions);
+  ValueDataArray = std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
 }
 
 bool ICallPromotionAnalysis::isPromotionProfitable(uint64_t Count,
diff --git a/contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp b/contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp
index 0dec146e0465..de83a48aad16 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp
@@ -18,9 +18,9 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/CodeMetrics.h"
 #include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
@@ -36,6 +36,7 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -50,7 +51,7 @@ static cl::opt<int> InlineThreshold(
     cl::desc("Control the amount of inlining to perform (default = 225)"));
 
 static cl::opt<int> HintThreshold(
-    "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore, 
+    "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore,
     cl::desc("Threshold for inlining functions with inline hint"));
 
 static cl::opt<int>
@@ -62,7 +63,7 @@ static cl::opt<int>
 // PGO before we actually hook up inliner with analysis passes such as BPI and
 // BFI.
 static cl::opt<int> ColdThreshold(
-    "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, 
+    "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore,
     cl::desc("Threshold for inlining functions with cold attribute"));
 
 static cl::opt<int>
@@ -92,11 +93,13 @@ static cl::opt<bool> OptComputeFullInlineCost(
              "exceeds the threshold."));
 
 namespace {
-
+class InlineCostCallAnalyzer;
 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   typedef InstVisitor<CallAnalyzer, bool> Base;
   friend class InstVisitor<CallAnalyzer, bool>;
 
+protected:
+  virtual ~CallAnalyzer() {}
   /// The TargetTransformInfo available for this compilation.
   const TargetTransformInfo &TTI;
 
@@ -123,20 +126,86 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   /// easily cacheable. Instead, use the cover function paramHasAttr.
   CallBase &CandidateCall;
 
-  /// Tunable parameters that control the analysis.
-  const InlineParams &Params;
+  /// Extension points for handling callsite features.
+  /// Called after a basic block was analyzed.
+  virtual void onBlockAnalyzed(const BasicBlock *BB) {}
 
-  /// Upper bound for the inlining cost. Bonuses are being applied to account
-  /// for speculative "expected profit" of the inlining decision.
-  int Threshold;
+  /// Called at the end of the analysis of the callsite. Return the outcome of
+  /// the analysis, i.e. 'InlineResult(true)' if the inlining may happen, or
+  /// the reason it can't.
+  virtual InlineResult finalizeAnalysis() { return true; }
 
-  /// Inlining cost measured in abstract units, accounts for all the
-  /// instructions expected to be executed for a given function invocation.
-  /// Instructions that are statically proven to be dead based on call-site
-  /// arguments are not counted here.
-  int Cost = 0;
+  /// Called when we're about to start processing a basic block, and every time
+  /// we are done processing an instruction. Return true if there is no point in
+  /// continuing the analysis (e.g. we've determined already the call site is
+  /// too expensive to inline)
+  virtual bool shouldStop() { return false; }
+
+  /// Called before the analysis of the callee body starts (with callsite
+  /// contexts propagated).  It checks callsite-specific information. Return a
+  /// reason analysis can't continue if that's the case, or 'true' if it may
+  /// continue.
+  virtual InlineResult onAnalysisStart() { return true; }
+
+  /// Called if the analysis engine decides SROA cannot be done for the given
+  /// alloca.
+  virtual void onDisableSROA(AllocaInst *Arg) {}
+
+  /// Called the analysis engine determines load elimination won't happen.
+  virtual void onDisableLoadElimination() {}
+
+  /// Called to account for a call.
+  virtual void onCallPenalty() {}
+
+  /// Called to account for the expectation the inlining would result in a load
+  /// elimination.
+  virtual void onLoadEliminationOpportunity() {}
 
-  bool ComputeFullInlineCost;
+  /// Called to account for the cost of argument setup for the Call in the
+  /// callee's body (not the callsite currently under analysis).
+  virtual void onCallArgumentSetup(const CallBase &Call) {}
+
+  /// Called to account for a load relative intrinsic.
+  virtual void onLoadRelativeIntrinsic() {}
+
+  /// Called to account for a lowered call.
+  virtual void onLoweredCall(Function *F, CallBase &Call, bool IsIndirectCall) {
+  }
+
+  /// Account for a jump table of given size. Return false to stop further
+  /// processing the switch instruction
+  virtual bool onJumpTable(unsigned JumpTableSize) { return true; }
+
+  /// Account for a case cluster of given size. Return false to stop further
+  /// processing of the instruction.
+  virtual bool onCaseCluster(unsigned NumCaseCluster) { return true; }
+
+  /// Called at the end of processing a switch instruction, with the given
+  /// number of case clusters.
+  virtual void onFinalizeSwitch(unsigned JumpTableSize,
+                                unsigned NumCaseCluster) {}
+
+  /// Called to account for any other instruction not specifically accounted
+  /// for.
+  virtual void onCommonInstructionSimplification() {}
+
+  /// Start accounting potential benefits due to SROA for the given alloca.
+  virtual void onInitializeSROAArg(AllocaInst *Arg) {}
+
+  /// Account SROA savings for the AllocaInst value.
+  virtual void onAggregateSROAUse(AllocaInst *V) {}
+
+  bool handleSROA(Value *V, bool DoNotDisable) {
+    // Check for SROA candidates in comparisons.
+    if (auto *SROAArg = getSROAArgForValueOrNull(V)) {
+      if (DoNotDisable) {
+        onAggregateSROAUse(SROAArg);
+        return true;
+      }
+      disableSROAForArg(SROAArg);
+    }
+    return false;
+  }
 
   bool IsCallerRecursive = false;
   bool IsRecursiveCall = false;
@@ -153,12 +222,6 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   unsigned NumInstructions = 0;
   unsigned NumVectorInstructions = 0;
 
-  /// Bonus to be applied when percentage of vector instructions in callee is
-  /// high (see more details in updateThreshold).
-  int VectorBonus = 0;
-  /// Bonus to be applied when the callee has only one reachable basic block.
-  int SingleBBBonus = 0;
-
   /// While we walk the potentially-inlined instructions, we build up and
   /// maintain a mapping of simplified values specific to this callsite. The
   /// idea is to propagate any special information we have about arguments to
@@ -170,12 +233,12 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
 
   /// Keep track of the values which map back (through function arguments) to
   /// allocas on the caller stack which could be simplified through SROA.
-  DenseMap<Value *, Value *> SROAArgValues;
+  DenseMap<Value *, AllocaInst *> SROAArgValues;
 
-  /// The mapping of caller Alloca values to their accumulated cost savings. If
-  /// we have to disable SROA for one of the allocas, this tells us how much
-  /// cost must be added.
-  DenseMap<Value *, int> SROAArgCosts;
+  /// Keep track of Allocas for which we believe we may get SROA optimization.
+  /// We don't delete entries in SROAArgValue because we still want
+  /// isAllocaDerivedArg to function correctly.
+  DenseSet<AllocaInst *> EnabledSROAArgValues;
 
   /// Keep track of values which map to a pointer base and constant offset.
   DenseMap<Value *, std::pair<Value *, APInt>> ConstantOffsetPtrs;
@@ -192,17 +255,20 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   /// loads.
   bool EnableLoadElimination;
   SmallPtrSet<Value *, 16> LoadAddrSet;
-  int LoadEliminationCost = 0;
+
+  AllocaInst *getSROAArgForValueOrNull(Value *V) const {
+    auto It = SROAArgValues.find(V);
+    if (It == SROAArgValues.end() ||
+        EnabledSROAArgValues.count(It->second) == 0)
+      return nullptr;
+    return It->second;
+  }
 
   // Custom simplification helper routines.
   bool isAllocaDerivedArg(Value *V);
-  bool lookupSROAArgAndCost(Value *V, Value *&Arg,
-                            DenseMap<Value *, int>::iterator &CostIt);
-  void disableSROA(DenseMap<Value *, int>::iterator CostIt);
+  void disableSROAForArg(AllocaInst *SROAArg);
   void disableSROA(Value *V);
   void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB);
-  void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
-                          int InstructionCost);
   void disableLoadElimination();
   bool isGEPFree(GetElementPtrInst &GEP);
   bool canFoldInboundsGEP(GetElementPtrInst &I);
@@ -223,32 +289,13 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   /// inlined through this particular callsite.
   bool isKnownNonNullInCallee(Value *V);
 
-  /// Update Threshold based on callsite properties such as callee
-  /// attributes and callee hotness for PGO builds. The Callee is explicitly
-  /// passed to support analyzing indirect calls whose target is inferred by
-  /// analysis.
-  void updateThreshold(CallBase &Call, Function &Callee);
-
   /// Return true if size growth is allowed when inlining the callee at \p Call.
   bool allowSizeGrowth(CallBase &Call);
 
-  /// Return true if \p Call is a cold callsite.
-  bool isColdCallSite(CallBase &Call, BlockFrequencyInfo *CallerBFI);
-
-  /// Return a higher threshold if \p Call is a hot callsite.
-  Optional<int> getHotCallSiteThreshold(CallBase &Call,
-                                        BlockFrequencyInfo *CallerBFI);
-
   // Custom analysis routines.
   InlineResult analyzeBlock(BasicBlock *BB,
                             SmallPtrSetImpl<const Value *> &EphValues);
 
-  /// Handle a capped 'int' increment for Cost.
-  void addCost(int64_t Inc, int64_t UpperBound = INT_MAX) {
-    assert(UpperBound > 0 && UpperBound <= INT_MAX && "invalid upper bound");
-    Cost = (int)std::min(UpperBound, Cost + Inc);
-  }
-
   // Disable several entry points to the visitor so we don't accidentally use
   // them by declaring but not defining them here.
   void visit(Module *);
@@ -294,18 +341,12 @@ public:
                std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
                Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI,
                ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE,
-               Function &Callee, CallBase &Call, const InlineParams &Params)
+               Function &Callee, CallBase &Call)
       : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
         PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
-        CandidateCall(Call), Params(Params), Threshold(Params.DefaultThreshold),
-        ComputeFullInlineCost(OptComputeFullInlineCost ||
-                              Params.ComputeFullInlineCost || ORE),
-        EnableLoadElimination(true) {}
-
-  InlineResult analyzeCall(CallBase &Call);
+        CandidateCall(Call), EnableLoadElimination(true) {}
 
-  int getThreshold() { return Threshold; }
-  int getCost() { return Cost; }
+  InlineResult analyze();
 
   // Keep a bunch of stats about the cost savings found so we can print them
   // out when debugging.
@@ -315,12 +356,291 @@ public:
   unsigned NumConstantPtrCmps = 0;
   unsigned NumConstantPtrDiffs = 0;
   unsigned NumInstructionsSimplified = 0;
+
+  void dump();
+};
+
+/// FIXME: if it is necessary to derive from InlineCostCallAnalyzer, note
+/// the FIXME in onLoweredCall, when instantiating an InlineCostCallAnalyzer
+class InlineCostCallAnalyzer final : public CallAnalyzer {
+  const int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1;
+  const bool ComputeFullInlineCost;
+  int LoadEliminationCost = 0;
+  /// Bonus to be applied when percentage of vector instructions in callee is
+  /// high (see more details in updateThreshold).
+  int VectorBonus = 0;
+  /// Bonus to be applied when the callee has only one reachable basic block.
+  int SingleBBBonus = 0;
+
+  /// Tunable parameters that control the analysis.
+  const InlineParams &Params;
+
+  /// Upper bound for the inlining cost. Bonuses are being applied to account
+  /// for speculative "expected profit" of the inlining decision.
+  int Threshold = 0;
+
+  /// Attempt to evaluate indirect calls to boost its inline cost.
+  const bool BoostIndirectCalls;
+
+  /// Inlining cost measured in abstract units, accounts for all the
+  /// instructions expected to be executed for a given function invocation.
+  /// Instructions that are statically proven to be dead based on call-site
+  /// arguments are not counted here.
+  int Cost = 0;
+
+  bool SingleBB = true;
+
   unsigned SROACostSavings = 0;
   unsigned SROACostSavingsLost = 0;
 
+  /// The mapping of caller Alloca values to their accumulated cost savings. If
+  /// we have to disable SROA for one of the allocas, this tells us how much
+  /// cost must be added.
+  DenseMap<AllocaInst *, int> SROAArgCosts;
+
+  /// Return true if \p Call is a cold callsite.
+  bool isColdCallSite(CallBase &Call, BlockFrequencyInfo *CallerBFI);
+
+  /// Update Threshold based on callsite properties such as callee
+  /// attributes and callee hotness for PGO builds. The Callee is explicitly
+  /// passed to support analyzing indirect calls whose target is inferred by
+  /// analysis.
+  void updateThreshold(CallBase &Call, Function &Callee);
+  /// Return a higher threshold if \p Call is a hot callsite.
+  Optional<int> getHotCallSiteThreshold(CallBase &Call,
+                                        BlockFrequencyInfo *CallerBFI);
+
+  /// Handle a capped 'int' increment for Cost.
+  void addCost(int64_t Inc, int64_t UpperBound = INT_MAX) {
+    assert(UpperBound > 0 && UpperBound <= INT_MAX && "invalid upper bound");
+    Cost = (int)std::min(UpperBound, Cost + Inc);
+  }
+
+  void onDisableSROA(AllocaInst *Arg) override {
+    auto CostIt = SROAArgCosts.find(Arg);
+    if (CostIt == SROAArgCosts.end())
+      return;
+    addCost(CostIt->second);
+    SROACostSavings -= CostIt->second;
+    SROACostSavingsLost += CostIt->second;
+    SROAArgCosts.erase(CostIt);
+  }
+
+  void onDisableLoadElimination() override {
+    addCost(LoadEliminationCost);
+    LoadEliminationCost = 0;
+  }
+  void onCallPenalty() override { addCost(InlineConstants::CallPenalty); }
+  void onCallArgumentSetup(const CallBase &Call) override {
+    // Pay the price of the argument setup. We account for the average 1
+    // instruction per call argument setup here.
+    addCost(Call.arg_size() * InlineConstants::InstrCost);
+  }
+  void onLoadRelativeIntrinsic() override {
+    // This is normally lowered to 4 LLVM instructions.
+    addCost(3 * InlineConstants::InstrCost);
+  }
+  void onLoweredCall(Function *F, CallBase &Call,
+                     bool IsIndirectCall) override {
+    // We account for the average 1 instruction per call argument setup here.
+    addCost(Call.arg_size() * InlineConstants::InstrCost);
+
+    // If we have a constant that we are calling as a function, we can peer
+    // through it and see the function target. This happens not infrequently
+    // during devirtualization and so we want to give it a hefty bonus for
+    // inlining, but cap that bonus in the event that inlining wouldn't pan out.
+    // Pretend to inline the function, with a custom threshold.
+    if (IsIndirectCall && BoostIndirectCalls) {
+      auto IndirectCallParams = Params;
+      IndirectCallParams.DefaultThreshold =
+          InlineConstants::IndirectCallThreshold;
+      /// FIXME: if InlineCostCallAnalyzer is derived from, this may need
+      /// to instantiate the derived class.
+      InlineCostCallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F,
+                                Call, IndirectCallParams, false);
+      if (CA.analyze()) {
+        // We were able to inline the indirect call! Subtract the cost from the
+        // threshold to get the bonus we want to apply, but don't go below zero.
+        Cost -= std::max(0, CA.getThreshold() - CA.getCost());
+      }
+    } else
+      // Otherwise simply add the cost for merely making the call.
+      addCost(InlineConstants::CallPenalty);
+  }
+
+  void onFinalizeSwitch(unsigned JumpTableSize,
+                        unsigned NumCaseCluster) override {
+    // If suitable for a jump table, consider the cost for the table size and
+    // branch to destination.
+    // Maximum valid cost increased in this function.
+    if (JumpTableSize) {
+      int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost +
+                       4 * InlineConstants::InstrCost;
+
+      addCost(JTCost, (int64_t)CostUpperBound);
+      return;
+    }
+    // Considering forming a binary search, we should find the number of nodes
+    // which is same as the number of comparisons when lowered. For a given
+    // number of clusters, n, we can define a recursive function, f(n), to find
+    // the number of nodes in the tree. The recursion is :
+    // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
+    // and f(n) = n, when n <= 3.
+    // This will lead a binary tree where the leaf should be either f(2) or f(3)
+    // when n > 3.  So, the number of comparisons from leaves should be n, while
+    // the number of non-leaf should be :
+    //   2^(log2(n) - 1) - 1
+    //   = 2^log2(n) * 2^-1 - 1
+    //   = n / 2 - 1.
+    // Considering comparisons from leaf and non-leaf nodes, we can estimate the
+    // number of comparisons in a simple closed form :
+    //   n + n / 2 - 1 = n * 3 / 2 - 1
+    if (NumCaseCluster <= 3) {
+      // Suppose a comparison includes one compare and one conditional branch.
+      addCost(NumCaseCluster * 2 * InlineConstants::InstrCost);
+      return;
+    }
+
+    int64_t ExpectedNumberOfCompare = 3 * (int64_t)NumCaseCluster / 2 - 1;
+    int64_t SwitchCost =
+        ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
+
+    addCost(SwitchCost, (int64_t)CostUpperBound);
+  }
+  void onCommonInstructionSimplification() override {
+    addCost(InlineConstants::InstrCost);
+  }
+
+  void onInitializeSROAArg(AllocaInst *Arg) override {
+    assert(Arg != nullptr &&
+           "Should not initialize SROA costs for null value.");
+    SROAArgCosts[Arg] = 0;
+    EnabledSROAArgValues.insert(Arg);
+  }
+
+  void onAggregateSROAUse(AllocaInst *SROAArg) override {
+    auto CostIt = SROAArgCosts.find(SROAArg);
+    assert(CostIt != SROAArgCosts.end() &&
+           "expected this argument to have a cost");
+    CostIt->second += InlineConstants::InstrCost;
+    SROACostSavings += InlineConstants::InstrCost;
+  }
+
+  void onBlockAnalyzed(const BasicBlock *BB) override {
+    auto *TI = BB->getTerminator();
+    // If we had any successors at this point, than post-inlining is likely to
+    // have them as well. Note that we assume any basic blocks which existed
+    // due to branches or switches which folded above will also fold after
+    // inlining.
+    if (SingleBB && TI->getNumSuccessors() > 1) {
+      // Take off the bonus we applied to the threshold.
+      Threshold -= SingleBBBonus;
+      SingleBB = false;
+    }
+  }
+  InlineResult finalizeAnalysis() override {
+    // Loops generally act a lot like calls in that they act like barriers to
+    // movement, require a certain amount of setup, etc. So when optimising for
+    // size, we penalise any call sites that perform loops. We do this after all
+    // other costs here, so will likely only be dealing with relatively small
+    // functions (and hence DT and LI will hopefully be cheap).
+    auto *Caller = CandidateCall.getFunction();
+    if (Caller->hasMinSize()) {
+      DominatorTree DT(F);
+      LoopInfo LI(DT);
+      int NumLoops = 0;
+      for (Loop *L : LI) {
+        // Ignore loops that will not be executed
+        if (DeadBlocks.count(L->getHeader()))
+          continue;
+        NumLoops++;
+      }
+      addCost(NumLoops * InlineConstants::CallPenalty);
+    }
+
+    // We applied the maximum possible vector bonus at the beginning. Now,
+    // subtract the excess bonus, if any, from the Threshold before
+    // comparing against Cost.
+    if (NumVectorInstructions <= NumInstructions / 10)
+      Threshold -= VectorBonus;
+    else if (NumVectorInstructions <= NumInstructions / 2)
+      Threshold -= VectorBonus / 2;
+
+    return Cost < std::max(1, Threshold);
+  }
+  bool shouldStop() override {
+    // Bail out the moment we cross the threshold. This means we'll under-count
+    // the cost, but only when undercounting doesn't matter.
+    return Cost >= Threshold && !ComputeFullInlineCost;
+  }
+
+  void onLoadEliminationOpportunity() override {
+    LoadEliminationCost += InlineConstants::InstrCost;
+  }
+
+  InlineResult onAnalysisStart() override {
+    // Perform some tweaks to the cost and threshold based on the direct
+    // callsite information.
+
+    // We want to more aggressively inline vector-dense kernels, so up the
+    // threshold, and we'll lower it if the % of vector instructions gets too
+    // low. Note that these bonuses are some what arbitrary and evolved over
+    // time by accident as much as because they are principled bonuses.
+    //
+    // FIXME: It would be nice to remove all such bonuses. At least it would be
+    // nice to base the bonus values on something more scientific.
+    assert(NumInstructions == 0);
+    assert(NumVectorInstructions == 0);
+
+    // Update the threshold based on callsite properties
+    updateThreshold(CandidateCall, F);
+
+    // While Threshold depends on commandline options that can take negative
+    // values, we want to enforce the invariant that the computed threshold and
+    // bonuses are non-negative.
+    assert(Threshold >= 0);
+    assert(SingleBBBonus >= 0);
+    assert(VectorBonus >= 0);
+
+    // Speculatively apply all possible bonuses to Threshold. If cost exceeds
+    // this Threshold any time, and cost cannot decrease, we can stop processing
+    // the rest of the function body.
+    Threshold += (SingleBBBonus + VectorBonus);
+
+    // Give out bonuses for the callsite, as the instructions setting them up
+    // will be gone after inlining.
+    addCost(-getCallsiteCost(this->CandidateCall, DL));
+
+    // If this function uses the coldcc calling convention, prefer not to inline
+    // it.
+    if (F.getCallingConv() == CallingConv::Cold)
+      Cost += InlineConstants::ColdccPenalty;
+
+    // Check if we're done. This can happen due to bonuses and penalties.
+    if (Cost >= Threshold && !ComputeFullInlineCost)
+      return "high cost";
+
+    return true;
+  }
+
+public:
+  InlineCostCallAnalyzer(
+      const TargetTransformInfo &TTI,
+      std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
+      Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI,
+      ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, Function &Callee,
+      CallBase &Call, const InlineParams &Params, bool BoostIndirect = true)
+      : CallAnalyzer(TTI, GetAssumptionCache, GetBFI, PSI, ORE, Callee, Call),
+        ComputeFullInlineCost(OptComputeFullInlineCost ||
+                              Params.ComputeFullInlineCost || ORE),
+        Params(Params), Threshold(Params.DefaultThreshold),
+        BoostIndirectCalls(BoostIndirect) {}
   void dump();
-};
 
+  virtual ~InlineCostCallAnalyzer() {}
+  int getThreshold() { return Threshold; }
+  int getCost() { return Cost; }
+};
 } // namespace
 
 /// Test whether the given value is an Alloca-derived function argument.
@@ -328,55 +648,21 @@ bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
   return SROAArgValues.count(V);
 }
 
-/// Lookup the SROA-candidate argument and cost iterator which V maps to.
-/// Returns false if V does not map to a SROA-candidate.
-bool CallAnalyzer::lookupSROAArgAndCost(
-    Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) {
-  if (SROAArgValues.empty() || SROAArgCosts.empty())
-    return false;
-
-  DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
-  if (ArgIt == SROAArgValues.end())
-    return false;
-
-  Arg = ArgIt->second;
-  CostIt = SROAArgCosts.find(Arg);
-  return CostIt != SROAArgCosts.end();
-}
-
-/// Disable SROA for the candidate marked by this cost iterator.
-///
-/// This marks the candidate as no longer viable for SROA, and adds the cost
-/// savings associated with it back into the inline cost measurement.
-void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
-  // If we're no longer able to perform SROA we need to undo its cost savings
-  // and prevent subsequent analysis.
-  addCost(CostIt->second);
-  SROACostSavings -= CostIt->second;
-  SROACostSavingsLost += CostIt->second;
-  SROAArgCosts.erase(CostIt);
+void CallAnalyzer::disableSROAForArg(AllocaInst *SROAArg) {
+  onDisableSROA(SROAArg);
+  EnabledSROAArgValues.erase(SROAArg);
   disableLoadElimination();
 }
-
 /// If 'V' maps to a SROA candidate, disable SROA for it.
 void CallAnalyzer::disableSROA(Value *V) {
-  Value *SROAArg;
-  DenseMap<Value *, int>::iterator CostIt;
-  if (lookupSROAArgAndCost(V, SROAArg, CostIt))
-    disableSROA(CostIt);
-}
-
-/// Accumulate the given cost for a particular SROA candidate.
-void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
-                                      int InstructionCost) {
-  CostIt->second += InstructionCost;
-  SROACostSavings += InstructionCost;
+  if (auto *SROAArg = getSROAArgForValueOrNull(V)) {
+    disableSROAForArg(SROAArg);
+  }
 }
 
 void CallAnalyzer::disableLoadElimination() {
   if (EnableLoadElimination) {
-    addCost(LoadEliminationCost);
-    LoadEliminationCost = 0;
+    onDisableLoadElimination();
     EnableLoadElimination = false;
   }
 }
@@ -422,9 +708,9 @@ bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
   Operands.push_back(GEP.getOperand(0));
   for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
     if (Constant *SimpleOp = SimplifiedValues.lookup(*I))
-       Operands.push_back(SimpleOp);
-     else
-       Operands.push_back(*I);
+      Operands.push_back(SimpleOp);
+    else
+      Operands.push_back(*I);
   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&GEP, Operands);
 }
 
@@ -436,7 +722,8 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) {
     if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
       Type *Ty = I.getAllocatedType();
       AllocatedSize = SaturatingMultiplyAdd(
-          AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty), AllocatedSize);
+          AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty).getFixedSize(),
+          AllocatedSize);
       return Base::visitAlloca(I);
     }
   }
@@ -444,7 +731,8 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) {
   // Accumulate the allocated size.
   if (I.isStaticAlloca()) {
     Type *Ty = I.getAllocatedType();
-    AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty), AllocatedSize);
+    AllocatedSize =
+        SaturatingAdd(DL.getTypeAllocSize(Ty).getFixedSize(), AllocatedSize);
   }
 
   // We will happily inline static alloca instructions.
@@ -546,9 +834,7 @@ bool CallAnalyzer::visitPHI(PHINode &I) {
   if (FirstBaseAndOffset.first) {
     ConstantOffsetPtrs[&I] = FirstBaseAndOffset;
 
-    Value *SROAArg;
-    DenseMap<Value *, int>::iterator CostIt;
-    if (lookupSROAArgAndCost(FirstV, SROAArg, CostIt))
+    if (auto *SROAArg = getSROAArgForValueOrNull(FirstV))
       SROAArgValues[&I] = SROAArg;
   }
 
@@ -578,10 +864,7 @@ bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) {
 }
 
 bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
-  Value *SROAArg;
-  DenseMap<Value *, int>::iterator CostIt;
-  bool SROACandidate =
-      lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt);
+  auto *SROAArg = getSROAArgForValueOrNull(I.getPointerOperand());
 
   // Lambda to check whether a GEP's indices are all constant.
   auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) {
@@ -592,7 +875,7 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
   };
 
   if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) {
-    if (SROACandidate)
+    if (SROAArg)
       SROAArgValues[&I] = SROAArg;
 
     // Constant GEPs are modeled as free.
@@ -600,8 +883,8 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
   }
 
   // Variable GEPs will require math and will disable SROA.
-  if (SROACandidate)
-    disableSROA(CostIt);
+  if (SROAArg)
+    disableSROAForArg(SROAArg);
   return isGEPFree(I);
 }
 
@@ -641,9 +924,7 @@ bool CallAnalyzer::visitBitCast(BitCastInst &I) {
     ConstantOffsetPtrs[&I] = BaseAndOffset;
 
   // Also look for SROA candidates here.
-  Value *SROAArg;
-  DenseMap<Value *, int>::iterator CostIt;
-  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
+  if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0)))
     SROAArgValues[&I] = SROAArg;
 
   // Bitcasts are always zero cost.
@@ -675,9 +956,7 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
   // and so we can just add the integer in here. The only places where SROA is
   // preserved either cannot fire on an integer, or won't in-and-of themselves
   // disable SROA (ext) w/o some later use that we would see and disable.
-  Value *SROAArg;
-  DenseMap<Value *, int>::iterator CostIt;
-  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
+  if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0)))
     SROAArgValues[&I] = SROAArg;
 
   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
@@ -701,9 +980,7 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
   }
 
   // "Propagate" SROA here in the same manner as we do for ptrtoint above.
-  Value *SROAArg;
-  DenseMap<Value *, int>::iterator CostIt;
-  if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
+  if (auto *SROAArg = getSROAArgForValueOrNull(Op))
     SROAArgValues[&I] = SROAArg;
 
   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
@@ -730,7 +1007,7 @@ bool CallAnalyzer::visitCastInst(CastInst &I) {
   case Instruction::FPToUI:
   case Instruction::FPToSI:
     if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
-      addCost(InlineConstants::CallPenalty);
+      onCallPenalty();
     break;
   default:
     break;
@@ -803,8 +1080,8 @@ bool CallAnalyzer::allowSizeGrowth(CallBase &Call) {
   return true;
 }
 
-bool CallAnalyzer::isColdCallSite(CallBase &Call,
-                                  BlockFrequencyInfo *CallerBFI) {
+bool InlineCostCallAnalyzer::isColdCallSite(CallBase &Call,
+                                            BlockFrequencyInfo *CallerBFI) {
   // If global profile summary is available, then callsite's coldness is
   // determined based on that.
   if (PSI && PSI->hasProfileSummary())
@@ -827,8 +1104,8 @@ bool CallAnalyzer::isColdCallSite(CallBase &Call,
 }
 
 Optional<int>
-CallAnalyzer::getHotCallSiteThreshold(CallBase &Call,
-                                      BlockFrequencyInfo *CallerBFI) {
+InlineCostCallAnalyzer::getHotCallSiteThreshold(CallBase &Call,
+                                                BlockFrequencyInfo *CallerBFI) {
 
   // If global profile summary is available, then callsite's hotness is
   // determined based on that.
@@ -855,7 +1132,7 @@ CallAnalyzer::getHotCallSiteThreshold(CallBase &Call,
   return None;
 }
 
-void CallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
+void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
   // If no size growth is allowed for this inlining, set Threshold to 0.
   if (!allowSizeGrowth(Call)) {
     Threshold = 0;
@@ -1017,19 +1294,7 @@ bool CallAnalyzer::visitCmpInst(CmpInst &I) {
                                       : ConstantInt::getFalse(I.getType());
     return true;
   }
-  // Finally check for SROA candidates in comparisons.
-  Value *SROAArg;
-  DenseMap<Value *, int>::iterator CostIt;
-  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
-    if (isa<ConstantPointerNull>(I.getOperand(1))) {
-      accumulateSROACost(CostIt, InlineConstants::InstrCost);
-      return true;
-    }
-
-    disableSROA(CostIt);
-  }
-
-  return false;
+  return handleSROA(I.getOperand(0), isa<ConstantPointerNull>(I.getOperand(1)));
 }
 
 bool CallAnalyzer::visitSub(BinaryOperator &I) {
@@ -1070,8 +1335,8 @@ bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
 
   Value *SimpleV = nullptr;
   if (auto FI = dyn_cast<FPMathOperator>(&I))
-    SimpleV = SimplifyFPBinOp(I.getOpcode(), CLHS ? CLHS : LHS,
-                              CRHS ? CRHS : RHS, FI->getFastMathFlags(), DL);
+    SimpleV = SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS,
+                            FI->getFastMathFlags(), DL);
   else
     SimpleV =
         SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL);
@@ -1093,7 +1358,7 @@ bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
   if (I.getType()->isFloatingPointTy() &&
       TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive &&
       !match(&I, m_FNeg(m_Value())))
-    addCost(InlineConstants::CallPenalty);
+    onCallPenalty();
 
   return false;
 }
@@ -1104,9 +1369,8 @@ bool CallAnalyzer::visitFNeg(UnaryOperator &I) {
   if (!COp)
     COp = SimplifiedValues.lookup(Op);
 
-  Value *SimpleV = SimplifyFNegInst(COp ? COp : Op,
-                                    cast<FPMathOperator>(I).getFastMathFlags(),
-                                    DL);
+  Value *SimpleV = SimplifyFNegInst(
+      COp ? COp : Op, cast<FPMathOperator>(I).getFastMathFlags(), DL);
 
   if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
     SimplifiedValues[&I] = C;
@@ -1121,23 +1385,15 @@ bool CallAnalyzer::visitFNeg(UnaryOperator &I) {
 }
 
 bool CallAnalyzer::visitLoad(LoadInst &I) {
-  Value *SROAArg;
-  DenseMap<Value *, int>::iterator CostIt;
-  if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
-    if (I.isSimple()) {
-      accumulateSROACost(CostIt, InlineConstants::InstrCost);
-      return true;
-    }
-
-    disableSROA(CostIt);
-  }
+  if (handleSROA(I.getPointerOperand(), I.isSimple()))
+    return true;
 
   // If the data is already loaded from this address and hasn't been clobbered
   // by any stores or calls, this load is likely to be redundant and can be
   // eliminated.
   if (EnableLoadElimination &&
       !LoadAddrSet.insert(I.getPointerOperand()).second && I.isUnordered()) {
-    LoadEliminationCost += InlineConstants::InstrCost;
+    onLoadEliminationOpportunity();
     return true;
   }
 
@@ -1145,16 +1401,8 @@ bool CallAnalyzer::visitLoad(LoadInst &I) {
 }
 
 bool CallAnalyzer::visitStore(StoreInst &I) {
-  Value *SROAArg;
-  DenseMap<Value *, int>::iterator CostIt;
-  if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
-    if (I.isSimple()) {
-      accumulateSROACost(CostIt, InlineConstants::InstrCost);
-      return true;
-    }
-
-    disableSROA(CostIt);
-  }
+  if (handleSROA(I.getPointerOperand(), I.isSimple()))
+    return true;
 
   // The store can potentially clobber loads and prevent repeated loads from
   // being eliminated.
@@ -1236,97 +1484,69 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) {
   if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
     ContainsNoDuplicateCall = true;
 
-  if (Function *F = Call.getCalledFunction()) {
-    // When we have a concrete function, first try to simplify it directly.
-    if (simplifyCallSite(F, Call))
-      return true;
-
-    // Next check if it is an intrinsic we know about.
-    // FIXME: Lift this into part of the InstVisitor.
-    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Call)) {
-      switch (II->getIntrinsicID()) {
-      default:
-        if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II))
-          disableLoadElimination();
-        return Base::visitCallBase(Call);
-
-      case Intrinsic::load_relative:
-        // This is normally lowered to 4 LLVM instructions.
-        addCost(3 * InlineConstants::InstrCost);
-        return false;
-
-      case Intrinsic::memset:
-      case Intrinsic::memcpy:
-      case Intrinsic::memmove:
+  Value *Callee = Call.getCalledOperand();
+  Function *F = dyn_cast_or_null<Function>(Callee);
+  bool IsIndirectCall = !F;
+  if (IsIndirectCall) {
+    // Check if this happens to be an indirect function call to a known function
+    // in this inline context. If not, we've done all we can.
+    F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
+    if (!F) {
+      onCallArgumentSetup(Call);
+
+      if (!Call.onlyReadsMemory())
         disableLoadElimination();
-        // SROA can usually chew through these intrinsics, but they aren't free.
-        return false;
-      case Intrinsic::icall_branch_funnel:
-      case Intrinsic::localescape:
-        HasUninlineableIntrinsic = true;
-        return false;
-      case Intrinsic::vastart:
-        InitsVargArgs = true;
-        return false;
-      }
+      return Base::visitCallBase(Call);
     }
+  }
 
-    if (F == Call.getFunction()) {
-      // This flag will fully abort the analysis, so don't bother with anything
-      // else.
-      IsRecursiveCall = true;
-      return false;
-    }
+  assert(F && "Expected a call to a known function");
+
+  // When we have a concrete function, first try to simplify it directly.
+  if (simplifyCallSite(F, Call))
+    return true;
 
-    if (TTI.isLoweredToCall(F)) {
-      // We account for the average 1 instruction per call argument setup
-      // here.
-      addCost(Call.arg_size() * InlineConstants::InstrCost);
+  // Next check if it is an intrinsic we know about.
+  // FIXME: Lift this into part of the InstVisitor.
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Call)) {
+    switch (II->getIntrinsicID()) {
+    default:
+      if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II))
+        disableLoadElimination();
+      return Base::visitCallBase(Call);
 
-      // Everything other than inline ASM will also have a significant cost
-      // merely from making the call.
-      if (!isa<InlineAsm>(Call.getCalledValue()))
-        addCost(InlineConstants::CallPenalty);
-    }
+    case Intrinsic::load_relative:
+      onLoadRelativeIntrinsic();
+      return false;
 
-    if (!Call.onlyReadsMemory())
+    case Intrinsic::memset:
+    case Intrinsic::memcpy:
+    case Intrinsic::memmove:
       disableLoadElimination();
-    return Base::visitCallBase(Call);
+      // SROA can usually chew through these intrinsics, but they aren't free.
+      return false;
+    case Intrinsic::icall_branch_funnel:
+    case Intrinsic::localescape:
+      HasUninlineableIntrinsic = true;
+      return false;
+    case Intrinsic::vastart:
+      InitsVargArgs = true;
+      return false;
+    }
   }
 
-  // Otherwise we're in a very special case -- an indirect function call. See
-  // if we can be particularly clever about this.
-  Value *Callee = Call.getCalledValue();
-
-  // First, pay the price of the argument setup. We account for the average
-  // 1 instruction per call argument setup here.
-  addCost(Call.arg_size() * InlineConstants::InstrCost);
-
-  // Next, check if this happens to be an indirect function call to a known
-  // function in this inline context. If not, we've done all we can.
-  Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
-  if (!F) {
-    if (!Call.onlyReadsMemory())
-      disableLoadElimination();
-    return Base::visitCallBase(Call);
+  if (F == Call.getFunction()) {
+    // This flag will fully abort the analysis, so don't bother with anything
+    // else.
+    IsRecursiveCall = true;
+    return false;
   }
 
-  // If we have a constant that we are calling as a function, we can peer
-  // through it and see the function target. This happens not infrequently
-  // during devirtualization and so we want to give it a hefty bonus for
-  // inlining, but cap that bonus in the event that inlining wouldn't pan
-  // out. Pretend to inline the function, with a custom threshold.
-  auto IndirectCallParams = Params;
-  IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold;
-  CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, Call,
-                  IndirectCallParams);
-  if (CA.analyzeCall(Call)) {
-    // We were able to inline the indirect call! Subtract the cost from the
-    // threshold to get the bonus we want to apply, but don't go below zero.
-    Cost -= std::max(0, CA.getThreshold() - CA.getCost());
+  if (TTI.isLoweredToCall(F)) {
+    onLoweredCall(F, Call, IsIndirectCall);
   }
 
-  if (!F->onlyReadsMemory())
+  if (!(Call.onlyReadsMemory() || (IsIndirectCall && F->onlyReadsMemory())))
     disableLoadElimination();
   return Base::visitCallBase(Call);
 }
@@ -1379,9 +1599,7 @@ bool CallAnalyzer::visitSelectInst(SelectInst &SI) {
     if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
       ConstantOffsetPtrs[&SI] = TrueBaseAndOffset;
 
-      Value *SROAArg;
-      DenseMap<Value *, int>::iterator CostIt;
-      if (lookupSROAArgAndCost(TrueVal, SROAArg, CostIt))
+      if (auto *SROAArg = getSROAArgForValueOrNull(TrueVal))
         SROAArgValues[&SI] = SROAArg;
       return true;
     }
@@ -1420,9 +1638,7 @@ bool CallAnalyzer::visitSelectInst(SelectInst &SI) {
   if (BaseAndOffset.first) {
     ConstantOffsetPtrs[&SI] = BaseAndOffset;
 
-    Value *SROAArg;
-    DenseMap<Value *, int>::iterator CostIt;
-    if (lookupSROAArgAndCost(SelectedV, SROAArg, CostIt))
+    if (auto *SROAArg = getSROAArgForValueOrNull(SelectedV))
       SROAArgValues[&SI] = SROAArg;
   }
 
@@ -1450,62 +1666,12 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
   // inlining those. It will prevent inlining in cases where the optimization
   // does not (yet) fire.
 
-  // Maximum valid cost increased in this function.
-  int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1;
-
-  // Exit early for a large switch, assuming one case needs at least one
-  // instruction.
-  // FIXME: This is not true for a bit test, but ignore such case for now to
-  // save compile-time.
-  int64_t CostLowerBound =
-      std::min((int64_t)CostUpperBound,
-               (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost);
-
-  if (CostLowerBound > Threshold && !ComputeFullInlineCost) {
-    addCost((int64_t)SI.getNumCases() * InlineConstants::InstrCost);
-    return false;
-  }
-
   unsigned JumpTableSize = 0;
+  BlockFrequencyInfo *BFI = GetBFI ? &((*GetBFI)(F)) : nullptr;
   unsigned NumCaseCluster =
-      TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize);
+      TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize, PSI, BFI);
 
-  // If suitable for a jump table, consider the cost for the table size and
-  // branch to destination.
-  if (JumpTableSize) {
-    int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost +
-                     4 * InlineConstants::InstrCost;
-
-    addCost(JTCost, (int64_t)CostUpperBound);
-    return false;
-  }
-
-  // Considering forming a binary search, we should find the number of nodes
-  // which is same as the number of comparisons when lowered. For a given
-  // number of clusters, n, we can define a recursive function, f(n), to find
-  // the number of nodes in the tree. The recursion is :
-  // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
-  // and f(n) = n, when n <= 3.
-  // This will lead a binary tree where the leaf should be either f(2) or f(3)
-  // when n > 3.  So, the number of comparisons from leaves should be n, while
-  // the number of non-leaf should be :
-  //   2^(log2(n) - 1) - 1
-  //   = 2^log2(n) * 2^-1 - 1
-  //   = n / 2 - 1.
-  // Considering comparisons from leaf and non-leaf nodes, we can estimate the
-  // number of comparisons in a simple closed form :
-  //   n + n / 2 - 1 = n * 3 / 2 - 1
-  if (NumCaseCluster <= 3) {
-    // Suppose a comparison includes one compare and one conditional branch.
-    addCost(NumCaseCluster * 2 * InlineConstants::InstrCost);
-    return false;
-  }
-
-  int64_t ExpectedNumberOfCompare = 3 * (int64_t)NumCaseCluster / 2 - 1;
-  int64_t SwitchCost =
-      ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
-
-  addCost(SwitchCost, (int64_t)CostUpperBound);
+  onFinalizeSwitch(JumpTableSize, NumCaseCluster);
   return false;
 }
 
@@ -1598,7 +1764,7 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB,
     if (Base::visit(&*I))
       ++NumInstructionsSimplified;
     else
-      addCost(InlineConstants::InstrCost);
+      onCommonInstructionSimplification();
 
     using namespace ore;
     // If the visit this instruction detected an uninlinable pattern, abort.
@@ -1643,9 +1809,7 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB,
       return IR;
     }
 
-    // Check if we've passed the maximum possible threshold so we don't spin in
-    // huge basic blocks that will never inline.
-    if (Cost >= Threshold && !ComputeFullInlineCost)
+    if (shouldStop())
       return false;
   }
 
@@ -1687,8 +1851,8 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
     assert(V->getType()->isPointerTy() && "Unexpected operand type!");
   } while (Visited.insert(V).second);
 
-  Type *IntPtrTy = DL.getIntPtrType(V->getContext(), AS);
-  return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
+  Type *IdxPtrTy = DL.getIndexType(V->getType());
+  return cast<ConstantInt>(ConstantInt::get(IdxPtrTy, Offset));
 }
 
 /// Find dead blocks due to deleted CFG edges during inlining.
@@ -1736,54 +1900,17 @@ void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
 /// factors and heuristics. If this method returns false but the computed cost
 /// is below the computed threshold, then inlining was forcibly disabled by
 /// some artifact of the routine.
-InlineResult CallAnalyzer::analyzeCall(CallBase &Call) {
+InlineResult CallAnalyzer::analyze() {
   ++NumCallsAnalyzed;
 
-  // Perform some tweaks to the cost and threshold based on the direct
-  // callsite information.
-
-  // We want to more aggressively inline vector-dense kernels, so up the
-  // threshold, and we'll lower it if the % of vector instructions gets too
-  // low. Note that these bonuses are some what arbitrary and evolved over time
-  // by accident as much as because they are principled bonuses.
-  //
-  // FIXME: It would be nice to remove all such bonuses. At least it would be
-  // nice to base the bonus values on something more scientific.
-  assert(NumInstructions == 0);
-  assert(NumVectorInstructions == 0);
-
-  // Update the threshold based on callsite properties
-  updateThreshold(Call, F);
-
-  // While Threshold depends on commandline options that can take negative
-  // values, we want to enforce the invariant that the computed threshold and
-  // bonuses are non-negative.
-  assert(Threshold >= 0);
-  assert(SingleBBBonus >= 0);
-  assert(VectorBonus >= 0);
-
-  // Speculatively apply all possible bonuses to Threshold. If cost exceeds
-  // this Threshold any time, and cost cannot decrease, we can stop processing
-  // the rest of the function body.
-  Threshold += (SingleBBBonus + VectorBonus);
-
-  // Give out bonuses for the callsite, as the instructions setting them up
-  // will be gone after inlining.
-  addCost(-getCallsiteCost(Call, DL));
-
-  // If this function uses the coldcc calling convention, prefer not to inline
-  // it.
-  if (F.getCallingConv() == CallingConv::Cold)
-    Cost += InlineConstants::ColdccPenalty;
-
-  // Check if we're done. This can happen due to bonuses and penalties.
-  if (Cost >= Threshold && !ComputeFullInlineCost)
-    return "high cost";
+  auto Result = onAnalysisStart();
+  if (!Result)
+    return Result;
 
   if (F.empty())
     return true;
 
-  Function *Caller = Call.getFunction();
+  Function *Caller = CandidateCall.getFunction();
   // Check if the caller function is recursive itself.
   for (User *U : Caller->users()) {
     CallBase *Call = dyn_cast<CallBase>(U);
@@ -1795,10 +1922,10 @@ InlineResult CallAnalyzer::analyzeCall(CallBase &Call) {
 
   // Populate our simplified values by mapping from function arguments to call
   // arguments with known important simplifications.
-  auto CAI = Call.arg_begin();
+  auto CAI = CandidateCall.arg_begin();
   for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
        FAI != FAE; ++FAI, ++CAI) {
-    assert(CAI != Call.arg_end());
+    assert(CAI != CandidateCall.arg_end());
     if (Constant *C = dyn_cast<Constant>(CAI))
       SimplifiedValues[&*FAI] = C;
 
@@ -1807,9 +1934,9 @@ InlineResult CallAnalyzer::analyzeCall(CallBase &Call) {
       ConstantOffsetPtrs[&*FAI] = std::make_pair(PtrArg, C->getValue());
 
       // We can SROA any pointer arguments derived from alloca instructions.
-      if (isa<AllocaInst>(PtrArg)) {
-        SROAArgValues[&*FAI] = PtrArg;
-        SROAArgCosts[PtrArg] = 0;
+      if (auto *SROAArg = dyn_cast<AllocaInst>(PtrArg)) {
+        SROAArgValues[&*FAI] = SROAArg;
+        onInitializeSROAArg(SROAArg);
       }
     }
   }
@@ -1835,12 +1962,10 @@ InlineResult CallAnalyzer::analyzeCall(CallBase &Call) {
       BBSetVector;
   BBSetVector BBWorklist;
   BBWorklist.insert(&F.getEntryBlock());
-  bool SingleBB = true;
+
   // Note that we *must not* cache the size, this loop grows the worklist.
   for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
-    // Bail out the moment we cross the threshold. This means we'll under-count
-    // the cost, but only when undercounting doesn't matter.
-    if (Cost >= Threshold && !ComputeFullInlineCost)
+    if (shouldStop())
       break;
 
     BasicBlock *BB = BBWorklist[Idx];
@@ -1900,57 +2025,23 @@ InlineResult CallAnalyzer::analyzeCall(CallBase &Call) {
          ++TIdx)
       BBWorklist.insert(TI->getSuccessor(TIdx));
 
-    // If we had any successors at this point, than post-inlining is likely to
-    // have them as well. Note that we assume any basic blocks which existed
-    // due to branches or switches which folded above will also fold after
-    // inlining.
-    if (SingleBB && TI->getNumSuccessors() > 1) {
-      // Take off the bonus we applied to the threshold.
-      Threshold -= SingleBBBonus;
-      SingleBB = false;
-    }
+    onBlockAnalyzed(BB);
   }
 
-  bool OnlyOneCallAndLocalLinkage =
-      F.hasLocalLinkage() && F.hasOneUse() && &F == Call.getCalledFunction();
+  bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() &&
+                                    &F == CandidateCall.getCalledFunction();
   // If this is a noduplicate call, we can still inline as long as
   // inlining this would cause the removal of the caller (so the instruction
   // is not actually duplicated, just moved).
   if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
     return "noduplicate";
 
-  // Loops generally act a lot like calls in that they act like barriers to
-  // movement, require a certain amount of setup, etc. So when optimising for
-  // size, we penalise any call sites that perform loops. We do this after all
-  // other costs here, so will likely only be dealing with relatively small
-  // functions (and hence DT and LI will hopefully be cheap).
-  if (Caller->hasMinSize()) {
-    DominatorTree DT(F);
-    LoopInfo LI(DT);
-    int NumLoops = 0;
-    for (Loop *L : LI) {
-      // Ignore loops that will not be executed
-      if (DeadBlocks.count(L->getHeader()))
-        continue;
-      NumLoops++;
-    }
-    addCost(NumLoops * InlineConstants::CallPenalty);
-  }
-
-  // We applied the maximum possible vector bonus at the beginning. Now,
-  // subtract the excess bonus, if any, from the Threshold before
-  // comparing against Cost.
-  if (NumVectorInstructions <= NumInstructions / 10)
-    Threshold -= VectorBonus;
-  else if (NumVectorInstructions <= NumInstructions / 2)
-    Threshold -= VectorBonus/2;
-
-  return Cost < std::max(1, Threshold);
+  return finalizeAnalysis();
 }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 /// Dump stats about this call's analysis.
-LLVM_DUMP_METHOD void CallAnalyzer::dump() {
+LLVM_DUMP_METHOD void InlineCostCallAnalyzer::dump() {
 #define DEBUG_PRINT_STAT(x) dbgs() << "      " #x ": " << x << "\n"
   DEBUG_PRINT_STAT(NumConstantArgs);
   DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
@@ -2084,9 +2175,9 @@ InlineCost llvm::getInlineCost(
   LLVM_DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()
                           << "... (caller:" << Caller->getName() << ")\n");
 
-  CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee,
-                  Call, Params);
-  InlineResult ShouldInline = CA.analyzeCall(Call);
+  InlineCostCallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE,
+                            *Callee, Call, Params);
+  InlineResult ShouldInline = CA.analyze();
 
   LLVM_DEBUG(CA.dump());
 
@@ -2132,16 +2223,17 @@ InlineResult llvm::isInlineViable(Function &F) {
         switch (Call->getCalledFunction()->getIntrinsicID()) {
         default:
           break;
-        // Disallow inlining of @llvm.icall.branch.funnel because current
-        // backend can't separate call targets from call arguments.
         case llvm::Intrinsic::icall_branch_funnel:
+          // Disallow inlining of @llvm.icall.branch.funnel because current
+          // backend can't separate call targets from call arguments.
           return "disallowed inlining of @llvm.icall.branch.funnel";
-        // Disallow inlining functions that call @llvm.localescape. Doing this
-        // correctly would require major changes to the inliner.
         case llvm::Intrinsic::localescape:
+          // Disallow inlining functions that call @llvm.localescape. Doing this
+          // correctly would require major changes to the inliner.
           return "disallowed inlining of @llvm.localescape";
-        // Disallow inlining of functions that initialize VarArgs with va_start.
         case llvm::Intrinsic::vastart:
+          // Disallow inlining of functions that initialize VarArgs with
+          // va_start.
           return "contains VarArgs initialized with va_start";
         }
     }
@@ -2184,7 +2276,8 @@ InlineParams llvm::getInlineParams(int Threshold) {
   if (LocallyHotCallSiteThreshold.getNumOccurrences() > 0)
     Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold;
 
-  // Set the ColdCallSiteThreshold knob from the -inline-cold-callsite-threshold.
+  // Set the ColdCallSiteThreshold knob from the
+  // -inline-cold-callsite-threshold.
   Params.ColdCallSiteThreshold = ColdCallSiteThreshold;
 
   // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
diff --git a/contrib/llvm-project/llvm/lib/Analysis/InstCount.cpp b/contrib/llvm-project/llvm/lib/Analysis/InstCount.cpp
index 943a99a5f46d..bb9c7b7eb11f 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/InstCount.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/InstCount.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Analysis/Passes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstVisitor.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp b/contrib/llvm-project/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp
index 35190ce3e11a..415797d6a378 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp
@@ -20,6 +20,7 @@
 #include "llvm/Analysis/InstructionPrecedenceTracking.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/CommandLine.h"
 
 using namespace llvm;
 
diff --git a/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp
index 941a68c5e6fd..d7510c899101 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -56,8 +56,8 @@ static Value *simplifyFPUnOp(unsigned, Value *, const FastMathFlags &,
                              const SimplifyQuery &, unsigned);
 static Value *SimplifyBinOp(unsigned, Value *, Value *, const SimplifyQuery &,
                             unsigned);
-static Value *SimplifyFPBinOp(unsigned, Value *, Value *, const FastMathFlags &,
-                              const SimplifyQuery &, unsigned);
+static Value *SimplifyBinOp(unsigned, Value *, Value *, const FastMathFlags &,
+                            const SimplifyQuery &, unsigned);
 static Value *SimplifyCmpInst(unsigned, Value *, Value *, const SimplifyQuery &,
                               unsigned);
 static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
@@ -137,6 +137,71 @@ static bool isSameCompare(Value *V, CmpInst::Predicate Pred, Value *LHS,
     CRHS == LHS;
 }
 
+/// Simplify comparison with true or false branch of select:
+///  %sel = select i1 %cond, i32 %tv, i32 %fv
+///  %cmp = icmp sle i32 %sel, %rhs
+/// Compose new comparison by substituting %sel with either %tv or %fv
+/// and see if it simplifies.
+static Value *simplifyCmpSelCase(CmpInst::Predicate Pred, Value *LHS,
+                                 Value *RHS, Value *Cond,
+                                 const SimplifyQuery &Q, unsigned MaxRecurse,
+                                 Constant *TrueOrFalse) {
+  Value *SimplifiedCmp = SimplifyCmpInst(Pred, LHS, RHS, Q, MaxRecurse);
+  if (SimplifiedCmp == Cond) {
+    // %cmp simplified to the select condition (%cond).
+    return TrueOrFalse;
+  } else if (!SimplifiedCmp && isSameCompare(Cond, Pred, LHS, RHS)) {
+    // It didn't simplify. However, if composed comparison is equivalent
+    // to the select condition (%cond) then we can replace it.
+    return TrueOrFalse;
+  }
+  return SimplifiedCmp;
+}
+
+/// Simplify comparison with true branch of select
+static Value *simplifyCmpSelTrueCase(CmpInst::Predicate Pred, Value *LHS,
+                                     Value *RHS, Value *Cond,
+                                     const SimplifyQuery &Q,
+                                     unsigned MaxRecurse) {
+  return simplifyCmpSelCase(Pred, LHS, RHS, Cond, Q, MaxRecurse,
+                            getTrue(Cond->getType()));
+}
+
+/// Simplify comparison with false branch of select
+static Value *simplifyCmpSelFalseCase(CmpInst::Predicate Pred, Value *LHS,
+                                      Value *RHS, Value *Cond,
+                                      const SimplifyQuery &Q,
+                                      unsigned MaxRecurse) {
+  return simplifyCmpSelCase(Pred, LHS, RHS, Cond, Q, MaxRecurse,
+                            getFalse(Cond->getType()));
+}
+
+/// We know comparison with both branches of select can be simplified, but they
+/// are not equal. This routine handles some logical simplifications.
+static Value *handleOtherCmpSelSimplifications(Value *TCmp, Value *FCmp,
+                                               Value *Cond,
+                                               const SimplifyQuery &Q,
+                                               unsigned MaxRecurse) {
+  // If the false value simplified to false, then the result of the compare
+  // is equal to "Cond && TCmp".  This also catches the case when the false
+  // value simplified to false and the true value to true, returning "Cond".
+  if (match(FCmp, m_Zero()))
+    if (Value *V = SimplifyAndInst(Cond, TCmp, Q, MaxRecurse))
+      return V;
+  // If the true value simplified to true, then the result of the compare
+  // is equal to "Cond || FCmp".
+  if (match(TCmp, m_One()))
+    if (Value *V = SimplifyOrInst(Cond, FCmp, Q, MaxRecurse))
+      return V;
+  // Finally, if the false value simplified to true and the true value to
+  // false, then the result of the compare is equal to "!Cond".
+  if (match(FCmp, m_One()) && match(TCmp, m_Zero()))
+    if (Value *V = SimplifyXorInst(
+            Cond, Constant::getAllOnesValue(Cond->getType()), Q, MaxRecurse))
+      return V;
+  return nullptr;
+}
+
 /// Does the given value dominate the specified phi node?
 static bool valueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
   Instruction *I = dyn_cast<Instruction>(V);
@@ -398,6 +463,12 @@ static Value *ThreadBinOpOverSelect(Instruction::BinaryOps Opcode, Value *LHS,
 /// In the case of a comparison with a select instruction, try to simplify the
 /// comparison by seeing whether both branches of the select result in the same
 /// value. Returns the common value if so, otherwise returns null.
+/// For example, if we have:
+///  %tmp = select i1 %cmp, i32 1, i32 2
+///  %cmp1 = icmp sle i32 %tmp, 3
+/// We can simplify %cmp1 to true, because both branches of select are
+/// less than 3. We compose new comparison by substituting %tmp with both
+/// branches of select and see if it can be simplified.
 static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
                                   Value *RHS, const SimplifyQuery &Q,
                                   unsigned MaxRecurse) {
@@ -418,32 +489,14 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
 
   // Now that we have "cmp select(Cond, TV, FV), RHS", analyse it.
   // Does "cmp TV, RHS" simplify?
-  Value *TCmp = SimplifyCmpInst(Pred, TV, RHS, Q, MaxRecurse);
-  if (TCmp == Cond) {
-    // It not only simplified, it simplified to the select condition.  Replace
-    // it with 'true'.
-    TCmp = getTrue(Cond->getType());
-  } else if (!TCmp) {
-    // It didn't simplify.  However if "cmp TV, RHS" is equal to the select
-    // condition then we can replace it with 'true'.  Otherwise give up.
-    if (!isSameCompare(Cond, Pred, TV, RHS))
-      return nullptr;
-    TCmp = getTrue(Cond->getType());
-  }
+  Value *TCmp = simplifyCmpSelTrueCase(Pred, TV, RHS, Cond, Q, MaxRecurse);
+  if (!TCmp)
+    return nullptr;
 
   // Does "cmp FV, RHS" simplify?
-  Value *FCmp = SimplifyCmpInst(Pred, FV, RHS, Q, MaxRecurse);
-  if (FCmp == Cond) {
-    // It not only simplified, it simplified to the select condition.  Replace
-    // it with 'false'.
-    FCmp = getFalse(Cond->getType());
-  } else if (!FCmp) {
-    // It didn't simplify.  However if "cmp FV, RHS" is equal to the select
-    // condition then we can replace it with 'false'.  Otherwise give up.
-    if (!isSameCompare(Cond, Pred, FV, RHS))
-      return nullptr;
-    FCmp = getFalse(Cond->getType());
-  }
+  Value *FCmp = simplifyCmpSelFalseCase(Pred, FV, RHS, Cond, Q, MaxRecurse);
+  if (!FCmp)
+    return nullptr;
 
   // If both sides simplified to the same value, then use it as the result of
   // the original comparison.
@@ -452,26 +505,8 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
 
   // The remaining cases only make sense if the select condition has the same
   // type as the result of the comparison, so bail out if this is not so.
-  if (Cond->getType()->isVectorTy() != RHS->getType()->isVectorTy())
-    return nullptr;
-  // If the false value simplified to false, then the result of the compare
-  // is equal to "Cond && TCmp".  This also catches the case when the false
-  // value simplified to false and the true value to true, returning "Cond".
-  if (match(FCmp, m_Zero()))
-    if (Value *V = SimplifyAndInst(Cond, TCmp, Q, MaxRecurse))
-      return V;
-  // If the true value simplified to true, then the result of the compare
-  // is equal to "Cond || FCmp".
-  if (match(TCmp, m_One()))
-    if (Value *V = SimplifyOrInst(Cond, FCmp, Q, MaxRecurse))
-      return V;
-  // Finally, if the false value simplified to true and the true value to
-  // false, then the result of the compare is equal to "!Cond".
-  if (match(FCmp, m_One()) && match(TCmp, m_Zero()))
-    if (Value *V =
-        SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()),
-                        Q, MaxRecurse))
-      return V;
+  if (Cond->getType()->isVectorTy() == RHS->getType()->isVectorTy())
+    return handleOtherCmpSelSimplifications(TCmp, FCmp, Cond, Q, MaxRecurse);
 
   return nullptr;
 }
@@ -543,10 +578,16 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
 
   // Evaluate the BinOp on the incoming phi values.
   Value *CommonValue = nullptr;
-  for (Value *Incoming : PI->incoming_values()) {
+  for (unsigned u = 0, e = PI->getNumIncomingValues(); u < e; ++u) {
+    Value *Incoming = PI->getIncomingValue(u);
+    Instruction *InTI = PI->getIncomingBlock(u)->getTerminator();
     // If the incoming value is the phi node itself, it can safely be skipped.
     if (Incoming == PI) continue;
-    Value *V = SimplifyCmpInst(Pred, Incoming, RHS, Q, MaxRecurse);
+    // Change the context instruction to the "edge" that flows into the phi.
+    // This is important because that is where incoming is actually "evaluated"
+    // even though it is used later somewhere else.
+    Value *V = SimplifyCmpInst(Pred, Incoming, RHS, Q.getWithInstruction(InTI),
+                               MaxRecurse);
     // If the operation failed to simplify, or simplified to a different value
     // to previously, then give up.
     if (!V || (CommonValue && V != CommonValue))
@@ -656,16 +697,16 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V,
                                                 bool AllowNonInbounds = false) {
   assert(V->getType()->isPtrOrPtrVectorTy());
 
-  Type *IntPtrTy = DL.getIntPtrType(V->getType())->getScalarType();
-  APInt Offset = APInt::getNullValue(IntPtrTy->getIntegerBitWidth());
+  Type *IntIdxTy = DL.getIndexType(V->getType())->getScalarType();
+  APInt Offset = APInt::getNullValue(IntIdxTy->getIntegerBitWidth());
 
   V = V->stripAndAccumulateConstantOffsets(DL, Offset, AllowNonInbounds);
   // As that strip may trace through `addrspacecast`, need to sext or trunc
   // the offset calculated.
-  IntPtrTy = DL.getIntPtrType(V->getType())->getScalarType();
-  Offset = Offset.sextOrTrunc(IntPtrTy->getIntegerBitWidth());
+  IntIdxTy = DL.getIndexType(V->getType())->getScalarType();
+  Offset = Offset.sextOrTrunc(IntIdxTy->getIntegerBitWidth());
 
-  Constant *OffsetIntPtr = ConstantInt::get(IntPtrTy, Offset);
+  Constant *OffsetIntPtr = ConstantInt::get(IntIdxTy, Offset);
   if (V->getType()->isVectorTy())
     return ConstantVector::getSplat(V->getType()->getVectorNumElements(),
                                     OffsetIntPtr);
@@ -1371,7 +1412,8 @@ Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
 /// Commuted variants are assumed to be handled by calling this function again
 /// with the parameters swapped.
 static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp,
-                                         ICmpInst *UnsignedICmp, bool IsAnd) {
+                                         ICmpInst *UnsignedICmp, bool IsAnd,
+                                         const SimplifyQuery &Q) {
   Value *X, *Y;
 
   ICmpInst::Predicate EqPred;
@@ -1380,6 +1422,59 @@ static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp,
     return nullptr;
 
   ICmpInst::Predicate UnsignedPred;
+
+  Value *A, *B;
+  // Y = (A - B);
+  if (match(Y, m_Sub(m_Value(A), m_Value(B)))) {
+    if (match(UnsignedICmp,
+              m_c_ICmp(UnsignedPred, m_Specific(A), m_Specific(B))) &&
+        ICmpInst::isUnsigned(UnsignedPred)) {
+      if (UnsignedICmp->getOperand(0) != A)
+        UnsignedPred = ICmpInst::getSwappedPredicate(UnsignedPred);
+
+      // A >=/<= B || (A - B) != 0  <-->  true
+      if ((UnsignedPred == ICmpInst::ICMP_UGE ||
+           UnsignedPred == ICmpInst::ICMP_ULE) &&
+          EqPred == ICmpInst::ICMP_NE && !IsAnd)
+        return ConstantInt::getTrue(UnsignedICmp->getType());
+      // A </> B && (A - B) == 0  <-->  false
+      if ((UnsignedPred == ICmpInst::ICMP_ULT ||
+           UnsignedPred == ICmpInst::ICMP_UGT) &&
+          EqPred == ICmpInst::ICMP_EQ && IsAnd)
+        return ConstantInt::getFalse(UnsignedICmp->getType());
+
+      // A </> B && (A - B) != 0  <-->  A </> B
+      // A </> B || (A - B) != 0  <-->  (A - B) != 0
+      if (EqPred == ICmpInst::ICMP_NE && (UnsignedPred == ICmpInst::ICMP_ULT ||
+                                          UnsignedPred == ICmpInst::ICMP_UGT))
+        return IsAnd ? UnsignedICmp : ZeroICmp;
+
+      // A <=/>= B && (A - B) == 0  <-->  (A - B) == 0
+      // A <=/>= B || (A - B) == 0  <-->  A <=/>= B
+      if (EqPred == ICmpInst::ICMP_EQ && (UnsignedPred == ICmpInst::ICMP_ULE ||
+                                          UnsignedPred == ICmpInst::ICMP_UGE))
+        return IsAnd ? ZeroICmp : UnsignedICmp;
+    }
+
+    // Given  Y = (A - B)
+    //   Y >= A && Y != 0  --> Y >= A  iff B != 0
+    //   Y <  A || Y == 0  --> Y <  A  iff B != 0
+    if (match(UnsignedICmp,
+              m_c_ICmp(UnsignedPred, m_Specific(Y), m_Specific(A)))) {
+      if (UnsignedICmp->getOperand(0) != Y)
+        UnsignedPred = ICmpInst::getSwappedPredicate(UnsignedPred);
+
+      if (UnsignedPred == ICmpInst::ICMP_UGE && IsAnd &&
+          EqPred == ICmpInst::ICMP_NE &&
+          isKnownNonZero(B, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT))
+        return UnsignedICmp;
+      if (UnsignedPred == ICmpInst::ICMP_ULT && !IsAnd &&
+          EqPred == ICmpInst::ICMP_EQ &&
+          isKnownNonZero(B, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT))
+        return UnsignedICmp;
+    }
+  }
+
   if (match(UnsignedICmp, m_ICmp(UnsignedPred, m_Value(X), m_Specific(Y))) &&
       ICmpInst::isUnsigned(UnsignedPred))
     ;
@@ -1395,19 +1490,33 @@ static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp,
   if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_NE)
     return IsAnd ? UnsignedICmp : ZeroICmp;
 
-  // X >= Y || Y != 0  -->  true
+  // X <= Y && Y != 0  -->  X <= Y  iff X != 0
+  // X <= Y || Y != 0  -->  Y != 0  iff X != 0
+  if (UnsignedPred == ICmpInst::ICMP_ULE && EqPred == ICmpInst::ICMP_NE &&
+      isKnownNonZero(X, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT))
+    return IsAnd ? UnsignedICmp : ZeroICmp;
+
+  // X >= Y && Y == 0  -->  Y == 0
   // X >= Y || Y == 0  -->  X >= Y
-  if (UnsignedPred == ICmpInst::ICMP_UGE && !IsAnd) {
-    if (EqPred == ICmpInst::ICMP_NE)
-      return getTrue(UnsignedICmp->getType());
-    return UnsignedICmp;
-  }
+  if (UnsignedPred == ICmpInst::ICMP_UGE && EqPred == ICmpInst::ICMP_EQ)
+    return IsAnd ? ZeroICmp : UnsignedICmp;
+
+  // X > Y && Y == 0  -->  Y == 0  iff X != 0
+  // X > Y || Y == 0  -->  X > Y   iff X != 0
+  if (UnsignedPred == ICmpInst::ICMP_UGT && EqPred == ICmpInst::ICMP_EQ &&
+      isKnownNonZero(X, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT))
+    return IsAnd ? ZeroICmp : UnsignedICmp;
 
   // X < Y && Y == 0  -->  false
   if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_EQ &&
       IsAnd)
     return getFalse(UnsignedICmp->getType());
 
+  // X >= Y || Y != 0  -->  true
+  if (UnsignedPred == ICmpInst::ICMP_UGE && EqPred == ICmpInst::ICMP_NE &&
+      !IsAnd)
+    return getTrue(UnsignedICmp->getType());
+
   return nullptr;
 }
 
@@ -1587,10 +1696,10 @@ static Value *simplifyAndOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1,
 }
 
 static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1,
-                                 const InstrInfoQuery &IIQ) {
-  if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true))
+                                 const SimplifyQuery &Q) {
+  if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true, Q))
     return X;
-  if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/true))
+  if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/true, Q))
     return X;
 
   if (Value *X = simplifyAndOfICmpsWithSameOperands(Op0, Op1))
@@ -1604,9 +1713,9 @@ static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1,
   if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, true))
     return X;
 
-  if (Value *X = simplifyAndOfICmpsWithAdd(Op0, Op1, IIQ))
+  if (Value *X = simplifyAndOfICmpsWithAdd(Op0, Op1, Q.IIQ))
     return X;
-  if (Value *X = simplifyAndOfICmpsWithAdd(Op1, Op0, IIQ))
+  if (Value *X = simplifyAndOfICmpsWithAdd(Op1, Op0, Q.IIQ))
     return X;
 
   return nullptr;
@@ -1660,10 +1769,10 @@ static Value *simplifyOrOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1,
 }
 
 static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1,
-                                const InstrInfoQuery &IIQ) {
-  if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false))
+                                const SimplifyQuery &Q) {
+  if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false, Q))
     return X;
-  if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/false))
+  if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/false, Q))
     return X;
 
   if (Value *X = simplifyOrOfICmpsWithSameOperands(Op0, Op1))
@@ -1677,9 +1786,9 @@ static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1,
   if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, false))
     return X;
 
-  if (Value *X = simplifyOrOfICmpsWithAdd(Op0, Op1, IIQ))
+  if (Value *X = simplifyOrOfICmpsWithAdd(Op0, Op1, Q.IIQ))
     return X;
-  if (Value *X = simplifyOrOfICmpsWithAdd(Op1, Op0, IIQ))
+  if (Value *X = simplifyOrOfICmpsWithAdd(Op1, Op0, Q.IIQ))
     return X;
 
   return nullptr;
@@ -1738,8 +1847,8 @@ static Value *simplifyAndOrOfCmps(const SimplifyQuery &Q,
   auto *ICmp0 = dyn_cast<ICmpInst>(Op0);
   auto *ICmp1 = dyn_cast<ICmpInst>(Op1);
   if (ICmp0 && ICmp1)
-    V = IsAnd ? simplifyAndOfICmps(ICmp0, ICmp1, Q.IIQ)
-              : simplifyOrOfICmps(ICmp0, ICmp1, Q.IIQ);
+    V = IsAnd ? simplifyAndOfICmps(ICmp0, ICmp1, Q)
+              : simplifyOrOfICmps(ICmp0, ICmp1, Q);
 
   auto *FCmp0 = dyn_cast<FCmpInst>(Op0);
   auto *FCmp1 = dyn_cast<FCmpInst>(Op1);
@@ -1759,6 +1868,77 @@ static Value *simplifyAndOrOfCmps(const SimplifyQuery &Q,
   return nullptr;
 }
 
+/// Check that the Op1 is in expected form, i.e.:
+///   %Agg = tail call { i4, i1 } @llvm.[us]mul.with.overflow.i4(i4 %X, i4 %???)
+///   %Op1 = extractvalue { i4, i1 } %Agg, 1
+static bool omitCheckForZeroBeforeMulWithOverflowInternal(Value *Op1,
+                                                          Value *X) {
+  auto *Extract = dyn_cast<ExtractValueInst>(Op1);
+  // We should only be extracting the overflow bit.
+  if (!Extract || !Extract->getIndices().equals(1))
+    return false;
+  Value *Agg = Extract->getAggregateOperand();
+  // This should be a multiplication-with-overflow intrinsic.
+  if (!match(Agg, m_CombineOr(m_Intrinsic<Intrinsic::umul_with_overflow>(),
+                              m_Intrinsic<Intrinsic::smul_with_overflow>())))
+    return false;
+  // One of its multipliers should be the value we checked for zero before.
+  if (!match(Agg, m_CombineOr(m_Argument<0>(m_Specific(X)),
+                              m_Argument<1>(m_Specific(X)))))
+    return false;
+  return true;
+}
+
+/// The @llvm.[us]mul.with.overflow intrinsic could have been folded from some
+/// other form of check, e.g. one that was using division; it may have been
+/// guarded against division-by-zero. We can drop that check now.
+/// Look for:
+///   %Op0 = icmp ne i4 %X, 0
+///   %Agg = tail call { i4, i1 } @llvm.[us]mul.with.overflow.i4(i4 %X, i4 %???)
+///   %Op1 = extractvalue { i4, i1 } %Agg, 1
+///   %??? = and i1 %Op0, %Op1
+/// We can just return  %Op1
+static Value *omitCheckForZeroBeforeMulWithOverflow(Value *Op0, Value *Op1) {
+  ICmpInst::Predicate Pred;
+  Value *X;
+  if (!match(Op0, m_ICmp(Pred, m_Value(X), m_Zero())) ||
+      Pred != ICmpInst::Predicate::ICMP_NE)
+    return nullptr;
+  // Is Op1 in expected form?
+  if (!omitCheckForZeroBeforeMulWithOverflowInternal(Op1, X))
+    return nullptr;
+  // Can omit 'and', and just return the overflow bit.
+  return Op1;
+}
+
+/// The @llvm.[us]mul.with.overflow intrinsic could have been folded from some
+/// other form of check, e.g. one that was using division; it may have been
+/// guarded against division-by-zero. We can drop that check now.
+/// Look for:
+///   %Op0 = icmp eq i4 %X, 0
+///   %Agg = tail call { i4, i1 } @llvm.[us]mul.with.overflow.i4(i4 %X, i4 %???)
+///   %Op1 = extractvalue { i4, i1 } %Agg, 1
+///   %NotOp1 = xor i1 %Op1, true
+///   %or = or i1 %Op0, %NotOp1
+/// We can just return  %NotOp1
+static Value *omitCheckForZeroBeforeInvertedMulWithOverflow(Value *Op0,
+                                                            Value *NotOp1) {
+  ICmpInst::Predicate Pred;
+  Value *X;
+  if (!match(Op0, m_ICmp(Pred, m_Value(X), m_Zero())) ||
+      Pred != ICmpInst::Predicate::ICMP_EQ)
+    return nullptr;
+  // We expect the other hand of an 'or' to be a 'not'.
+  Value *Op1;
+  if (!match(NotOp1, m_Not(m_Value(Op1))))
+    return nullptr;
+  // Is Op1 in expected form?
+  if (!omitCheckForZeroBeforeMulWithOverflowInternal(Op1, X))
+    return nullptr;
+  // Can omit 'and', and just return the inverted overflow bit.
+  return NotOp1;
+}
+
 /// Given operands for an And, see if we can fold the result.
 /// If not, this returns null.
 static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
@@ -1813,6 +1993,14 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
       return Op0;
   }
 
+  // If we have a multiplication overflow check that is being 'and'ed with a
+  // check that one of the multipliers is not zero, we can omit the 'and', and
+  // only keep the overflow check.
+  if (Value *V = omitCheckForZeroBeforeMulWithOverflow(Op0, Op1))
+    return V;
+  if (Value *V = omitCheckForZeroBeforeMulWithOverflow(Op1, Op0))
+    return V;
+
   // A & (-A) = A if A is a power of two or zero.
   if (match(Op0, m_Neg(m_Specific(Op1))) ||
       match(Op1, m_Neg(m_Specific(Op0)))) {
@@ -1987,6 +2175,14 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
   if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, false))
     return V;
 
+  // If we have a multiplication overflow check that is being 'and'ed with a
+  // check that one of the multipliers is not zero, we can omit the 'and', and
+  // only keep the overflow check.
+  if (Value *V = omitCheckForZeroBeforeInvertedMulWithOverflow(Op0, Op1))
+    return V;
+  if (Value *V = omitCheckForZeroBeforeInvertedMulWithOverflow(Op1, Op0))
+    return V;
+
   // Try some generic simplifications for associative operations.
   if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, Q,
                                           MaxRecurse))
@@ -3529,6 +3725,9 @@ static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
     //   %sel = select i1 %cmp, i32 -2147483648, i32 %add
     //
     // We can't replace %sel with %add unless we strip away the flags.
+    // TODO: This is an unusual limitation because better analysis results in
+    //       worse simplification. InstCombine can do this fold more generally
+    //       by dropping the flags. Remove this fold to save compile-time?
     if (isa<OverflowingBinaryOperator>(B))
       if (Q.IIQ.hasNoSignedWrap(B) || Q.IIQ.hasNoUnsignedWrap(B))
         return nullptr;
@@ -3745,18 +3944,21 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
 
 /// Try to simplify a select instruction when its condition operand is a
 /// floating-point comparison.
-static Value *simplifySelectWithFCmp(Value *Cond, Value *T, Value *F) {
+static Value *simplifySelectWithFCmp(Value *Cond, Value *T, Value *F,
+                                     const SimplifyQuery &Q) {
   FCmpInst::Predicate Pred;
   if (!match(Cond, m_FCmp(Pred, m_Specific(T), m_Specific(F))) &&
       !match(Cond, m_FCmp(Pred, m_Specific(F), m_Specific(T))))
     return nullptr;
 
-  // TODO: The transform may not be valid with -0.0. An incomplete way of
-  // testing for that possibility is to check if at least one operand is a
-  // non-zero constant.
+  // This transform is safe if we do not have (do not care about) -0.0 or if
+  // at least one operand is known to not be -0.0. Otherwise, the select can
+  // change the sign of a zero operand.
+  bool HasNoSignedZeros = Q.CxtI && isa<FPMathOperator>(Q.CxtI) &&
+                          Q.CxtI->hasNoSignedZeros();
   const APFloat *C;
-  if ((match(T, m_APFloat(C)) && C->isNonZero()) ||
-      (match(F, m_APFloat(C)) && C->isNonZero())) {
+  if (HasNoSignedZeros || (match(T, m_APFloat(C)) && C->isNonZero()) ||
+                          (match(F, m_APFloat(C)) && C->isNonZero())) {
     // (T == F) ? T : F --> F
     // (F == T) ? T : F --> F
     if (Pred == FCmpInst::FCMP_OEQ)
@@ -3794,6 +3996,15 @@ static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
       return FalseVal;
   }
 
+  // select i1 Cond, i1 true, i1 false --> i1 Cond
+  assert(Cond->getType()->isIntOrIntVectorTy(1) &&
+         "Select must have bool or bool vector condition");
+  assert(TrueVal->getType() == FalseVal->getType() &&
+         "Select must have same types for true/false ops");
+  if (Cond->getType() == TrueVal->getType() &&
+      match(TrueVal, m_One()) && match(FalseVal, m_ZeroInt()))
+    return Cond;
+
   // select ?, X, X -> X
   if (TrueVal == FalseVal)
     return TrueVal;
@@ -3807,7 +4018,7 @@ static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
           simplifySelectWithICmpCond(Cond, TrueVal, FalseVal, Q, MaxRecurse))
     return V;
 
-  if (Value *V = simplifySelectWithFCmp(Cond, TrueVal, FalseVal))
+  if (Value *V = simplifySelectWithFCmp(Cond, TrueVal, FalseVal, Q))
     return V;
 
   if (Value *V = foldSelectWithBinaryOp(Cond, TrueVal, FalseVal))
@@ -3865,7 +4076,7 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
       // The following transforms are only safe if the ptrtoint cast
       // doesn't truncate the pointers.
       if (Ops[1]->getType()->getScalarSizeInBits() ==
-          Q.DL.getIndexSizeInBits(AS)) {
+          Q.DL.getPointerSizeInBits(AS)) {
         auto PtrToIntOrZero = [GEPTy](Value *P) -> Value * {
           if (match(P, m_Zero()))
             return Constant::getNullValue(GEPTy);
@@ -4250,6 +4461,30 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
     ShuffleVectorInst::commuteShuffleMask(Indices, InVecNumElts);
   }
 
+  // A splat of an inserted scalar constant becomes a vector constant:
+  // shuf (inselt ?, C, IndexC), undef, <IndexC, IndexC...> --> <C, C...>
+  // NOTE: We may have commuted above, so analyze the updated Indices, not the
+  //       original mask constant.
+  Constant *C;
+  ConstantInt *IndexC;
+  if (match(Op0, m_InsertElement(m_Value(), m_Constant(C),
+                                 m_ConstantInt(IndexC)))) {
+    // Match a splat shuffle mask of the insert index allowing undef elements.
+    int InsertIndex = IndexC->getZExtValue();
+    if (all_of(Indices, [InsertIndex](int MaskElt) {
+          return MaskElt == InsertIndex || MaskElt == -1;
+        })) {
+      assert(isa<UndefValue>(Op1) && "Expected undef operand 1 for splat");
+
+      // Shuffle mask undefs become undefined constant result elements.
+      SmallVector<Constant *, 16> VecC(MaskNumElts, C);
+      for (unsigned i = 0; i != MaskNumElts; ++i)
+        if (Indices[i] == -1)
+          VecC[i] = UndefValue::get(C->getType());
+      return ConstantVector::get(VecC);
+    }
+  }
+
   // A shuffle of a splat is always the splat itself. Legal if the shuffle's
   // value type is same as the input vectors' type.
   if (auto *OpShuf = dyn_cast<ShuffleVectorInst>(Op0))
@@ -4324,14 +4559,16 @@ static Constant *propagateNaN(Constant *In) {
   return In;
 }
 
-static Constant *simplifyFPBinop(Value *Op0, Value *Op1) {
-  if (isa<UndefValue>(Op0) || isa<UndefValue>(Op1))
-    return ConstantFP::getNaN(Op0->getType());
+/// Perform folds that are common to any floating-point operation. This implies
+/// transforms based on undef/NaN because the operation itself makes no
+/// difference to the result.
+static Constant *simplifyFPOp(ArrayRef<Value *> Ops) {
+  if (any_of(Ops, [](Value *V) { return isa<UndefValue>(V); }))
+    return ConstantFP::getNaN(Ops[0]->getType());
 
-  if (match(Op0, m_NaN()))
-    return propagateNaN(cast<Constant>(Op0));
-  if (match(Op1, m_NaN()))
-    return propagateNaN(cast<Constant>(Op1));
+  for (Value *V : Ops)
+    if (match(V, m_NaN()))
+      return propagateNaN(cast<Constant>(V));
 
   return nullptr;
 }
@@ -4343,7 +4580,7 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
   if (Constant *C = foldOrCommuteConstant(Instruction::FAdd, Op0, Op1, Q))
     return C;
 
-  if (Constant *C = simplifyFPBinop(Op0, Op1))
+  if (Constant *C = simplifyFPOp({Op0, Op1}))
     return C;
 
   // fadd X, -0 ==> X
@@ -4390,7 +4627,7 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
   if (Constant *C = foldOrCommuteConstant(Instruction::FSub, Op0, Op1, Q))
     return C;
 
-  if (Constant *C = simplifyFPBinop(Op0, Op1))
+  if (Constant *C = simplifyFPOp({Op0, Op1}))
     return C;
 
   // fsub X, +0 ==> X
@@ -4430,23 +4667,27 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
   return nullptr;
 }
 
-/// Given the operands for an FMul, see if we can fold the result
-static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
-                               const SimplifyQuery &Q, unsigned MaxRecurse) {
-  if (Constant *C = foldOrCommuteConstant(Instruction::FMul, Op0, Op1, Q))
-    return C;
-
-  if (Constant *C = simplifyFPBinop(Op0, Op1))
+static Value *SimplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF,
+                              const SimplifyQuery &Q, unsigned MaxRecurse) {
+  if (Constant *C = simplifyFPOp({Op0, Op1}))
     return C;
 
   // fmul X, 1.0 ==> X
   if (match(Op1, m_FPOne()))
     return Op0;
 
+  // fmul 1.0, X ==> X
+  if (match(Op0, m_FPOne()))
+    return Op1;
+
   // fmul nnan nsz X, 0 ==> 0
   if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZeroFP()))
     return ConstantFP::getNullValue(Op0->getType());
 
+  // fmul nnan nsz 0, X ==> 0
+  if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZeroFP()))
+    return ConstantFP::getNullValue(Op1->getType());
+
   // sqrt(X) * sqrt(X) --> X, if we can:
   // 1. Remove the intermediate rounding (reassociate).
   // 2. Ignore non-zero negative numbers because sqrt would produce NAN.
@@ -4459,6 +4700,16 @@ static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
   return nullptr;
 }
 
+/// Given the operands for an FMul, see if we can fold the result
+static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+                               const SimplifyQuery &Q, unsigned MaxRecurse) {
+  if (Constant *C = foldOrCommuteConstant(Instruction::FMul, Op0, Op1, Q))
+    return C;
+
+  // Now apply simplifications that do not require rounding.
+  return SimplifyFMAFMul(Op0, Op1, FMF, Q, MaxRecurse);
+}
+
 Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
                               const SimplifyQuery &Q) {
   return ::SimplifyFAddInst(Op0, Op1, FMF, Q, RecursionLimit);
@@ -4475,12 +4726,17 @@ Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
   return ::SimplifyFMulInst(Op0, Op1, FMF, Q, RecursionLimit);
 }
 
+Value *llvm::SimplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF,
+                             const SimplifyQuery &Q) {
+  return ::SimplifyFMAFMul(Op0, Op1, FMF, Q, RecursionLimit);
+}
+
 static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
                                const SimplifyQuery &Q, unsigned) {
   if (Constant *C = foldOrCommuteConstant(Instruction::FDiv, Op0, Op1, Q))
     return C;
 
-  if (Constant *C = simplifyFPBinop(Op0, Op1))
+  if (Constant *C = simplifyFPOp({Op0, Op1}))
     return C;
 
   // X / 1.0 -> X
@@ -4525,7 +4781,7 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
   if (Constant *C = foldOrCommuteConstant(Instruction::FRem, Op0, Op1, Q))
     return C;
 
-  if (Constant *C = simplifyFPBinop(Op0, Op1))
+  if (Constant *C = simplifyFPOp({Op0, Op1}))
     return C;
 
   // Unlike fdiv, the result of frem always matches the sign of the dividend.
@@ -4564,8 +4820,7 @@ static Value *simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q,
 
 /// Given the operand for a UnaryOperator, see if we can fold the result.
 /// If not, this returns null.
-/// In contrast to SimplifyUnOp, try to use FastMathFlag when folding the
-/// result. In case we don't need FastMathFlags, simply fall to SimplifyUnOp.
+/// Try to use FastMathFlags when folding the result.
 static Value *simplifyFPUnOp(unsigned Opcode, Value *Op,
                              const FastMathFlags &FMF,
                              const SimplifyQuery &Q, unsigned MaxRecurse) {
@@ -4581,8 +4836,8 @@ Value *llvm::SimplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q) {
   return ::simplifyUnOp(Opcode, Op, Q, RecursionLimit);
 }
 
-Value *llvm::SimplifyFPUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF,
-                            const SimplifyQuery &Q) {
+Value *llvm::SimplifyUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF,
+                          const SimplifyQuery &Q) {
   return ::simplifyFPUnOp(Opcode, Op, FMF, Q, RecursionLimit);
 }
 
@@ -4634,11 +4889,10 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
 
 /// Given operands for a BinaryOperator, see if we can fold the result.
 /// If not, this returns null.
-/// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the
-/// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp.
-static Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
-                              const FastMathFlags &FMF, const SimplifyQuery &Q,
-                              unsigned MaxRecurse) {
+/// Try to use FastMathFlags when folding the result.
+static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+                            const FastMathFlags &FMF, const SimplifyQuery &Q,
+                            unsigned MaxRecurse) {
   switch (Opcode) {
   case Instruction::FAdd:
     return SimplifyFAddInst(LHS, RHS, FMF, Q, MaxRecurse);
@@ -4658,9 +4912,9 @@ Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
   return ::SimplifyBinOp(Opcode, LHS, RHS, Q, RecursionLimit);
 }
 
-Value *llvm::SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
-                             FastMathFlags FMF, const SimplifyQuery &Q) {
-  return ::SimplifyFPBinOp(Opcode, LHS, RHS, FMF, Q, RecursionLimit);
+Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+                           FastMathFlags FMF, const SimplifyQuery &Q) {
+  return ::SimplifyBinOp(Opcode, LHS, RHS, FMF, Q, RecursionLimit);
 }
 
 /// Given operands for a CmpInst, see if we can fold the result.
@@ -4906,6 +5160,16 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
         return Op0;
     }
     break;
+  case Intrinsic::copysign:
+    // copysign X, X --> X
+    if (Op0 == Op1)
+      return Op0;
+    // copysign -X, X --> X
+    // copysign X, -X --> -X
+    if (match(Op0, m_FNeg(m_Specific(Op1))) ||
+        match(Op1, m_FNeg(m_Specific(Op0))))
+      return Op1;
+    break;
   case Intrinsic::maxnum:
   case Intrinsic::minnum:
   case Intrinsic::maximum:
@@ -5009,6 +5273,15 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) {
     }
     return nullptr;
   }
+  case Intrinsic::fma:
+  case Intrinsic::fmuladd: {
+    Value *Op0 = Call->getArgOperand(0);
+    Value *Op1 = Call->getArgOperand(1);
+    Value *Op2 = Call->getArgOperand(2);
+    if (Value *V = simplifyFPOp({ Op0, Op1, Op2 }))
+      return V;
+    return nullptr;
+  }
   default:
     return nullptr;
   }
@@ -5046,6 +5319,19 @@ Value *llvm::SimplifyCall(CallBase *Call, const SimplifyQuery &Q) {
   return ConstantFoldCall(Call, F, ConstantArgs, Q.TLI);
 }
 
+/// Given operands for a Freeze, see if we can fold the result.
+static Value *SimplifyFreezeInst(Value *Op0) {
+  // Use a utility function defined in ValueTracking.
+  if (llvm::isGuaranteedNotToBeUndefOrPoison(Op0))
+    return Op0;
+  // We have room for improvement.
+  return nullptr;
+}
+
+Value *llvm::SimplifyFreezeInst(Value *Op0, const SimplifyQuery &Q) {
+  return ::SimplifyFreezeInst(Op0);
+}
+
 /// See if we can compute a simplified version of this instruction.
 /// If not, this returns null.
 
@@ -5188,6 +5474,9 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
     Result = SimplifyCall(cast<CallInst>(I), Q);
     break;
   }
+  case Instruction::Freeze:
+    Result = SimplifyFreezeInst(I->getOperand(0), Q);
+    break;
 #define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc:
 #include "llvm/IR/Instruction.def"
 #undef HANDLE_CAST_INST
@@ -5308,7 +5597,7 @@ const SimplifyQuery getBestSimplifyQuery(Pass &P, Function &F) {
   auto *DTWP = P.getAnalysisIfAvailable<DominatorTreeWrapperPass>();
   auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
   auto *TLIWP = P.getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
-  auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr;
+  auto *TLI = TLIWP ? &TLIWP->getTLI(F) : nullptr;
   auto *ACWP = P.getAnalysisIfAvailable<AssumptionCacheTracker>();
   auto *AC = ACWP ? &ACWP->getAssumptionCache(F) : nullptr;
   return {F.getParent()->getDataLayout(), TLI, DT, AC};
diff --git a/contrib/llvm-project/llvm/lib/Analysis/IntervalPartition.cpp b/contrib/llvm-project/llvm/lib/Analysis/IntervalPartition.cpp
index d12db010db6a..23ff4fd6f85e 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/IntervalPartition.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/IntervalPartition.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Analysis/IntervalPartition.h"
 #include "llvm/Analysis/Interval.h"
 #include "llvm/Analysis/IntervalIterator.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include <cassert>
 #include <utility>
@@ -22,6 +23,10 @@ using namespace llvm;
 
 char IntervalPartition::ID = 0;
 
+IntervalPartition::IntervalPartition() : FunctionPass(ID) {
+  initializeIntervalPartitionPass(*PassRegistry::getPassRegistry());
+}
+
 INITIALIZE_PASS(IntervalPartition, "intervals",
                 "Interval Partition Construction", true, true)
 
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LazyBlockFrequencyInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/LazyBlockFrequencyInfo.cpp
index 439758560284..6107cacb9533 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LazyBlockFrequencyInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LazyBlockFrequencyInfo.cpp
@@ -17,6 +17,7 @@
 #include "llvm/Analysis/LazyBranchProbabilityInfo.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/IR/Dominators.h"
+#include "llvm/InitializePasses.h"
 
 using namespace llvm;
 
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LazyBranchProbabilityInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/LazyBranchProbabilityInfo.cpp
index f2592c26b373..83698598e156 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LazyBranchProbabilityInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LazyBranchProbabilityInfo.cpp
@@ -17,6 +17,7 @@
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/Dominators.h"
+#include "llvm/InitializePasses.h"
 
 using namespace llvm;
 
@@ -55,8 +56,9 @@ void LazyBranchProbabilityInfoPass::releaseMemory() { LBPI.reset(); }
 
 bool LazyBranchProbabilityInfoPass::runOnFunction(Function &F) {
   LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-  TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
-  LBPI = llvm::make_unique<LazyBranchProbabilityInfo>(&F, &LI, &TLI);
+  TargetLibraryInfo &TLI =
+      getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+  LBPI = std::make_unique<LazyBranchProbabilityInfo>(&F, &LI, &TLI);
   return false;
 }
 
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LazyCallGraph.cpp b/contrib/llvm-project/llvm/lib/Analysis/LazyCallGraph.cpp
index 797fcf516429..ef31c1e0ba8c 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LazyCallGraph.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LazyCallGraph.cpp
@@ -150,7 +150,8 @@ static bool isKnownLibFunction(Function &F, TargetLibraryInfo &TLI) {
   return TLI.getLibFunc(F, LF) || TLI.isFunctionVectorizable(F.getName());
 }
 
-LazyCallGraph::LazyCallGraph(Module &M, TargetLibraryInfo &TLI) {
+LazyCallGraph::LazyCallGraph(
+    Module &M, function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
   LLVM_DEBUG(dbgs() << "Building CG for module: " << M.getModuleIdentifier()
                     << "\n");
   for (Function &F : M) {
@@ -159,7 +160,7 @@ LazyCallGraph::LazyCallGraph(Module &M, TargetLibraryInfo &TLI) {
     // If this function is a known lib function to LLVM then we want to
     // synthesize reference edges to it to model the fact that LLVM can turn
     // arbitrary code into a library function call.
-    if (isKnownLibFunction(F, TLI))
+    if (isKnownLibFunction(F, GetTLI(F)))
       LibFunctions.insert(&F);
 
     if (F.hasLocalLinkage())
@@ -631,7 +632,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(
 
   // If the merge range is empty, then adding the edge didn't actually form any
   // new cycles. We're done.
-  if (empty(MergeRange)) {
+  if (MergeRange.empty()) {
     // Now that the SCC structure is finalized, flip the kind to call.
     SourceN->setEdgeKind(TargetN, Edge::Call);
     return false; // No new cycle.
@@ -1751,16 +1752,14 @@ static void printNode(raw_ostream &OS, LazyCallGraph::Node &N) {
 }
 
 static void printSCC(raw_ostream &OS, LazyCallGraph::SCC &C) {
-  ptrdiff_t Size = size(C);
-  OS << "    SCC with " << Size << " functions:\n";
+  OS << "    SCC with " << C.size() << " functions:\n";
 
   for (LazyCallGraph::Node &N : C)
     OS << "      " << N.getFunction().getName() << "\n";
 }
 
 static void printRefSCC(raw_ostream &OS, LazyCallGraph::RefSCC &C) {
-  ptrdiff_t Size = size(C);
-  OS << "  RefSCC with " << Size << " call SCCs:\n";
+  OS << "  RefSCC with " << C.size() << " call SCCs:\n";
 
   for (LazyCallGraph::SCC &InnerC : C)
     printSCC(OS, InnerC);
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp
index 542ff709d475..bad2de9e5f5e 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -19,8 +19,8 @@
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Analysis/ValueLattice.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/AssemblyAnnotationWriter.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/ConstantRange.h"
@@ -33,6 +33,7 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/raw_ostream.h"
@@ -47,6 +48,9 @@ using namespace PatternMatch;
 static const unsigned MaxProcessedPerValue = 500;
 
 char LazyValueInfoWrapperPass::ID = 0;
+LazyValueInfoWrapperPass::LazyValueInfoWrapperPass() : FunctionPass(ID) {
+  initializeLazyValueInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
 INITIALIZE_PASS_BEGIN(LazyValueInfoWrapperPass, "lazy-value-info",
                 "Lazy Value Information Analysis", false, true)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
@@ -188,7 +192,7 @@ namespace {
       else {
         auto It = ValueCache.find_as(Val);
         if (It == ValueCache.end()) {
-          ValueCache[Val] = make_unique<ValueCacheEntryTy>(Val, this);
+          ValueCache[Val] = std::make_unique<ValueCacheEntryTy>(Val, this);
           It = ValueCache.find_as(Val);
           assert(It != ValueCache.end() && "Val was just added to the map!");
         }
@@ -432,8 +436,12 @@ namespace {
                            BasicBlock *BB);
   bool solveBlockValueOverflowIntrinsic(
       ValueLatticeElement &BBLV, WithOverflowInst *WO, BasicBlock *BB);
+  bool solveBlockValueSaturatingIntrinsic(ValueLatticeElement &BBLV,
+                                          SaturatingInst *SI, BasicBlock *BB);
   bool solveBlockValueIntrinsic(ValueLatticeElement &BBLV, IntrinsicInst *II,
                                 BasicBlock *BB);
+  bool solveBlockValueExtractValue(ValueLatticeElement &BBLV,
+                                   ExtractValueInst *EVI, BasicBlock *BB);
   void intersectAssumeOrGuardBlockValueConstantRange(Value *Val,
                                                      ValueLatticeElement &BBLV,
                                                      Instruction *BBI);
@@ -648,9 +656,7 @@ bool LazyValueInfoImpl::solveBlockValueImpl(ValueLatticeElement &Res,
       return solveBlockValueBinaryOp(Res, BO, BB);
 
     if (auto *EVI = dyn_cast<ExtractValueInst>(BBI))
-      if (auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand()))
-        if (EVI->getNumIndices() == 1 && *EVI->idx_begin() == 0)
-          return solveBlockValueOverflowIntrinsic(Res, WO, BB);
+      return solveBlockValueExtractValue(Res, EVI, BB);
 
     if (auto *II = dyn_cast<IntrinsicInst>(BBI))
       return solveBlockValueIntrinsic(Res, II, BB);
@@ -1090,8 +1096,22 @@ bool LazyValueInfoImpl::solveBlockValueBinaryOp(ValueLatticeElement &BBLV,
     return true;
   }
 
-  return solveBlockValueBinaryOpImpl(BBLV, BO, BB,
-      [BO](const ConstantRange &CR1, const ConstantRange &CR2) {
+  if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(BO)) {
+    unsigned NoWrapKind = 0;
+    if (OBO->hasNoUnsignedWrap())
+      NoWrapKind |= OverflowingBinaryOperator::NoUnsignedWrap;
+    if (OBO->hasNoSignedWrap())
+      NoWrapKind |= OverflowingBinaryOperator::NoSignedWrap;
+
+    return solveBlockValueBinaryOpImpl(
+        BBLV, BO, BB,
+        [BO, NoWrapKind](const ConstantRange &CR1, const ConstantRange &CR2) {
+          return CR1.overflowingBinaryOp(BO->getOpcode(), CR2, NoWrapKind);
+        });
+  }
+
+  return solveBlockValueBinaryOpImpl(
+      BBLV, BO, BB, [BO](const ConstantRange &CR1, const ConstantRange &CR2) {
         return CR1.binaryOp(BO->getOpcode(), CR2);
       });
 }
@@ -1104,35 +1124,71 @@ bool LazyValueInfoImpl::solveBlockValueOverflowIntrinsic(
       });
 }
 
-bool LazyValueInfoImpl::solveBlockValueIntrinsic(
-    ValueLatticeElement &BBLV, IntrinsicInst *II, BasicBlock *BB) {
-  switch (II->getIntrinsicID()) {
+bool LazyValueInfoImpl::solveBlockValueSaturatingIntrinsic(
+    ValueLatticeElement &BBLV, SaturatingInst *SI, BasicBlock *BB) {
+  switch (SI->getIntrinsicID()) {
   case Intrinsic::uadd_sat:
-    return solveBlockValueBinaryOpImpl(BBLV, II, BB,
-        [](const ConstantRange &CR1, const ConstantRange &CR2) {
+    return solveBlockValueBinaryOpImpl(
+        BBLV, SI, BB, [](const ConstantRange &CR1, const ConstantRange &CR2) {
           return CR1.uadd_sat(CR2);
         });
   case Intrinsic::usub_sat:
-    return solveBlockValueBinaryOpImpl(BBLV, II, BB,
-        [](const ConstantRange &CR1, const ConstantRange &CR2) {
+    return solveBlockValueBinaryOpImpl(
+        BBLV, SI, BB, [](const ConstantRange &CR1, const ConstantRange &CR2) {
           return CR1.usub_sat(CR2);
         });
   case Intrinsic::sadd_sat:
-    return solveBlockValueBinaryOpImpl(BBLV, II, BB,
-        [](const ConstantRange &CR1, const ConstantRange &CR2) {
+    return solveBlockValueBinaryOpImpl(
+        BBLV, SI, BB, [](const ConstantRange &CR1, const ConstantRange &CR2) {
           return CR1.sadd_sat(CR2);
         });
   case Intrinsic::ssub_sat:
-    return solveBlockValueBinaryOpImpl(BBLV, II, BB,
-        [](const ConstantRange &CR1, const ConstantRange &CR2) {
+    return solveBlockValueBinaryOpImpl(
+        BBLV, SI, BB, [](const ConstantRange &CR1, const ConstantRange &CR2) {
           return CR1.ssub_sat(CR2);
         });
   default:
-    LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName()
-                      << "' - overdefined (unknown intrinsic).\n");
-    BBLV = ValueLatticeElement::getOverdefined();
+    llvm_unreachable("All llvm.sat intrinsic are handled.");
+  }
+}
+
+bool LazyValueInfoImpl::solveBlockValueIntrinsic(ValueLatticeElement &BBLV,
+                                                 IntrinsicInst *II,
+                                                 BasicBlock *BB) {
+  if (auto *SI = dyn_cast<SaturatingInst>(II))
+    return solveBlockValueSaturatingIntrinsic(BBLV, SI, BB);
+
+  LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName()
+                    << "' - overdefined (unknown intrinsic).\n");
+  BBLV = ValueLatticeElement::getOverdefined();
+  return true;
+}
+
+bool LazyValueInfoImpl::solveBlockValueExtractValue(
+    ValueLatticeElement &BBLV, ExtractValueInst *EVI, BasicBlock *BB) {
+  if (auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand()))
+    if (EVI->getNumIndices() == 1 && *EVI->idx_begin() == 0)
+      return solveBlockValueOverflowIntrinsic(BBLV, WO, BB);
+
+  // Handle extractvalue of insertvalue to allow further simplification
+  // based on replaced with.overflow intrinsics.
+  if (Value *V = SimplifyExtractValueInst(
+          EVI->getAggregateOperand(), EVI->getIndices(),
+          EVI->getModule()->getDataLayout())) {
+    if (!hasBlockValue(V, BB)) {
+      if (pushBlockValue({ BB, V }))
+        return false;
+      BBLV = ValueLatticeElement::getOverdefined();
+      return true;
+    }
+    BBLV = getBlockValue(V, BB);
     return true;
   }
+
+  LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName()
+                    << "' - overdefined (unknown extractvalue).\n");
+  BBLV = ValueLatticeElement::getOverdefined();
+  return true;
 }
 
 static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI,
@@ -1575,7 +1631,7 @@ bool LazyValueInfoWrapperPass::runOnFunction(Function &F) {
   DominatorTreeWrapperPass *DTWP =
       getAnalysisIfAvailable<DominatorTreeWrapperPass>();
   Info.DT = DTWP ? &DTWP->getDomTree() : nullptr;
-  Info.TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+  Info.TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
 
   if (Info.PImpl)
     getImpl(Info.PImpl, Info.AC, &DL, Info.DT).clear();
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp
index 52212e1c42aa..0f274429f11f 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp
@@ -64,10 +64,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/DivergenceAnalysis.h"
-#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
@@ -75,6 +75,8 @@
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include <vector>
@@ -93,8 +95,9 @@ namespace {
 class DivergencePropagator {
 public:
   DivergencePropagator(Function &F, TargetTransformInfo &TTI, DominatorTree &DT,
-                       PostDominatorTree &PDT, DenseSet<const Value *> &DV)
-      : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV) {}
+                       PostDominatorTree &PDT, DenseSet<const Value *> &DV,
+                       DenseSet<const Use *> &DU)
+      : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV), DU(DU) {}
   void populateWithSourcesOfDivergence();
   void propagate();
 
@@ -118,11 +121,14 @@ private:
   PostDominatorTree &PDT;
   std::vector<Value *> Worklist; // Stack for DFS.
   DenseSet<const Value *> &DV;   // Stores all divergent values.
+  DenseSet<const Use *> &DU;   // Stores divergent uses of possibly uniform
+                               // values.
 };
 
 void DivergencePropagator::populateWithSourcesOfDivergence() {
   Worklist.clear();
   DV.clear();
+  DU.clear();
   for (auto &I : instructions(F)) {
     if (TTI.isSourceOfDivergence(&I)) {
       Worklist.push_back(&I);
@@ -197,8 +203,10 @@ void DivergencePropagator::exploreSyncDependency(Instruction *TI) {
   // dominators of TI until it is outside the influence region.
   BasicBlock *InfluencedBB = ThisBB;
   while (InfluenceRegion.count(InfluencedBB)) {
-    for (auto &I : *InfluencedBB)
-      findUsersOutsideInfluenceRegion(I, InfluenceRegion);
+    for (auto &I : *InfluencedBB) {
+      if (!DV.count(&I))
+        findUsersOutsideInfluenceRegion(I, InfluenceRegion);
+    }
     DomTreeNode *IDomNode = DT.getNode(InfluencedBB)->getIDom();
     if (IDomNode == nullptr)
       break;
@@ -208,9 +216,10 @@ void DivergencePropagator::exploreSyncDependency(Instruction *TI) {
 
 void DivergencePropagator::findUsersOutsideInfluenceRegion(
     Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion) {
-  for (User *U : I.users()) {
-    Instruction *UserInst = cast<Instruction>(U);
+  for (Use &Use : I.uses()) {
+    Instruction *UserInst = cast<Instruction>(Use.getUser());
     if (!InfluenceRegion.count(UserInst->getParent())) {
+      DU.insert(&Use);
       if (DV.insert(UserInst).second)
         Worklist.push_back(UserInst);
     }
@@ -250,9 +259,8 @@ void DivergencePropagator::computeInfluenceRegion(
 void DivergencePropagator::exploreDataDependency(Value *V) {
   // Follow def-use chains of V.
   for (User *U : V->users()) {
-    Instruction *UserInst = cast<Instruction>(U);
-    if (!TTI.isAlwaysUniform(U) && DV.insert(UserInst).second)
-      Worklist.push_back(UserInst);
+    if (!TTI.isAlwaysUniform(U) && DV.insert(U).second)
+      Worklist.push_back(U);
   }
 }
 
@@ -275,6 +283,9 @@ void DivergencePropagator::propagate() {
 
 // Register this pass.
 char LegacyDivergenceAnalysis::ID = 0;
+LegacyDivergenceAnalysis::LegacyDivergenceAnalysis() : FunctionPass(ID) {
+  initializeLegacyDivergenceAnalysisPass(*PassRegistry::getPassRegistry());
+}
 INITIALIZE_PASS_BEGIN(LegacyDivergenceAnalysis, "divergence",
                       "Legacy Divergence Analysis", false, true)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
@@ -320,6 +331,7 @@ bool LegacyDivergenceAnalysis::runOnFunction(Function &F) {
     return false;
 
   DivergentValues.clear();
+  DivergentUses.clear();
   gpuDA = nullptr;
 
   auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
@@ -328,11 +340,11 @@ bool LegacyDivergenceAnalysis::runOnFunction(Function &F) {
   if (shouldUseGPUDivergenceAnalysis(F)) {
     // run the new GPU divergence analysis
     auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-    gpuDA = llvm::make_unique<GPUDivergenceAnalysis>(F, DT, PDT, LI, TTI);
+    gpuDA = std::make_unique<GPUDivergenceAnalysis>(F, DT, PDT, LI, TTI);
 
   } else {
     // run LLVM's existing DivergenceAnalysis
-    DivergencePropagator DP(F, TTI, DT, PDT, DivergentValues);
+    DivergencePropagator DP(F, TTI, DT, PDT, DivergentValues, DivergentUses);
     DP.populateWithSourcesOfDivergence();
     DP.propagate();
   }
@@ -351,6 +363,13 @@ bool LegacyDivergenceAnalysis::isDivergent(const Value *V) const {
   return DivergentValues.count(V);
 }
 
+bool LegacyDivergenceAnalysis::isDivergentUse(const Use *U) const {
+  if (gpuDA) {
+    return gpuDA->isDivergentUse(*U);
+  }
+  return DivergentValues.count(U->get()) || DivergentUses.count(U);
+}
+
 void LegacyDivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
   if ((!gpuDA || !gpuDA->hasDivergence()) && DivergentValues.empty())
     return;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/Lint.cpp b/contrib/llvm-project/llvm/lib/Analysis/Lint.cpp
index d28b8a189d4b..ba945eb4318f 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/Lint.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/Lint.cpp
@@ -66,6 +66,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
@@ -205,7 +206,7 @@ bool Lint::runOnFunction(Function &F) {
   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
   AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
   visit(F);
   dbgs() << MessagesStr.str();
   Messages.clear();
diff --git a/contrib/llvm-project/llvm/lib/Analysis/Loads.cpp b/contrib/llvm-project/llvm/lib/Analysis/Loads.cpp
index 31da4e9ec783..a7d07c0b6183 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/Loads.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/Loads.cpp
@@ -12,6 +12,9 @@
 
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/GlobalAlias.h"
@@ -24,34 +27,30 @@
 
 using namespace llvm;
 
-static bool isAligned(const Value *Base, const APInt &Offset, unsigned Align,
-                      const DataLayout &DL) {
-  APInt BaseAlign(Offset.getBitWidth(), Base->getPointerAlignment(DL));
-
-  if (!BaseAlign) {
-    Type *Ty = Base->getType()->getPointerElementType();
-    if (!Ty->isSized())
-      return false;
-    BaseAlign = DL.getABITypeAlignment(Ty);
-  }
-
-  APInt Alignment(Offset.getBitWidth(), Align);
-
-  assert(Alignment.isPowerOf2() && "must be a power of 2!");
-  return BaseAlign.uge(Alignment) && !(Offset & (Alignment-1));
+static MaybeAlign getBaseAlign(const Value *Base, const DataLayout &DL) {
+  if (const MaybeAlign PA = Base->getPointerAlignment(DL))
+    return *PA;
+  Type *const Ty = Base->getType()->getPointerElementType();
+  if (!Ty->isSized())
+    return None;
+  return Align(DL.getABITypeAlignment(Ty));
 }
 
-static bool isAligned(const Value *Base, unsigned Align, const DataLayout &DL) {
-  Type *Ty = Base->getType();
-  assert(Ty->isSized() && "must be sized");
-  APInt Offset(DL.getTypeStoreSizeInBits(Ty), 0);
-  return isAligned(Base, Offset, Align, DL);
+static bool isAligned(const Value *Base, const APInt &Offset, Align Alignment,
+                      const DataLayout &DL) {
+  if (MaybeAlign BA = getBaseAlign(Base, DL)) {
+    const APInt APBaseAlign(Offset.getBitWidth(), BA->value());
+    const APInt APAlign(Offset.getBitWidth(), Alignment.value());
+    assert(APAlign.isPowerOf2() && "must be a power of 2!");
+    return APBaseAlign.uge(APAlign) && !(Offset & (APAlign - 1));
+  }
+  return false;
 }
 
 /// Test if V is always a pointer to allocated and suitably aligned memory for
 /// a simple load or store.
 static bool isDereferenceableAndAlignedPointer(
-    const Value *V, unsigned Align, const APInt &Size, const DataLayout &DL,
+    const Value *V, Align Alignment, const APInt &Size, const DataLayout &DL,
     const Instruction *CtxI, const DominatorTree *DT,
     SmallPtrSetImpl<const Value *> &Visited) {
   // Already visited?  Bail out, we've likely hit unreachable code.
@@ -63,17 +62,22 @@ static bool isDereferenceableAndAlignedPointer(
 
   // bitcast instructions are no-ops as far as dereferenceability is concerned.
   if (const BitCastOperator *BC = dyn_cast<BitCastOperator>(V))
-    return isDereferenceableAndAlignedPointer(BC->getOperand(0), Align, Size,
-                                              DL, CtxI, DT, Visited);
+    return isDereferenceableAndAlignedPointer(BC->getOperand(0), Alignment,
+                                              Size, DL, CtxI, DT, Visited);
 
   bool CheckForNonNull = false;
   APInt KnownDerefBytes(Size.getBitWidth(),
                         V->getPointerDereferenceableBytes(DL, CheckForNonNull));
-  if (KnownDerefBytes.getBoolValue()) {
-    if (KnownDerefBytes.uge(Size))
-      if (!CheckForNonNull || isKnownNonZero(V, DL, 0, nullptr, CtxI, DT))
-        return isAligned(V, Align, DL);
-  }
+  if (KnownDerefBytes.getBoolValue() && KnownDerefBytes.uge(Size))
+    if (!CheckForNonNull || isKnownNonZero(V, DL, 0, nullptr, CtxI, DT)) {
+      // As we recursed through GEPs to get here, we've incrementally checked
+      // that each step advanced by a multiple of the alignment. If our base is
+      // properly aligned, then the original offset accessed must also be.  
+      Type *Ty = V->getType();
+      assert(Ty->isSized() && "must be sized");
+      APInt Offset(DL.getTypeStoreSizeInBits(Ty), 0);
+      return isAligned(V, Offset, Alignment, DL);
+    }
 
   // For GEPs, determine if the indexing lands within the allocated object.
   if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
@@ -81,7 +85,8 @@ static bool isDereferenceableAndAlignedPointer(
 
     APInt Offset(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
     if (!GEP->accumulateConstantOffset(DL, Offset) || Offset.isNegative() ||
-        !Offset.urem(APInt(Offset.getBitWidth(), Align)).isMinValue())
+        !Offset.urem(APInt(Offset.getBitWidth(), Alignment.value()))
+             .isMinValue())
       return false;
 
     // If the base pointer is dereferenceable for Offset+Size bytes, then the
@@ -93,67 +98,69 @@ static bool isDereferenceableAndAlignedPointer(
     // Offset and Size may have different bit widths if we have visited an
     // addrspacecast, so we can't do arithmetic directly on the APInt values.
     return isDereferenceableAndAlignedPointer(
-        Base, Align, Offset + Size.sextOrTrunc(Offset.getBitWidth()),
-        DL, CtxI, DT, Visited);
+        Base, Alignment, Offset + Size.sextOrTrunc(Offset.getBitWidth()), DL,
+        CtxI, DT, Visited);
   }
 
   // For gc.relocate, look through relocations
   if (const GCRelocateInst *RelocateInst = dyn_cast<GCRelocateInst>(V))
     return isDereferenceableAndAlignedPointer(
-        RelocateInst->getDerivedPtr(), Align, Size, DL, CtxI, DT, Visited);
+        RelocateInst->getDerivedPtr(), Alignment, Size, DL, CtxI, DT, Visited);
 
   if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(V))
-    return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Align, Size,
-                                              DL, CtxI, DT, Visited);
+    return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Alignment,
+                                              Size, DL, CtxI, DT, Visited);
 
   if (const auto *Call = dyn_cast<CallBase>(V))
-    if (auto *RP = getArgumentAliasingToReturnedPointer(Call))
-      return isDereferenceableAndAlignedPointer(RP, Align, Size, DL, CtxI, DT,
-                                                Visited);
+    if (auto *RP = getArgumentAliasingToReturnedPointer(Call, true))
+      return isDereferenceableAndAlignedPointer(RP, Alignment, Size, DL, CtxI,
+                                                DT, Visited);
 
   // If we don't know, assume the worst.
   return false;
 }
 
-bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
+bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Align Alignment,
                                               const APInt &Size,
                                               const DataLayout &DL,
                                               const Instruction *CtxI,
                                               const DominatorTree *DT) {
+  // Note: At the moment, Size can be zero.  This ends up being interpreted as
+  // a query of whether [Base, V] is dereferenceable and V is aligned (since
+  // that's what the implementation happened to do).  It's unclear if this is
+  // the desired semantic, but at least SelectionDAG does exercise this case.  
+  
   SmallPtrSet<const Value *, 32> Visited;
-  return ::isDereferenceableAndAlignedPointer(V, Align, Size, DL, CtxI, DT,
+  return ::isDereferenceableAndAlignedPointer(V, Alignment, Size, DL, CtxI, DT,
                                               Visited);
 }
 
 bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Type *Ty,
-                                              unsigned Align,
+                                              MaybeAlign MA,
                                               const DataLayout &DL,
                                               const Instruction *CtxI,
                                               const DominatorTree *DT) {
+  if (!Ty->isSized())
+    return false;
+  
   // When dereferenceability information is provided by a dereferenceable
   // attribute, we know exactly how many bytes are dereferenceable. If we can
   // determine the exact offset to the attributed variable, we can use that
   // information here.
 
   // Require ABI alignment for loads without alignment specification
-  if (Align == 0)
-    Align = DL.getABITypeAlignment(Ty);
-
-  if (!Ty->isSized())
-    return false;
-
-  SmallPtrSet<const Value *, 32> Visited;
-  return ::isDereferenceableAndAlignedPointer(
-      V, Align,
-      APInt(DL.getIndexTypeSizeInBits(V->getType()), DL.getTypeStoreSize(Ty)),
-      DL, CtxI, DT, Visited);
+  const Align Alignment = DL.getValueOrABITypeAlignment(MA, Ty);
+  APInt AccessSize(DL.getPointerTypeSizeInBits(V->getType()),
+                   DL.getTypeStoreSize(Ty));
+  return isDereferenceableAndAlignedPointer(V, Alignment, AccessSize, DL, CtxI,
+                                            DT);
 }
 
 bool llvm::isDereferenceablePointer(const Value *V, Type *Ty,
                                     const DataLayout &DL,
                                     const Instruction *CtxI,
                                     const DominatorTree *DT) {
-  return isDereferenceableAndAlignedPointer(V, Ty, 1, DL, CtxI, DT);
+  return isDereferenceableAndAlignedPointer(V, Ty, Align::None(), DL, CtxI, DT);
 }
 
 /// Test if A and B will obviously have the same value.
@@ -187,6 +194,60 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
   return false;
 }
 
+bool llvm::isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L,
+                                             ScalarEvolution &SE,
+                                             DominatorTree &DT) {
+  auto &DL = LI->getModule()->getDataLayout();
+  Value *Ptr = LI->getPointerOperand();
+
+  APInt EltSize(DL.getIndexTypeSizeInBits(Ptr->getType()),
+                DL.getTypeStoreSize(LI->getType()));
+  const Align Alignment = DL.getValueOrABITypeAlignment(
+      MaybeAlign(LI->getAlignment()), LI->getType());
+
+  Instruction *HeaderFirstNonPHI = L->getHeader()->getFirstNonPHI();
+
+  // If given a uniform (i.e. non-varying) address, see if we can prove the
+  // access is safe within the loop w/o needing predication.
+  if (L->isLoopInvariant(Ptr))
+    return isDereferenceableAndAlignedPointer(Ptr, Alignment, EltSize, DL,
+                                              HeaderFirstNonPHI, &DT);
+
+  // Otherwise, check to see if we have a repeating access pattern where we can
+  // prove that all accesses are well aligned and dereferenceable.
+  auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Ptr));
+  if (!AddRec || AddRec->getLoop() != L || !AddRec->isAffine())
+    return false;
+  auto* Step = dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(SE));
+  if (!Step)
+    return false;
+  // TODO: generalize to access patterns which have gaps
+  if (Step->getAPInt() != EltSize)
+    return false;
+
+  // TODO: If the symbolic trip count has a small bound (max count), we might
+  // be able to prove safety.
+  auto TC = SE.getSmallConstantTripCount(L);
+  if (!TC)
+    return false;
+
+  const APInt AccessSize = TC * EltSize;
+
+  auto *StartS = dyn_cast<SCEVUnknown>(AddRec->getStart());
+  if (!StartS)
+    return false;
+  assert(SE.isLoopInvariant(StartS, L) && "implied by addrec definition");
+  Value *Base = StartS->getValue();
+
+  // For the moment, restrict ourselves to the case where the access size is a
+  // multiple of the requested alignment and the base is aligned.
+  // TODO: generalize if a case found which warrants
+  if (EltSize.urem(Alignment.value()) != 0)
+    return false;
+  return isDereferenceableAndAlignedPointer(Base, Alignment, AccessSize, DL,
+                                            HeaderFirstNonPHI, &DT);
+}
+
 /// Check if executing a load of this pointer value cannot trap.
 ///
 /// If DT and ScanFrom are specified this method performs context-sensitive
@@ -198,64 +259,25 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
 ///
 /// This uses the pointee type to determine how many bytes need to be safe to
 /// load from the pointer.
-bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size,
+bool llvm::isSafeToLoadUnconditionally(Value *V, MaybeAlign MA, APInt &Size,
                                        const DataLayout &DL,
                                        Instruction *ScanFrom,
                                        const DominatorTree *DT) {
   // Zero alignment means that the load has the ABI alignment for the target
-  if (Align == 0)
-    Align = DL.getABITypeAlignment(V->getType()->getPointerElementType());
-  assert(isPowerOf2_32(Align));
+  const Align Alignment =
+      DL.getValueOrABITypeAlignment(MA, V->getType()->getPointerElementType());
 
   // If DT is not specified we can't make context-sensitive query
   const Instruction* CtxI = DT ? ScanFrom : nullptr;
-  if (isDereferenceableAndAlignedPointer(V, Align, Size, DL, CtxI, DT))
+  if (isDereferenceableAndAlignedPointer(V, Alignment, Size, DL, CtxI, DT))
     return true;
 
-  int64_t ByteOffset = 0;
-  Value *Base = V;
-  Base = GetPointerBaseWithConstantOffset(V, ByteOffset, DL);
-
-  if (ByteOffset < 0) // out of bounds
+  if (!ScanFrom)
     return false;
 
-  Type *BaseType = nullptr;
-  unsigned BaseAlign = 0;
-  if (const AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
-    // An alloca is safe to load from as load as it is suitably aligned.
-    BaseType = AI->getAllocatedType();
-    BaseAlign = AI->getAlignment();
-  } else if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
-    // Global variables are not necessarily safe to load from if they are
-    // interposed arbitrarily. Their size may change or they may be weak and
-    // require a test to determine if they were in fact provided.
-    if (!GV->isInterposable()) {
-      BaseType = GV->getType()->getElementType();
-      BaseAlign = GV->getAlignment();
-    }
-  }
-
-  PointerType *AddrTy = cast<PointerType>(V->getType());
-  uint64_t LoadSize = DL.getTypeStoreSize(AddrTy->getElementType());
-
-  // If we found a base allocated type from either an alloca or global variable,
-  // try to see if we are definitively within the allocated region. We need to
-  // know the size of the base type and the loaded type to do anything in this
-  // case.
-  if (BaseType && BaseType->isSized()) {
-    if (BaseAlign == 0)
-      BaseAlign = DL.getPrefTypeAlignment(BaseType);
-
-    if (Align <= BaseAlign) {
-      // Check if the load is within the bounds of the underlying object.
-      if (ByteOffset + LoadSize <= DL.getTypeAllocSize(BaseType) &&
-          ((ByteOffset % Align) == 0))
-        return true;
-    }
-  }
-
-  if (!ScanFrom)
+  if (Size.getBitWidth() > 64)
     return false;
+  const uint64_t LoadSize = Size.getZExtValue();
 
   // Otherwise, be a little bit aggressive by scanning the local block where we
   // want to check to see if the pointer is already being loaded or stored
@@ -279,7 +301,7 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size,
       return false;
 
     Value *AccessedPtr;
-    unsigned AccessedAlign;
+    MaybeAlign MaybeAccessedAlign;
     if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
       // Ignore volatile loads. The execution of a volatile load cannot
       // be used to prove an address is backed by regular memory; it can,
@@ -287,24 +309,26 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size,
       if (LI->isVolatile())
         continue;
       AccessedPtr = LI->getPointerOperand();
-      AccessedAlign = LI->getAlignment();
+      MaybeAccessedAlign = MaybeAlign(LI->getAlignment());
     } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
       // Ignore volatile stores (see comment for loads).
       if (SI->isVolatile())
         continue;
       AccessedPtr = SI->getPointerOperand();
-      AccessedAlign = SI->getAlignment();
+      MaybeAccessedAlign = MaybeAlign(SI->getAlignment());
     } else
       continue;
 
     Type *AccessedTy = AccessedPtr->getType()->getPointerElementType();
-    if (AccessedAlign == 0)
-      AccessedAlign = DL.getABITypeAlignment(AccessedTy);
-    if (AccessedAlign < Align)
+
+    const Align AccessedAlign =
+        DL.getValueOrABITypeAlignment(MaybeAccessedAlign, AccessedTy);
+    if (AccessedAlign < Alignment)
       continue;
 
     // Handle trivial cases.
-    if (AccessedPtr == V)
+    if (AccessedPtr == V &&
+        LoadSize <= DL.getTypeStoreSize(AccessedTy))
       return true;
 
     if (AreEquivalentAddressValues(AccessedPtr->stripPointerCasts(), V) &&
@@ -314,12 +338,12 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size,
   return false;
 }
 
-bool llvm::isSafeToLoadUnconditionally(Value *V, Type *Ty, unsigned Align,
+bool llvm::isSafeToLoadUnconditionally(Value *V, Type *Ty, MaybeAlign Alignment,
                                        const DataLayout &DL,
                                        Instruction *ScanFrom,
                                        const DominatorTree *DT) {
   APInt Size(DL.getIndexTypeSizeInBits(V->getType()), DL.getTypeStoreSize(Ty));
-  return isSafeToLoadUnconditionally(V, Align, Size, DL, ScanFrom, DT);
+  return isSafeToLoadUnconditionally(V, Alignment, Size, DL, ScanFrom, DT);
 }
 
   /// DefMaxInstsToScan - the default number of maximum instructions
@@ -359,10 +383,6 @@ Value *llvm::FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy,
     MaxInstsToScan = ~0U;
 
   const DataLayout &DL = ScanBB->getModule()->getDataLayout();
-
-  // Try to get the store size for the type.
-  auto AccessSize = LocationSize::precise(DL.getTypeStoreSize(AccessTy));
-
   Value *StrippedPtr = Ptr->stripPointerCasts();
 
   while (ScanFrom != ScanBB->begin()) {
@@ -401,6 +421,9 @@ Value *llvm::FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy,
         return LI;
       }
 
+    // Try to get the store size for the type.
+    auto AccessSize = LocationSize::precise(DL.getTypeStoreSize(AccessTy));
+
     if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
       Value *StorePtr = SI->getPointerOperand()->stripPointerCasts();
       // If this is a store through Ptr, the value is available!
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 36bd9a8b7ea7..26fa5112c29a 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -52,6 +52,7 @@
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
 #include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
@@ -1189,18 +1190,31 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
 
   unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
   Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
-  APInt Size(IdxWidth, DL.getTypeStoreSize(Ty));
 
   APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
   PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
   PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
 
+  // Retrieve the address space again as pointer stripping now tracks through
+  // `addrspacecast`.
+  ASA = cast<PointerType>(PtrA->getType())->getAddressSpace();
+  ASB = cast<PointerType>(PtrB->getType())->getAddressSpace();
+  // Check that the address spaces match and that the pointers are valid.
+  if (ASA != ASB)
+    return false;
+
+  IdxWidth = DL.getIndexSizeInBits(ASA);
+  OffsetA = OffsetA.sextOrTrunc(IdxWidth);
+  OffsetB = OffsetB.sextOrTrunc(IdxWidth);
+
+  APInt Size(IdxWidth, DL.getTypeStoreSize(Ty));
+
   //  OffsetDelta = OffsetB - OffsetA;
   const SCEV *OffsetSCEVA = SE.getConstant(OffsetA);
   const SCEV *OffsetSCEVB = SE.getConstant(OffsetB);
   const SCEV *OffsetDeltaSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA);
-  const SCEVConstant *OffsetDeltaC = dyn_cast<SCEVConstant>(OffsetDeltaSCEV);
-  const APInt &OffsetDelta = OffsetDeltaC->getAPInt();
+  const APInt &OffsetDelta = cast<SCEVConstant>(OffsetDeltaSCEV)->getAPInt();
+
   // Check if they are based on the same pointer. That makes the offsets
   // sufficient.
   if (PtrA == PtrB)
@@ -1641,13 +1655,21 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
     // Check every access pair.
     while (AI != AE) {
       Visited.insert(*AI);
-      EquivalenceClasses<MemAccessInfo>::member_iterator OI = std::next(AI);
+      bool AIIsWrite = AI->getInt();
+      // Check loads only against next equivalent class, but stores also against
+      // other stores in the same equivalence class - to the same address.
+      EquivalenceClasses<MemAccessInfo>::member_iterator OI =
+          (AIIsWrite ? AI : std::next(AI));
       while (OI != AE) {
         // Check every accessing instruction pair in program order.
         for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
              I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
-          for (std::vector<unsigned>::iterator I2 = Accesses[*OI].begin(),
-               I2E = Accesses[*OI].end(); I2 != I2E; ++I2) {
+          // Scan all accesses of another equivalence class, but only the next
+          // accesses of the same equivalent class.
+          for (std::vector<unsigned>::iterator
+                   I2 = (OI == AI ? std::next(I1) : Accesses[*OI].begin()),
+                   I2E = (OI == AI ? I1E : Accesses[*OI].end());
+               I2 != I2E; ++I2) {
             auto A = std::make_pair(&*AI, *I1);
             auto B = std::make_pair(&*OI, *I2);
 
@@ -2078,7 +2100,7 @@ OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName,
       DL = I->getDebugLoc();
   }
 
-  Report = make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL,
+  Report = std::make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL,
                                                    CodeRegion);
   return *Report;
 }
@@ -2323,9 +2345,9 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
 LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
                                const TargetLibraryInfo *TLI, AliasAnalysis *AA,
                                DominatorTree *DT, LoopInfo *LI)
-    : PSE(llvm::make_unique<PredicatedScalarEvolution>(*SE, *L)),
-      PtrRtChecking(llvm::make_unique<RuntimePointerChecking>(SE)),
-      DepChecker(llvm::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
+    : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
+      PtrRtChecking(std::make_unique<RuntimePointerChecking>(SE)),
+      DepChecker(std::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
       NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false),
       HasConvergentOp(false),
       HasDependenceInvolvingLoopInvariantAddress(false) {
@@ -2376,11 +2398,15 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
   PSE->print(OS, Depth);
 }
 
+LoopAccessLegacyAnalysis::LoopAccessLegacyAnalysis() : FunctionPass(ID) {
+  initializeLoopAccessLegacyAnalysisPass(*PassRegistry::getPassRegistry());
+}
+
 const LoopAccessInfo &LoopAccessLegacyAnalysis::getInfo(Loop *L) {
   auto &LAI = LoopAccessInfoMap[L];
 
   if (!LAI)
-    LAI = llvm::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI);
+    LAI = std::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI);
 
   return *LAI.get();
 }
@@ -2399,7 +2425,7 @@ void LoopAccessLegacyAnalysis::print(raw_ostream &OS, const Module *M) const {
 bool LoopAccessLegacyAnalysis::runOnFunction(Function &F) {
   SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
   auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
-  TLI = TLIP ? &TLIP->getTLI() : nullptr;
+  TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopAnalysisManager.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopAnalysisManager.cpp
index a10a87ce113b..02d40fb8d72a 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LoopAnalysisManager.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LoopAnalysisManager.cpp
@@ -46,7 +46,7 @@ bool LoopAnalysisManagerFunctionProxy::Result::invalidate(
   // invalidation logic below to act on that.
   auto PAC = PA.getChecker<LoopAnalysisManagerFunctionProxy>();
   bool invalidateMemorySSAAnalysis = false;
-  if (EnableMSSALoopDependency)
+  if (MSSAUsed)
     invalidateMemorySSAAnalysis = Inv.invalidate<MemorySSAAnalysis>(F, PA);
   if (!(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) ||
       Inv.invalidate<AAManager>(F, PA) ||
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopCacheAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopCacheAnalysis.cpp
new file mode 100644
index 000000000000..25325ec1be02
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Analysis/LoopCacheAnalysis.cpp
@@ -0,0 +1,629 @@
+//===- LoopCacheAnalysis.cpp - Loop Cache Analysis -------------------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines the implementation for the loop cache analysis.
+/// The implementation is largely based on the following paper:
+///
+///       Compiler Optimizations for Improving Data Locality
+///       By: Steve Carr, Katherine S. McKinley, Chau-Wen Tseng
+///       http://www.cs.utexas.edu/users/mckinley/papers/asplos-1994.pdf
+///
+/// The general approach taken to estimate the number of cache lines used by the
+/// memory references in an inner loop is:
+///    1. Partition memory references that exhibit temporal or spacial reuse
+///       into reference groups.
+///    2. For each loop L in the a loop nest LN:
+///       a. Compute the cost of the reference group
+///       b. Compute the loop cost by summing up the reference groups costs
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LoopCacheAnalysis.h"
+#include "llvm/ADT/BreadthFirstIterator.h"
+#include "llvm/ADT/Sequence.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "loop-cache-cost"
+
+static cl::opt<unsigned> DefaultTripCount(
+    "default-trip-count", cl::init(100), cl::Hidden,
+    cl::desc("Use this to specify the default trip count of a loop"));
+
+// In this analysis two array references are considered to exhibit temporal
+// reuse if they access either the same memory location, or a memory location
+// with distance smaller than a configurable threshold.
+static cl::opt<unsigned> TemporalReuseThreshold(
+    "temporal-reuse-threshold", cl::init(2), cl::Hidden,
+    cl::desc("Use this to specify the max. distance between array elements "
+             "accessed in a loop so that the elements are classified to have "
+             "temporal reuse"));
+
+/// Retrieve the innermost loop in the given loop nest \p Loops. It returns a
+/// nullptr if any loops in the loop vector supplied has more than one sibling.
+/// The loop vector is expected to contain loops collected in breadth-first
+/// order.
+static Loop *getInnerMostLoop(const LoopVectorTy &Loops) {
+  assert(!Loops.empty() && "Expecting a non-empy loop vector");
+
+  Loop *LastLoop = Loops.back();
+  Loop *ParentLoop = LastLoop->getParentLoop();
+
+  if (ParentLoop == nullptr) {
+    assert(Loops.size() == 1 && "Expecting a single loop");
+    return LastLoop;
+  }
+
+  return (std::is_sorted(Loops.begin(), Loops.end(),
+                         [](const Loop *L1, const Loop *L2) {
+                           return L1->getLoopDepth() < L2->getLoopDepth();
+                         }))
+             ? LastLoop
+             : nullptr;
+}
+
+static bool isOneDimensionalArray(const SCEV &AccessFn, const SCEV &ElemSize,
+                                  const Loop &L, ScalarEvolution &SE) {
+  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(&AccessFn);
+  if (!AR || !AR->isAffine())
+    return false;
+
+  assert(AR->getLoop() && "AR should have a loop");
+
+  // Check that start and increment are not add recurrences.
+  const SCEV *Start = AR->getStart();
+  const SCEV *Step = AR->getStepRecurrence(SE);
+  if (isa<SCEVAddRecExpr>(Start) || isa<SCEVAddRecExpr>(Step))
+    return false;
+
+  // Check that start and increment are both invariant in the loop.
+  if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L))
+    return false;
+
+  return AR->getStepRecurrence(SE) == &ElemSize;
+}
+
+/// Compute the trip count for the given loop \p L. Return the SCEV expression
+/// for the trip count or nullptr if it cannot be computed.
+static const SCEV *computeTripCount(const Loop &L, ScalarEvolution &SE) {
+  const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(&L);
+  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
+      !isa<SCEVConstant>(BackedgeTakenCount))
+    return nullptr;
+
+  return SE.getAddExpr(BackedgeTakenCount,
+                       SE.getOne(BackedgeTakenCount->getType()));
+}
+
+//===----------------------------------------------------------------------===//
+// IndexedReference implementation
+//
+raw_ostream &llvm::operator<<(raw_ostream &OS, const IndexedReference &R) {
+  if (!R.IsValid) {
+    OS << R.StoreOrLoadInst;
+    OS << ", IsValid=false.";
+    return OS;
+  }
+
+  OS << *R.BasePointer;
+  for (const SCEV *Subscript : R.Subscripts)
+    OS << "[" << *Subscript << "]";
+
+  OS << ", Sizes: ";
+  for (const SCEV *Size : R.Sizes)
+    OS << "[" << *Size << "]";
+
+  return OS;
+}
+
+IndexedReference::IndexedReference(Instruction &StoreOrLoadInst,
+                                   const LoopInfo &LI, ScalarEvolution &SE)
+    : StoreOrLoadInst(StoreOrLoadInst), SE(SE) {
+  assert((isa<StoreInst>(StoreOrLoadInst) || isa<LoadInst>(StoreOrLoadInst)) &&
+         "Expecting a load or store instruction");
+
+  IsValid = delinearize(LI);
+  if (IsValid)
+    LLVM_DEBUG(dbgs().indent(2) << "Succesfully delinearized: " << *this
+                                << "\n");
+}
+
+Optional<bool> IndexedReference::hasSpacialReuse(const IndexedReference &Other,
+                                                 unsigned CLS,
+                                                 AliasAnalysis &AA) const {
+  assert(IsValid && "Expecting a valid reference");
+
+  if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) {
+    LLVM_DEBUG(dbgs().indent(2)
+               << "No spacial reuse: different base pointers\n");
+    return false;
+  }
+
+  unsigned NumSubscripts = getNumSubscripts();
+  if (NumSubscripts != Other.getNumSubscripts()) {
+    LLVM_DEBUG(dbgs().indent(2)
+               << "No spacial reuse: different number of subscripts\n");
+    return false;
+  }
+
+  // all subscripts must be equal, except the leftmost one (the last one).
+  for (auto SubNum : seq<unsigned>(0, NumSubscripts - 1)) {
+    if (getSubscript(SubNum) != Other.getSubscript(SubNum)) {
+      LLVM_DEBUG(dbgs().indent(2) << "No spacial reuse, different subscripts: "
+                                  << "\n\t" << *getSubscript(SubNum) << "\n\t"
+                                  << *Other.getSubscript(SubNum) << "\n");
+      return false;
+    }
+  }
+
+  // the difference between the last subscripts must be less than the cache line
+  // size.
+  const SCEV *LastSubscript = getLastSubscript();
+  const SCEV *OtherLastSubscript = Other.getLastSubscript();
+  const SCEVConstant *Diff = dyn_cast<SCEVConstant>(
+      SE.getMinusSCEV(LastSubscript, OtherLastSubscript));
+
+  if (Diff == nullptr) {
+    LLVM_DEBUG(dbgs().indent(2)
+               << "No spacial reuse, difference between subscript:\n\t"
+               << *LastSubscript << "\n\t" << OtherLastSubscript
+               << "\nis not constant.\n");
+    return None;
+  }
+
+  bool InSameCacheLine = (Diff->getValue()->getSExtValue() < CLS);
+
+  LLVM_DEBUG({
+    if (InSameCacheLine)
+      dbgs().indent(2) << "Found spacial reuse.\n";
+    else
+      dbgs().indent(2) << "No spacial reuse.\n";
+  });
+
+  return InSameCacheLine;
+}
+
+Optional<bool> IndexedReference::hasTemporalReuse(const IndexedReference &Other,
+                                                  unsigned MaxDistance,
+                                                  const Loop &L,
+                                                  DependenceInfo &DI,
+                                                  AliasAnalysis &AA) const {
+  assert(IsValid && "Expecting a valid reference");
+
+  if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) {
+    LLVM_DEBUG(dbgs().indent(2)
+               << "No temporal reuse: different base pointer\n");
+    return false;
+  }
+
+  std::unique_ptr<Dependence> D =
+      DI.depends(&StoreOrLoadInst, &Other.StoreOrLoadInst, true);
+
+  if (D == nullptr) {
+    LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: no dependence\n");
+    return false;
+  }
+
+  if (D->isLoopIndependent()) {
+    LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n");
+    return true;
+  }
+
+  // Check the dependence distance at every loop level. There is temporal reuse
+  // if the distance at the given loop's depth is small (|d| <= MaxDistance) and
+  // it is zero at every other loop level.
+  int LoopDepth = L.getLoopDepth();
+  int Levels = D->getLevels();
+  for (int Level = 1; Level <= Levels; ++Level) {
+    const SCEV *Distance = D->getDistance(Level);
+    const SCEVConstant *SCEVConst = dyn_cast_or_null<SCEVConstant>(Distance);
+
+    if (SCEVConst == nullptr) {
+      LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: distance unknown\n");
+      return None;
+    }
+
+    const ConstantInt &CI = *SCEVConst->getValue();
+    if (Level != LoopDepth && !CI.isZero()) {
+      LLVM_DEBUG(dbgs().indent(2)
+                 << "No temporal reuse: distance is not zero at depth=" << Level
+                 << "\n");
+      return false;
+    } else if (Level == LoopDepth && CI.getSExtValue() > MaxDistance) {
+      LLVM_DEBUG(
+          dbgs().indent(2)
+          << "No temporal reuse: distance is greater than MaxDistance at depth="
+          << Level << "\n");
+      return false;
+    }
+  }
+
+  LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n");
+  return true;
+}
+
+CacheCostTy IndexedReference::computeRefCost(const Loop &L,
+                                             unsigned CLS) const {
+  assert(IsValid && "Expecting a valid reference");
+  LLVM_DEBUG({
+    dbgs().indent(2) << "Computing cache cost for:\n";
+    dbgs().indent(4) << *this << "\n";
+  });
+
+  // If the indexed reference is loop invariant the cost is one.
+  if (isLoopInvariant(L)) {
+    LLVM_DEBUG(dbgs().indent(4) << "Reference is loop invariant: RefCost=1\n");
+    return 1;
+  }
+
+  const SCEV *TripCount = computeTripCount(L, SE);
+  if (!TripCount) {
+    LLVM_DEBUG(dbgs() << "Trip count of loop " << L.getName()
+                      << " could not be computed, using DefaultTripCount\n");
+    const SCEV *ElemSize = Sizes.back();
+    TripCount = SE.getConstant(ElemSize->getType(), DefaultTripCount);
+  }
+  LLVM_DEBUG(dbgs() << "TripCount=" << *TripCount << "\n");
+
+  // If the indexed reference is 'consecutive' the cost is
+  // (TripCount*Stride)/CLS, otherwise the cost is TripCount.
+  const SCEV *RefCost = TripCount;
+
+  if (isConsecutive(L, CLS)) {
+    const SCEV *Coeff = getLastCoefficient();
+    const SCEV *ElemSize = Sizes.back();
+    const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize);
+    const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS);
+    Type *WiderType = SE.getWiderType(Stride->getType(), TripCount->getType());
+    Stride = SE.getNoopOrSignExtend(Stride, WiderType);
+    TripCount = SE.getNoopOrAnyExtend(TripCount, WiderType);
+    const SCEV *Numerator = SE.getMulExpr(Stride, TripCount);
+    RefCost = SE.getUDivExpr(Numerator, CacheLineSize);
+    LLVM_DEBUG(dbgs().indent(4)
+               << "Access is consecutive: RefCost=(TripCount*Stride)/CLS="
+               << *RefCost << "\n");
+  } else
+    LLVM_DEBUG(dbgs().indent(4)
+               << "Access is not consecutive: RefCost=TripCount=" << *RefCost
+               << "\n");
+
+  // Attempt to fold RefCost into a constant.
+  if (auto ConstantCost = dyn_cast<SCEVConstant>(RefCost))
+    return ConstantCost->getValue()->getSExtValue();
+
+  LLVM_DEBUG(dbgs().indent(4)
+             << "RefCost is not a constant! Setting to RefCost=InvalidCost "
+                "(invalid value).\n");
+
+  return CacheCost::InvalidCost;
+}
+
+bool IndexedReference::delinearize(const LoopInfo &LI) {
+  assert(Subscripts.empty() && "Subscripts should be empty");
+  assert(Sizes.empty() && "Sizes should be empty");
+  assert(!IsValid && "Should be called once from the constructor");
+  LLVM_DEBUG(dbgs() << "Delinearizing: " << StoreOrLoadInst << "\n");
+
+  const SCEV *ElemSize = SE.getElementSize(&StoreOrLoadInst);
+  const BasicBlock *BB = StoreOrLoadInst.getParent();
+
+  if (Loop *L = LI.getLoopFor(BB)) {
+    const SCEV *AccessFn =
+        SE.getSCEVAtScope(getPointerOperand(&StoreOrLoadInst), L);
+
+    BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFn));
+    if (BasePointer == nullptr) {
+      LLVM_DEBUG(
+          dbgs().indent(2)
+          << "ERROR: failed to delinearize, can't identify base pointer\n");
+      return false;
+    }
+
+    AccessFn = SE.getMinusSCEV(AccessFn, BasePointer);
+
+    LLVM_DEBUG(dbgs().indent(2) << "In Loop '" << L->getName()
+                                << "', AccessFn: " << *AccessFn << "\n");
+
+    SE.delinearize(AccessFn, Subscripts, Sizes,
+                   SE.getElementSize(&StoreOrLoadInst));
+
+    if (Subscripts.empty() || Sizes.empty() ||
+        Subscripts.size() != Sizes.size()) {
+      // Attempt to determine whether we have a single dimensional array access.
+      // before giving up.
+      if (!isOneDimensionalArray(*AccessFn, *ElemSize, *L, SE)) {
+        LLVM_DEBUG(dbgs().indent(2)
+                   << "ERROR: failed to delinearize reference\n");
+        Subscripts.clear();
+        Sizes.clear();
+        return false;
+      }
+
+      const SCEV *Div = SE.getUDivExactExpr(AccessFn, ElemSize);
+      Subscripts.push_back(Div);
+      Sizes.push_back(ElemSize);
+    }
+
+    return all_of(Subscripts, [&](const SCEV *Subscript) {
+      return isSimpleAddRecurrence(*Subscript, *L);
+    });
+  }
+
+  return false;
+}
+
+bool IndexedReference::isLoopInvariant(const Loop &L) const {
+  Value *Addr = getPointerOperand(&StoreOrLoadInst);
+  assert(Addr != nullptr && "Expecting either a load or a store instruction");
+  assert(SE.isSCEVable(Addr->getType()) && "Addr should be SCEVable");
+
+  if (SE.isLoopInvariant(SE.getSCEV(Addr), &L))
+    return true;
+
+  // The indexed reference is loop invariant if none of the coefficients use
+  // the loop induction variable.
+  bool allCoeffForLoopAreZero = all_of(Subscripts, [&](const SCEV *Subscript) {
+    return isCoeffForLoopZeroOrInvariant(*Subscript, L);
+  });
+
+  return allCoeffForLoopAreZero;
+}
+
+bool IndexedReference::isConsecutive(const Loop &L, unsigned CLS) const {
+  // The indexed reference is 'consecutive' if the only coefficient that uses
+  // the loop induction variable is the last one...
+  const SCEV *LastSubscript = Subscripts.back();
+  for (const SCEV *Subscript : Subscripts) {
+    if (Subscript == LastSubscript)
+      continue;
+    if (!isCoeffForLoopZeroOrInvariant(*Subscript, L))
+      return false;
+  }
+
+  // ...and the access stride is less than the cache line size.
+  const SCEV *Coeff = getLastCoefficient();
+  const SCEV *ElemSize = Sizes.back();
+  const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize);
+  const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS);
+
+  return SE.isKnownPredicate(ICmpInst::ICMP_ULT, Stride, CacheLineSize);
+}
+
+const SCEV *IndexedReference::getLastCoefficient() const {
+  const SCEV *LastSubscript = getLastSubscript();
+  assert(isa<SCEVAddRecExpr>(LastSubscript) &&
+         "Expecting a SCEV add recurrence expression");
+  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LastSubscript);
+  return AR->getStepRecurrence(SE);
+}
+
+bool IndexedReference::isCoeffForLoopZeroOrInvariant(const SCEV &Subscript,
+                                                     const Loop &L) const {
+  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(&Subscript);
+  return (AR != nullptr) ? AR->getLoop() != &L
+                         : SE.isLoopInvariant(&Subscript, &L);
+}
+
+bool IndexedReference::isSimpleAddRecurrence(const SCEV &Subscript,
+                                             const Loop &L) const {
+  if (!isa<SCEVAddRecExpr>(Subscript))
+    return false;
+
+  const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(&Subscript);
+  assert(AR->getLoop() && "AR should have a loop");
+
+  if (!AR->isAffine())
+    return false;
+
+  const SCEV *Start = AR->getStart();
+  const SCEV *Step = AR->getStepRecurrence(SE);
+
+  if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L))
+    return false;
+
+  return true;
+}
+
+bool IndexedReference::isAliased(const IndexedReference &Other,
+                                 AliasAnalysis &AA) const {
+  const auto &Loc1 = MemoryLocation::get(&StoreOrLoadInst);
+  const auto &Loc2 = MemoryLocation::get(&Other.StoreOrLoadInst);
+  return AA.isMustAlias(Loc1, Loc2);
+}
+
+//===----------------------------------------------------------------------===//
+// CacheCost implementation
+//
+raw_ostream &llvm::operator<<(raw_ostream &OS, const CacheCost &CC) {
+  for (const auto &LC : CC.LoopCosts) {
+    const Loop *L = LC.first;
+    OS << "Loop '" << L->getName() << "' has cost = " << LC.second << "\n";
+  }
+  return OS;
+}
+
+CacheCost::CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI,
+                     ScalarEvolution &SE, TargetTransformInfo &TTI,
+                     AliasAnalysis &AA, DependenceInfo &DI,
+                     Optional<unsigned> TRT)
+    : Loops(Loops), TripCounts(), LoopCosts(),
+      TRT((TRT == None) ? Optional<unsigned>(TemporalReuseThreshold) : TRT),
+      LI(LI), SE(SE), TTI(TTI), AA(AA), DI(DI) {
+  assert(!Loops.empty() && "Expecting a non-empty loop vector.");
+
+  for (const Loop *L : Loops) {
+    unsigned TripCount = SE.getSmallConstantTripCount(L);
+    TripCount = (TripCount == 0) ? DefaultTripCount : TripCount;
+    TripCounts.push_back({L, TripCount});
+  }
+
+  calculateCacheFootprint();
+}
+
+std::unique_ptr<CacheCost>
+CacheCost::getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR,
+                        DependenceInfo &DI, Optional<unsigned> TRT) {
+  if (Root.getParentLoop()) {
+    LLVM_DEBUG(dbgs() << "Expecting the outermost loop in a loop nest\n");
+    return nullptr;
+  }
+
+  LoopVectorTy Loops;
+  for (Loop *L : breadth_first(&Root))
+    Loops.push_back(L);
+
+  if (!getInnerMostLoop(Loops)) {
+    LLVM_DEBUG(dbgs() << "Cannot compute cache cost of loop nest with more "
+                         "than one innermost loop\n");
+    return nullptr;
+  }
+
+  return std::make_unique<CacheCost>(Loops, AR.LI, AR.SE, AR.TTI, AR.AA, DI, TRT);
+}
+
+void CacheCost::calculateCacheFootprint() {
+  LLVM_DEBUG(dbgs() << "POPULATING REFERENCE GROUPS\n");
+  ReferenceGroupsTy RefGroups;
+  if (!populateReferenceGroups(RefGroups))
+    return;
+
+  LLVM_DEBUG(dbgs() << "COMPUTING LOOP CACHE COSTS\n");
+  for (const Loop *L : Loops) {
+    assert((std::find_if(LoopCosts.begin(), LoopCosts.end(),
+                         [L](const LoopCacheCostTy &LCC) {
+                           return LCC.first == L;
+                         }) == LoopCosts.end()) &&
+           "Should not add duplicate element");
+    CacheCostTy LoopCost = computeLoopCacheCost(*L, RefGroups);
+    LoopCosts.push_back(std::make_pair(L, LoopCost));
+  }
+
+  sortLoopCosts();
+  RefGroups.clear();
+}
+
+bool CacheCost::populateReferenceGroups(ReferenceGroupsTy &RefGroups) const {
+  assert(RefGroups.empty() && "Reference groups should be empty");
+
+  unsigned CLS = TTI.getCacheLineSize();
+  Loop *InnerMostLoop = getInnerMostLoop(Loops);
+  assert(InnerMostLoop != nullptr && "Expecting a valid innermost loop");
+
+  for (BasicBlock *BB : InnerMostLoop->getBlocks()) {
+    for (Instruction &I : *BB) {
+      if (!isa<StoreInst>(I) && !isa<LoadInst>(I))
+        continue;
+
+      std::unique_ptr<IndexedReference> R(new IndexedReference(I, LI, SE));
+      if (!R->isValid())
+        continue;
+
+      bool Added = false;
+      for (ReferenceGroupTy &RefGroup : RefGroups) {
+        const IndexedReference &Representative = *RefGroup.front().get();
+        LLVM_DEBUG({
+          dbgs() << "References:\n";
+          dbgs().indent(2) << *R << "\n";
+          dbgs().indent(2) << Representative << "\n";
+        });
+
+        Optional<bool> HasTemporalReuse =
+            R->hasTemporalReuse(Representative, *TRT, *InnerMostLoop, DI, AA);
+        Optional<bool> HasSpacialReuse =
+            R->hasSpacialReuse(Representative, CLS, AA);
+
+        if ((HasTemporalReuse.hasValue() && *HasTemporalReuse) ||
+            (HasSpacialReuse.hasValue() && *HasSpacialReuse)) {
+          RefGroup.push_back(std::move(R));
+          Added = true;
+          break;
+        }
+      }
+
+      if (!Added) {
+        ReferenceGroupTy RG;
+        RG.push_back(std::move(R));
+        RefGroups.push_back(std::move(RG));
+      }
+    }
+  }
+
+  if (RefGroups.empty())
+    return false;
+
+  LLVM_DEBUG({
+    dbgs() << "\nIDENTIFIED REFERENCE GROUPS:\n";
+    int n = 1;
+    for (const ReferenceGroupTy &RG : RefGroups) {
+      dbgs().indent(2) << "RefGroup " << n << ":\n";
+      for (const auto &IR : RG)
+        dbgs().indent(4) << *IR << "\n";
+      n++;
+    }
+    dbgs() << "\n";
+  });
+
+  return true;
+}
+
+CacheCostTy
+CacheCost::computeLoopCacheCost(const Loop &L,
+                                const ReferenceGroupsTy &RefGroups) const {
+  if (!L.isLoopSimplifyForm())
+    return InvalidCost;
+
+  LLVM_DEBUG(dbgs() << "Considering loop '" << L.getName()
+                    << "' as innermost loop.\n");
+
+  // Compute the product of the trip counts of each other loop in the nest.
+  CacheCostTy TripCountsProduct = 1;
+  for (const auto &TC : TripCounts) {
+    if (TC.first == &L)
+      continue;
+    TripCountsProduct *= TC.second;
+  }
+
+  CacheCostTy LoopCost = 0;
+  for (const ReferenceGroupTy &RG : RefGroups) {
+    CacheCostTy RefGroupCost = computeRefGroupCacheCost(RG, L);
+    LoopCost += RefGroupCost * TripCountsProduct;
+  }
+
+  LLVM_DEBUG(dbgs().indent(2) << "Loop '" << L.getName()
+                              << "' has cost=" << LoopCost << "\n");
+
+  return LoopCost;
+}
+
+CacheCostTy CacheCost::computeRefGroupCacheCost(const ReferenceGroupTy &RG,
+                                                const Loop &L) const {
+  assert(!RG.empty() && "Reference group should have at least one member.");
+
+  const IndexedReference *Representative = RG.front().get();
+  return Representative->computeRefCost(L, TTI.getCacheLineSize());
+}
+
+//===----------------------------------------------------------------------===//
+// LoopCachePrinterPass implementation
+//
+PreservedAnalyses LoopCachePrinterPass::run(Loop &L, LoopAnalysisManager &AM,
+                                            LoopStandardAnalysisResults &AR,
+                                            LPMUpdater &U) {
+  Function *F = L.getHeader()->getParent();
+  DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI);
+
+  if (auto CC = CacheCost::getCacheCost(L, AR, DI))
+    OS << *CC;
+
+  return PreservedAnalyses::all();
+}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopInfo.cpp
index aa5da0859805..3dc29b40834c 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LoopInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LoopInfo.cpp
@@ -34,6 +34,7 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -105,7 +106,8 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
   I->moveBefore(InsertPt);
   if (MSSAU)
     if (auto *MUD = MSSAU->getMemorySSA()->getMemoryAccess(I))
-      MSSAU->moveToPlace(MUD, InsertPt->getParent(), MemorySSA::End);
+      MSSAU->moveToPlace(MUD, InsertPt->getParent(),
+                         MemorySSA::BeforeTerminator);
 
   // There is possibility of hoisting this instruction above some arbitrary
   // condition. Any metadata defined on it can be control dependent on this
@@ -359,6 +361,44 @@ bool Loop::isAuxiliaryInductionVariable(PHINode &AuxIndVar,
   return SE.isLoopInvariant(IndDesc.getStep(), this);
 }
 
+BranchInst *Loop::getLoopGuardBranch() const {
+  if (!isLoopSimplifyForm())
+    return nullptr;
+
+  BasicBlock *Preheader = getLoopPreheader();
+  assert(Preheader && getLoopLatch() &&
+         "Expecting a loop with valid preheader and latch");
+
+  // Loop should be in rotate form.
+  if (!isRotatedForm())
+    return nullptr;
+
+  // Disallow loops with more than one unique exit block, as we do not verify
+  // that GuardOtherSucc post dominates all exit blocks.
+  BasicBlock *ExitFromLatch = getUniqueExitBlock();
+  if (!ExitFromLatch)
+    return nullptr;
+
+  BasicBlock *ExitFromLatchSucc = ExitFromLatch->getUniqueSuccessor();
+  if (!ExitFromLatchSucc)
+    return nullptr;
+
+  BasicBlock *GuardBB = Preheader->getUniquePredecessor();
+  if (!GuardBB)
+    return nullptr;
+
+  assert(GuardBB->getTerminator() && "Expecting valid guard terminator");
+
+  BranchInst *GuardBI = dyn_cast<BranchInst>(GuardBB->getTerminator());
+  if (!GuardBI || GuardBI->isUnconditional())
+    return nullptr;
+
+  BasicBlock *GuardOtherSucc = (GuardBI->getSuccessor(0) == Preheader)
+                                   ? GuardBI->getSuccessor(1)
+                                   : GuardBI->getSuccessor(0);
+  return (GuardOtherSucc == ExitFromLatchSucc) ? GuardBI : nullptr;
+}
+
 bool Loop::isCanonical(ScalarEvolution &SE) const {
   InductionDescriptor IndDesc;
   if (!getInductionDescriptor(SE, IndDesc))
@@ -1012,6 +1052,10 @@ MDNode *llvm::makePostTransformationMetadata(LLVMContext &Context,
 // LoopInfo implementation
 //
 
+LoopInfoWrapperPass::LoopInfoWrapperPass() : FunctionPass(ID) {
+  initializeLoopInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
 char LoopInfoWrapperPass::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopInfoWrapperPass, "loops", "Natural Loop Information",
                       true, true)
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopPass.cpp
index 4ab3798039d8..507f5f442865 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LoopPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LoopPass.cpp
@@ -20,9 +20,10 @@
 #include "llvm/IR/OptBisect.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/PassTimingInfo.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/Timer.h"
 #include "llvm/Support/TimeProfiler.h"
+#include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -409,6 +410,10 @@ bool LoopPass::skipLoop(const Loop *L) const {
   return false;
 }
 
+LCSSAVerificationPass::LCSSAVerificationPass() : FunctionPass(ID) {
+  initializeLCSSAVerificationPassPass(*PassRegistry::getPassRegistry());
+}
+
 char LCSSAVerificationPass::ID = 0;
 INITIALIZE_PASS(LCSSAVerificationPass, "lcssa-verification", "LCSSA Verifier",
                 false, false)
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp
index 1728b5e9f6d2..762623de41e9 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp
@@ -78,7 +78,7 @@ bool UnrolledInstAnalyzer::visitBinaryOperator(BinaryOperator &I) {
   const DataLayout &DL = I.getModule()->getDataLayout();
   if (auto FI = dyn_cast<FPMathOperator>(&I))
     SimpleV =
-        SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL);
+        SimplifyBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL);
   else
     SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL);
 
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemDepPrinter.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemDepPrinter.cpp
index 6e1bb50e8893..2c57e63251c6 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/MemDepPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/MemDepPrinter.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Analysis/Passes.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemDerefPrinter.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemDerefPrinter.cpp
index 77ebf89d9a08..5d824067df53 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/MemDerefPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/MemDerefPrinter.cpp
@@ -13,6 +13,7 @@
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -55,8 +56,8 @@ bool MemDerefPrinter::runOnFunction(Function &F) {
       Value *PO = LI->getPointerOperand();
       if (isDereferenceablePointer(PO, LI->getType(), DL))
         Deref.push_back(PO);
-      if (isDereferenceableAndAlignedPointer(PO, LI->getType(),
-                                             LI->getAlignment(), DL))
+      if (isDereferenceableAndAlignedPointer(
+              PO, LI->getType(), MaybeAlign(LI->getAlignment()), DL))
         DerefAndAligned.insert(PO);
     }
   }
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp
index 729dad463657..427e6fd3ace2 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -180,6 +180,19 @@ static Optional<AllocFnsTy> getAllocationData(const Value *V, AllocType AllocTy,
   return None;
 }
 
+static Optional<AllocFnsTy>
+getAllocationData(const Value *V, AllocType AllocTy,
+                  function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
+                  bool LookThroughBitCast = false) {
+  bool IsNoBuiltinCall;
+  if (const Function *Callee =
+          getCalledFunction(V, LookThroughBitCast, IsNoBuiltinCall))
+    if (!IsNoBuiltinCall)
+      return getAllocationDataForFunction(
+          Callee, AllocTy, &GetTLI(const_cast<Function &>(*Callee)));
+  return None;
+}
+
 static Optional<AllocFnsTy> getAllocationSize(const Value *V,
                                               const TargetLibraryInfo *TLI) {
   bool IsNoBuiltinCall;
@@ -223,6 +236,11 @@ bool llvm::isAllocationFn(const Value *V, const TargetLibraryInfo *TLI,
                           bool LookThroughBitCast) {
   return getAllocationData(V, AnyAlloc, TLI, LookThroughBitCast).hasValue();
 }
+bool llvm::isAllocationFn(
+    const Value *V, function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
+    bool LookThroughBitCast) {
+  return getAllocationData(V, AnyAlloc, GetTLI, LookThroughBitCast).hasValue();
+}
 
 /// Tests if a value is a call or invoke to a function that returns a
 /// NoAlias pointer (including malloc/calloc/realloc/strdup-like functions).
@@ -240,6 +258,12 @@ bool llvm::isMallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
                           bool LookThroughBitCast) {
   return getAllocationData(V, MallocLike, TLI, LookThroughBitCast).hasValue();
 }
+bool llvm::isMallocLikeFn(
+    const Value *V, function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
+    bool LookThroughBitCast) {
+  return getAllocationData(V, MallocLike, GetTLI, LookThroughBitCast)
+      .hasValue();
+}
 
 /// Tests if a value is a call or invoke to a library function that
 /// allocates zero-filled memory (such as calloc).
@@ -276,12 +300,27 @@ bool llvm::isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI) {
   return getAllocationDataForFunction(F, ReallocLike, TLI).hasValue();
 }
 
+/// Tests if a value is a call or invoke to a library function that
+/// allocates memory and throws if an allocation failed (e.g., new).
+bool llvm::isOpNewLikeFn(const Value *V, const TargetLibraryInfo *TLI,
+                     bool LookThroughBitCast) {
+  return getAllocationData(V, OpNewLike, TLI, LookThroughBitCast).hasValue();
+}
+
+/// Tests if a value is a call or invoke to a library function that
+/// allocates memory (strdup, strndup).
+bool llvm::isStrdupLikeFn(const Value *V, const TargetLibraryInfo *TLI,
+                          bool LookThroughBitCast) {
+  return getAllocationData(V, StrDupLike, TLI, LookThroughBitCast).hasValue();
+}
+
 /// extractMallocCall - Returns the corresponding CallInst if the instruction
 /// is a malloc call.  Since CallInst::CreateMalloc() only creates calls, we
 /// ignore InvokeInst here.
-const CallInst *llvm::extractMallocCall(const Value *I,
-                                        const TargetLibraryInfo *TLI) {
-  return isMallocLikeFn(I, TLI) ? dyn_cast<CallInst>(I) : nullptr;
+const CallInst *llvm::extractMallocCall(
+    const Value *I,
+    function_ref<const TargetLibraryInfo &(Function &)> GetTLI) {
+  return isMallocLikeFn(I, GetTLI) ? dyn_cast<CallInst>(I) : nullptr;
 }
 
 static Value *computeArraySize(const CallInst *CI, const DataLayout &DL,
@@ -505,6 +544,7 @@ Value *llvm::lowerObjectSizeCall(IntrinsicInst *ObjectSize,
           Builder.CreateSub(SizeOffsetPair.first, SizeOffsetPair.second);
       Value *UseZero =
           Builder.CreateICmpULT(SizeOffsetPair.first, SizeOffsetPair.second);
+      ResultSize = Builder.CreateZExtOrTrunc(ResultSize, ResultType);
       return Builder.CreateSelect(UseZero, ConstantInt::get(ResultType, 0),
                                   ResultSize);
     }
@@ -521,9 +561,9 @@ STATISTIC(ObjectVisitorArgument,
 STATISTIC(ObjectVisitorLoad,
           "Number of load instructions with unsolved size and offset");
 
-APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) {
-  if (Options.RoundToAlign && Align)
-    return APInt(IntTyBits, alignTo(Size.getZExtValue(), Align));
+APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Alignment) {
+  if (Options.RoundToAlign && Alignment)
+    return APInt(IntTyBits, alignTo(Size.getZExtValue(), Align(Alignment)));
   return Size;
 }
 
@@ -537,7 +577,7 @@ ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout &DL,
 }
 
 SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
-  IntTyBits = DL.getPointerTypeSizeInBits(V->getType());
+  IntTyBits = DL.getIndexTypeSizeInBits(V->getType());
   Zero = APInt::getNullValue(IntTyBits);
 
   V = V->stripPointerCasts();
@@ -707,7 +747,7 @@ ObjectSizeOffsetVisitor::visitExtractValueInst(ExtractValueInst&) {
 
 SizeOffsetType ObjectSizeOffsetVisitor::visitGEPOperator(GEPOperator &GEP) {
   SizeOffsetType PtrData = compute(GEP.getPointerOperand());
-  APInt Offset(IntTyBits, 0);
+  APInt Offset(DL.getIndexTypeSizeInBits(GEP.getPointerOperand()->getType()), 0);
   if (!bothKnown(PtrData) || !GEP.accumulateConstantOffset(DL, Offset))
     return unknown();
 
@@ -795,7 +835,7 @@ ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(
 
 SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) {
   // XXX - Are vectors of pointers possible here?
-  IntTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
+  IntTy = cast<IntegerType>(DL.getIndexType(V->getType()));
   Zero = ConstantInt::get(IntTy, 0);
 
   SizeOffsetEvalType Result = compute_(V);
@@ -899,12 +939,12 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitCallSite(CallSite CS) {
   }
 
   Value *FirstArg = CS.getArgument(FnData->FstParam);
-  FirstArg = Builder.CreateZExt(FirstArg, IntTy);
+  FirstArg = Builder.CreateZExtOrTrunc(FirstArg, IntTy);
   if (FnData->SndParam < 0)
     return std::make_pair(FirstArg, Zero);
 
   Value *SecondArg = CS.getArgument(FnData->SndParam);
-  SecondArg = Builder.CreateZExt(SecondArg, IntTy);
+  SecondArg = Builder.CreateZExtOrTrunc(SecondArg, IntTy);
   Value *Size = Builder.CreateMul(FirstArg, SecondArg);
   return std::make_pair(Size, Zero);
 
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index b25b655165d7..a97a56e25805 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -47,6 +47,7 @@
 #include "llvm/IR/Use.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/AtomicOrdering.h"
 #include "llvm/Support/Casting.h"
@@ -183,7 +184,7 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc,
 MemDepResult MemoryDependenceResults::getCallDependencyFrom(
     CallBase *Call, bool isReadOnlyCall, BasicBlock::iterator ScanIt,
     BasicBlock *BB) {
-  unsigned Limit = BlockScanLimit;
+  unsigned Limit = getDefaultBlockScanLimit();
 
   // Walk backwards through the block, looking for dependencies.
   while (ScanIt != BB->begin()) {
@@ -356,7 +357,7 @@ MemDepResult
 MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
                                                             BasicBlock *BB) {
 
-  if (!LI->getMetadata(LLVMContext::MD_invariant_group))
+  if (!LI->hasMetadata(LLVMContext::MD_invariant_group))
     return MemDepResult::getUnknown();
 
   // Take the ptr operand after all casts and geps 0. This way we can search
@@ -417,7 +418,7 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
       // same pointer operand) we can assume that value pointed by pointer
       // operand didn't change.
       if ((isa<LoadInst>(U) || isa<StoreInst>(U)) &&
-          U->getMetadata(LLVMContext::MD_invariant_group) != nullptr)
+          U->hasMetadata(LLVMContext::MD_invariant_group))
         ClosestDependency = GetClosestDependency(ClosestDependency, U);
     }
   }
@@ -443,7 +444,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
     OrderedBasicBlock *OBB) {
   bool isInvariantLoad = false;
 
-  unsigned DefaultLimit = BlockScanLimit;
+  unsigned DefaultLimit = getDefaultBlockScanLimit();
   if (!Limit)
     Limit = &DefaultLimit;
 
@@ -481,7 +482,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
   // Arguably, this logic should be pushed inside AliasAnalysis itself.
   if (isLoad && QueryInst) {
     LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
-    if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr)
+    if (LI && LI->hasMetadata(LLVMContext::MD_invariant_load))
       isInvariantLoad = true;
   }
 
@@ -1493,7 +1494,7 @@ void MemoryDependenceResults::RemoveCachedNonLocalPointerDependencies(
     if (auto *I = dyn_cast<Instruction>(P.getPointer())) {
       auto toRemoveIt = ReverseNonLocalDefsCache.find(I);
       if (toRemoveIt != ReverseNonLocalDefsCache.end()) {
-        for (const auto &entry : toRemoveIt->second)
+        for (const auto *entry : toRemoveIt->second)
           NonLocalDefsCache.erase(entry);
         ReverseNonLocalDefsCache.erase(toRemoveIt);
       }
@@ -1746,6 +1747,9 @@ void MemoryDependenceResults::verifyRemoved(Instruction *D) const {
 
 AnalysisKey MemoryDependenceAnalysis::Key;
 
+MemoryDependenceAnalysis::MemoryDependenceAnalysis()
+    : DefaultBlockScanLimit(BlockScanLimit) {}
+
 MemoryDependenceResults
 MemoryDependenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
   auto &AA = AM.getResult<AAManager>(F);
@@ -1753,7 +1757,7 @@ MemoryDependenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
   auto &PV = AM.getResult<PhiValuesAnalysis>(F);
-  return MemoryDependenceResults(AA, AC, TLI, DT, PV);
+  return MemoryDependenceResults(AA, AC, TLI, DT, PV, DefaultBlockScanLimit);
 }
 
 char MemoryDependenceWrapperPass::ID = 0;
@@ -1807,15 +1811,15 @@ bool MemoryDependenceResults::invalidate(Function &F, const PreservedAnalyses &P
 }
 
 unsigned MemoryDependenceResults::getDefaultBlockScanLimit() const {
-  return BlockScanLimit;
+  return DefaultBlockScanLimit;
 }
 
 bool MemoryDependenceWrapperPass::runOnFunction(Function &F) {
   auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
   auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
-  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
   auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   auto &PV = getAnalysis<PhiValuesWrapperPass>().getResult();
-  MemDep.emplace(AA, AC, TLI, DT, PV);
+  MemDep.emplace(AA, AC, TLI, DT, PV, BlockScanLimit);
   return false;
 }
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemoryLocation.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemoryLocation.cpp
index 163830eee797..103cdea148e5 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/MemoryLocation.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/MemoryLocation.cpp
@@ -12,6 +12,7 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsARM.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemorySSA.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemorySSA.cpp
index 17f5d9b9f0ad..bf8dc94bfbf9 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/MemorySSA.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/MemorySSA.cpp
@@ -38,6 +38,7 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/Use.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/AtomicOrdering.h"
 #include "llvm/Support/Casting.h"
@@ -49,6 +50,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
+#include <cstdlib>
 #include <iterator>
 #include <memory>
 #include <utility>
@@ -83,7 +85,7 @@ bool llvm::VerifyMemorySSA = false;
 #endif
 /// Enables memory ssa as a dependency for loop passes in legacy pass manager.
 cl::opt<bool> llvm::EnableMSSALoopDependency(
-    "enable-mssa-loop-dependency", cl::Hidden, cl::init(false),
+    "enable-mssa-loop-dependency", cl::Hidden, cl::init(true),
     cl::desc("Enable MemorySSA dependency for loop pass manager"));
 
 static cl::opt<bool, true>
@@ -284,6 +286,11 @@ instructionClobbersQuery(const MemoryDef *MD, const MemoryLocation &UseLoc,
     case Intrinsic::invariant_end:
     case Intrinsic::assume:
       return {false, NoAlias};
+    case Intrinsic::dbg_addr:
+    case Intrinsic::dbg_declare:
+    case Intrinsic::dbg_label:
+    case Intrinsic::dbg_value:
+      llvm_unreachable("debuginfo shouldn't have associated defs!");
     default:
       break;
     }
@@ -369,7 +376,7 @@ static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysisType &AA,
                                                    const Instruction *I) {
   // If the memory can't be changed, then loads of the memory can't be
   // clobbered.
-  return isa<LoadInst>(I) && (I->getMetadata(LLVMContext::MD_invariant_load) ||
+  return isa<LoadInst>(I) && (I->hasMetadata(LLVMContext::MD_invariant_load) ||
                               AA.pointsToConstantMemory(MemoryLocation(
                                   cast<LoadInst>(I)->getPointerOperand())));
 }
@@ -867,6 +874,7 @@ template <class AliasAnalysisType> class ClobberWalker {
         if (!DefChainEnd)
           for (auto *MA : def_chain(const_cast<MemoryAccess *>(Target)))
             DefChainEnd = MA;
+        assert(DefChainEnd && "Failed to find dominating phi/liveOnEntry");
 
         // If any of the terminated paths don't dominate the phi we'll try to
         // optimize, we need to figure out what they are and quit.
@@ -1087,9 +1095,14 @@ void MemorySSA::renameSuccessorPhis(BasicBlock *BB, MemoryAccess *IncomingVal,
     AccessList *Accesses = It->second.get();
     auto *Phi = cast<MemoryPhi>(&Accesses->front());
     if (RenameAllUses) {
-      int PhiIndex = Phi->getBasicBlockIndex(BB);
-      assert(PhiIndex != -1 && "Incomplete phi during partial rename");
-      Phi->setIncomingValue(PhiIndex, IncomingVal);
+      bool ReplacementDone = false;
+      for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I)
+        if (Phi->getIncomingBlock(I) == BB) {
+          Phi->setIncomingValue(I, IncomingVal);
+          ReplacementDone = true;
+        }
+      (void) ReplacementDone;
+      assert(ReplacementDone && "Incomplete phi during partial rename");
     } else
       Phi->addIncoming(IncomingVal, BB);
   }
@@ -1217,6 +1230,7 @@ MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT)
   // safe because there are no CFG changes while building MemorySSA and can
   // significantly reduce the time spent by the compiler in AA, because we will
   // make queries about all the instructions in the Function.
+  assert(AA && "No alias analysis?");
   BatchAAResults BatchAA(*AA);
   buildMemorySSA(BatchAA);
   // Intentionally leave AA to nullptr while building so we don't accidently
@@ -1237,7 +1251,7 @@ MemorySSA::AccessList *MemorySSA::getOrCreateAccessList(const BasicBlock *BB) {
   auto Res = PerBlockAccesses.insert(std::make_pair(BB, nullptr));
 
   if (Res.second)
-    Res.first->second = llvm::make_unique<AccessList>();
+    Res.first->second = std::make_unique<AccessList>();
   return Res.first->second.get();
 }
 
@@ -1245,7 +1259,7 @@ MemorySSA::DefsList *MemorySSA::getOrCreateDefsList(const BasicBlock *BB) {
   auto Res = PerBlockDefs.insert(std::make_pair(BB, nullptr));
 
   if (Res.second)
-    Res.first->second = llvm::make_unique<DefsList>();
+    Res.first->second = std::make_unique<DefsList>();
   return Res.first->second.get();
 }
 
@@ -1554,10 +1568,10 @@ MemorySSA::CachingWalker<AliasAnalysis> *MemorySSA::getWalkerImpl() {
 
   if (!WalkerBase)
     WalkerBase =
-        llvm::make_unique<ClobberWalkerBase<AliasAnalysis>>(this, AA, DT);
+        std::make_unique<ClobberWalkerBase<AliasAnalysis>>(this, AA, DT);
 
   Walker =
-      llvm::make_unique<CachingWalker<AliasAnalysis>>(this, WalkerBase.get());
+      std::make_unique<CachingWalker<AliasAnalysis>>(this, WalkerBase.get());
   return Walker.get();
 }
 
@@ -1567,10 +1581,10 @@ MemorySSAWalker *MemorySSA::getSkipSelfWalker() {
 
   if (!WalkerBase)
     WalkerBase =
-        llvm::make_unique<ClobberWalkerBase<AliasAnalysis>>(this, AA, DT);
+        std::make_unique<ClobberWalkerBase<AliasAnalysis>>(this, AA, DT);
 
   SkipWalker =
-      llvm::make_unique<SkipSelfWalker<AliasAnalysis>>(this, WalkerBase.get());
+      std::make_unique<SkipSelfWalker<AliasAnalysis>>(this, WalkerBase.get());
   return SkipWalker.get();
  }
 
@@ -1687,13 +1701,15 @@ MemoryPhi *MemorySSA::createMemoryPhi(BasicBlock *BB) {
 
 MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I,
                                                MemoryAccess *Definition,
-                                               const MemoryUseOrDef *Template) {
+                                               const MemoryUseOrDef *Template,
+                                               bool CreationMustSucceed) {
   assert(!isa<PHINode>(I) && "Cannot create a defined access for a PHI");
   MemoryUseOrDef *NewAccess = createNewAccess(I, AA, Template);
-  assert(
-      NewAccess != nullptr &&
-      "Tried to create a memory access for a non-memory touching instruction");
-  NewAccess->setDefiningAccess(Definition);
+  if (CreationMustSucceed)
+    assert(NewAccess != nullptr && "Tried to create a memory access for a "
+                                   "non-memory touching instruction");
+  if (NewAccess)
+    NewAccess->setDefiningAccess(Definition);
   return NewAccess;
 }
 
@@ -1717,13 +1733,21 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I,
                                            AliasAnalysisType *AAP,
                                            const MemoryUseOrDef *Template) {
   // The assume intrinsic has a control dependency which we model by claiming
-  // that it writes arbitrarily. Ignore that fake memory dependency here.
+  // that it writes arbitrarily. Debuginfo intrinsics may be considered
+  // clobbers when we have a nonstandard AA pipeline. Ignore these fake memory
+  // dependencies here.
   // FIXME: Replace this special casing with a more accurate modelling of
   // assume's control dependency.
   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
     if (II->getIntrinsicID() == Intrinsic::assume)
       return nullptr;
 
+  // Using a nonstandard AA pipelines might leave us with unexpected modref
+  // results for I, so add a check to not model instructions that may not read
+  // from or write to memory. This is necessary for correctness.
+  if (!I->mayReadFromMemory() && !I->mayWriteToMemory())
+    return nullptr;
+
   bool Def, Use;
   if (Template) {
     Def = dyn_cast_or_null<MemoryDef>(Template) != nullptr;
@@ -1846,10 +1870,9 @@ LLVM_DUMP_METHOD void MemorySSA::dump() const { print(dbgs()); }
 #endif
 
 void MemorySSA::verifyMemorySSA() const {
-  verifyDefUses(F);
-  verifyDomination(F);
-  verifyOrdering(F);
+  verifyOrderingDominationAndDefUses(F);
   verifyDominationNumbers(F);
+  verifyPrevDefInPhis(F);
   // Previously, the verification used to also verify that the clobberingAccess
   // cached by MemorySSA is the same as the clobberingAccess found at a later
   // query to AA. This does not hold true in general due to the current fragility
@@ -1862,6 +1885,40 @@ void MemorySSA::verifyMemorySSA() const {
   // example, see test4 added in D51960.
 }
 
+void MemorySSA::verifyPrevDefInPhis(Function &F) const {
+#if !defined(NDEBUG) && defined(EXPENSIVE_CHECKS)
+  for (const BasicBlock &BB : F) {
+    if (MemoryPhi *Phi = getMemoryAccess(&BB)) {
+      for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
+        auto *Pred = Phi->getIncomingBlock(I);
+        auto *IncAcc = Phi->getIncomingValue(I);
+        // If Pred has no unreachable predecessors, get last def looking at
+        // IDoms. If, while walkings IDoms, any of these has an unreachable
+        // predecessor, then the incoming def can be any access.
+        if (auto *DTNode = DT->getNode(Pred)) {
+          while (DTNode) {
+            if (auto *DefList = getBlockDefs(DTNode->getBlock())) {
+              auto *LastAcc = &*(--DefList->end());
+              assert(LastAcc == IncAcc &&
+                     "Incorrect incoming access into phi.");
+              break;
+            }
+            DTNode = DTNode->getIDom();
+          }
+        } else {
+          // If Pred has unreachable predecessors, but has at least a Def, the
+          // incoming access can be the last Def in Pred, or it could have been
+          // optimized to LoE. After an update, though, the LoE may have been
+          // replaced by another access, so IncAcc may be any access.
+          // If Pred has unreachable predecessors and no Defs, incoming access
+          // should be LoE; However, after an update, it may be any access.
+        }
+      }
+    }
+  }
+#endif
+}
+
 /// Verify that all of the blocks we believe to have valid domination numbers
 /// actually have valid domination numbers.
 void MemorySSA::verifyDominationNumbers(const Function &F) const {
@@ -1900,10 +1957,14 @@ void MemorySSA::verifyDominationNumbers(const Function &F) const {
 #endif
 }
 
-/// Verify that the order and existence of MemoryAccesses matches the
+/// Verify ordering: the order and existence of MemoryAccesses matches the
 /// order and existence of memory affecting instructions.
-void MemorySSA::verifyOrdering(Function &F) const {
-#ifndef NDEBUG
+/// Verify domination: each definition dominates all of its uses.
+/// Verify def-uses: the immediate use information - walk all the memory
+/// accesses and verifying that, for each use, it appears in the appropriate
+/// def's use list
+void MemorySSA::verifyOrderingDominationAndDefUses(Function &F) const {
+#if !defined(NDEBUG)
   // Walk all the blocks, comparing what the lookups think and what the access
   // lists think, as well as the order in the blocks vs the order in the access
   // lists.
@@ -1912,29 +1973,56 @@ void MemorySSA::verifyOrdering(Function &F) const {
   for (BasicBlock &B : F) {
     const AccessList *AL = getBlockAccesses(&B);
     const auto *DL = getBlockDefs(&B);
-    MemoryAccess *Phi = getMemoryAccess(&B);
+    MemoryPhi *Phi = getMemoryAccess(&B);
     if (Phi) {
+      // Verify ordering.
       ActualAccesses.push_back(Phi);
       ActualDefs.push_back(Phi);
+      // Verify domination
+      for (const Use &U : Phi->uses())
+        assert(dominates(Phi, U) && "Memory PHI does not dominate it's uses");
+#if defined(EXPENSIVE_CHECKS)
+      // Verify def-uses.
+      assert(Phi->getNumOperands() == static_cast<unsigned>(std::distance(
+                                          pred_begin(&B), pred_end(&B))) &&
+             "Incomplete MemoryPhi Node");
+      for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
+        verifyUseInDefs(Phi->getIncomingValue(I), Phi);
+        assert(find(predecessors(&B), Phi->getIncomingBlock(I)) !=
+                   pred_end(&B) &&
+               "Incoming phi block not a block predecessor");
+      }
+#endif
     }
 
     for (Instruction &I : B) {
-      MemoryAccess *MA = getMemoryAccess(&I);
+      MemoryUseOrDef *MA = getMemoryAccess(&I);
       assert((!MA || (AL && (isa<MemoryUse>(MA) || DL))) &&
              "We have memory affecting instructions "
              "in this block but they are not in the "
              "access list or defs list");
       if (MA) {
+        // Verify ordering.
         ActualAccesses.push_back(MA);
-        if (isa<MemoryDef>(MA))
+        if (MemoryAccess *MD = dyn_cast<MemoryDef>(MA)) {
+          // Verify ordering.
           ActualDefs.push_back(MA);
+          // Verify domination.
+          for (const Use &U : MD->uses())
+            assert(dominates(MD, U) &&
+                   "Memory Def does not dominate it's uses");
+        }
+#if defined(EXPENSIVE_CHECKS)
+        // Verify def-uses.
+        verifyUseInDefs(MA->getDefiningAccess(), MA);
+#endif
       }
     }
     // Either we hit the assert, really have no accesses, or we have both
-    // accesses and an access list.
-    // Same with defs.
+    // accesses and an access list. Same with defs.
     if (!AL && !DL)
       continue;
+    // Verify ordering.
     assert(AL->size() == ActualAccesses.size() &&
            "We don't have the same number of accesses in the block as on the "
            "access list");
@@ -1965,28 +2053,6 @@ void MemorySSA::verifyOrdering(Function &F) const {
 #endif
 }
 
-/// Verify the domination properties of MemorySSA by checking that each
-/// definition dominates all of its uses.
-void MemorySSA::verifyDomination(Function &F) const {
-#ifndef NDEBUG
-  for (BasicBlock &B : F) {
-    // Phi nodes are attached to basic blocks
-    if (MemoryPhi *MP = getMemoryAccess(&B))
-      for (const Use &U : MP->uses())
-        assert(dominates(MP, U) && "Memory PHI does not dominate it's uses");
-
-    for (Instruction &I : B) {
-      MemoryAccess *MD = dyn_cast_or_null<MemoryDef>(getMemoryAccess(&I));
-      if (!MD)
-        continue;
-
-      for (const Use &U : MD->uses())
-        assert(dominates(MD, U) && "Memory Def does not dominate it's uses");
-    }
-  }
-#endif
-}
-
 /// Verify the def-use lists in MemorySSA, by verifying that \p Use
 /// appears in the use list of \p Def.
 void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const {
@@ -2001,34 +2067,6 @@ void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const {
 #endif
 }
 
-/// Verify the immediate use information, by walking all the memory
-/// accesses and verifying that, for each use, it appears in the
-/// appropriate def's use list
-void MemorySSA::verifyDefUses(Function &F) const {
-#ifndef NDEBUG
-  for (BasicBlock &B : F) {
-    // Phi nodes are attached to basic blocks
-    if (MemoryPhi *Phi = getMemoryAccess(&B)) {
-      assert(Phi->getNumOperands() == static_cast<unsigned>(std::distance(
-                                          pred_begin(&B), pred_end(&B))) &&
-             "Incomplete MemoryPhi Node");
-      for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
-        verifyUseInDefs(Phi->getIncomingValue(I), Phi);
-        assert(find(predecessors(&B), Phi->getIncomingBlock(I)) !=
-                   pred_end(&B) &&
-               "Incoming phi block not a block predecessor");
-      }
-    }
-
-    for (Instruction &I : B) {
-      if (MemoryUseOrDef *MA = getMemoryAccess(&I)) {
-        verifyUseInDefs(MA->getDefiningAccess(), MA);
-      }
-    }
-  }
-#endif
-}
-
 /// Perform a local numbering on blocks so that instruction ordering can be
 /// determined in constant time.
 /// TODO: We currently just number in order.  If we numbered by N, we could
@@ -2212,7 +2250,7 @@ MemorySSAAnalysis::Result MemorySSAAnalysis::run(Function &F,
                                                  FunctionAnalysisManager &AM) {
   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
   auto &AA = AM.getResult<AAManager>(F);
-  return MemorySSAAnalysis::Result(llvm::make_unique<MemorySSA>(F, &AA, &DT));
+  return MemorySSAAnalysis::Result(std::make_unique<MemorySSA>(F, &AA, &DT));
 }
 
 bool MemorySSAAnalysis::Result::invalidate(
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemorySSAUpdater.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemorySSAUpdater.cpp
index 4c1feee7fd9a..473268982f2d 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/MemorySSAUpdater.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/MemorySSAUpdater.cpp
@@ -44,11 +44,15 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(
   // First, do a cache lookup. Without this cache, certain CFG structures
   // (like a series of if statements) take exponential time to visit.
   auto Cached = CachedPreviousDef.find(BB);
-  if (Cached != CachedPreviousDef.end()) {
+  if (Cached != CachedPreviousDef.end())
     return Cached->second;
-  }
 
-  if (BasicBlock *Pred = BB->getSinglePredecessor()) {
+  // If this method is called from an unreachable block, return LoE.
+  if (!MSSA->DT->isReachableFromEntry(BB))
+    return MSSA->getLiveOnEntryDef();
+
+  if (BasicBlock *Pred = BB->getUniquePredecessor()) {
+    VisitedBlocks.insert(BB);
     // Single predecessor case, just recurse, we can only have one definition.
     MemoryAccess *Result = getPreviousDefFromEnd(Pred, CachedPreviousDef);
     CachedPreviousDef.insert({BB, Result});
@@ -71,11 +75,19 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(
     // Recurse to get the values in our predecessors for placement of a
     // potential phi node. This will insert phi nodes if we cycle in order to
     // break the cycle and have an operand.
-    for (auto *Pred : predecessors(BB))
-      if (MSSA->DT->isReachableFromEntry(Pred))
-        PhiOps.push_back(getPreviousDefFromEnd(Pred, CachedPreviousDef));
-      else
+    bool UniqueIncomingAccess = true;
+    MemoryAccess *SingleAccess = nullptr;
+    for (auto *Pred : predecessors(BB)) {
+      if (MSSA->DT->isReachableFromEntry(Pred)) {
+        auto *IncomingAccess = getPreviousDefFromEnd(Pred, CachedPreviousDef);
+        if (!SingleAccess)
+          SingleAccess = IncomingAccess;
+        else if (IncomingAccess != SingleAccess)
+          UniqueIncomingAccess = false;
+        PhiOps.push_back(IncomingAccess);
+      } else
         PhiOps.push_back(MSSA->getLiveOnEntryDef());
+    }
 
     // Now try to simplify the ops to avoid placing a phi.
     // This may return null if we never created a phi yet, that's okay
@@ -84,7 +96,15 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(
     // See if we can avoid the phi by simplifying it.
     auto *Result = tryRemoveTrivialPhi(Phi, PhiOps);
     // If we couldn't simplify, we may have to create a phi
-    if (Result == Phi) {
+    if (Result == Phi && UniqueIncomingAccess && SingleAccess) {
+      // A concrete Phi only exists if we created an empty one to break a cycle.
+      if (Phi) {
+        assert(Phi->operands().empty() && "Expected empty Phi");
+        Phi->replaceAllUsesWith(SingleAccess);
+        removeMemoryAccess(Phi);
+      }
+      Result = SingleAccess;
+    } else if (Result == Phi && !(UniqueIncomingAccess && SingleAccess)) {
       if (!Phi)
         Phi = MSSA->createMemoryPhi(BB);
 
@@ -173,12 +193,9 @@ MemoryAccess *MemorySSAUpdater::recursePhi(MemoryAccess *Phi) {
   TrackingVH<MemoryAccess> Res(Phi);
   SmallVector<TrackingVH<Value>, 8> Uses;
   std::copy(Phi->user_begin(), Phi->user_end(), std::back_inserter(Uses));
-  for (auto &U : Uses) {
-    if (MemoryPhi *UsePhi = dyn_cast<MemoryPhi>(&*U)) {
-      auto OperRange = UsePhi->operands();
-      tryRemoveTrivialPhi(UsePhi, OperRange);
-    }
-  }
+  for (auto &U : Uses)
+    if (MemoryPhi *UsePhi = dyn_cast<MemoryPhi>(&*U))
+      tryRemoveTrivialPhi(UsePhi);
   return Res;
 }
 
@@ -187,6 +204,11 @@ MemoryAccess *MemorySSAUpdater::recursePhi(MemoryAccess *Phi) {
 // argument.
 // IE phi(a, a) or b = phi(a, b) or c = phi(a, a, c)
 // We recursively try to remove them.
+MemoryAccess *MemorySSAUpdater::tryRemoveTrivialPhi(MemoryPhi *Phi) {
+  assert(Phi && "Can only remove concrete Phi.");
+  auto OperRange = Phi->operands();
+  return tryRemoveTrivialPhi(Phi, OperRange);
+}
 template <class RangeType>
 MemoryAccess *MemorySSAUpdater::tryRemoveTrivialPhi(MemoryPhi *Phi,
                                                     RangeType &Operands) {
@@ -218,17 +240,49 @@ MemoryAccess *MemorySSAUpdater::tryRemoveTrivialPhi(MemoryPhi *Phi,
   return recursePhi(Same);
 }
 
-void MemorySSAUpdater::insertUse(MemoryUse *MU) {
+void MemorySSAUpdater::insertUse(MemoryUse *MU, bool RenameUses) {
   InsertedPHIs.clear();
   MU->setDefiningAccess(getPreviousDef(MU));
-  // Unlike for defs, there is no extra work to do.  Because uses do not create
-  // new may-defs, there are only two cases:
-  //
+
+  // In cases without unreachable blocks, because uses do not create new
+  // may-defs, there are only two cases:
   // 1. There was a def already below us, and therefore, we should not have
   // created a phi node because it was already needed for the def.
   //
   // 2. There is no def below us, and therefore, there is no extra renaming work
   // to do.
+
+  // In cases with unreachable blocks, where the unnecessary Phis were
+  // optimized out, adding the Use may re-insert those Phis. Hence, when
+  // inserting Uses outside of the MSSA creation process, and new Phis were
+  // added, rename all uses if we are asked.
+
+  if (!RenameUses && !InsertedPHIs.empty()) {
+    auto *Defs = MSSA->getBlockDefs(MU->getBlock());
+    (void)Defs;
+    assert((!Defs || (++Defs->begin() == Defs->end())) &&
+           "Block may have only a Phi or no defs");
+  }
+
+  if (RenameUses && InsertedPHIs.size()) {
+    SmallPtrSet<BasicBlock *, 16> Visited;
+    BasicBlock *StartBlock = MU->getBlock();
+
+    if (auto *Defs = MSSA->getWritableBlockDefs(StartBlock)) {
+      MemoryAccess *FirstDef = &*Defs->begin();
+      // Convert to incoming value if it's a memorydef. A phi *is* already an
+      // incoming value.
+      if (auto *MD = dyn_cast<MemoryDef>(FirstDef))
+        FirstDef = MD->getDefiningAccess();
+
+      MSSA->renamePass(MU->getBlock(), FirstDef, Visited);
+    }
+    // We just inserted a phi into this block, so the incoming value will
+    // become the phi anyway, so it does not matter what we pass.
+    for (auto &MP : InsertedPHIs)
+      if (MemoryPhi *Phi = cast_or_null<MemoryPhi>(MP))
+        MSSA->renamePass(Phi->getBlock(), nullptr, Visited);
+  }
 }
 
 // Set every incoming edge {BB, MP->getBlock()} of MemoryPhi MP to NewDef.
@@ -260,33 +314,35 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
 
   // See if we had a local def, and if not, go hunting.
   MemoryAccess *DefBefore = getPreviousDef(MD);
-  bool DefBeforeSameBlock = DefBefore->getBlock() == MD->getBlock();
+  bool DefBeforeSameBlock = false;
+  if (DefBefore->getBlock() == MD->getBlock() &&
+      !(isa<MemoryPhi>(DefBefore) &&
+        std::find(InsertedPHIs.begin(), InsertedPHIs.end(), DefBefore) !=
+            InsertedPHIs.end()))
+    DefBeforeSameBlock = true;
 
   // There is a def before us, which means we can replace any store/phi uses
   // of that thing with us, since we are in the way of whatever was there
   // before.
   // We now define that def's memorydefs and memoryphis
   if (DefBeforeSameBlock) {
-    for (auto UI = DefBefore->use_begin(), UE = DefBefore->use_end();
-         UI != UE;) {
-      Use &U = *UI++;
+    DefBefore->replaceUsesWithIf(MD, [MD](Use &U) {
       // Leave the MemoryUses alone.
       // Also make sure we skip ourselves to avoid self references.
-      if (isa<MemoryUse>(U.getUser()) || U.getUser() == MD)
-        continue;
+      User *Usr = U.getUser();
+      return !isa<MemoryUse>(Usr) && Usr != MD;
       // Defs are automatically unoptimized when the user is set to MD below,
       // because the isOptimized() call will fail to find the same ID.
-      U.set(MD);
-    }
+    });
   }
 
   // and that def is now our defining access.
   MD->setDefiningAccess(DefBefore);
 
-  // Remember the index where we may insert new phis below.
-  unsigned NewPhiIndex = InsertedPHIs.size();
-
   SmallVector<WeakVH, 8> FixupList(InsertedPHIs.begin(), InsertedPHIs.end());
+
+  // Remember the index where we may insert new phis.
+  unsigned NewPhiIndex = InsertedPHIs.size();
   if (!DefBeforeSameBlock) {
     // If there was a local def before us, we must have the same effect it
     // did. Because every may-def is the same, any phis/etc we would create, it
@@ -302,46 +358,54 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
 
     // If this is the first def in the block and this insert is in an arbitrary
     // place, compute IDF and place phis.
+    SmallPtrSet<BasicBlock *, 2> DefiningBlocks;
+
+    // If this is the last Def in the block, also compute IDF based on MD, since
+    // this may a new Def added, and we may need additional Phis.
     auto Iter = MD->getDefsIterator();
     ++Iter;
     auto IterEnd = MSSA->getBlockDefs(MD->getBlock())->end();
-    if (Iter == IterEnd) {
-      ForwardIDFCalculator IDFs(*MSSA->DT);
-      SmallVector<BasicBlock *, 32> IDFBlocks;
-      SmallPtrSet<BasicBlock *, 2> DefiningBlocks;
+    if (Iter == IterEnd)
       DefiningBlocks.insert(MD->getBlock());
-      IDFs.setDefiningBlocks(DefiningBlocks);
-      IDFs.calculate(IDFBlocks);
-      SmallVector<AssertingVH<MemoryPhi>, 4> NewInsertedPHIs;
-      for (auto *BBIDF : IDFBlocks)
-        if (!MSSA->getMemoryAccess(BBIDF)) {
-          auto *MPhi = MSSA->createMemoryPhi(BBIDF);
-          NewInsertedPHIs.push_back(MPhi);
-          // Add the phis created into the IDF blocks to NonOptPhis, so they are
-          // not optimized out as trivial by the call to getPreviousDefFromEnd
-          // below. Once they are complete, all these Phis are added to the
-          // FixupList, and removed from NonOptPhis inside fixupDefs().
-          NonOptPhis.insert(MPhi);
-        }
 
-      for (auto &MPhi : NewInsertedPHIs) {
-        auto *BBIDF = MPhi->getBlock();
-        for (auto *Pred : predecessors(BBIDF)) {
-          DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> CachedPreviousDef;
-          MPhi->addIncoming(getPreviousDefFromEnd(Pred, CachedPreviousDef),
-                            Pred);
-        }
+    for (const auto &VH : InsertedPHIs)
+      if (const auto *RealPHI = cast_or_null<MemoryPhi>(VH))
+        DefiningBlocks.insert(RealPHI->getBlock());
+    ForwardIDFCalculator IDFs(*MSSA->DT);
+    SmallVector<BasicBlock *, 32> IDFBlocks;
+    IDFs.setDefiningBlocks(DefiningBlocks);
+    IDFs.calculate(IDFBlocks);
+    SmallVector<AssertingVH<MemoryPhi>, 4> NewInsertedPHIs;
+    for (auto *BBIDF : IDFBlocks) {
+      auto *MPhi = MSSA->getMemoryAccess(BBIDF);
+      if (!MPhi) {
+        MPhi = MSSA->createMemoryPhi(BBIDF);
+        NewInsertedPHIs.push_back(MPhi);
       }
-
-      // Re-take the index where we're adding the new phis, because the above
-      // call to getPreviousDefFromEnd, may have inserted into InsertedPHIs.
-      NewPhiIndex = InsertedPHIs.size();
-      for (auto &MPhi : NewInsertedPHIs) {
-        InsertedPHIs.push_back(&*MPhi);
-        FixupList.push_back(&*MPhi);
+      // Add the phis created into the IDF blocks to NonOptPhis, so they are not
+      // optimized out as trivial by the call to getPreviousDefFromEnd below.
+      // Once they are complete, all these Phis are added to the FixupList, and
+      // removed from NonOptPhis inside fixupDefs(). Existing Phis in IDF may
+      // need fixing as well, and potentially be trivial before this insertion,
+      // hence add all IDF Phis. See PR43044.
+      NonOptPhis.insert(MPhi);
+    }
+    for (auto &MPhi : NewInsertedPHIs) {
+      auto *BBIDF = MPhi->getBlock();
+      for (auto *Pred : predecessors(BBIDF)) {
+        DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> CachedPreviousDef;
+        MPhi->addIncoming(getPreviousDefFromEnd(Pred, CachedPreviousDef), Pred);
       }
     }
 
+    // Re-take the index where we're adding the new phis, because the above call
+    // to getPreviousDefFromEnd, may have inserted into InsertedPHIs.
+    NewPhiIndex = InsertedPHIs.size();
+    for (auto &MPhi : NewInsertedPHIs) {
+      InsertedPHIs.push_back(&*MPhi);
+      FixupList.push_back(&*MPhi);
+    }
+
     FixupList.push_back(MD);
   }
 
@@ -458,8 +522,7 @@ void MemorySSAUpdater::fixupDefs(const SmallVectorImpl<WeakVH> &Vars) {
 void MemorySSAUpdater::removeEdge(BasicBlock *From, BasicBlock *To) {
   if (MemoryPhi *MPhi = MSSA->getMemoryAccess(To)) {
     MPhi->unorderedDeleteIncomingBlock(From);
-    if (MPhi->getNumIncomingValues() == 1)
-      removeMemoryAccess(MPhi);
+    tryRemoveTrivialPhi(MPhi);
   }
 }
 
@@ -475,34 +538,51 @@ void MemorySSAUpdater::removeDuplicatePhiEdgesBetween(const BasicBlock *From,
       Found = true;
       return false;
     });
-    if (MPhi->getNumIncomingValues() == 1)
-      removeMemoryAccess(MPhi);
+    tryRemoveTrivialPhi(MPhi);
+  }
+}
+
+static MemoryAccess *getNewDefiningAccessForClone(MemoryAccess *MA,
+                                                  const ValueToValueMapTy &VMap,
+                                                  PhiToDefMap &MPhiMap,
+                                                  bool CloneWasSimplified,
+                                                  MemorySSA *MSSA) {
+  MemoryAccess *InsnDefining = MA;
+  if (MemoryDef *DefMUD = dyn_cast<MemoryDef>(InsnDefining)) {
+    if (!MSSA->isLiveOnEntryDef(DefMUD)) {
+      Instruction *DefMUDI = DefMUD->getMemoryInst();
+      assert(DefMUDI && "Found MemoryUseOrDef with no Instruction.");
+      if (Instruction *NewDefMUDI =
+              cast_or_null<Instruction>(VMap.lookup(DefMUDI))) {
+        InsnDefining = MSSA->getMemoryAccess(NewDefMUDI);
+        if (!CloneWasSimplified)
+          assert(InsnDefining && "Defining instruction cannot be nullptr.");
+        else if (!InsnDefining || isa<MemoryUse>(InsnDefining)) {
+          // The clone was simplified, it's no longer a MemoryDef, look up.
+          auto DefIt = DefMUD->getDefsIterator();
+          // Since simplified clones only occur in single block cloning, a
+          // previous definition must exist, otherwise NewDefMUDI would not
+          // have been found in VMap.
+          assert(DefIt != MSSA->getBlockDefs(DefMUD->getBlock())->begin() &&
+                 "Previous def must exist");
+          InsnDefining = getNewDefiningAccessForClone(
+              &*(--DefIt), VMap, MPhiMap, CloneWasSimplified, MSSA);
+        }
+      }
+    }
+  } else {
+    MemoryPhi *DefPhi = cast<MemoryPhi>(InsnDefining);
+    if (MemoryAccess *NewDefPhi = MPhiMap.lookup(DefPhi))
+      InsnDefining = NewDefPhi;
   }
+  assert(InsnDefining && "Defining instruction cannot be nullptr.");
+  return InsnDefining;
 }
 
 void MemorySSAUpdater::cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB,
                                         const ValueToValueMapTy &VMap,
                                         PhiToDefMap &MPhiMap,
                                         bool CloneWasSimplified) {
-  auto GetNewDefiningAccess = [&](MemoryAccess *MA) -> MemoryAccess * {
-    MemoryAccess *InsnDefining = MA;
-    if (MemoryUseOrDef *DefMUD = dyn_cast<MemoryUseOrDef>(InsnDefining)) {
-      if (!MSSA->isLiveOnEntryDef(DefMUD)) {
-        Instruction *DefMUDI = DefMUD->getMemoryInst();
-        assert(DefMUDI && "Found MemoryUseOrDef with no Instruction.");
-        if (Instruction *NewDefMUDI =
-                cast_or_null<Instruction>(VMap.lookup(DefMUDI)))
-          InsnDefining = MSSA->getMemoryAccess(NewDefMUDI);
-      }
-    } else {
-      MemoryPhi *DefPhi = cast<MemoryPhi>(InsnDefining);
-      if (MemoryAccess *NewDefPhi = MPhiMap.lookup(DefPhi))
-        InsnDefining = NewDefPhi;
-    }
-    assert(InsnDefining && "Defining instruction cannot be nullptr.");
-    return InsnDefining;
-  };
-
   const MemorySSA::AccessList *Acc = MSSA->getBlockAccesses(BB);
   if (!Acc)
     return;
@@ -519,9 +599,13 @@ void MemorySSAUpdater::cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB,
       if (Instruction *NewInsn =
               dyn_cast_or_null<Instruction>(VMap.lookup(Insn))) {
         MemoryAccess *NewUseOrDef = MSSA->createDefinedAccess(
-            NewInsn, GetNewDefiningAccess(MUD->getDefiningAccess()),
-            CloneWasSimplified ? nullptr : MUD);
-        MSSA->insertIntoListsForBlock(NewUseOrDef, NewBB, MemorySSA::End);
+            NewInsn,
+            getNewDefiningAccessForClone(MUD->getDefiningAccess(), VMap,
+                                         MPhiMap, CloneWasSimplified, MSSA),
+            /*Template=*/CloneWasSimplified ? nullptr : MUD,
+            /*CreationMustSucceed=*/CloneWasSimplified ? false : true);
+        if (NewUseOrDef)
+          MSSA->insertIntoListsForBlock(NewUseOrDef, NewBB, MemorySSA::End);
       }
     }
   }
@@ -563,8 +647,7 @@ void MemorySSAUpdater::updatePhisWhenInsertingUniqueBackedgeBlock(
 
   // If NewMPhi is a trivial phi, remove it. Its use in the header MPhi will be
   // replaced with the unique value.
-  if (HasUniqueIncomingValue)
-    removeMemoryAccess(NewMPhi);
+  tryRemoveTrivialPhi(NewMPhi);
 }
 
 void MemorySSAUpdater::updateForClonedLoop(const LoopBlocksRPO &LoopBlocks,
@@ -770,6 +853,9 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
       } else {
         // Single predecessor, BB cannot be dead. GetLastDef of Pred.
         assert(Count == 1 && Pred && "Single predecessor expected.");
+        // BB can be unreachable though, return LoE if that is the case.
+        if (!DT.getNode(BB))
+          return MSSA->getLiveOnEntryDef();
         BB = Pred;
       }
     };
@@ -1010,7 +1096,7 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
         for (; UI != E;) {
           Use &U = *UI;
           ++UI;
-          MemoryAccess *Usr = dyn_cast<MemoryAccess>(U.getUser());
+          MemoryAccess *Usr = cast<MemoryAccess>(U.getUser());
           if (MemoryPhi *UsrPhi = dyn_cast<MemoryPhi>(Usr)) {
             BasicBlock *DominatedBlock = UsrPhi->getIncomingBlock(U);
             if (!DT.dominates(DominatingBlock, DominatedBlock))
@@ -1052,9 +1138,9 @@ void MemorySSAUpdater::moveTo(MemoryUseOrDef *What, BasicBlock *BB,
 
   // Now reinsert it into the IR and do whatever fixups needed.
   if (auto *MD = dyn_cast<MemoryDef>(What))
-    insertDef(MD);
+    insertDef(MD, /*RenameUses=*/true);
   else
-    insertUse(cast<MemoryUse>(What));
+    insertUse(cast<MemoryUse>(What), /*RenameUses=*/true);
 
   // Clear dangling pointers. We added all MemoryPhi users, but not all
   // of them are removed by fixupDefs().
@@ -1073,7 +1159,13 @@ void MemorySSAUpdater::moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where) {
 
 void MemorySSAUpdater::moveToPlace(MemoryUseOrDef *What, BasicBlock *BB,
                                    MemorySSA::InsertionPlace Where) {
-  return moveTo(What, BB, Where);
+  if (Where != MemorySSA::InsertionPlace::BeforeTerminator)
+    return moveTo(What, BB, Where);
+
+  if (auto *Where = MSSA->getMemoryAccess(BB->getTerminator()))
+    return moveBefore(What, Where);
+  else
+    return moveTo(What, BB, MemorySSA::InsertionPlace::End);
 }
 
 // All accesses in To used to be in From. Move to end and update access lists.
@@ -1084,25 +1176,32 @@ void MemorySSAUpdater::moveAllAccesses(BasicBlock *From, BasicBlock *To,
   if (!Accs)
     return;
 
+  assert(Start->getParent() == To && "Incorrect Start instruction");
   MemoryAccess *FirstInNew = nullptr;
   for (Instruction &I : make_range(Start->getIterator(), To->end()))
     if ((FirstInNew = MSSA->getMemoryAccess(&I)))
       break;
-  if (!FirstInNew)
-    return;
+  if (FirstInNew) {
+    auto *MUD = cast<MemoryUseOrDef>(FirstInNew);
+    do {
+      auto NextIt = ++MUD->getIterator();
+      MemoryUseOrDef *NextMUD = (!Accs || NextIt == Accs->end())
+                                    ? nullptr
+                                    : cast<MemoryUseOrDef>(&*NextIt);
+      MSSA->moveTo(MUD, To, MemorySSA::End);
+      // Moving MUD from Accs in the moveTo above, may delete Accs, so we need
+      // to retrieve it again.
+      Accs = MSSA->getWritableBlockAccesses(From);
+      MUD = NextMUD;
+    } while (MUD);
+  }
 
-  auto *MUD = cast<MemoryUseOrDef>(FirstInNew);
-  do {
-    auto NextIt = ++MUD->getIterator();
-    MemoryUseOrDef *NextMUD = (!Accs || NextIt == Accs->end())
-                                  ? nullptr
-                                  : cast<MemoryUseOrDef>(&*NextIt);
-    MSSA->moveTo(MUD, To, MemorySSA::End);
-    // Moving MUD from Accs in the moveTo above, may delete Accs, so we need to
-    // retrieve it again.
-    Accs = MSSA->getWritableBlockAccesses(From);
-    MUD = NextMUD;
-  } while (MUD);
+  // If all accesses were moved and only a trivial Phi remains, we try to remove
+  // that Phi. This is needed when From is going to be deleted.
+  auto *Defs = MSSA->getWritableBlockDefs(From);
+  if (Defs && !Defs->empty())
+    if (auto *Phi = dyn_cast<MemoryPhi>(&*Defs->begin()))
+      tryRemoveTrivialPhi(Phi);
 }
 
 void MemorySSAUpdater::moveAllAfterSpliceBlocks(BasicBlock *From,
@@ -1118,7 +1217,7 @@ void MemorySSAUpdater::moveAllAfterSpliceBlocks(BasicBlock *From,
 
 void MemorySSAUpdater::moveAllAfterMergeBlocks(BasicBlock *From, BasicBlock *To,
                                                Instruction *Start) {
-  assert(From->getSinglePredecessor() == To &&
+  assert(From->getUniquePredecessor() == To &&
          "From block is expected to have a single predecessor (To).");
   moveAllAccesses(From, To, Start);
   for (BasicBlock *Succ : successors(From))
@@ -1173,8 +1272,7 @@ void MemorySSAUpdater::wireOldPredecessorsToNewImmediatePredecessor(
       return false;
     });
     Phi->addIncoming(NewPhi, New);
-    if (onlySingleValue(NewPhi))
-      removeMemoryAccess(NewPhi);
+    tryRemoveTrivialPhi(NewPhi);
   }
 }
 
@@ -1239,10 +1337,8 @@ void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA, bool OptimizePhis) {
     unsigned PhisSize = PhisToOptimize.size();
     while (PhisSize-- > 0)
       if (MemoryPhi *MP =
-              cast_or_null<MemoryPhi>(PhisToOptimize.pop_back_val())) {
-        auto OperRange = MP->operands();
-        tryRemoveTrivialPhi(MP, OperRange);
-      }
+              cast_or_null<MemoryPhi>(PhisToOptimize.pop_back_val()))
+        tryRemoveTrivialPhi(MP);
   }
 }
 
@@ -1256,8 +1352,7 @@ void MemorySSAUpdater::removeBlocks(
       if (!DeadBlocks.count(Succ))
         if (MemoryPhi *MP = MSSA->getMemoryAccess(Succ)) {
           MP->unorderedDeleteIncomingBlock(BB);
-          if (MP->getNumIncomingValues() == 1)
-            removeMemoryAccess(MP);
+          tryRemoveTrivialPhi(MP);
         }
     // Drop all references of all accesses in BB
     if (MemorySSA::AccessList *Acc = MSSA->getWritableBlockAccesses(BB))
@@ -1281,10 +1376,8 @@ void MemorySSAUpdater::removeBlocks(
 
 void MemorySSAUpdater::tryRemoveTrivialPhis(ArrayRef<WeakVH> UpdatedPHIs) {
   for (auto &VH : UpdatedPHIs)
-    if (auto *MPhi = cast_or_null<MemoryPhi>(VH)) {
-      auto OperRange = MPhi->operands();
-      tryRemoveTrivialPhi(MPhi, OperRange);
-    }
+    if (auto *MPhi = cast_or_null<MemoryPhi>(VH))
+      tryRemoveTrivialPhi(MPhi);
 }
 
 void MemorySSAUpdater::changeToUnreachable(const Instruction *I) {
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp b/contrib/llvm-project/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
index 519242759824..52b884fb88e0 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -17,6 +17,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/IR/DebugInfo.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index e25eb290a665..8a1206f49c21 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -44,6 +44,7 @@
 #include "llvm/IR/ModuleSummaryIndex.h"
 #include "llvm/IR/Use.h"
 #include "llvm/IR/User.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Object/ModuleSymbolTable.h"
 #include "llvm/Object/SymbolicFile.h"
 #include "llvm/Pass.h"
@@ -319,7 +320,7 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
       auto *CalledValue = CS.getCalledValue();
       auto *CalledFunction = CS.getCalledFunction();
       if (CalledValue && !CalledFunction) {
-        CalledValue = CalledValue->stripPointerCastsNoFollowAliases();
+        CalledValue = CalledValue->stripPointerCasts();
         // Stripping pointer casts can reveal a called function.
         CalledFunction = dyn_cast<Function>(CalledValue);
       }
@@ -466,8 +467,9 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
       F.hasFnAttribute(Attribute::NoRecurse), F.returnDoesNotAlias(),
       // FIXME: refactor this to use the same code that inliner is using.
       // Don't try to import functions with noinline attribute.
-      F.getAttributes().hasFnAttribute(Attribute::NoInline)};
-  auto FuncSummary = llvm::make_unique<FunctionSummary>(
+      F.getAttributes().hasFnAttribute(Attribute::NoInline),
+      F.hasFnAttribute(Attribute::AlwaysInline)};
+  auto FuncSummary = std::make_unique<FunctionSummary>(
       Flags, NumInsts, FunFlags, /*EntryCount=*/0, std::move(Refs),
       CallGraphEdges.takeVector(), TypeTests.takeVector(),
       TypeTestAssumeVCalls.takeVector(), TypeCheckedLoadVCalls.takeVector(),
@@ -598,7 +600,7 @@ static void computeVariableSummary(ModuleSummaryIndex &Index,
       !V.hasComdat() && !V.hasAppendingLinkage() && !V.isInterposable() &&
       !V.hasAvailableExternallyLinkage() && !V.hasDLLExportStorageClass();
   GlobalVarSummary::GVarFlags VarFlags(CanBeInternalized, CanBeInternalized);
-  auto GVarSummary = llvm::make_unique<GlobalVarSummary>(Flags, VarFlags,
+  auto GVarSummary = std::make_unique<GlobalVarSummary>(Flags, VarFlags,
                                                          RefEdges.takeVector());
   if (NonRenamableLocal)
     CantBePromoted.insert(V.getGUID());
@@ -616,7 +618,7 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A,
   GlobalValueSummary::GVFlags Flags(A.getLinkage(), NonRenamableLocal,
                                     /* Live = */ false, A.isDSOLocal(),
                                     A.hasLinkOnceODRLinkage() && A.hasGlobalUnnamedAddr());
-  auto AS = llvm::make_unique<AliasSummary>(Flags);
+  auto AS = std::make_unique<AliasSummary>(Flags);
   auto *Aliasee = A.getBaseObject();
   auto AliaseeVI = Index.getValueInfo(Aliasee->getGUID());
   assert(AliaseeVI && "Alias expects aliasee summary to be available");
@@ -696,14 +698,15 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
           // Create the appropriate summary type.
           if (Function *F = dyn_cast<Function>(GV)) {
             std::unique_ptr<FunctionSummary> Summary =
-                llvm::make_unique<FunctionSummary>(
+                std::make_unique<FunctionSummary>(
                     GVFlags, /*InstCount=*/0,
                     FunctionSummary::FFlags{
                         F->hasFnAttribute(Attribute::ReadNone),
                         F->hasFnAttribute(Attribute::ReadOnly),
                         F->hasFnAttribute(Attribute::NoRecurse),
                         F->returnDoesNotAlias(),
-                        /* NoInline = */ false},
+                        /* NoInline = */ false,
+                        F->hasFnAttribute(Attribute::AlwaysInline)},
                     /*EntryCount=*/0, ArrayRef<ValueInfo>{},
                     ArrayRef<FunctionSummary::EdgeTy>{},
                     ArrayRef<GlobalValue::GUID>{},
@@ -714,7 +717,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
             Index.addGlobalValueSummary(*GV, std::move(Summary));
           } else {
             std::unique_ptr<GlobalVarSummary> Summary =
-                llvm::make_unique<GlobalVarSummary>(
+                std::make_unique<GlobalVarSummary>(
                     GVFlags, GlobalVarSummary::GVarFlags(false, false),
                     ArrayRef<ValueInfo>{});
             Index.addGlobalValueSummary(*GV, std::move(Summary));
@@ -741,7 +744,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
     else if (F.hasProfileData()) {
       LoopInfo LI{DT};
       BranchProbabilityInfo BPI{F, LI};
-      BFIPtr = llvm::make_unique<BlockFrequencyInfo>(F, BPI, LI);
+      BFIPtr = std::make_unique<BlockFrequencyInfo>(F, BPI, LI);
       BFI = BFIPtr.get();
     }
 
@@ -813,11 +816,11 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
 
   if (!ModuleSummaryDotFile.empty()) {
     std::error_code EC;
-    raw_fd_ostream OSDot(ModuleSummaryDotFile, EC, sys::fs::OpenFlags::F_None);
+    raw_fd_ostream OSDot(ModuleSummaryDotFile, EC, sys::fs::OpenFlags::OF_None);
     if (EC)
       report_fatal_error(Twine("Failed to open dot file ") +
                          ModuleSummaryDotFile + ": " + EC.message() + "\n");
-    Index.exportToDot(OSDot);
+    Index.exportToDot(OSDot, {});
   }
 
   return Index;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MustExecute.cpp b/contrib/llvm-project/llvm/lib/Analysis/MustExecute.cpp
index b616cd6f762b..952c2cbfec4e 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/MustExecute.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/MustExecute.cpp
@@ -7,20 +7,27 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/MustExecute.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/AssemblyAnnotationWriter.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/raw_ostream.h"
+
 using namespace llvm;
 
+#define DEBUG_TYPE "must-execute"
+
 const DenseMap<BasicBlock *, ColorVector> &
 LoopSafetyInfo::getBlockColors() const {
   return BlockColors;
@@ -306,6 +313,17 @@ namespace {
     }
     bool runOnFunction(Function &F) override;
   };
+  struct MustBeExecutedContextPrinter : public ModulePass {
+    static char ID;
+
+    MustBeExecutedContextPrinter() : ModulePass(ID) {
+      initializeMustBeExecutedContextPrinterPass(*PassRegistry::getPassRegistry());
+    }
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.setPreservesAll();
+    }
+    bool runOnModule(Module &M) override;
+  };
 }
 
 char MustExecutePrinter::ID = 0;
@@ -320,6 +338,57 @@ FunctionPass *llvm::createMustExecutePrinter() {
   return new MustExecutePrinter();
 }
 
+char MustBeExecutedContextPrinter::ID = 0;
+INITIALIZE_PASS_BEGIN(
+    MustBeExecutedContextPrinter, "print-must-be-executed-contexts",
+    "print the must-be-executed-contexed for all instructions", false, true)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(MustBeExecutedContextPrinter,
+                    "print-must-be-executed-contexts",
+                    "print the must-be-executed-contexed for all instructions",
+                    false, true)
+
+ModulePass *llvm::createMustBeExecutedContextPrinter() {
+  return new MustBeExecutedContextPrinter();
+}
+
+bool MustBeExecutedContextPrinter::runOnModule(Module &M) {
+  // We provide non-PM analysis here because the old PM doesn't like to query
+  // function passes from a module pass.
+  SmallVector<PostDominatorTree *, 8> PDTs;
+  SmallVector<DominatorTree *, 8> DTs;
+  SmallVector<LoopInfo *, 8> LIs;
+
+  GetterTy<LoopInfo> LIGetter = [&](const Function &F) {
+    DominatorTree *DT = new DominatorTree(const_cast<Function &>(F));
+    LoopInfo *LI = new LoopInfo(*DT);
+    DTs.push_back(DT);
+    LIs.push_back(LI);
+    return LI;
+  };
+  GetterTy<PostDominatorTree> PDTGetter = [&](const Function &F) {
+    PostDominatorTree *PDT = new PostDominatorTree(const_cast<Function &>(F));
+    PDTs.push_back(PDT);
+    return PDT;
+  };
+  MustBeExecutedContextExplorer Explorer(true, LIGetter, PDTGetter);
+  for (Function &F : M) {
+    for (Instruction &I : instructions(F)) {
+      dbgs() << "-- Explore context of: " << I << "\n";
+      for (const Instruction *CI : Explorer.range(&I))
+        dbgs() << "  [F: " << CI->getFunction()->getName() << "] " << *CI
+               << "\n";
+    }
+  }
+
+  DeleteContainerPointers(PDTs);
+  DeleteContainerPointers(LIs);
+  DeleteContainerPointers(DTs);
+  return false;
+}
+
 static bool isMustExecuteIn(const Instruction &I, Loop *L, DominatorTree *DT) {
   // TODO: merge these two routines.  For the moment, we display the best
   // result obtained by *either* implementation.  This is a bit unfair since no
@@ -396,3 +465,248 @@ bool MustExecutePrinter::runOnFunction(Function &F) {
 
   return false;
 }
+
+/// Return true if \p L might be an endless loop.
+static bool maybeEndlessLoop(const Loop &L) {
+  if (L.getHeader()->getParent()->hasFnAttribute(Attribute::WillReturn))
+    return false;
+  // TODO: Actually try to prove it is not.
+  // TODO: If maybeEndlessLoop is going to be expensive, cache it.
+  return true;
+}
+
+static bool mayContainIrreducibleControl(const Function &F, const LoopInfo *LI) {
+  if (!LI)
+    return false;
+  using RPOTraversal = ReversePostOrderTraversal<const Function *>;
+  RPOTraversal FuncRPOT(&F);
+  return !containsIrreducibleCFG<const BasicBlock *, const RPOTraversal,
+                                 const LoopInfo>(FuncRPOT, *LI);
+}
+
+/// Lookup \p Key in \p Map and return the result, potentially after
+/// initializing the optional through \p Fn(\p args).
+template <typename K, typename V, typename FnTy, typename... ArgsTy>
+static V getOrCreateCachedOptional(K Key, DenseMap<K, Optional<V>> &Map,
+                                   FnTy &&Fn, ArgsTy&&... args) {
+  Optional<V> &OptVal = Map[Key];
+  if (!OptVal.hasValue())
+    OptVal = Fn(std::forward<ArgsTy>(args)...);
+  return OptVal.getValue();
+}
+
+const BasicBlock *
+MustBeExecutedContextExplorer::findForwardJoinPoint(const BasicBlock *InitBB) {
+  const LoopInfo *LI = LIGetter(*InitBB->getParent());
+  const PostDominatorTree *PDT = PDTGetter(*InitBB->getParent());
+
+  LLVM_DEBUG(dbgs() << "\tFind forward join point for " << InitBB->getName()
+                    << (LI ? " [LI]" : "") << (PDT ? " [PDT]" : ""));
+
+  const Function &F = *InitBB->getParent();
+  const Loop *L = LI ? LI->getLoopFor(InitBB) : nullptr;
+  const BasicBlock *HeaderBB = L ? L->getHeader() : InitBB;
+  bool WillReturnAndNoThrow = (F.hasFnAttribute(Attribute::WillReturn) ||
+                               (L && !maybeEndlessLoop(*L))) &&
+                              F.doesNotThrow();
+  LLVM_DEBUG(dbgs() << (L ? " [in loop]" : "")
+                    << (WillReturnAndNoThrow ? " [WillReturn] [NoUnwind]" : "")
+                    << "\n");
+
+  // Determine the adjacent blocks in the given direction but exclude (self)
+  // loops under certain circumstances.
+  SmallVector<const BasicBlock *, 8> Worklist;
+  for (const BasicBlock *SuccBB : successors(InitBB)) {
+    bool IsLatch = SuccBB == HeaderBB;
+    // Loop latches are ignored in forward propagation if the loop cannot be
+    // endless and may not throw: control has to go somewhere.
+    if (!WillReturnAndNoThrow || !IsLatch)
+      Worklist.push_back(SuccBB);
+  }
+  LLVM_DEBUG(dbgs() << "\t\t#Worklist: " << Worklist.size() << "\n");
+
+  // If there are no other adjacent blocks, there is no join point.
+  if (Worklist.empty())
+    return nullptr;
+
+  // If there is one adjacent block, it is the join point.
+  if (Worklist.size() == 1)
+    return Worklist[0];
+
+  // Try to determine a join block through the help of the post-dominance
+  // tree. If no tree was provided, we perform simple pattern matching for one
+  // block conditionals and one block loops only.
+  const BasicBlock *JoinBB = nullptr;
+  if (PDT)
+    if (const auto *InitNode = PDT->getNode(InitBB))
+      if (const auto *IDomNode = InitNode->getIDom())
+        JoinBB = IDomNode->getBlock();
+
+  if (!JoinBB && Worklist.size() == 2) {
+    const BasicBlock *Succ0 = Worklist[0];
+    const BasicBlock *Succ1 = Worklist[1];
+    const BasicBlock *Succ0UniqueSucc = Succ0->getUniqueSuccessor();
+    const BasicBlock *Succ1UniqueSucc = Succ1->getUniqueSuccessor();
+    if (Succ0UniqueSucc == InitBB) {
+      // InitBB -> Succ0 -> InitBB
+      // InitBB -> Succ1  = JoinBB
+      JoinBB = Succ1;
+    } else if (Succ1UniqueSucc == InitBB) {
+      // InitBB -> Succ1 -> InitBB
+      // InitBB -> Succ0  = JoinBB
+      JoinBB = Succ0;
+    } else if (Succ0 == Succ1UniqueSucc) {
+      // InitBB ->          Succ0 = JoinBB
+      // InitBB -> Succ1 -> Succ0 = JoinBB
+      JoinBB = Succ0;
+    } else if (Succ1 == Succ0UniqueSucc) {
+      // InitBB -> Succ0 -> Succ1 = JoinBB
+      // InitBB ->          Succ1 = JoinBB
+      JoinBB = Succ1;
+    } else if (Succ0UniqueSucc == Succ1UniqueSucc) {
+      // InitBB -> Succ0 -> JoinBB
+      // InitBB -> Succ1 -> JoinBB
+      JoinBB = Succ0UniqueSucc;
+    }
+  }
+
+  if (!JoinBB && L)
+    JoinBB = L->getUniqueExitBlock();
+
+  if (!JoinBB)
+    return nullptr;
+
+  LLVM_DEBUG(dbgs() << "\t\tJoin block candidate: " << JoinBB->getName() << "\n");
+
+  // In forward direction we check if control will for sure reach JoinBB from
+  // InitBB, thus it can not be "stopped" along the way. Ways to "stop" control
+  // are: infinite loops and instructions that do not necessarily transfer
+  // execution to their successor. To check for them we traverse the CFG from
+  // the adjacent blocks to the JoinBB, looking at all intermediate blocks.
+
+  // If we know the function is "will-return" and "no-throw" there is no need
+  // for futher checks.
+  if (!F.hasFnAttribute(Attribute::WillReturn) || !F.doesNotThrow()) {
+
+    auto BlockTransfersExecutionToSuccessor = [](const BasicBlock *BB) {
+      return isGuaranteedToTransferExecutionToSuccessor(BB);
+    };
+
+    SmallPtrSet<const BasicBlock *, 16> Visited;
+    while (!Worklist.empty()) {
+      const BasicBlock *ToBB = Worklist.pop_back_val();
+      if (ToBB == JoinBB)
+        continue;
+
+      // Make sure all loops in-between are finite.
+      if (!Visited.insert(ToBB).second) {
+        if (!F.hasFnAttribute(Attribute::WillReturn)) {
+          if (!LI)
+            return nullptr;
+
+          bool MayContainIrreducibleControl = getOrCreateCachedOptional(
+              &F, IrreducibleControlMap, mayContainIrreducibleControl, F, LI);
+          if (MayContainIrreducibleControl)
+            return nullptr;
+
+          const Loop *L = LI->getLoopFor(ToBB);
+          if (L && maybeEndlessLoop(*L))
+            return nullptr;
+        }
+
+        continue;
+      }
+
+      // Make sure the block has no instructions that could stop control
+      // transfer.
+      bool TransfersExecution = getOrCreateCachedOptional(
+          ToBB, BlockTransferMap, BlockTransfersExecutionToSuccessor, ToBB);
+      if (!TransfersExecution)
+        return nullptr;
+
+      for (const BasicBlock *AdjacentBB : successors(ToBB))
+        Worklist.push_back(AdjacentBB);
+    }
+  }
+
+  LLVM_DEBUG(dbgs() << "\tJoin block: " << JoinBB->getName() << "\n");
+  return JoinBB;
+}
+
+const Instruction *
+MustBeExecutedContextExplorer::getMustBeExecutedNextInstruction(
+    MustBeExecutedIterator &It, const Instruction *PP) {
+  if (!PP)
+    return PP;
+  LLVM_DEBUG(dbgs() << "Find next instruction for " << *PP << "\n");
+
+  // If we explore only inside a given basic block we stop at terminators.
+  if (!ExploreInterBlock && PP->isTerminator()) {
+    LLVM_DEBUG(dbgs() << "\tReached terminator in intra-block mode, done\n");
+    return nullptr;
+  }
+
+  // If we do not traverse the call graph we check if we can make progress in
+  // the current function. First, check if the instruction is guaranteed to
+  // transfer execution to the successor.
+  bool TransfersExecution = isGuaranteedToTransferExecutionToSuccessor(PP);
+  if (!TransfersExecution)
+    return nullptr;
+
+  // If this is not a terminator we know that there is a single instruction
+  // after this one that is executed next if control is transfered. If not,
+  // we can try to go back to a call site we entered earlier. If none exists, we
+  // do not know any instruction that has to be executd next.
+  if (!PP->isTerminator()) {
+    const Instruction *NextPP = PP->getNextNode();
+    LLVM_DEBUG(dbgs() << "\tIntermediate instruction does transfer control\n");
+    return NextPP;
+  }
+
+  // Finally, we have to handle terminators, trivial ones first.
+  assert(PP->isTerminator() && "Expected a terminator!");
+
+  // A terminator without a successor is not handled yet.
+  if (PP->getNumSuccessors() == 0) {
+    LLVM_DEBUG(dbgs() << "\tUnhandled terminator\n");
+    return nullptr;
+  }
+
+  // A terminator with a single successor, we will continue at the beginning of
+  // that one.
+  if (PP->getNumSuccessors() == 1) {
+    LLVM_DEBUG(
+        dbgs() << "\tUnconditional terminator, continue with successor\n");
+    return &PP->getSuccessor(0)->front();
+  }
+
+  // Multiple successors mean we need to find the join point where control flow
+  // converges again. We use the findForwardJoinPoint helper function with
+  // information about the function and helper analyses, if available.
+  if (const BasicBlock *JoinBB = findForwardJoinPoint(PP->getParent()))
+    return &JoinBB->front();
+
+  LLVM_DEBUG(dbgs() << "\tNo join point found\n");
+  return nullptr;
+}
+
+MustBeExecutedIterator::MustBeExecutedIterator(
+    MustBeExecutedContextExplorer &Explorer, const Instruction *I)
+    : Explorer(Explorer), CurInst(I) {
+  reset(I);
+}
+
+void MustBeExecutedIterator::reset(const Instruction *I) {
+  CurInst = I;
+  Visited.clear();
+  Visited.insert(I);
+}
+
+const Instruction *MustBeExecutedIterator::advance() {
+  assert(CurInst && "Cannot advance an end iterator!");
+  const Instruction *Next =
+      Explorer.getMustBeExecutedNextInstruction(*this, CurInst);
+  if (Next && !Visited.insert(Next).second)
+    Next = nullptr;
+  return Next;
+}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp b/contrib/llvm-project/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp
index 72c40a0be232..44e6637f6337 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/OptimizationRemarkEmitter.cpp
@@ -18,6 +18,7 @@
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/InitializePasses.h"
 
 using namespace llvm;
 
@@ -39,7 +40,7 @@ OptimizationRemarkEmitter::OptimizationRemarkEmitter(const Function *F)
   BPI.calculate(*F, LI);
 
   // Finally compute BFI.
-  OwnedBFI = llvm::make_unique<BlockFrequencyInfo>(*F, BPI, LI);
+  OwnedBFI = std::make_unique<BlockFrequencyInfo>(*F, BPI, LI);
   BFI = OwnedBFI.get();
 }
 
@@ -97,7 +98,7 @@ bool OptimizationRemarkEmitterWrapperPass::runOnFunction(Function &Fn) {
   else
     BFI = nullptr;
 
-  ORE = llvm::make_unique<OptimizationRemarkEmitter>(&Fn, BFI);
+  ORE = std::make_unique<OptimizationRemarkEmitter>(&Fn, BFI);
   return false;
 }
 
diff --git a/contrib/llvm-project/llvm/lib/Analysis/OrderedInstructions.cpp b/contrib/llvm-project/llvm/lib/Analysis/OrderedInstructions.cpp
index 458c0a7de6c2..e947e5e388a8 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/OrderedInstructions.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/OrderedInstructions.cpp
@@ -21,7 +21,7 @@ bool OrderedInstructions::localDominates(const Instruction *InstA,
   const BasicBlock *IBB = InstA->getParent();
   auto OBB = OBBMap.find(IBB);
   if (OBB == OBBMap.end())
-    OBB = OBBMap.insert({IBB, make_unique<OrderedBasicBlock>(IBB)}).first;
+    OBB = OBBMap.insert({IBB, std::make_unique<OrderedBasicBlock>(IBB)}).first;
   return OBB->second->dominates(InstA, InstB);
 }
 
diff --git a/contrib/llvm-project/llvm/lib/Analysis/PhiValues.cpp b/contrib/llvm-project/llvm/lib/Analysis/PhiValues.cpp
index 49749bc44746..198647dafbef 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/PhiValues.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/PhiValues.cpp
@@ -10,6 +10,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/InitializePasses.h"
 
 using namespace llvm;
 
@@ -47,25 +48,28 @@ bool PhiValues::invalidate(Function &, const PreservedAnalyses &PA,
 //    we're ultimately interested in, and all of the reachable values, i.e.
 //    including phis, as that makes invalidateValue easier.
 void PhiValues::processPhi(const PHINode *Phi,
-                           SmallVector<const PHINode *, 8> &Stack) {
+                           SmallVectorImpl<const PHINode *> &Stack) {
   // Initialize the phi with the next depth number.
   assert(DepthMap.lookup(Phi) == 0);
   assert(NextDepthNumber != UINT_MAX);
-  unsigned int DepthNumber = ++NextDepthNumber;
-  DepthMap[Phi] = DepthNumber;
+  unsigned int RootDepthNumber = ++NextDepthNumber;
+  DepthMap[Phi] = RootDepthNumber;
 
   // Recursively process the incoming phis of this phi.
   TrackedValues.insert(PhiValuesCallbackVH(const_cast<PHINode *>(Phi), this));
   for (Value *PhiOp : Phi->incoming_values()) {
     if (PHINode *PhiPhiOp = dyn_cast<PHINode>(PhiOp)) {
       // Recurse if the phi has not yet been visited.
-      if (DepthMap.lookup(PhiPhiOp) == 0)
+      unsigned int OpDepthNumber = DepthMap.lookup(PhiPhiOp);
+      if (OpDepthNumber == 0) {
         processPhi(PhiPhiOp, Stack);
-      assert(DepthMap.lookup(PhiPhiOp) != 0);
+        OpDepthNumber = DepthMap.lookup(PhiPhiOp);
+        assert(OpDepthNumber != 0);
+      }
       // If the phi did not become part of a component then this phi and that
       // phi are part of the same component, so adjust the depth number.
-      if (!ReachableMap.count(DepthMap[PhiPhiOp]))
-        DepthMap[Phi] = std::min(DepthMap[Phi], DepthMap[PhiPhiOp]);
+      if (!ReachableMap.count(OpDepthNumber))
+        DepthMap[Phi] = std::min(DepthMap[Phi], OpDepthNumber);
     } else {
       TrackedValues.insert(PhiValuesCallbackVH(PhiOp, this));
     }
@@ -76,48 +80,59 @@ void PhiValues::processPhi(const PHINode *Phi,
 
   // If the depth number has not changed then we've finished collecting the phis
   // of a strongly connected component.
-  if (DepthMap[Phi] == DepthNumber) {
+  if (DepthMap[Phi] == RootDepthNumber) {
     // Collect the reachable values for this component. The phis of this
-    // component will be those on top of the depth stach with the same or
+    // component will be those on top of the depth stack with the same or
     // greater depth number.
-    ConstValueSet Reachable;
-    while (!Stack.empty() && DepthMap[Stack.back()] >= DepthNumber) {
+    ConstValueSet &Reachable = ReachableMap[RootDepthNumber];
+    while (true) {
       const PHINode *ComponentPhi = Stack.pop_back_val();
       Reachable.insert(ComponentPhi);
-      DepthMap[ComponentPhi] = DepthNumber;
+
       for (Value *Op : ComponentPhi->incoming_values()) {
         if (PHINode *PhiOp = dyn_cast<PHINode>(Op)) {
           // If this phi is not part of the same component then that component
           // is guaranteed to have been completed before this one. Therefore we
           // can just add its reachable values to the reachable values of this
           // component.
-          auto It = ReachableMap.find(DepthMap[PhiOp]);
-          if (It != ReachableMap.end())
-            Reachable.insert(It->second.begin(), It->second.end());
-        } else {
+          unsigned int OpDepthNumber = DepthMap[PhiOp];
+          if (OpDepthNumber != RootDepthNumber) {
+            auto It = ReachableMap.find(OpDepthNumber);
+            if (It != ReachableMap.end())
+              Reachable.insert(It->second.begin(), It->second.end());
+          }
+        } else
           Reachable.insert(Op);
-        }
       }
+
+      if (Stack.empty())
+        break;
+
+      unsigned int &ComponentDepthNumber = DepthMap[Stack.back()];
+      if (ComponentDepthNumber < RootDepthNumber)
+        break;
+
+      ComponentDepthNumber = RootDepthNumber;
     }
-    ReachableMap.insert({DepthNumber,Reachable});
 
     // Filter out phis to get the non-phi reachable values.
-    ValueSet NonPhi;
+    ValueSet &NonPhi = NonPhiReachableMap[RootDepthNumber];
     for (const Value *V : Reachable)
       if (!isa<PHINode>(V))
-        NonPhi.insert(const_cast<Value*>(V));
-    NonPhiReachableMap.insert({DepthNumber,NonPhi});
+        NonPhi.insert(const_cast<Value *>(V));
   }
 }
 
 const PhiValues::ValueSet &PhiValues::getValuesForPhi(const PHINode *PN) {
-  if (DepthMap.count(PN) == 0) {
+  unsigned int DepthNumber = DepthMap.lookup(PN);
+  if (DepthNumber == 0) {
     SmallVector<const PHINode *, 8> Stack;
     processPhi(PN, Stack);
+    DepthNumber = DepthMap.lookup(PN);
     assert(Stack.empty());
+    assert(DepthNumber != 0);
   }
-  assert(DepthMap.lookup(PN) != 0);
-  return NonPhiReachableMap[DepthMap[PN]];
+  return NonPhiReachableMap[DepthNumber];
 }
 
 void PhiValues::invalidateValue(const Value *V) {
diff --git a/contrib/llvm-project/llvm/lib/Analysis/PostDominators.cpp b/contrib/llvm-project/llvm/lib/Analysis/PostDominators.cpp
index 4afe22bd5342..f01d51504d7c 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/PostDominators.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/PostDominators.cpp
@@ -12,7 +12,9 @@
 
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -32,6 +34,11 @@ static constexpr bool ExpensiveChecksEnabled = false;
 
 char PostDominatorTreeWrapperPass::ID = 0;
 
+PostDominatorTreeWrapperPass::PostDominatorTreeWrapperPass()
+    : FunctionPass(ID) {
+  initializePostDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
 INITIALIZE_PASS(PostDominatorTreeWrapperPass, "postdomtree",
                 "Post-Dominator Tree Construction", true, true)
 
@@ -44,6 +51,28 @@ bool PostDominatorTree::invalidate(Function &F, const PreservedAnalyses &PA,
            PAC.preservedSet<CFGAnalyses>());
 }
 
+bool PostDominatorTree::dominates(const Instruction *I1,
+                                  const Instruction *I2) const {
+  assert(I1 && I2 && "Expecting valid I1 and I2");
+
+  const BasicBlock *BB1 = I1->getParent();
+  const BasicBlock *BB2 = I2->getParent();
+
+  if (BB1 != BB2)
+    return Base::dominates(BB1, BB2);
+
+  // PHINodes in a block are unordered.
+  if (isa<PHINode>(I1) && isa<PHINode>(I2))
+    return false;
+
+  // Loop through the basic block until we find I1 or I2.
+  BasicBlock::const_iterator I = BB1->begin();
+  for (; &*I != I1 && &*I != I2; ++I)
+    /*empty*/;
+
+  return &*I == I2;
+}
+
 bool PostDominatorTreeWrapperPass::runOnFunction(Function &F) {
   DT.recalculate(F);
   return false;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/ProfileSummaryInfo.cpp
index dce19d6d546e..911d39d9a263 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ProfileSummaryInfo.cpp
@@ -18,6 +18,8 @@
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ProfileSummary.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
 using namespace llvm;
 
 // The following two parameters determine the threshold for a count to be
@@ -45,6 +47,13 @@ static cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold(
              " blocks required to reach the -profile-summary-cutoff-hot"
              " percentile exceeds this count."));
 
+static cl::opt<unsigned> ProfileSummaryLargeWorkingSetSizeThreshold(
+    "profile-summary-large-working-set-size-threshold", cl::Hidden,
+    cl::init(12500), cl::ZeroOrMore,
+    cl::desc("The code working set size is considered large if the number of"
+             " blocks required to reach the -profile-summary-cutoff-hot"
+             " percentile exceeds this count."));
+
 // The next two options override the counts derived from summary computation and
 // are useful for debugging purposes.
 static cl::opt<int> ProfileSummaryHotCount(
@@ -186,6 +195,31 @@ bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F,
   return true;
 }
 
+// Like isFunctionHotInCallGraph but for a given cutoff.
+bool ProfileSummaryInfo::isFunctionHotInCallGraphNthPercentile(
+    int PercentileCutoff, const Function *F, BlockFrequencyInfo &BFI) {
+  if (!F || !computeSummary())
+    return false;
+  if (auto FunctionCount = F->getEntryCount())
+    if (isHotCountNthPercentile(PercentileCutoff, FunctionCount.getCount()))
+      return true;
+
+  if (hasSampleProfile()) {
+    uint64_t TotalCallCount = 0;
+    for (const auto &BB : *F)
+      for (const auto &I : BB)
+        if (isa<CallInst>(I) || isa<InvokeInst>(I))
+          if (auto CallCount = getProfileCount(&I, nullptr))
+            TotalCallCount += CallCount.getValue();
+    if (isHotCountNthPercentile(PercentileCutoff, TotalCallCount))
+      return true;
+  }
+  for (const auto &BB : *F)
+    if (isHotBlockNthPercentile(PercentileCutoff, &BB, &BFI))
+      return true;
+  return false;
+}
+
 /// Returns true if the function's entry is a cold. If it returns false, it
 /// either means it is not cold or it is unknown whether it is cold or not (for
 /// example, no profile data is available).
@@ -222,6 +256,23 @@ void ProfileSummaryInfo::computeThresholds() {
          "Cold count threshold cannot exceed hot count threshold!");
   HasHugeWorkingSetSize =
       HotEntry.NumCounts > ProfileSummaryHugeWorkingSetSizeThreshold;
+  HasLargeWorkingSetSize =
+      HotEntry.NumCounts > ProfileSummaryLargeWorkingSetSizeThreshold;
+}
+
+Optional<uint64_t> ProfileSummaryInfo::computeThreshold(int PercentileCutoff) {
+  if (!computeSummary())
+    return None;
+  auto iter = ThresholdCache.find(PercentileCutoff);
+  if (iter != ThresholdCache.end()) {
+    return iter->second;
+  }
+  auto &DetailedSummary = Summary->getDetailedSummary();
+  auto &Entry =
+      getEntryForPercentile(DetailedSummary, PercentileCutoff);
+  uint64_t CountThreshold = Entry.MinCount;
+  ThresholdCache[PercentileCutoff] = CountThreshold;
+  return CountThreshold;
 }
 
 bool ProfileSummaryInfo::hasHugeWorkingSetSize() {
@@ -230,6 +281,12 @@ bool ProfileSummaryInfo::hasHugeWorkingSetSize() {
   return HasHugeWorkingSetSize && HasHugeWorkingSetSize.getValue();
 }
 
+bool ProfileSummaryInfo::hasLargeWorkingSetSize() {
+  if (!HasLargeWorkingSetSize)
+    computeThresholds();
+  return HasLargeWorkingSetSize && HasLargeWorkingSetSize.getValue();
+}
+
 bool ProfileSummaryInfo::isHotCount(uint64_t C) {
   if (!HotCountThreshold)
     computeThresholds();
@@ -242,6 +299,11 @@ bool ProfileSummaryInfo::isColdCount(uint64_t C) {
   return ColdCountThreshold && C <= ColdCountThreshold.getValue();
 }
 
+bool ProfileSummaryInfo::isHotCountNthPercentile(int PercentileCutoff, uint64_t C) {
+  auto CountThreshold = computeThreshold(PercentileCutoff);
+  return CountThreshold && C >= CountThreshold.getValue();
+}
+
 uint64_t ProfileSummaryInfo::getOrCompHotCountThreshold() {
   if (!HotCountThreshold)
     computeThresholds();
@@ -265,6 +327,13 @@ bool ProfileSummaryInfo::isColdBlock(const BasicBlock *BB,
   return Count && isColdCount(*Count);
 }
 
+bool ProfileSummaryInfo::isHotBlockNthPercentile(int PercentileCutoff,
+                                                 const BasicBlock *BB,
+                                                 BlockFrequencyInfo *BFI) {
+  auto Count = BFI->getBlockProfileCount(BB);
+  return Count && isHotCountNthPercentile(PercentileCutoff, *Count);
+}
+
 bool ProfileSummaryInfo::isHotCallSite(const CallSite &CS,
                                        BlockFrequencyInfo *BFI) {
   auto C = getProfileCount(CS.getInstruction(), BFI);
diff --git a/contrib/llvm-project/llvm/lib/Analysis/RegionInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/RegionInfo.cpp
index 8ba38adfb0d2..88629517d484 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/RegionInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/RegionInfo.cpp
@@ -10,6 +10,7 @@
 
 #include "llvm/Analysis/RegionInfo.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/InitializePasses.h"
 #ifndef NDEBUG
 #include "llvm/Analysis/RegionPrinter.h"
 #endif
diff --git a/contrib/llvm-project/llvm/lib/Analysis/RegionPrinter.cpp b/contrib/llvm-project/llvm/lib/Analysis/RegionPrinter.cpp
index 5bdcb31fbe99..020ff85d1b98 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/RegionPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/RegionPrinter.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/RegionInfo.h"
 #include "llvm/Analysis/RegionIterator.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp
index bc2cfd6fcc42..26a9a5ddf1ea 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -112,6 +112,7 @@
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
 #include "llvm/IR/Verifier.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
@@ -148,6 +149,7 @@ STATISTIC(NumBruteForceTripCountsComputed,
 
 static cl::opt<unsigned>
 MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
+                        cl::ZeroOrMore,
                         cl::desc("Maximum number of iterations SCEV will "
                                  "symbolically execute a constant "
                                  "derived loop"),
@@ -157,6 +159,9 @@ MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
 static cl::opt<bool> VerifySCEV(
     "verify-scev", cl::Hidden,
     cl::desc("Verify ScalarEvolution's backedge taken counts (slow)"));
+static cl::opt<bool> VerifySCEVStrict(
+    "verify-scev-strict", cl::Hidden,
+    cl::desc("Enable stricter verification with -verify-scev is passed"));
 static cl::opt<bool>
     VerifySCEVMap("verify-scev-maps", cl::Hidden,
                   cl::desc("Verify no dangling value in ScalarEvolution's "
@@ -216,6 +221,12 @@ static cl::opt<unsigned>
                   cl::desc("Size of the expression which is considered huge"),
                   cl::init(4096));
 
+static cl::opt<bool>
+ClassifyExpressions("scalar-evolution-classify-expressions",
+    cl::Hidden, cl::init(true),
+    cl::desc("When printing analysis, include information on every instruction"));
+
+
 //===----------------------------------------------------------------------===//
 //                           SCEV class definitions
 //===----------------------------------------------------------------------===//
@@ -1707,7 +1718,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
       // in infinite recursion. In the later case, the analysis code will
       // cope with a conservative value, and it will take care to purge
       // that value once it has finished.
-      const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
+      const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L);
       if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
         // Manually compute the final value for AR, checking for
         // overflow.
@@ -2051,7 +2062,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
       // in infinite recursion. In the later case, the analysis code will
       // cope with a conservative value, and it will take care to purge
       // that value once it has finished.
-      const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
+      const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L);
       if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
         // Manually compute the final value for AR, checking for
         // overflow.
@@ -3421,7 +3432,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
     return getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); // {X,+,0}  -->  X
   }
 
-  // It's tempting to want to call getMaxBackedgeTakenCount count here and
+  // It's tempting to want to call getConstantMaxBackedgeTakenCount count here and
   // use that information to infer NUW and NSW flags. However, computing a
   // BE count requires calling getAddRecExpr, so we may not yet have a
   // meaningful BE count at this point (and if we don't, we'd be stuck
@@ -3484,7 +3495,7 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP,
   const SCEV *BaseExpr = getSCEV(GEP->getPointerOperand());
   // getSCEV(Base)->getType() has the same address space as Base->getType()
   // because SCEV::getType() preserves the address space.
-  Type *IntPtrTy = getEffectiveSCEVType(BaseExpr->getType());
+  Type *IntIdxTy = getEffectiveSCEVType(BaseExpr->getType());
   // FIXME(PR23527): Don't blindly transfer the inbounds flag from the GEP
   // instruction to its SCEV, because the Instruction may be guarded by control
   // flow and the no-overflow bits may not be valid for the expression in any
@@ -3493,7 +3504,7 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP,
   SCEV::NoWrapFlags Wrap = GEP->isInBounds() ? SCEV::FlagNSW
                                              : SCEV::FlagAnyWrap;
 
-  const SCEV *TotalOffset = getZero(IntPtrTy);
+  const SCEV *TotalOffset = getZero(IntIdxTy);
   // The array size is unimportant. The first thing we do on CurTy is getting
   // its element type.
   Type *CurTy = ArrayType::get(GEP->getSourceElementType(), 0);
@@ -3503,7 +3514,7 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP,
       // For a struct, add the member offset.
       ConstantInt *Index = cast<SCEVConstant>(IndexExpr)->getValue();
       unsigned FieldNo = Index->getZExtValue();
-      const SCEV *FieldOffset = getOffsetOfExpr(IntPtrTy, STy, FieldNo);
+      const SCEV *FieldOffset = getOffsetOfExpr(IntIdxTy, STy, FieldNo);
 
       // Add the field offset to the running total offset.
       TotalOffset = getAddExpr(TotalOffset, FieldOffset);
@@ -3514,9 +3525,9 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP,
       // Update CurTy to its element type.
       CurTy = cast<SequentialType>(CurTy)->getElementType();
       // For an array, add the element offset, explicitly scaled.
-      const SCEV *ElementSize = getSizeOfExpr(IntPtrTy, CurTy);
+      const SCEV *ElementSize = getSizeOfExpr(IntIdxTy, CurTy);
       // Getelementptr indices are signed.
-      IndexExpr = getTruncateOrSignExtend(IndexExpr, IntPtrTy);
+      IndexExpr = getTruncateOrSignExtend(IndexExpr, IntIdxTy);
 
       // Multiply the index by the element size to compute the element offset.
       const SCEV *LocalOffset = getMulExpr(IndexExpr, ElementSize, Wrap);
@@ -3775,7 +3786,7 @@ uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
 
 /// Return a type with the same bitwidth as the given type and which represents
 /// how SCEV will treat the given type, for which isSCEVable must return
-/// true. For pointer types, this is the pointer-sized integer type.
+/// true. For pointer types, this is the pointer index sized integer type.
 Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
   assert(isSCEVable(Ty) && "Type is not SCEVable!");
 
@@ -3784,7 +3795,7 @@ Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
 
   // The only other support type is pointer.
   assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
-  return getDataLayout().getIntPtrType(Ty);
+  return getDataLayout().getIndexType(Ty);
 }
 
 Type *ScalarEvolution::getWiderType(Type *T1, Type *T2) const {
@@ -4564,6 +4575,12 @@ static Optional<BinaryOp> MatchBinaryOp(Value *V, DominatorTree &DT) {
     break;
   }
 
+  // Recognise intrinsic loop.decrement.reg, and as this has exactly the same
+  // semantics as a Sub, return a binary sub expression.
+  if (auto *II = dyn_cast<IntrinsicInst>(V))
+    if (II->getIntrinsicID() == Intrinsic::loop_decrement_reg)
+      return BinaryOp(Instruction::Sub, II->getOperand(0), II->getOperand(1));
+
   return None;
 }
 
@@ -4991,7 +5008,7 @@ const SCEV *ScalarEvolution::createSimpleAffineAddRec(PHINode *PN,
   // overflow.
   if (auto *BEInst = dyn_cast<Instruction>(BEValueV))
     if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L))
-      (void)getAddRecExpr(getAddExpr(StartVal, Accum, Flags), Accum, L, Flags);
+      (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags);
 
   return PHISCEV;
 }
@@ -5549,6 +5566,7 @@ ScalarEvolution::getRangeRef(const SCEV *S,
 
   unsigned BitWidth = getTypeSizeInBits(S->getType());
   ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
+  using OBO = OverflowingBinaryOperator;
 
   // If the value has known zeros, the maximum value will have those known zeros
   // as well.
@@ -5566,8 +5584,14 @@ ScalarEvolution::getRangeRef(const SCEV *S,
 
   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
     ConstantRange X = getRangeRef(Add->getOperand(0), SignHint);
+    unsigned WrapType = OBO::AnyWrap;
+    if (Add->hasNoSignedWrap())
+      WrapType |= OBO::NoSignedWrap;
+    if (Add->hasNoUnsignedWrap())
+      WrapType |= OBO::NoUnsignedWrap;
     for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
-      X = X.add(getRangeRef(Add->getOperand(i), SignHint));
+      X = X.addWithNoWrap(getRangeRef(Add->getOperand(i), SignHint),
+                          WrapType, RangeType);
     return setRange(Add, SignHint,
                     ConservativeResult.intersectWith(X, RangeType));
   }
@@ -5596,6 +5620,22 @@ ScalarEvolution::getRangeRef(const SCEV *S,
                     ConservativeResult.intersectWith(X, RangeType));
   }
 
+  if (const SCEVSMinExpr *SMin = dyn_cast<SCEVSMinExpr>(S)) {
+    ConstantRange X = getRangeRef(SMin->getOperand(0), SignHint);
+    for (unsigned i = 1, e = SMin->getNumOperands(); i != e; ++i)
+      X = X.smin(getRangeRef(SMin->getOperand(i), SignHint));
+    return setRange(SMin, SignHint,
+                    ConservativeResult.intersectWith(X, RangeType));
+  }
+
+  if (const SCEVUMinExpr *UMin = dyn_cast<SCEVUMinExpr>(S)) {
+    ConstantRange X = getRangeRef(UMin->getOperand(0), SignHint);
+    for (unsigned i = 1, e = UMin->getNumOperands(); i != e; ++i)
+      X = X.umin(getRangeRef(UMin->getOperand(i), SignHint));
+    return setRange(UMin, SignHint,
+                    ConservativeResult.intersectWith(X, RangeType));
+  }
+
   if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
     ConstantRange X = getRangeRef(UDiv->getLHS(), SignHint);
     ConstantRange Y = getRangeRef(UDiv->getRHS(), SignHint);
@@ -5627,34 +5667,43 @@ ScalarEvolution::getRangeRef(const SCEV *S,
   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
     // If there's no unsigned wrap, the value will never be less than its
     // initial value.
-    if (AddRec->hasNoUnsignedWrap())
-      if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart()))
-        if (!C->getValue()->isZero())
-          ConservativeResult = ConservativeResult.intersectWith(
-              ConstantRange(C->getAPInt(), APInt(BitWidth, 0)), RangeType);
-
-    // If there's no signed wrap, and all the operands have the same sign or
-    // zero, the value won't ever change sign.
+    if (AddRec->hasNoUnsignedWrap()) {
+      APInt UnsignedMinValue = getUnsignedRangeMin(AddRec->getStart());
+      if (!UnsignedMinValue.isNullValue())
+        ConservativeResult = ConservativeResult.intersectWith(
+            ConstantRange(UnsignedMinValue, APInt(BitWidth, 0)), RangeType);
+    }
+
+    // If there's no signed wrap, and all the operands except initial value have
+    // the same sign or zero, the value won't ever be:
+    // 1: smaller than initial value if operands are non negative,
+    // 2: bigger than initial value if operands are non positive.
+    // For both cases, value can not cross signed min/max boundary.
     if (AddRec->hasNoSignedWrap()) {
       bool AllNonNeg = true;
       bool AllNonPos = true;
-      for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
-        if (!isKnownNonNegative(AddRec->getOperand(i))) AllNonNeg = false;
-        if (!isKnownNonPositive(AddRec->getOperand(i))) AllNonPos = false;
+      for (unsigned i = 1, e = AddRec->getNumOperands(); i != e; ++i) {
+        if (!isKnownNonNegative(AddRec->getOperand(i)))
+          AllNonNeg = false;
+        if (!isKnownNonPositive(AddRec->getOperand(i)))
+          AllNonPos = false;
       }
       if (AllNonNeg)
         ConservativeResult = ConservativeResult.intersectWith(
-          ConstantRange(APInt(BitWidth, 0),
-                        APInt::getSignedMinValue(BitWidth)), RangeType);
+            ConstantRange::getNonEmpty(getSignedRangeMin(AddRec->getStart()),
+                                       APInt::getSignedMinValue(BitWidth)),
+            RangeType);
       else if (AllNonPos)
         ConservativeResult = ConservativeResult.intersectWith(
-          ConstantRange(APInt::getSignedMinValue(BitWidth),
-                        APInt(BitWidth, 1)), RangeType);
+            ConstantRange::getNonEmpty(
+                APInt::getSignedMinValue(BitWidth),
+                getSignedRangeMax(AddRec->getStart()) + 1),
+            RangeType);
     }
 
     // TODO: non-affine addrec
     if (AddRec->isAffine()) {
-      const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
+      const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(AddRec->getLoop());
       if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
           getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
         auto RangeFromAffine = getRangeForAffineAR(
@@ -5690,14 +5739,26 @@ ScalarEvolution::getRangeRef(const SCEV *S,
     if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) {
       // For a SCEVUnknown, ask ValueTracking.
       KnownBits Known = computeKnownBits(U->getValue(), DL, 0, &AC, nullptr, &DT);
-      if (Known.One != ~Known.Zero + 1)
-        ConservativeResult =
-            ConservativeResult.intersectWith(
-                ConstantRange(Known.One, ~Known.Zero + 1), RangeType);
+      if (Known.getBitWidth() != BitWidth)
+        Known = Known.zextOrTrunc(BitWidth, true);
+      // If Known does not result in full-set, intersect with it.
+      if (Known.getMinValue() != Known.getMaxValue() + 1)
+        ConservativeResult = ConservativeResult.intersectWith(
+            ConstantRange(Known.getMinValue(), Known.getMaxValue() + 1),
+            RangeType);
     } else {
       assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED &&
              "generalize as needed!");
       unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, &AC, nullptr, &DT);
+      // If the pointer size is larger than the index size type, this can cause
+      // NS to be larger than BitWidth. So compensate for this.
+      if (U->getType()->isPointerTy()) {
+        unsigned ptrSize = DL.getPointerTypeSizeInBits(U->getType());
+        int ptrIdxDiff = ptrSize - BitWidth;
+        if (ptrIdxDiff > 0 && ptrSize > BitWidth && NS > (unsigned)ptrIdxDiff)
+          NS -= ptrIdxDiff;
+      }
+
       if (NS > 1)
         ConservativeResult = ConservativeResult.intersectWith(
             ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
@@ -6523,7 +6584,7 @@ unsigned ScalarEvolution::getSmallConstantTripCount(const Loop *L,
 
 unsigned ScalarEvolution::getSmallConstantMaxTripCount(const Loop *L) {
   const auto *MaxExitCount =
-      dyn_cast<SCEVConstant>(getMaxBackedgeTakenCount(L));
+      dyn_cast<SCEVConstant>(getConstantMaxBackedgeTakenCount(L));
   return getConstantTripCount(MaxExitCount);
 }
 
@@ -6579,12 +6640,16 @@ ScalarEvolution::getSmallConstantTripMultiple(const Loop *L,
   return (unsigned)Result->getZExtValue();
 }
 
-/// Get the expression for the number of loop iterations for which this loop is
-/// guaranteed not to exit via ExitingBlock. Otherwise return
-/// SCEVCouldNotCompute.
 const SCEV *ScalarEvolution::getExitCount(const Loop *L,
-                                          BasicBlock *ExitingBlock) {
-  return getBackedgeTakenInfo(L).getExact(ExitingBlock, this);
+                                          BasicBlock *ExitingBlock,
+                                          ExitCountKind Kind) {
+  switch (Kind) {
+  case Exact: 
+    return getBackedgeTakenInfo(L).getExact(ExitingBlock, this);
+  case ConstantMaximum:
+    return getBackedgeTakenInfo(L).getMax(ExitingBlock, this);
+  };
+  llvm_unreachable("Invalid ExitCountKind!");
 }
 
 const SCEV *
@@ -6593,14 +6658,15 @@ ScalarEvolution::getPredicatedBackedgeTakenCount(const Loop *L,
   return getPredicatedBackedgeTakenInfo(L).getExact(L, this, &Preds);
 }
 
-const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) {
-  return getBackedgeTakenInfo(L).getExact(L, this);
-}
-
-/// Similar to getBackedgeTakenCount, except return the least SCEV value that is
-/// known never to be less than the actual backedge taken count.
-const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) {
-  return getBackedgeTakenInfo(L).getMax(this);
+const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L,
+                                                   ExitCountKind Kind) {
+  switch (Kind) {
+  case Exact: 
+    return getBackedgeTakenInfo(L).getExact(L, this);
+  case ConstantMaximum:
+    return getBackedgeTakenInfo(L).getMax(this);
+  };
+  llvm_unreachable("Invalid ExitCountKind!");
 }
 
 bool ScalarEvolution::isBackedgeTakenCountMaxOrZero(const Loop *L) {
@@ -6909,6 +6975,16 @@ ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock,
   return SE->getCouldNotCompute();
 }
 
+const SCEV *
+ScalarEvolution::BackedgeTakenInfo::getMax(BasicBlock *ExitingBlock,
+                                           ScalarEvolution *SE) const {
+  for (auto &ENT : ExitNotTaken)
+    if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate())
+      return ENT.MaxNotTaken;
+
+  return SE->getCouldNotCompute();
+}
+
 /// getMax - Get the max backedge taken count for the loop.
 const SCEV *
 ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const {
@@ -7000,13 +7076,15 @@ ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
         BasicBlock *ExitBB = EEI.first;
         const ExitLimit &EL = EEI.second;
         if (EL.Predicates.empty())
-          return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, nullptr);
+          return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, EL.MaxNotTaken,
+                                  nullptr);
 
         std::unique_ptr<SCEVUnionPredicate> Predicate(new SCEVUnionPredicate);
         for (auto *Pred : EL.Predicates)
           Predicate->add(Pred);
 
-        return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, std::move(Predicate));
+        return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, EL.MaxNotTaken,
+                                std::move(Predicate));
       });
   assert((isa<SCEVCouldNotCompute>(MaxCount) || isa<SCEVConstant>(MaxCount)) &&
          "No point in having a non-constant max backedge taken count!");
@@ -7038,6 +7116,17 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L,
   // Do a union of all the predicates here.
   for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
     BasicBlock *ExitBB = ExitingBlocks[i];
+
+    // We canonicalize untaken exits to br (constant), ignore them so that
+    // proving an exit untaken doesn't negatively impact our ability to reason
+    // about the loop as whole.
+    if (auto *BI = dyn_cast<BranchInst>(ExitBB->getTerminator()))
+      if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
+        bool ExitIfTrue = !L->contains(BI->getSuccessor(0));
+        if ((ExitIfTrue && CI->isZero()) || (!ExitIfTrue && CI->isOne()))
+          continue;
+      }
+
     ExitLimit EL = computeExitLimit(L, ExitBB, AllowPredicates);
 
     assert((AllowPredicates || EL.Predicates.empty()) &&
@@ -7197,6 +7286,11 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl(
       ExitLimit EL1 = computeExitLimitFromCondCached(
           Cache, L, BO->getOperand(1), ExitIfTrue,
           ControlsExit && !EitherMayExit, AllowPredicates);
+      // Be robust against unsimplified IR for the form "and i1 X, true"
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1)))
+        return CI->isOne() ? EL0 : EL1;
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(0)))
+        return CI->isOne() ? EL1 : EL0;
       const SCEV *BECount = getCouldNotCompute();
       const SCEV *MaxBECount = getCouldNotCompute();
       if (EitherMayExit) {
@@ -7245,6 +7339,11 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl(
       ExitLimit EL1 = computeExitLimitFromCondCached(
           Cache, L, BO->getOperand(1), ExitIfTrue,
           ControlsExit && !EitherMayExit, AllowPredicates);
+      // Be robust against unsimplified IR for the form "or i1 X, true"
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1)))
+        return CI->isZero() ? EL0 : EL1;
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(0)))
+        return CI->isZero() ? EL1 : EL0;
       const SCEV *BECount = getCouldNotCompute();
       const SCEV *MaxBECount = getCouldNotCompute();
       if (EitherMayExit) {
@@ -9833,6 +9932,10 @@ Optional<APInt> ScalarEvolution::computeConstantDifference(const SCEV *More,
   // We avoid subtracting expressions here because this function is usually
   // fairly deep in the call stack (i.e. is called many times).
 
+  // X - X = 0.
+  if (More == Less)
+    return APInt(getTypeSizeInBits(More->getType()), 0);
+
   if (isa<SCEVAddRecExpr>(Less) && isa<SCEVAddRecExpr>(More)) {
     const auto *LAR = cast<SCEVAddRecExpr>(Less);
     const auto *MAR = cast<SCEVAddRecExpr>(More);
@@ -10314,10 +10417,43 @@ bool ScalarEvolution::isImpliedViaOperations(ICmpInst::Predicate Pred,
   return false;
 }
 
+static bool isKnownPredicateExtendIdiom(ICmpInst::Predicate Pred,
+                                        const SCEV *LHS, const SCEV *RHS) {
+  // zext x u<= sext x, sext x s<= zext x
+  switch (Pred) {
+  case ICmpInst::ICMP_SGE:
+    std::swap(LHS, RHS);
+    LLVM_FALLTHROUGH;
+  case ICmpInst::ICMP_SLE: {
+    // If operand >=s 0 then ZExt == SExt.  If operand <s 0 then SExt <s ZExt.
+    const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(LHS);
+    const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(RHS);
+    if (SExt && ZExt && SExt->getOperand() == ZExt->getOperand())
+      return true;
+    break;
+  }
+  case ICmpInst::ICMP_UGE:
+    std::swap(LHS, RHS);
+    LLVM_FALLTHROUGH;
+  case ICmpInst::ICMP_ULE: {
+    // If operand >=s 0 then ZExt == SExt.  If operand <s 0 then ZExt <u SExt.
+    const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(LHS);
+    const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(RHS);
+    if (SExt && ZExt && SExt->getOperand() == ZExt->getOperand())
+      return true;
+    break;
+  }
+  default:
+    break;
+  };
+  return false;
+}
+
 bool
 ScalarEvolution::isKnownViaNonRecursiveReasoning(ICmpInst::Predicate Pred,
                                            const SCEV *LHS, const SCEV *RHS) {
-  return isKnownPredicateViaConstantRanges(Pred, LHS, RHS) ||
+  return isKnownPredicateExtendIdiom(Pred, LHS, RHS) ||
+         isKnownPredicateViaConstantRanges(Pred, LHS, RHS) ||
          IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS) ||
          IsKnownPredicateViaAddRecStart(*this, Pred, LHS, RHS) ||
          isKnownPredicateViaNoOverflow(Pred, LHS, RHS);
@@ -10919,7 +11055,7 @@ struct SCEVCollectAddRecMultiplies {
         } else if (Unknown) {
           HasAddRec = true;
         } else {
-          bool ContainsAddRec;
+          bool ContainsAddRec = false;
           SCEVHasAddRec ContiansAddRec(ContainsAddRec);
           visitAll(Op, ContiansAddRec);
           HasAddRec |= ContainsAddRec;
@@ -11434,8 +11570,8 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
   L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
   OS << ": ";
 
-  if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) {
-    OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L);
+  if (!isa<SCEVCouldNotCompute>(SE->getConstantMaxBackedgeTakenCount(L))) {
+    OS << "max backedge-taken count is " << *SE->getConstantMaxBackedgeTakenCount(L);
     if (SE->isBackedgeTakenCountMaxOrZero(L))
       OS << ", actual taken count either this or zero.";
   } else {
@@ -11487,77 +11623,79 @@ void ScalarEvolution::print(raw_ostream &OS) const {
   // const isn't dangerous.
   ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
 
-  OS << "Classifying expressions for: ";
-  F.printAsOperand(OS, /*PrintType=*/false);
-  OS << "\n";
-  for (Instruction &I : instructions(F))
-    if (isSCEVable(I.getType()) && !isa<CmpInst>(I)) {
-      OS << I << '\n';
-      OS << "  -->  ";
-      const SCEV *SV = SE.getSCEV(&I);
-      SV->print(OS);
-      if (!isa<SCEVCouldNotCompute>(SV)) {
-        OS << " U: ";
-        SE.getUnsignedRange(SV).print(OS);
-        OS << " S: ";
-        SE.getSignedRange(SV).print(OS);
-      }
-
-      const Loop *L = LI.getLoopFor(I.getParent());
-
-      const SCEV *AtUse = SE.getSCEVAtScope(SV, L);
-      if (AtUse != SV) {
+  if (ClassifyExpressions) {
+    OS << "Classifying expressions for: ";
+    F.printAsOperand(OS, /*PrintType=*/false);
+    OS << "\n";
+    for (Instruction &I : instructions(F))
+      if (isSCEVable(I.getType()) && !isa<CmpInst>(I)) {
+        OS << I << '\n';
         OS << "  -->  ";
-        AtUse->print(OS);
-        if (!isa<SCEVCouldNotCompute>(AtUse)) {
+        const SCEV *SV = SE.getSCEV(&I);
+        SV->print(OS);
+        if (!isa<SCEVCouldNotCompute>(SV)) {
           OS << " U: ";
-          SE.getUnsignedRange(AtUse).print(OS);
+          SE.getUnsignedRange(SV).print(OS);
           OS << " S: ";
-          SE.getSignedRange(AtUse).print(OS);
+          SE.getSignedRange(SV).print(OS);
         }
-      }
 
-      if (L) {
-        OS << "\t\t" "Exits: ";
-        const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop());
-        if (!SE.isLoopInvariant(ExitValue, L)) {
-          OS << "<<Unknown>>";
-        } else {
-          OS << *ExitValue;
+        const Loop *L = LI.getLoopFor(I.getParent());
+
+        const SCEV *AtUse = SE.getSCEVAtScope(SV, L);
+        if (AtUse != SV) {
+          OS << "  -->  ";
+          AtUse->print(OS);
+          if (!isa<SCEVCouldNotCompute>(AtUse)) {
+            OS << " U: ";
+            SE.getUnsignedRange(AtUse).print(OS);
+            OS << " S: ";
+            SE.getSignedRange(AtUse).print(OS);
+          }
         }
 
-        bool First = true;
-        for (auto *Iter = L; Iter; Iter = Iter->getParentLoop()) {
-          if (First) {
-            OS << "\t\t" "LoopDispositions: { ";
-            First = false;
+        if (L) {
+          OS << "\t\t" "Exits: ";
+          const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop());
+          if (!SE.isLoopInvariant(ExitValue, L)) {
+            OS << "<<Unknown>>";
           } else {
-            OS << ", ";
+            OS << *ExitValue;
           }
 
-          Iter->getHeader()->printAsOperand(OS, /*PrintType=*/false);
-          OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, Iter));
-        }
+          bool First = true;
+          for (auto *Iter = L; Iter; Iter = Iter->getParentLoop()) {
+            if (First) {
+              OS << "\t\t" "LoopDispositions: { ";
+              First = false;
+            } else {
+              OS << ", ";
+            }
 
-        for (auto *InnerL : depth_first(L)) {
-          if (InnerL == L)
-            continue;
-          if (First) {
-            OS << "\t\t" "LoopDispositions: { ";
-            First = false;
-          } else {
-            OS << ", ";
+            Iter->getHeader()->printAsOperand(OS, /*PrintType=*/false);
+            OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, Iter));
+          }
+
+          for (auto *InnerL : depth_first(L)) {
+            if (InnerL == L)
+              continue;
+            if (First) {
+              OS << "\t\t" "LoopDispositions: { ";
+              First = false;
+            } else {
+              OS << ", ";
+            }
+
+            InnerL->getHeader()->printAsOperand(OS, /*PrintType=*/false);
+            OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, InnerL));
           }
 
-          InnerL->getHeader()->printAsOperand(OS, /*PrintType=*/false);
-          OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, InnerL));
+          OS << " }";
         }
 
-        OS << " }";
+        OS << "\n";
       }
-
-      OS << "\n";
-    }
+  }
 
   OS << "Determining loop execution counts for: ";
   F.printAsOperand(OS, /*PrintType=*/false);
@@ -11901,14 +12039,14 @@ void ScalarEvolution::verify() const {
              SE.getTypeSizeInBits(NewBECount->getType()))
       CurBECount = SE2.getZeroExtendExpr(CurBECount, NewBECount->getType());
 
-    auto *ConstantDelta =
-        dyn_cast<SCEVConstant>(SE2.getMinusSCEV(CurBECount, NewBECount));
+    const SCEV *Delta = SE2.getMinusSCEV(CurBECount, NewBECount);
 
-    if (ConstantDelta && ConstantDelta->getAPInt() != 0) {
-      dbgs() << "Trip Count Changed!\n";
+    // Unless VerifySCEVStrict is set, we only compare constant deltas.
+    if ((VerifySCEVStrict || isa<SCEVConstant>(Delta)) && !Delta->isZero()) {
+      dbgs() << "Trip Count for " << *L << " Changed!\n";
       dbgs() << "Old: " << *CurBECount << "\n";
       dbgs() << "New: " << *NewBECount << "\n";
-      dbgs() << "Delta: " << *ConstantDelta << "\n";
+      dbgs() << "Delta: " << *Delta << "\n";
       std::abort();
     }
   }
@@ -11937,6 +12075,12 @@ ScalarEvolution ScalarEvolutionAnalysis::run(Function &F,
 }
 
 PreservedAnalyses
+ScalarEvolutionVerifierPass::run(Function &F, FunctionAnalysisManager &AM) {
+  AM.getResult<ScalarEvolutionAnalysis>(F).verify();
+  return PreservedAnalyses::all();
+}
+
+PreservedAnalyses
 ScalarEvolutionPrinterPass::run(Function &F, FunctionAnalysisManager &AM) {
   AM.getResult<ScalarEvolutionAnalysis>(F).print(OS);
   return PreservedAnalyses::all();
@@ -11959,7 +12103,7 @@ ScalarEvolutionWrapperPass::ScalarEvolutionWrapperPass() : FunctionPass(ID) {
 
 bool ScalarEvolutionWrapperPass::runOnFunction(Function &F) {
   SE.reset(new ScalarEvolution(
-      F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
+      F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F),
       getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
       getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
       getAnalysis<LoopInfoWrapperPass>().getLoopInfo()));
@@ -12405,7 +12549,7 @@ PredicatedScalarEvolution::PredicatedScalarEvolution(
     const PredicatedScalarEvolution &Init)
     : RewriteMap(Init.RewriteMap), SE(Init.SE), L(Init.L), Preds(Init.Preds),
       Generation(Init.Generation), BackedgeCount(Init.BackedgeCount) {
-  for (const auto &I : Init.FlagsMap)
+  for (auto I : Init.FlagsMap)
     FlagsMap.insert(I);
 }
 
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index 96da0a24cddd..79640256f695 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -19,6 +19,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/InitializePasses.h"
 using namespace llvm;
 
 AliasResult SCEVAAResult::alias(const MemoryLocation &LocA,
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
index e8a95d35482c..dc5d02aa3a3c 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -240,9 +240,6 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
 /// division. If so, update S with Factor divided out and return true.
 /// S need not be evenly divisible if a reasonable remainder can be
 /// computed.
-/// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made
-/// unnecessary; in its place, just signed-divide Ops[i] by the scale and
-/// check to see if the divide was folded.
 static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder,
                               const SCEV *Factor, ScalarEvolution &SE,
                               const DataLayout &DL) {
@@ -417,7 +414,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
   // without the other.
   SplitAddRecs(Ops, Ty, SE);
 
-  Type *IntPtrTy = DL.getIntPtrType(PTy);
+  Type *IntIdxTy = DL.getIndexType(PTy);
 
   // Descend down the pointer's type and attempt to convert the other
   // operands into GEP indices, at each level. The first index in a GEP
@@ -429,7 +426,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
     // array indexing.
     SmallVector<const SCEV *, 8> ScaledOps;
     if (ElTy->isSized()) {
-      const SCEV *ElSize = SE.getSizeOfExpr(IntPtrTy, ElTy);
+      const SCEV *ElSize = SE.getSizeOfExpr(IntIdxTy, ElTy);
       if (!ElSize->isZero()) {
         SmallVector<const SCEV *, 8> NewOps;
         for (const SCEV *Op : Ops) {
@@ -1486,7 +1483,18 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
 }
 
 Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
-  if (!CanonicalMode) return expandAddRecExprLiterally(S);
+  // In canonical mode we compute the addrec as an expression of a canonical IV
+  // using evaluateAtIteration and expand the resulting SCEV expression. This
+  // way we avoid introducing new IVs to carry on the comutation of the addrec
+  // throughout the loop.
+  //
+  // For nested addrecs evaluateAtIteration might need a canonical IV of a
+  // type wider than the addrec itself. Emitting a canonical IV of the
+  // proper type might produce non-legal types, for example expanding an i64
+  // {0,+,2,+,1} addrec would need an i65 canonical IV. To avoid this just fall
+  // back to non-canonical mode for nested addrecs.
+  if (!CanonicalMode || (S->getNumOperands() > 2))
+    return expandAddRecExprLiterally(S);
 
   Type *Ty = SE.getEffectiveSCEVType(S->getType());
   const Loop *L = S->getLoop();
@@ -2094,11 +2102,10 @@ SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At,
   for (BasicBlock *BB : ExitingBlocks) {
     ICmpInst::Predicate Pred;
     Instruction *LHS, *RHS;
-    BasicBlock *TrueBB, *FalseBB;
 
     if (!match(BB->getTerminator(),
                m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)),
-                    TrueBB, FalseBB)))
+                    m_BasicBlock(), m_BasicBlock())))
       continue;
 
     if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At))
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ScopedNoAliasAA.cpp b/contrib/llvm-project/llvm/lib/Analysis/ScopedNoAliasAA.cpp
index 094e4a3d5dc8..8928678d6ab2 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ScopedNoAliasAA.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ScopedNoAliasAA.cpp
@@ -37,6 +37,7 @@
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
diff --git a/contrib/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp
index 4cf235db86eb..7f5bedabbd80 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp
@@ -13,6 +13,8 @@
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
@@ -333,8 +335,8 @@ bool StackSafetyLocalAnalysis::analyzeAllUses(const Value *Ptr, UseInfo &US) {
         // FIXME: consult devirt?
         // Do not follow aliases, otherwise we could inadvertently follow
         // dso_preemptable aliases or aliases with interposable linkage.
-        const GlobalValue *Callee = dyn_cast<GlobalValue>(
-            CS.getCalledValue()->stripPointerCastsNoFollowAliases());
+        const GlobalValue *Callee =
+            dyn_cast<GlobalValue>(CS.getCalledValue()->stripPointerCasts());
         if (!Callee) {
           US.updateRange(UnknownRange);
           return false;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/SyncDependenceAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/SyncDependenceAnalysis.cpp
index 3cf248a31142..8447dc87069d 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/SyncDependenceAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/SyncDependenceAnalysis.cpp
@@ -218,9 +218,11 @@ struct DivergencePropagator {
   template <typename SuccessorIterable>
   std::unique_ptr<ConstBlockSet>
   computeJoinPoints(const BasicBlock &RootBlock,
-                    SuccessorIterable NodeSuccessors, const Loop *ParentLoop, const BasicBlock * PdBoundBlock) {
+                    SuccessorIterable NodeSuccessors, const Loop *ParentLoop) {
     assert(JoinBlocks);
 
+    LLVM_DEBUG(dbgs() << "SDA:computeJoinPoints. Parent loop: " << (ParentLoop ? ParentLoop->getName() : "<null>") << "\n" );
+
     // bootstrap with branch targets
     for (const auto *SuccBlock : NodeSuccessors) {
       DefMap.emplace(SuccBlock, SuccBlock);
@@ -228,13 +230,19 @@ struct DivergencePropagator {
       if (ParentLoop && !ParentLoop->contains(SuccBlock)) {
         // immediate loop exit from node.
         ReachedLoopExits.insert(SuccBlock);
-        continue;
       } else {
         // regular successor
         PendingUpdates.insert(SuccBlock);
       }
     }
 
+    LLVM_DEBUG(
+      dbgs() << "SDA: rpo order:\n";
+      for (const auto * RpoBlock : FuncRPOT) {
+        dbgs() << "- " << RpoBlock->getName() << "\n";
+      }
+    );
+
     auto ItBeginRPO = FuncRPOT.begin();
 
     // skip until term (TODO RPOT won't let us start at @term directly)
@@ -245,16 +253,18 @@ struct DivergencePropagator {
 
     // propagate definitions at the immediate successors of the node in RPO
     auto ItBlockRPO = ItBeginRPO;
-    while (++ItBlockRPO != ItEndRPO && *ItBlockRPO != PdBoundBlock) {
+    while ((++ItBlockRPO != ItEndRPO) &&
+           !PendingUpdates.empty()) {
       const auto *Block = *ItBlockRPO;
+      LLVM_DEBUG(dbgs() << "SDA::joins. visiting " << Block->getName() << "\n");
 
-      // skip @block if not pending update
+      // skip Block if not pending update
       auto ItPending = PendingUpdates.find(Block);
       if (ItPending == PendingUpdates.end())
         continue;
       PendingUpdates.erase(ItPending);
 
-      // propagate definition at @block to its successors
+      // propagate definition at Block to its successors
       auto ItDef = DefMap.find(Block);
       const auto *DefBlock = ItDef->second;
       assert(DefBlock);
@@ -278,6 +288,8 @@ struct DivergencePropagator {
       }
     }
 
+    LLVM_DEBUG(dbgs() << "SDA::joins. After propagation:\n"; printDefs(dbgs()));
+
     // We need to know the definition at the parent loop header to decide
     // whether the definition at the header is different from the definition at
     // the loop exits, which would indicate a divergent loop exits.
@@ -292,24 +304,17 @@ struct DivergencePropagator {
     // |
     // proper exit from both loops
     //
-    // D post-dominates B as it is the only proper exit from the "A loop".
-    // If C has a divergent branch, propagation will therefore stop at D.
-    // That implies that B will never receive a definition.
-    // But that definition can only be the same as at D (D itself in thise case)
-    // because all paths to anywhere have to pass through D.
-    //
-    const BasicBlock *ParentLoopHeader =
-        ParentLoop ? ParentLoop->getHeader() : nullptr;
-    if (ParentLoop && ParentLoop->contains(PdBoundBlock)) {
-      DefMap[ParentLoopHeader] = DefMap[PdBoundBlock];
-    }
-
     // analyze reached loop exits
     if (!ReachedLoopExits.empty()) {
+      const BasicBlock *ParentLoopHeader =
+          ParentLoop ? ParentLoop->getHeader() : nullptr;
+
       assert(ParentLoop);
-      const auto *HeaderDefBlock = DefMap[ParentLoopHeader];
+      auto ItHeaderDef = DefMap.find(ParentLoopHeader);
+      const auto *HeaderDefBlock = (ItHeaderDef == DefMap.end()) ? nullptr : ItHeaderDef->second;
+
       LLVM_DEBUG(printDefs(dbgs()));
-      assert(HeaderDefBlock && "no definition in header of carrying loop");
+      assert(HeaderDefBlock && "no definition at header of carrying loop");
 
       for (const auto *ExitBlock : ReachedLoopExits) {
         auto ItExitDef = DefMap.find(ExitBlock);
@@ -339,19 +344,10 @@ const ConstBlockSet &SyncDependenceAnalysis::join_blocks(const Loop &Loop) {
     return *ItCached->second;
   }
 
-  // dont propagte beyond the immediate post dom of the loop
-  const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(Loop.getHeader()));
-  const auto *IpdNode = PdNode->getIDom();
-  const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
-  while (PdBoundBlock && Loop.contains(PdBoundBlock)) {
-    IpdNode = IpdNode->getIDom();
-    PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
-  }
-
   // compute all join points
   DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
   auto JoinBlocks = Propagator.computeJoinPoints<const LoopExitVec &>(
-      *Loop.getHeader(), LoopExits, Loop.getParentLoop(), PdBoundBlock);
+      *Loop.getHeader(), LoopExits, Loop.getParentLoop());
 
   auto ItInserted = CachedLoopExitJoins.emplace(&Loop, std::move(JoinBlocks));
   assert(ItInserted.second);
@@ -370,16 +366,11 @@ SyncDependenceAnalysis::join_blocks(const Instruction &Term) {
   if (ItCached != CachedBranchJoins.end())
     return *ItCached->second;
 
-  // dont propagate beyond the immediate post dominator of the branch
-  const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(Term.getParent()));
-  const auto *IpdNode = PdNode->getIDom();
-  const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
-
   // compute all join points
   DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
   const auto &TermBlock = *Term.getParent();
   auto JoinBlocks = Propagator.computeJoinPoints<succ_const_range>(
-      TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock), PdBoundBlock);
+      TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock));
 
   auto ItInserted = CachedBranchJoins.emplace(&Term, std::move(JoinBlocks));
   assert(ItInserted.second);
diff --git a/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp
index ef139d3257d2..c7238db43aab 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -13,6 +13,7 @@
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/CommandLine.h"
 using namespace llvm;
 
@@ -28,7 +29,8 @@ static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary(
                clEnumValN(TargetLibraryInfoImpl::SVML, "SVML",
                           "Intel SVML library")));
 
-StringRef const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] = {
+StringLiteral const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] =
+    {
 #define TLI_DEFINE_STRING
 #include "llvm/Analysis/TargetLibraryInfo.def"
 };
@@ -58,14 +60,14 @@ static bool hasBcmp(const Triple &TT) {
     return TT.isGNUEnvironment() || TT.isMusl();
   // Both NetBSD and OpenBSD are planning to remove the function. Windows does
   // not have it.
-  return TT.isOSFreeBSD() || TT.isOSSolaris() || TT.isOSDarwin();
+  return TT.isOSFreeBSD() || TT.isOSSolaris();
 }
 
 /// Initialize the set of available library functions based on the specified
 /// target triple. This should be carefully written so that a missing target
 /// triple gets a sane set of defaults.
 static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
-                       ArrayRef<StringRef> StandardNames) {
+                       ArrayRef<StringLiteral> StandardNames) {
   // Verify that the StandardNames array is in alphabetical order.
   assert(std::is_sorted(StandardNames.begin(), StandardNames.end(),
                         [](StringRef LHS, StringRef RHS) {
@@ -104,19 +106,10 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
   TLI.setShouldSignExtI32Param(ShouldSignExtI32Param);
 
   if (T.getArch() == Triple::r600 ||
-      T.getArch() == Triple::amdgcn) {
-    TLI.setUnavailable(LibFunc_ldexp);
-    TLI.setUnavailable(LibFunc_ldexpf);
-    TLI.setUnavailable(LibFunc_ldexpl);
-    TLI.setUnavailable(LibFunc_exp10);
-    TLI.setUnavailable(LibFunc_exp10f);
-    TLI.setUnavailable(LibFunc_exp10l);
-    TLI.setUnavailable(LibFunc_log10);
-    TLI.setUnavailable(LibFunc_log10f);
-    TLI.setUnavailable(LibFunc_log10l);
-  }
+      T.getArch() == Triple::amdgcn)
+    TLI.disableAllFunctions();
 
-  // There are no library implementations of mempcy and memset for AMD gpus and
+  // There are no library implementations of memcpy and memset for AMD gpus and
   // these can be difficult to lower in the backend.
   if (T.getArch() == Triple::r600 ||
       T.getArch() == Triple::amdgcn) {
@@ -386,10 +379,8 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
   case Triple::TvOS:
   case Triple::WatchOS:
     TLI.setUnavailable(LibFunc_exp10l);
-    if (!T.isWatchOS() && (T.isOSVersionLT(7, 0) ||
-                           (T.isOSVersionLT(9, 0) &&
-                            (T.getArch() == Triple::x86 ||
-                             T.getArch() == Triple::x86_64)))) {
+    if (!T.isWatchOS() &&
+        (T.isOSVersionLT(7, 0) || (T.isOSVersionLT(9, 0) && T.isX86()))) {
       TLI.setUnavailable(LibFunc_exp10);
       TLI.setUnavailable(LibFunc_exp10f);
     } else {
@@ -623,19 +614,14 @@ static StringRef sanitizeFunctionName(StringRef funcName) {
   return GlobalValue::dropLLVMManglingEscape(funcName);
 }
 
-bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName,
-                                       LibFunc &F) const {
-  StringRef const *Start = &StandardNames[0];
-  StringRef const *End = &StandardNames[NumLibFuncs];
-
+bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName, LibFunc &F) const {
   funcName = sanitizeFunctionName(funcName);
   if (funcName.empty())
     return false;
 
-  StringRef const *I = std::lower_bound(
-      Start, End, funcName, [](StringRef LHS, StringRef RHS) {
-        return LHS < RHS;
-      });
+  const auto *Start = std::begin(StandardNames);
+  const auto *End = std::end(StandardNames);
+  const auto *I = std::lower_bound(Start, End, funcName);
   if (I != End && *I == funcName) {
     F = (LibFunc)(I - Start);
     return true;
@@ -1481,6 +1467,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
       return false;
   }
   case LibFunc::NumLibFuncs:
+  case LibFunc::NotLibFunc:
     break;
   }
 
@@ -1599,30 +1586,12 @@ StringRef TargetLibraryInfoImpl::getScalarizedFunction(StringRef F,
   return I->ScalarFnName;
 }
 
-TargetLibraryInfo TargetLibraryAnalysis::run(Module &M,
-                                             ModuleAnalysisManager &) {
-  if (PresetInfoImpl)
-    return TargetLibraryInfo(*PresetInfoImpl);
-
-  return TargetLibraryInfo(lookupInfoImpl(Triple(M.getTargetTriple())));
-}
-
-TargetLibraryInfo TargetLibraryAnalysis::run(Function &F,
+TargetLibraryInfo TargetLibraryAnalysis::run(const Function &F,
                                              FunctionAnalysisManager &) {
-  if (PresetInfoImpl)
-    return TargetLibraryInfo(*PresetInfoImpl);
-
-  return TargetLibraryInfo(
-      lookupInfoImpl(Triple(F.getParent()->getTargetTriple())));
-}
-
-TargetLibraryInfoImpl &TargetLibraryAnalysis::lookupInfoImpl(const Triple &T) {
-  std::unique_ptr<TargetLibraryInfoImpl> &Impl =
-      Impls[T.normalize()];
-  if (!Impl)
-    Impl.reset(new TargetLibraryInfoImpl(T));
-
-  return *Impl;
+  if (!BaselineInfoImpl)
+    BaselineInfoImpl =
+        TargetLibraryInfoImpl(Triple(F.getParent()->getTargetTriple()));
+  return TargetLibraryInfo(*BaselineInfoImpl, &F);
 }
 
 unsigned TargetLibraryInfoImpl::getWCharSize(const Module &M) const {
@@ -1633,18 +1602,18 @@ unsigned TargetLibraryInfoImpl::getWCharSize(const Module &M) const {
 }
 
 TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass()
-    : ImmutablePass(ID), TLIImpl(), TLI(TLIImpl) {
+    : ImmutablePass(ID), TLA(TargetLibraryInfoImpl()) {
   initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry());
 }
 
 TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass(const Triple &T)
-    : ImmutablePass(ID), TLIImpl(T), TLI(TLIImpl) {
+    : ImmutablePass(ID), TLA(TargetLibraryInfoImpl(T)) {
   initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry());
 }
 
 TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass(
     const TargetLibraryInfoImpl &TLIImpl)
-    : ImmutablePass(ID), TLIImpl(TLIImpl), TLI(this->TLIImpl) {
+    : ImmutablePass(ID), TLA(TLIImpl) {
   initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry());
 }
 
@@ -1656,3 +1625,19 @@ INITIALIZE_PASS(TargetLibraryInfoWrapperPass, "targetlibinfo",
 char TargetLibraryInfoWrapperPass::ID = 0;
 
 void TargetLibraryInfoWrapperPass::anchor() {}
+
+unsigned TargetLibraryInfoImpl::getWidestVF(StringRef ScalarF) const {
+  ScalarF = sanitizeFunctionName(ScalarF);
+  if (ScalarF.empty())
+    return 1;
+
+  unsigned VF = 1;
+  std::vector<VecDesc>::const_iterator I =
+      llvm::lower_bound(VectorDescs, ScalarF, compareWithScalarFnName);
+  while (I != VectorDescs.end() && StringRef(I->ScalarFnName) == ScalarF) {
+    if (I->VectorizationFactor > VF)
+      VF = I->VectorizationFactor;
+    ++I;
+  }
+  return VF;
+}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp
index eb04c34453fb..f2c63f789d89 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -7,7 +7,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/TargetTransformInfoImpl.h"
+#include "llvm/IR/CFG.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Instruction.h"
@@ -16,10 +19,9 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/LoopIterator.h"
 #include <utility>
 
 using namespace llvm;
@@ -59,11 +61,7 @@ bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE,
   SmallVector<BasicBlock *, 4> ExitingBlocks;
   L->getExitingBlocks(ExitingBlocks);
 
-  for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
-                                               IE = ExitingBlocks.end();
-       I != IE; ++I) {
-    BasicBlock *BB = *I;
-
+  for (BasicBlock *BB : ExitingBlocks) {
     // If we pass the updated counter back through a phi, we need to know
     // which latch the updated value will be coming from.
     if (!L->isLoopLatch(BB)) {
@@ -97,13 +95,11 @@ bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE,
     // For this to be true, we must dominate all blocks with backedges. Such
     // blocks are in-loop predecessors to the header block.
     bool NotAlways = false;
-    for (pred_iterator PI = pred_begin(L->getHeader()),
-                       PIE = pred_end(L->getHeader());
-         PI != PIE; ++PI) {
-      if (!L->contains(*PI))
+    for (BasicBlock *Pred : predecessors(L->getHeader())) {
+      if (!L->contains(Pred))
         continue;
 
-      if (!DT.dominates(*I, *PI)) {
+      if (!DT.dominates(BB, Pred)) {
         NotAlways = true;
         break;
       }
@@ -127,7 +123,7 @@ bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE,
 
     // Note that this block may not be the loop latch block, even if the loop
     // has a latch block.
-    ExitBlock = *I;
+    ExitBlock = BB;
     ExitCount = EC;
     break;
   }
@@ -199,9 +195,10 @@ int TargetTransformInfo::getIntrinsicCost(
 }
 
 unsigned
-TargetTransformInfo::getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
-                                                      unsigned &JTSize) const {
-  return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize);
+TargetTransformInfo::getEstimatedNumberOfCaseClusters(
+    const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI,
+    BlockFrequencyInfo *BFI) const {
+  return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
 }
 
 int TargetTransformInfo::getUserCost(const User *U,
@@ -227,6 +224,16 @@ unsigned TargetTransformInfo::getFlatAddressSpace() const {
   return TTIImpl->getFlatAddressSpace();
 }
 
+bool TargetTransformInfo::collectFlatAddressOperands(
+  SmallVectorImpl<int> &OpIndexes, Intrinsic::ID IID) const  {
+  return TTIImpl->collectFlatAddressOperands(OpIndexes, IID);
+}
+
+bool TargetTransformInfo::rewriteIntrinsicWithAddressSpace(
+  IntrinsicInst *II, Value *OldV, Value *NewV) const {
+  return TTIImpl->rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
+}
+
 bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
   return TTIImpl->isLoweredToCall(F);
 }
@@ -237,6 +244,12 @@ bool TargetTransformInfo::isHardwareLoopProfitable(
   return TTIImpl->isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
 }
 
+bool TargetTransformInfo::preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
+    ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI,
+    DominatorTree *DT, const LoopAccessInfo *LAI) const {
+  return TTIImpl->preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
+}
+
 void TargetTransformInfo::getUnrollingPreferences(
     Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const {
   return TTIImpl->getUnrollingPreferences(L, SE, UP);
@@ -283,30 +296,33 @@ bool TargetTransformInfo::shouldFavorBackedgeIndex(const Loop *L) const {
   return TTIImpl->shouldFavorBackedgeIndex(L);
 }
 
-bool TargetTransformInfo::isLegalMaskedStore(Type *DataType) const {
-  return TTIImpl->isLegalMaskedStore(DataType);
+bool TargetTransformInfo::isLegalMaskedStore(Type *DataType,
+                                             MaybeAlign Alignment) const {
+  return TTIImpl->isLegalMaskedStore(DataType, Alignment);
 }
 
-bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType) const {
-  return TTIImpl->isLegalMaskedLoad(DataType);
+bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType,
+                                            MaybeAlign Alignment) const {
+  return TTIImpl->isLegalMaskedLoad(DataType, Alignment);
 }
 
 bool TargetTransformInfo::isLegalNTStore(Type *DataType,
-                                         unsigned Alignment) const {
+                                         Align Alignment) const {
   return TTIImpl->isLegalNTStore(DataType, Alignment);
 }
 
-bool TargetTransformInfo::isLegalNTLoad(Type *DataType,
-                                        unsigned Alignment) const {
+bool TargetTransformInfo::isLegalNTLoad(Type *DataType, Align Alignment) const {
   return TTIImpl->isLegalNTLoad(DataType, Alignment);
 }
 
-bool TargetTransformInfo::isLegalMaskedGather(Type *DataType) const {
-  return TTIImpl->isLegalMaskedGather(DataType);
+bool TargetTransformInfo::isLegalMaskedGather(Type *DataType,
+                                              MaybeAlign Alignment) const {
+  return TTIImpl->isLegalMaskedGather(DataType, Alignment);
 }
 
-bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType) const {
-  return TTIImpl->isLegalMaskedScatter(DataType);
+bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType,
+                                               MaybeAlign Alignment) const {
+  return TTIImpl->isLegalMaskedScatter(DataType, Alignment);
 }
 
 bool TargetTransformInfo::isLegalMaskedCompressStore(Type *DataType) const {
@@ -359,14 +375,6 @@ bool TargetTransformInfo::isTypeLegal(Type *Ty) const {
   return TTIImpl->isTypeLegal(Ty);
 }
 
-unsigned TargetTransformInfo::getJumpBufAlignment() const {
-  return TTIImpl->getJumpBufAlignment();
-}
-
-unsigned TargetTransformInfo::getJumpBufSize() const {
-  return TTIImpl->getJumpBufSize();
-}
-
 bool TargetTransformInfo::shouldBuildLookupTables() const {
   return TTIImpl->shouldBuildLookupTables();
 }
@@ -456,22 +464,30 @@ int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
   return Cost;
 }
 
-int TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx,
-                                       const APInt &Imm, Type *Ty) const {
-  int Cost = TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty);
+int TargetTransformInfo::getIntImmCostInst(unsigned Opcode, unsigned Idx,
+                                           const APInt &Imm, Type *Ty) const {
+  int Cost = TTIImpl->getIntImmCostInst(Opcode, Idx, Imm, Ty);
   assert(Cost >= 0 && "TTI should not produce negative costs!");
   return Cost;
 }
 
-int TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
-                                       const APInt &Imm, Type *Ty) const {
-  int Cost = TTIImpl->getIntImmCost(IID, Idx, Imm, Ty);
+int TargetTransformInfo::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
+                                             const APInt &Imm, Type *Ty) const {
+  int Cost = TTIImpl->getIntImmCostIntrin(IID, Idx, Imm, Ty);
   assert(Cost >= 0 && "TTI should not produce negative costs!");
   return Cost;
 }
 
-unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
-  return TTIImpl->getNumberOfRegisters(Vector);
+unsigned TargetTransformInfo::getNumberOfRegisters(unsigned ClassID) const {
+  return TTIImpl->getNumberOfRegisters(ClassID);
+}
+
+unsigned TargetTransformInfo::getRegisterClassForType(bool Vector, Type *Ty) const {
+  return TTIImpl->getRegisterClassForType(Vector, Ty);
+}
+
+const char* TargetTransformInfo::getRegisterClassName(unsigned ClassID) const {
+  return TTIImpl->getRegisterClassName(ClassID);
 }
 
 unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const {
@@ -578,10 +594,10 @@ TargetTransformInfo::getOperandInfo(Value *V, OperandValueProperties &OpProps) {
 int TargetTransformInfo::getArithmeticInstrCost(
     unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
     OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
-    OperandValueProperties Opd2PropInfo,
-    ArrayRef<const Value *> Args) const {
-  int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
-                                             Opd1PropInfo, Opd2PropInfo, Args);
+    OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
+    const Instruction *CxtI) const {
+  int Cost = TTIImpl->getArithmeticInstrCost(
+      Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo, Args, CxtI);
   assert(Cost >= 0 && "TTI should not produce negative costs!");
   return Cost;
 }
@@ -633,7 +649,7 @@ int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
 }
 
 int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
-                                         unsigned Alignment,
+                                         MaybeAlign Alignment,
                                          unsigned AddressSpace,
                                          const Instruction *I) const {
   assert ((I == nullptr || I->getOpcode() == Opcode) &&
@@ -1169,7 +1185,7 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
     Op2VK = getOperandInfo(I->getOperand(1), Op2VP);
     SmallVector<const Value *, 2> Operands(I->operand_values());
     return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK,
-                                  Op1VP, Op2VP, Operands);
+                                  Op1VP, Op2VP, Operands, I);
   }
   case Instruction::FNeg: {
     TargetTransformInfo::OperandValueKind Op1VK, Op2VK;
@@ -1179,7 +1195,7 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
     Op2VP = OP_None;
     SmallVector<const Value *, 2> Operands(I->operand_values());
     return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK,
-                                  Op1VP, Op2VP, Operands);
+                                  Op1VP, Op2VP, Operands, I);
   }
   case Instruction::Select: {
     const SelectInst *SI = cast<SelectInst>(I);
@@ -1195,14 +1211,14 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
     const StoreInst *SI = cast<StoreInst>(I);
     Type *ValTy = SI->getValueOperand()->getType();
     return getMemoryOpCost(I->getOpcode(), ValTy,
-                                SI->getAlignment(),
-                                SI->getPointerAddressSpace(), I);
+                           MaybeAlign(SI->getAlignment()),
+                           SI->getPointerAddressSpace(), I);
   }
   case Instruction::Load: {
     const LoadInst *LI = cast<LoadInst>(I);
     return getMemoryOpCost(I->getOpcode(), I->getType(),
-                                LI->getAlignment(),
-                                LI->getPointerAddressSpace(), I);
+                           MaybeAlign(LI->getAlignment()),
+                           LI->getPointerAddressSpace(), I);
   }
   case Instruction::ZExt:
   case Instruction::SExt:
@@ -1276,6 +1292,8 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
     return getVectorInstrCost(I->getOpcode(),
                                    IE->getType(), Idx);
   }
+  case Instruction::ExtractValue:
+    return 0; // Model all ExtractValue nodes as free.
   case Instruction::ShuffleVector: {
     const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
     Type *Ty = Shuffle->getType();
diff --git a/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 3b9040aa0f52..da4520066b46 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -114,6 +114,7 @@
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
@@ -520,23 +521,20 @@ static const MDNode *getLeastCommonType(const MDNode *A, const MDNode *B) {
 }
 
 void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const {
-  if (Merge)
+  if (Merge) {
     N.TBAA =
         MDNode::getMostGenericTBAA(N.TBAA, getMetadata(LLVMContext::MD_tbaa));
-  else
-    N.TBAA = getMetadata(LLVMContext::MD_tbaa);
-
-  if (Merge)
+    N.TBAAStruct = nullptr;
     N.Scope = MDNode::getMostGenericAliasScope(
         N.Scope, getMetadata(LLVMContext::MD_alias_scope));
-  else
-    N.Scope = getMetadata(LLVMContext::MD_alias_scope);
-
-  if (Merge)
     N.NoAlias =
         MDNode::intersect(N.NoAlias, getMetadata(LLVMContext::MD_noalias));
-  else
+  } else {
+    N.TBAA = getMetadata(LLVMContext::MD_tbaa);
+    N.TBAAStruct = getMetadata(LLVMContext::MD_tbaa_struct);
+    N.Scope = getMetadata(LLVMContext::MD_alias_scope);
     N.NoAlias = getMetadata(LLVMContext::MD_noalias);
+  }
 }
 
 static const MDNode *createAccessTag(const MDNode *AccessType) {
diff --git a/contrib/llvm-project/llvm/lib/Analysis/TypeMetadataUtils.cpp b/contrib/llvm-project/llvm/lib/Analysis/TypeMetadataUtils.cpp
index 9311dfbc6eba..072d291f3f93 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/TypeMetadataUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/TypeMetadataUtils.cpp
@@ -127,3 +127,35 @@ void llvm::findDevirtualizableCallsForTypeCheckedLoad(
     findCallsAtConstantOffset(DevirtCalls, &HasNonCallUses, LoadedPtr,
                               Offset->getZExtValue(), CI, DT);
 }
+
+Constant *llvm::getPointerAtOffset(Constant *I, uint64_t Offset, Module &M) {
+  if (I->getType()->isPointerTy()) {
+    if (Offset == 0)
+      return I;
+    return nullptr;
+  }
+
+  const DataLayout &DL = M.getDataLayout();
+
+  if (auto *C = dyn_cast<ConstantStruct>(I)) {
+    const StructLayout *SL = DL.getStructLayout(C->getType());
+    if (Offset >= SL->getSizeInBytes())
+      return nullptr;
+
+    unsigned Op = SL->getElementContainingOffset(Offset);
+    return getPointerAtOffset(cast<Constant>(I->getOperand(Op)),
+                              Offset - SL->getElementOffset(Op), M);
+  }
+  if (auto *C = dyn_cast<ConstantArray>(I)) {
+    ArrayType *VTableTy = C->getType();
+    uint64_t ElemSize = DL.getTypeAllocSize(VTableTy->getElementType());
+
+    unsigned Op = Offset / ElemSize;
+    if (Op >= C->getNumOperands())
+      return nullptr;
+
+    return getPointerAtOffset(cast<Constant>(I->getOperand(Op)),
+                              Offset % ElemSize, M);
+  }
+  return nullptr;
+}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp b/contrib/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp
new file mode 100644
index 000000000000..a331b95e818b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp
@@ -0,0 +1,430 @@
+//===- VFABIDemangling.cpp - Vector Function ABI demangling utilities. ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Analysis/VectorUtils.h"
+
+using namespace llvm;
+
+namespace {
+/// Utilities for the Vector Function ABI name parser.
+
+/// Return types for the parser functions.
+enum class ParseRet {
+  OK,   // Found.
+  None, // Not found.
+  Error // Syntax error.
+};
+
+/// Extracts the `<isa>` information from the mangled string, and
+/// sets the `ISA` accordingly.
+ParseRet tryParseISA(StringRef &MangledName, VFISAKind &ISA) {
+  if (MangledName.empty())
+    return ParseRet::Error;
+
+  if (MangledName.startswith(VFABI::_LLVM_)) {
+    MangledName = MangledName.drop_front(strlen(VFABI::_LLVM_));
+    ISA = VFISAKind::LLVM;
+  } else {
+    ISA = StringSwitch<VFISAKind>(MangledName.take_front(1))
+              .Case("n", VFISAKind::AdvancedSIMD)
+              .Case("s", VFISAKind::SVE)
+              .Case("b", VFISAKind::SSE)
+              .Case("c", VFISAKind::AVX)
+              .Case("d", VFISAKind::AVX2)
+              .Case("e", VFISAKind::AVX512)
+              .Default(VFISAKind::Unknown);
+    MangledName = MangledName.drop_front(1);
+  }
+
+  return ParseRet::OK;
+}
+
+/// Extracts the `<mask>` information from the mangled string, and
+/// sets `IsMasked` accordingly. The input string `MangledName` is
+/// left unmodified.
+ParseRet tryParseMask(StringRef &MangledName, bool &IsMasked) {
+  if (MangledName.consume_front("M")) {
+    IsMasked = true;
+    return ParseRet::OK;
+  }
+
+  if (MangledName.consume_front("N")) {
+    IsMasked = false;
+    return ParseRet::OK;
+  }
+
+  return ParseRet::Error;
+}
+
+/// Extract the `<vlen>` information from the mangled string, and
+/// sets `VF` accordingly. A `<vlen> == "x"` token is interpreted as a scalable
+/// vector length. On success, the `<vlen>` token is removed from
+/// the input string `ParseString`.
+///
+ParseRet tryParseVLEN(StringRef &ParseString, unsigned &VF, bool &IsScalable) {
+  if (ParseString.consume_front("x")) {
+    VF = 0;
+    IsScalable = true;
+    return ParseRet::OK;
+  }
+
+  if (ParseString.consumeInteger(10, VF))
+    return ParseRet::Error;
+
+  IsScalable = false;
+  return ParseRet::OK;
+}
+
+/// The function looks for the following strings at the beginning of
+/// the input string `ParseString`:
+///
+///  <token> <number>
+///
+/// On success, it removes the parsed parameter from `ParseString`,
+/// sets `PKind` to the correspondent enum value, sets `Pos` to
+/// <number>, and return success.  On a syntax error, it return a
+/// parsing error. If nothing is parsed, it returns None.
+///
+/// The function expects <token> to be one of "ls", "Rs", "Us" or
+/// "Ls".
+ParseRet tryParseLinearTokenWithRuntimeStep(StringRef &ParseString,
+                                            VFParamKind &PKind, int &Pos,
+                                            const StringRef Token) {
+  if (ParseString.consume_front(Token)) {
+    PKind = VFABI::getVFParamKindFromString(Token);
+    if (ParseString.consumeInteger(10, Pos))
+      return ParseRet::Error;
+    return ParseRet::OK;
+  }
+
+  return ParseRet::None;
+}
+
+/// The function looks for the following stringt at the beginning of
+/// the input string `ParseString`:
+///
+///  <token> <number>
+///
+/// <token> is one of "ls", "Rs", "Us" or "Ls".
+///
+/// On success, it removes the parsed parameter from `ParseString`,
+/// sets `PKind` to the correspondent enum value, sets `StepOrPos` to
+/// <number>, and return success.  On a syntax error, it return a
+/// parsing error. If nothing is parsed, it returns None.
+ParseRet tryParseLinearWithRuntimeStep(StringRef &ParseString,
+                                       VFParamKind &PKind, int &StepOrPos) {
+  ParseRet Ret;
+
+  // "ls" <RuntimeStepPos>
+  Ret = tryParseLinearTokenWithRuntimeStep(ParseString, PKind, StepOrPos, "ls");
+  if (Ret != ParseRet::None)
+    return Ret;
+
+  // "Rs" <RuntimeStepPos>
+  Ret = tryParseLinearTokenWithRuntimeStep(ParseString, PKind, StepOrPos, "Rs");
+  if (Ret != ParseRet::None)
+    return Ret;
+
+  // "Ls" <RuntimeStepPos>
+  Ret = tryParseLinearTokenWithRuntimeStep(ParseString, PKind, StepOrPos, "Ls");
+  if (Ret != ParseRet::None)
+    return Ret;
+
+  // "Us" <RuntimeStepPos>
+  Ret = tryParseLinearTokenWithRuntimeStep(ParseString, PKind, StepOrPos, "Us");
+  if (Ret != ParseRet::None)
+    return Ret;
+
+  return ParseRet::None;
+}
+
+/// The function looks for the following strings at the beginning of
+/// the input string `ParseString`:
+///
+///  <token> {"n"} <number>
+///
+/// On success, it removes the parsed parameter from `ParseString`,
+/// sets `PKind` to the correspondent enum value, sets `LinearStep` to
+/// <number>, and return success.  On a syntax error, it return a
+/// parsing error. If nothing is parsed, it returns None.
+///
+/// The function expects <token> to be one of "l", "R", "U" or
+/// "L".
+ParseRet tryParseCompileTimeLinearToken(StringRef &ParseString,
+                                        VFParamKind &PKind, int &LinearStep,
+                                        const StringRef Token) {
+  if (ParseString.consume_front(Token)) {
+    PKind = VFABI::getVFParamKindFromString(Token);
+    const bool Negate = ParseString.consume_front("n");
+    if (ParseString.consumeInteger(10, LinearStep))
+      LinearStep = 1;
+    if (Negate)
+      LinearStep *= -1;
+    return ParseRet::OK;
+  }
+
+  return ParseRet::None;
+}
+
+/// The function looks for the following strings at the beginning of
+/// the input string `ParseString`:
+///
+/// ["l" | "R" | "U" | "L"] {"n"} <number>
+///
+/// On success, it removes the parsed parameter from `ParseString`,
+/// sets `PKind` to the correspondent enum value, sets `LinearStep` to
+/// <number>, and return success.  On a syntax error, it return a
+/// parsing error. If nothing is parsed, it returns None.
+ParseRet tryParseLinearWithCompileTimeStep(StringRef &ParseString,
+                                           VFParamKind &PKind, int &StepOrPos) {
+  // "l" {"n"} <CompileTimeStep>
+  if (tryParseCompileTimeLinearToken(ParseString, PKind, StepOrPos, "l") ==
+      ParseRet::OK)
+    return ParseRet::OK;
+
+  // "R" {"n"} <CompileTimeStep>
+  if (tryParseCompileTimeLinearToken(ParseString, PKind, StepOrPos, "R") ==
+      ParseRet::OK)
+    return ParseRet::OK;
+
+  // "L" {"n"} <CompileTimeStep>
+  if (tryParseCompileTimeLinearToken(ParseString, PKind, StepOrPos, "L") ==
+      ParseRet::OK)
+    return ParseRet::OK;
+
+  // "U" {"n"} <CompileTimeStep>
+  if (tryParseCompileTimeLinearToken(ParseString, PKind, StepOrPos, "U") ==
+      ParseRet::OK)
+    return ParseRet::OK;
+
+  return ParseRet::None;
+}
+
+/// The function looks for the following strings at the beginning of
+/// the input string `ParseString`:
+///
+/// "u" <number>
+///
+/// On success, it removes the parsed parameter from `ParseString`,
+/// sets `PKind` to the correspondent enum value, sets `Pos` to
+/// <number>, and return success.  On a syntax error, it return a
+/// parsing error. If nothing is parsed, it returns None.
+ParseRet tryParseUniform(StringRef &ParseString, VFParamKind &PKind, int &Pos) {
+  // "u" <Pos>
+  const char *UniformToken = "u";
+  if (ParseString.consume_front(UniformToken)) {
+    PKind = VFABI::getVFParamKindFromString(UniformToken);
+    if (ParseString.consumeInteger(10, Pos))
+      return ParseRet::Error;
+
+    return ParseRet::OK;
+  }
+  return ParseRet::None;
+}
+
+/// Looks into the <parameters> part of the mangled name in search
+/// for valid paramaters at the beginning of the string
+/// `ParseString`.
+///
+/// On success, it removes the parsed parameter from `ParseString`,
+/// sets `PKind` to the correspondent enum value, sets `StepOrPos`
+/// accordingly, and return success.  On a syntax error, it return a
+/// parsing error. If nothing is parsed, it returns None.
+ParseRet tryParseParameter(StringRef &ParseString, VFParamKind &PKind,
+                           int &StepOrPos) {
+  if (ParseString.consume_front("v")) {
+    PKind = VFParamKind::Vector;
+    StepOrPos = 0;
+    return ParseRet::OK;
+  }
+
+  const ParseRet HasLinearRuntime =
+      tryParseLinearWithRuntimeStep(ParseString, PKind, StepOrPos);
+  if (HasLinearRuntime != ParseRet::None)
+    return HasLinearRuntime;
+
+  const ParseRet HasLinearCompileTime =
+      tryParseLinearWithCompileTimeStep(ParseString, PKind, StepOrPos);
+  if (HasLinearCompileTime != ParseRet::None)
+    return HasLinearCompileTime;
+
+  const ParseRet HasUniform = tryParseUniform(ParseString, PKind, StepOrPos);
+  if (HasUniform != ParseRet::None)
+    return HasUniform;
+
+  return ParseRet::None;
+}
+
+/// Looks into the <parameters> part of the mangled name in search
+/// of a valid 'aligned' clause. The function should be invoked
+/// after parsing a parameter via `tryParseParameter`.
+///
+/// On success, it removes the parsed parameter from `ParseString`,
+/// sets `PKind` to the correspondent enum value, sets `StepOrPos`
+/// accordingly, and return success.  On a syntax error, it return a
+/// parsing error. If nothing is parsed, it returns None.
+ParseRet tryParseAlign(StringRef &ParseString, Align &Alignment) {
+  uint64_t Val;
+  //    "a" <number>
+  if (ParseString.consume_front("a")) {
+    if (ParseString.consumeInteger(10, Val))
+      return ParseRet::Error;
+
+    if (!isPowerOf2_64(Val))
+      return ParseRet::Error;
+
+    Alignment = Align(Val);
+
+    return ParseRet::OK;
+  }
+
+  return ParseRet::None;
+}
+} // namespace
+
+// Format of the ABI name:
+// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)]
+Optional<VFInfo> VFABI::tryDemangleForVFABI(StringRef MangledName) {
+  const StringRef OriginalName = MangledName;
+  // Assume there is no custom name <redirection>, and therefore the
+  // vector name consists of
+  // _ZGV<isa><mask><vlen><parameters>_<scalarname>.
+  StringRef VectorName = MangledName;
+
+  // Parse the fixed size part of the manled name
+  if (!MangledName.consume_front("_ZGV"))
+    return None;
+
+  // Extract ISA. An unknow ISA is also supported, so we accept all
+  // values.
+  VFISAKind ISA;
+  if (tryParseISA(MangledName, ISA) != ParseRet::OK)
+    return None;
+
+  // Extract <mask>.
+  bool IsMasked;
+  if (tryParseMask(MangledName, IsMasked) != ParseRet::OK)
+    return None;
+
+  // Parse the variable size, starting from <vlen>.
+  unsigned VF;
+  bool IsScalable;
+  if (tryParseVLEN(MangledName, VF, IsScalable) != ParseRet::OK)
+    return None;
+
+  // Parse the <parameters>.
+  ParseRet ParamFound;
+  SmallVector<VFParameter, 8> Parameters;
+  do {
+    const unsigned ParameterPos = Parameters.size();
+    VFParamKind PKind;
+    int StepOrPos;
+    ParamFound = tryParseParameter(MangledName, PKind, StepOrPos);
+
+    // Bail off if there is a parsing error in the parsing of the parameter.
+    if (ParamFound == ParseRet::Error)
+      return None;
+
+    if (ParamFound == ParseRet::OK) {
+      Align Alignment;
+      // Look for the alignment token "a <number>".
+      const ParseRet AlignFound = tryParseAlign(MangledName, Alignment);
+      // Bail off if there is a syntax error in the align token.
+      if (AlignFound == ParseRet::Error)
+        return None;
+
+      // Add the parameter.
+      Parameters.push_back({ParameterPos, PKind, StepOrPos, Alignment});
+    }
+  } while (ParamFound == ParseRet::OK);
+
+  // A valid MangledName must have at least one valid entry in the
+  // <parameters>.
+  if (Parameters.empty())
+    return None;
+
+  // Check for the <scalarname> and the optional <redirection>, which
+  // are separated from the prefix with "_"
+  if (!MangledName.consume_front("_"))
+    return None;
+
+  // The rest of the string must be in the format:
+  // <scalarname>[(<redirection>)]
+  const StringRef ScalarName =
+      MangledName.take_while([](char In) { return In != '('; });
+
+  if (ScalarName.empty())
+    return None;
+
+  // Reduce MangledName to [(<redirection>)].
+  MangledName = MangledName.ltrim(ScalarName);
+  // Find the optional custom name redirection.
+  if (MangledName.consume_front("(")) {
+    if (!MangledName.consume_back(")"))
+      return None;
+    // Update the vector variant with the one specified by the user.
+    VectorName = MangledName;
+    // If the vector name is missing, bail out.
+    if (VectorName.empty())
+      return None;
+  }
+
+  // LLVM internal mapping via the TargetLibraryInfo (TLI) must be
+  // redirected to an existing name.
+  if (ISA == VFISAKind::LLVM && VectorName == OriginalName)
+    return None;
+
+  // When <mask> is "M", we need to add a parameter that is used as
+  // global predicate for the function.
+  if (IsMasked) {
+    const unsigned Pos = Parameters.size();
+    Parameters.push_back({Pos, VFParamKind::GlobalPredicate});
+  }
+
+  // Asserts for parameters of type `VFParamKind::GlobalPredicate`, as
+  // prescribed by the Vector Function ABI specifications supported by
+  // this parser:
+  // 1. Uniqueness.
+  // 2. Must be the last in the parameter list.
+  const auto NGlobalPreds = std::count_if(
+      Parameters.begin(), Parameters.end(), [](const VFParameter PK) {
+        return PK.ParamKind == VFParamKind::GlobalPredicate;
+      });
+  assert(NGlobalPreds < 2 && "Cannot have more than one global predicate.");
+  if (NGlobalPreds)
+    assert(Parameters.back().ParamKind == VFParamKind::GlobalPredicate &&
+           "The global predicate must be the last parameter");
+
+  const VFShape Shape({VF, IsScalable, Parameters});
+  return VFInfo({Shape, ScalarName, VectorName, ISA});
+}
+
+VFParamKind VFABI::getVFParamKindFromString(const StringRef Token) {
+  const VFParamKind ParamKind = StringSwitch<VFParamKind>(Token)
+                                    .Case("v", VFParamKind::Vector)
+                                    .Case("l", VFParamKind::OMP_Linear)
+                                    .Case("R", VFParamKind::OMP_LinearRef)
+                                    .Case("L", VFParamKind::OMP_LinearVal)
+                                    .Case("U", VFParamKind::OMP_LinearUVal)
+                                    .Case("ls", VFParamKind::OMP_LinearPos)
+                                    .Case("Ls", VFParamKind::OMP_LinearValPos)
+                                    .Case("Rs", VFParamKind::OMP_LinearRefPos)
+                                    .Case("Us", VFParamKind::OMP_LinearUValPos)
+                                    .Case("u", VFParamKind::OMP_Uniform)
+                                    .Default(VFParamKind::Unknown);
+
+  if (ParamKind != VFParamKind::Unknown)
+    return ParamKind;
+
+  // This function should never be invoked with an invalid input.
+  llvm_unreachable("This fuction should be invoken only on parameters"
+                   " that have a textual representation in the mangled name"
+                   " of the Vector Function ABI");
+}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp
index c70906dcc629..ad6765e2514b 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp
@@ -51,6 +51,8 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsAArch64.h"
+#include "llvm/IR/IntrinsicsX86.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
@@ -88,7 +90,7 @@ static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
   if (unsigned BitWidth = Ty->getScalarSizeInBits())
     return BitWidth;
 
-  return DL.getIndexTypeSizeInBits(Ty);
+  return DL.getPointerTypeSizeInBits(Ty);
 }
 
 namespace {
@@ -558,17 +560,89 @@ bool llvm::isValidAssumeForContext(const Instruction *Inv,
         return true;
   }
 
-  // The context comes first, but they're both in the same block. Make sure
-  // there is nothing in between that might interrupt the control flow.
-  for (BasicBlock::const_iterator I =
-         std::next(BasicBlock::const_iterator(CxtI)), IE(Inv);
-       I != IE; ++I)
-    if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I))
+  // Don't let an assume affect itself - this would cause the problems
+  // `isEphemeralValueOf` is trying to prevent, and it would also make
+  // the loop below go out of bounds.
+  if (Inv == CxtI)
+    return false;
+
+  // The context comes first, but they're both in the same block.
+  // Make sure there is nothing in between that might interrupt
+  // the control flow, not even CxtI itself.
+  for (BasicBlock::const_iterator I(CxtI), IE(Inv); I != IE; ++I)
+    if (!isGuaranteedToTransferExecutionToSuccessor(&*I))
       return false;
 
   return !isEphemeralValueOf(Inv, CxtI);
 }
 
+static bool isKnownNonZeroFromAssume(const Value *V, const Query &Q) {
+  // Use of assumptions is context-sensitive. If we don't have a context, we
+  // cannot use them!
+  if (!Q.AC || !Q.CxtI)
+    return false;
+
+  // Note that the patterns below need to be kept in sync with the code
+  // in AssumptionCache::updateAffectedValues.
+
+  auto CmpExcludesZero = [V](ICmpInst *Cmp) {
+    auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V)));
+
+    Value *RHS;
+    CmpInst::Predicate Pred;
+    if (!match(Cmp, m_c_ICmp(Pred, m_V, m_Value(RHS))))
+      return false;
+    // Canonicalize 'v' to be on the LHS of the comparison.
+    if (Cmp->getOperand(1) != RHS)
+      Pred = CmpInst::getSwappedPredicate(Pred);
+
+    // assume(v u> y) -> assume(v != 0)
+    if (Pred == ICmpInst::ICMP_UGT)
+      return true;
+
+    // assume(v != 0)
+    // We special-case this one to ensure that we handle `assume(v != null)`.
+    if (Pred == ICmpInst::ICMP_NE)
+      return match(RHS, m_Zero());
+
+    // All other predicates - rely on generic ConstantRange handling.
+    ConstantInt *CI;
+    if (!match(RHS, m_ConstantInt(CI)))
+      return false;
+    ConstantRange RHSRange(CI->getValue());
+    ConstantRange TrueValues =
+        ConstantRange::makeAllowedICmpRegion(Pred, RHSRange);
+    return !TrueValues.contains(APInt::getNullValue(CI->getBitWidth()));
+  };
+
+  for (auto &AssumeVH : Q.AC->assumptionsFor(V)) {
+    if (!AssumeVH)
+      continue;
+    CallInst *I = cast<CallInst>(AssumeVH);
+    assert(I->getFunction() == Q.CxtI->getFunction() &&
+           "Got assumption for the wrong function!");
+    if (Q.isExcluded(I))
+      continue;
+
+    // Warning: This loop can end up being somewhat performance sensitive.
+    // We're running this loop for once for each value queried resulting in a
+    // runtime of ~O(#assumes * #values).
+
+    assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume &&
+           "must be an assume intrinsic");
+
+    Value *Arg = I->getArgOperand(0);
+    ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg);
+    if (!Cmp)
+      continue;
+
+    if (CmpExcludesZero(Cmp) && isValidAssumeForContext(I, Q.CxtI, Q.DT))
+      return true;
+  }
+
+  return false;
+}
+
 static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
                                        unsigned Depth, const Query &Q) {
   // Use of assumptions is context-sensitive. If we don't have a context, we
@@ -909,7 +983,7 @@ static void computeKnownBitsFromShiftOperator(
   // If the shift amount could be greater than or equal to the bit-width of the
   // LHS, the value could be poison, but bail out because the check below is
   // expensive. TODO: Should we just carry on?
-  if ((~Known.Zero).uge(BitWidth)) {
+  if (Known.getMaxValue().uge(BitWidth)) {
     Known.resetAll();
     return;
   }
@@ -1049,7 +1123,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
     break;
   }
   case Instruction::Select: {
-    const Value *LHS, *RHS;
+    const Value *LHS = nullptr, *RHS = nullptr;
     SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor;
     if (SelectPatternResult::isMinOrMax(SPF)) {
       computeKnownBits(RHS, Known, Depth + 1, Q);
@@ -1095,7 +1169,8 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
       // RHS from matchSelectPattern returns the negation part of abs pattern.
       // If the negate has an NSW flag we can assume the sign bit of the result
       // will be 0 because that makes abs(INT_MIN) undefined.
-      if (Q.IIQ.hasNoSignedWrap(cast<Instruction>(RHS)))
+      if (match(RHS, m_Neg(m_Specific(LHS))) &&
+          Q.IIQ.hasNoSignedWrap(cast<Instruction>(RHS)))
         MaxHighZeros = 1;
     }
 
@@ -1128,7 +1203,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
     // which fall through here.
     Type *ScalarTy = SrcTy->getScalarType();
     SrcBitWidth = ScalarTy->isPointerTy() ?
-      Q.DL.getIndexTypeSizeInBits(ScalarTy) :
+      Q.DL.getPointerTypeSizeInBits(ScalarTy) :
       Q.DL.getTypeSizeInBits(ScalarTy);
 
     assert(SrcBitWidth && "SrcBitWidth can't be zero");
@@ -1346,6 +1421,8 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
       for (unsigned i = 0; i != 2; ++i) {
         Value *L = P->getIncomingValue(i);
         Value *R = P->getIncomingValue(!i);
+        Instruction *RInst = P->getIncomingBlock(!i)->getTerminator();
+        Instruction *LInst = P->getIncomingBlock(i)->getTerminator();
         Operator *LU = dyn_cast<Operator>(L);
         if (!LU)
           continue;
@@ -1366,14 +1443,23 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
           else if (LR == I)
             L = LL;
           else
-            break;
+            continue; // Check for recurrence with L and R flipped.
+
+          // Change the context instruction to the "edge" that flows into the
+          // phi. This is important because that is where the value is actually
+          // "evaluated" even though it is used later somewhere else. (see also
+          // D69571).
+          Query RecQ = Q;
+
           // Ok, we have a PHI of the form L op= R. Check for low
           // zero bits.
-          computeKnownBits(R, Known2, Depth + 1, Q);
+          RecQ.CxtI = RInst;
+          computeKnownBits(R, Known2, Depth + 1, RecQ);
 
           // We need to take the minimum number of known bits
           KnownBits Known3(Known);
-          computeKnownBits(L, Known3, Depth + 1, Q);
+          RecQ.CxtI = LInst;
+          computeKnownBits(L, Known3, Depth + 1, RecQ);
 
           Known.Zero.setLowBits(std::min(Known2.countMinTrailingZeros(),
                                          Known3.countMinTrailingZeros()));
@@ -1429,14 +1515,22 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
 
       Known.Zero.setAllBits();
       Known.One.setAllBits();
-      for (Value *IncValue : P->incoming_values()) {
+      for (unsigned u = 0, e = P->getNumIncomingValues(); u < e; ++u) {
+        Value *IncValue = P->getIncomingValue(u);
         // Skip direct self references.
         if (IncValue == P) continue;
 
+        // Change the context instruction to the "edge" that flows into the
+        // phi. This is important because that is where the value is actually
+        // "evaluated" even though it is used later somewhere else. (see also
+        // D69571).
+        Query RecQ = Q;
+        RecQ.CxtI = P->getIncomingBlock(u)->getTerminator();
+
         Known2 = KnownBits(BitWidth);
         // Recurse, but cap the recursion to one level, because we don't
         // want to waste time spinning around in loops.
-        computeKnownBits(IncValue, Known2, MaxDepth - 1, Q);
+        computeKnownBits(IncValue, Known2, MaxDepth - 1, RecQ);
         Known.Zero &= Known2.Zero;
         Known.One &= Known2.One;
         // If all bits have been ruled out, there's no need to check
@@ -1636,7 +1730,7 @@ void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
 
   Type *ScalarTy = V->getType()->getScalarType();
   unsigned ExpectedWidth = ScalarTy->isPointerTy() ?
-    Q.DL.getIndexTypeSizeInBits(ScalarTy) : Q.DL.getTypeSizeInBits(ScalarTy);
+    Q.DL.getPointerTypeSizeInBits(ScalarTy) : Q.DL.getTypeSizeInBits(ScalarTy);
   assert(ExpectedWidth == BitWidth && "V and Known should have same BitWidth");
   (void)BitWidth;
   (void)ExpectedWidth;
@@ -1714,9 +1808,9 @@ void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
 
   // Aligned pointers have trailing zeros - refine Known.Zero set
   if (V->getType()->isPointerTy()) {
-    unsigned Align = V->getPointerAlignment(Q.DL);
+    const MaybeAlign Align = V->getPointerAlignment(Q.DL);
     if (Align)
-      Known.Zero.setLowBits(countTrailingZeros(Align));
+      Known.Zero.setLowBits(countTrailingZeros(Align->value()));
   }
 
   // computeKnownBitsFromAssume strictly refines Known.
@@ -1895,8 +1989,8 @@ static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth,
 static bool isKnownNonNullFromDominatingCondition(const Value *V,
                                                   const Instruction *CtxI,
                                                   const DominatorTree *DT) {
-  assert(V->getType()->isPointerTy() && "V must be pointer type");
-  assert(!isa<ConstantData>(V) && "Did not expect ConstantPointerNull");
+  if (isa<Constant>(V))
+    return false;
 
   if (!CtxI || !DT)
     return false;
@@ -1917,6 +2011,15 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V,
               Arg.hasNonNullAttr() && DT->dominates(CS.getInstruction(), CtxI))
             return true;
 
+    // If the value is used as a load/store, then the pointer must be non null.
+    if (V == getLoadStorePointerOperand(U)) {
+      const Instruction *I = cast<Instruction>(U);
+      if (!NullPointerIsDefined(I->getFunction(),
+                                V->getType()->getPointerAddressSpace()) &&
+          DT->dominates(I, CtxI))
+        return true;
+    }
+
     // Consider only compare instructions uniquely controlling a branch
     CmpInst::Predicate Pred;
     if (!match(const_cast<User *>(U),
@@ -2043,6 +2146,9 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
     }
   }
 
+  if (isKnownNonZeroFromAssume(V, Q))
+    return true;
+
   // Some of the tests below are recursive, so bail out if we hit the limit.
   if (Depth++ >= MaxDepth)
     return false;
@@ -2066,17 +2172,16 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
     if (const auto *Call = dyn_cast<CallBase>(V)) {
       if (Call->isReturnNonNull())
         return true;
-      if (const auto *RP = getArgumentAliasingToReturnedPointer(Call))
+      if (const auto *RP = getArgumentAliasingToReturnedPointer(Call, true))
         return isKnownNonZero(RP, Depth, Q);
     }
   }
 
+  if (isKnownNonNullFromDominatingCondition(V, Q.CxtI, Q.DT))
+    return true;
 
   // Check for recursive pointer simplifications.
   if (V->getType()->isPointerTy()) {
-    if (isKnownNonNullFromDominatingCondition(V, Q.CxtI, Q.DT))
-      return true;
-
     // Look through bitcast operations, GEPs, and int2ptr instructions as they
     // do not alter the value, or at least not the nullness property of the
     // value, e.g., int2ptr is allowed to zero/sign extend the value.
@@ -2300,7 +2405,7 @@ static bool isSignedMinMaxClamp(const Value *Select, const Value *&In,
          cast<Operator>(Select)->getOpcode() == Instruction::Select &&
          "Input should be a Select!");
 
-  const Value *LHS, *RHS, *LHS2, *RHS2;
+  const Value *LHS = nullptr, *RHS = nullptr;
   SelectPatternFlavor SPF = matchSelectPattern(Select, LHS, RHS).Flavor;
   if (SPF != SPF_SMAX && SPF != SPF_SMIN)
     return false;
@@ -2308,6 +2413,7 @@ static bool isSignedMinMaxClamp(const Value *Select, const Value *&In,
   if (!match(RHS, m_APInt(CLow)))
     return false;
 
+  const Value *LHS2 = nullptr, *RHS2 = nullptr;
   SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor;
   if (getInverseMinMaxFlavor(SPF) != SPF2)
     return false;
@@ -2372,7 +2478,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
 
   Type *ScalarTy = V->getType()->getScalarType();
   unsigned TyBits = ScalarTy->isPointerTy() ?
-    Q.DL.getIndexTypeSizeInBits(ScalarTy) :
+    Q.DL.getPointerTypeSizeInBits(ScalarTy) :
     Q.DL.getTypeSizeInBits(ScalarTy);
 
   unsigned Tmp, Tmp2;
@@ -2384,253 +2490,256 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
   if (Depth == MaxDepth)
     return 1;  // Limit search depth.
 
-  const Operator *U = dyn_cast<Operator>(V);
-  switch (Operator::getOpcode(V)) {
-  default: break;
-  case Instruction::SExt:
-    Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
-    return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q) + Tmp;
+  if (auto *U = dyn_cast<Operator>(V)) {
+    switch (Operator::getOpcode(V)) {
+    default: break;
+    case Instruction::SExt:
+      Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
+      return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q) + Tmp;
 
-  case Instruction::SDiv: {
-    const APInt *Denominator;
-    // sdiv X, C -> adds log(C) sign bits.
-    if (match(U->getOperand(1), m_APInt(Denominator))) {
+    case Instruction::SDiv: {
+      const APInt *Denominator;
+      // sdiv X, C -> adds log(C) sign bits.
+      if (match(U->getOperand(1), m_APInt(Denominator))) {
 
-      // Ignore non-positive denominator.
-      if (!Denominator->isStrictlyPositive())
-        break;
+        // Ignore non-positive denominator.
+        if (!Denominator->isStrictlyPositive())
+          break;
 
-      // Calculate the incoming numerator bits.
-      unsigned NumBits = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
+        // Calculate the incoming numerator bits.
+        unsigned NumBits = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
 
-      // Add floor(log(C)) bits to the numerator bits.
-      return std::min(TyBits, NumBits + Denominator->logBase2());
+        // Add floor(log(C)) bits to the numerator bits.
+        return std::min(TyBits, NumBits + Denominator->logBase2());
+      }
+      break;
     }
-    break;
-  }
 
-  case Instruction::SRem: {
-    const APInt *Denominator;
-    // srem X, C -> we know that the result is within [-C+1,C) when C is a
-    // positive constant.  This let us put a lower bound on the number of sign
-    // bits.
-    if (match(U->getOperand(1), m_APInt(Denominator))) {
-
-      // Ignore non-positive denominator.
-      if (!Denominator->isStrictlyPositive())
-        break;
+    case Instruction::SRem: {
+      const APInt *Denominator;
+      // srem X, C -> we know that the result is within [-C+1,C) when C is a
+      // positive constant.  This let us put a lower bound on the number of sign
+      // bits.
+      if (match(U->getOperand(1), m_APInt(Denominator))) {
 
-      // Calculate the incoming numerator bits. SRem by a positive constant
-      // can't lower the number of sign bits.
-      unsigned NumrBits =
-          ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
-
-      // Calculate the leading sign bit constraints by examining the
-      // denominator.  Given that the denominator is positive, there are two
-      // cases:
-      //
-      //  1. the numerator is positive.  The result range is [0,C) and [0,C) u<
-      //     (1 << ceilLogBase2(C)).
-      //
-      //  2. the numerator is negative.  Then the result range is (-C,0] and
-      //     integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)).
-      //
-      // Thus a lower bound on the number of sign bits is `TyBits -
-      // ceilLogBase2(C)`.
+        // Ignore non-positive denominator.
+        if (!Denominator->isStrictlyPositive())
+          break;
 
-      unsigned ResBits = TyBits - Denominator->ceilLogBase2();
-      return std::max(NumrBits, ResBits);
+        // Calculate the incoming numerator bits. SRem by a positive constant
+        // can't lower the number of sign bits.
+        unsigned NumrBits = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
+
+        // Calculate the leading sign bit constraints by examining the
+        // denominator.  Given that the denominator is positive, there are two
+        // cases:
+        //
+        //  1. the numerator is positive. The result range is [0,C) and [0,C) u<
+        //     (1 << ceilLogBase2(C)).
+        //
+        //  2. the numerator is negative. Then the result range is (-C,0] and
+        //     integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)).
+        //
+        // Thus a lower bound on the number of sign bits is `TyBits -
+        // ceilLogBase2(C)`.
+
+        unsigned ResBits = TyBits - Denominator->ceilLogBase2();
+        return std::max(NumrBits, ResBits);
+      }
+      break;
     }
-    break;
-  }
 
-  case Instruction::AShr: {
-    Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
-    // ashr X, C   -> adds C sign bits.  Vectors too.
-    const APInt *ShAmt;
-    if (match(U->getOperand(1), m_APInt(ShAmt))) {
-      if (ShAmt->uge(TyBits))
-        break;  // Bad shift.
-      unsigned ShAmtLimited = ShAmt->getZExtValue();
-      Tmp += ShAmtLimited;
-      if (Tmp > TyBits) Tmp = TyBits;
-    }
-    return Tmp;
-  }
-  case Instruction::Shl: {
-    const APInt *ShAmt;
-    if (match(U->getOperand(1), m_APInt(ShAmt))) {
-      // shl destroys sign bits.
+    case Instruction::AShr: {
       Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
-      if (ShAmt->uge(TyBits) ||      // Bad shift.
-          ShAmt->uge(Tmp)) break;    // Shifted all sign bits out.
-      Tmp2 = ShAmt->getZExtValue();
-      return Tmp - Tmp2;
+      // ashr X, C   -> adds C sign bits.  Vectors too.
+      const APInt *ShAmt;
+      if (match(U->getOperand(1), m_APInt(ShAmt))) {
+        if (ShAmt->uge(TyBits))
+          break; // Bad shift.
+        unsigned ShAmtLimited = ShAmt->getZExtValue();
+        Tmp += ShAmtLimited;
+        if (Tmp > TyBits) Tmp = TyBits;
+      }
+      return Tmp;
     }
-    break;
-  }
-  case Instruction::And:
-  case Instruction::Or:
-  case Instruction::Xor:    // NOT is handled here.
-    // Logical binary ops preserve the number of sign bits at the worst.
-    Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
-    if (Tmp != 1) {
-      Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
-      FirstAnswer = std::min(Tmp, Tmp2);
-      // We computed what we know about the sign bits as our first
-      // answer. Now proceed to the generic code that uses
-      // computeKnownBits, and pick whichever answer is better.
+    case Instruction::Shl: {
+      const APInt *ShAmt;
+      if (match(U->getOperand(1), m_APInt(ShAmt))) {
+        // shl destroys sign bits.
+        Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
+        if (ShAmt->uge(TyBits) ||   // Bad shift.
+            ShAmt->uge(Tmp)) break; // Shifted all sign bits out.
+        Tmp2 = ShAmt->getZExtValue();
+        return Tmp - Tmp2;
+      }
+      break;
     }
-    break;
-
-  case Instruction::Select: {
-    // If we have a clamp pattern, we know that the number of sign bits will be
-    // the minimum of the clamp min/max range.
-    const Value *X;
-    const APInt *CLow, *CHigh;
-    if (isSignedMinMaxClamp(U, X, CLow, CHigh))
-      return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits());
-
-    Tmp = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
-    if (Tmp == 1) break;
-    Tmp2 = ComputeNumSignBits(U->getOperand(2), Depth + 1, Q);
-    return std::min(Tmp, Tmp2);
-  }
-
-  case Instruction::Add:
-    // Add can have at most one carry bit.  Thus we know that the output
-    // is, at worst, one more bit than the inputs.
-    Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
-    if (Tmp == 1) break;
-
-    // Special case decrementing a value (ADD X, -1):
-    if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1)))
-      if (CRHS->isAllOnesValue()) {
-        KnownBits Known(TyBits);
-        computeKnownBits(U->getOperand(0), Known, Depth + 1, Q);
-
-        // If the input is known to be 0 or 1, the output is 0/-1, which is all
-        // sign bits set.
-        if ((Known.Zero | 1).isAllOnesValue())
-          return TyBits;
-
-        // If we are subtracting one from a positive number, there is no carry
-        // out of the result.
-        if (Known.isNonNegative())
-          return Tmp;
+    case Instruction::And:
+    case Instruction::Or:
+    case Instruction::Xor: // NOT is handled here.
+      // Logical binary ops preserve the number of sign bits at the worst.
+      Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
+      if (Tmp != 1) {
+        Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
+        FirstAnswer = std::min(Tmp, Tmp2);
+        // We computed what we know about the sign bits as our first
+        // answer. Now proceed to the generic code that uses
+        // computeKnownBits, and pick whichever answer is better.
       }
+      break;
 
-    Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
-    if (Tmp2 == 1) break;
-    return std::min(Tmp, Tmp2)-1;
+    case Instruction::Select: {
+      // If we have a clamp pattern, we know that the number of sign bits will
+      // be the minimum of the clamp min/max range.
+      const Value *X;
+      const APInt *CLow, *CHigh;
+      if (isSignedMinMaxClamp(U, X, CLow, CHigh))
+        return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits());
+
+      Tmp = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
+      if (Tmp == 1) break;
+      Tmp2 = ComputeNumSignBits(U->getOperand(2), Depth + 1, Q);
+      return std::min(Tmp, Tmp2);
+    }
 
-  case Instruction::Sub:
-    Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
-    if (Tmp2 == 1) break;
-
-    // Handle NEG.
-    if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0)))
-      if (CLHS->isNullValue()) {
-        KnownBits Known(TyBits);
-        computeKnownBits(U->getOperand(1), Known, Depth + 1, Q);
-        // If the input is known to be 0 or 1, the output is 0/-1, which is all
-        // sign bits set.
-        if ((Known.Zero | 1).isAllOnesValue())
-          return TyBits;
-
-        // If the input is known to be positive (the sign bit is known clear),
-        // the output of the NEG has the same number of sign bits as the input.
-        if (Known.isNonNegative())
-          return Tmp2;
-
-        // Otherwise, we treat this like a SUB.
-      }
+    case Instruction::Add:
+      // Add can have at most one carry bit.  Thus we know that the output
+      // is, at worst, one more bit than the inputs.
+      Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
+      if (Tmp == 1) break;
+
+      // Special case decrementing a value (ADD X, -1):
+      if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1)))
+        if (CRHS->isAllOnesValue()) {
+          KnownBits Known(TyBits);
+          computeKnownBits(U->getOperand(0), Known, Depth + 1, Q);
+
+          // If the input is known to be 0 or 1, the output is 0/-1, which is
+          // all sign bits set.
+          if ((Known.Zero | 1).isAllOnesValue())
+            return TyBits;
+
+          // If we are subtracting one from a positive number, there is no carry
+          // out of the result.
+          if (Known.isNonNegative())
+            return Tmp;
+        }
 
-    // Sub can have at most one carry bit.  Thus we know that the output
-    // is, at worst, one more bit than the inputs.
-    Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
-    if (Tmp == 1) break;
-    return std::min(Tmp, Tmp2)-1;
+      Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
+      if (Tmp2 == 1) break;
+      return std::min(Tmp, Tmp2) - 1;
 
-  case Instruction::Mul: {
-    // The output of the Mul can be at most twice the valid bits in the inputs.
-    unsigned SignBitsOp0 = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
-    if (SignBitsOp0 == 1) break;
-    unsigned SignBitsOp1 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
-    if (SignBitsOp1 == 1) break;
-    unsigned OutValidBits =
-        (TyBits - SignBitsOp0 + 1) + (TyBits - SignBitsOp1 + 1);
-    return OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1;
-  }
+    case Instruction::Sub:
+      Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
+      if (Tmp2 == 1) break;
+
+      // Handle NEG.
+      if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0)))
+        if (CLHS->isNullValue()) {
+          KnownBits Known(TyBits);
+          computeKnownBits(U->getOperand(1), Known, Depth + 1, Q);
+          // If the input is known to be 0 or 1, the output is 0/-1, which is
+          // all sign bits set.
+          if ((Known.Zero | 1).isAllOnesValue())
+            return TyBits;
+
+          // If the input is known to be positive (the sign bit is known clear),
+          // the output of the NEG has the same number of sign bits as the
+          // input.
+          if (Known.isNonNegative())
+            return Tmp2;
+
+          // Otherwise, we treat this like a SUB.
+        }
 
-  case Instruction::PHI: {
-    const PHINode *PN = cast<PHINode>(U);
-    unsigned NumIncomingValues = PN->getNumIncomingValues();
-    // Don't analyze large in-degree PHIs.
-    if (NumIncomingValues > 4) break;
-    // Unreachable blocks may have zero-operand PHI nodes.
-    if (NumIncomingValues == 0) break;
-
-    // Take the minimum of all incoming values.  This can't infinitely loop
-    // because of our depth threshold.
-    Tmp = ComputeNumSignBits(PN->getIncomingValue(0), Depth + 1, Q);
-    for (unsigned i = 1, e = NumIncomingValues; i != e; ++i) {
-      if (Tmp == 1) return Tmp;
-      Tmp = std::min(
-          Tmp, ComputeNumSignBits(PN->getIncomingValue(i), Depth + 1, Q));
+      // Sub can have at most one carry bit.  Thus we know that the output
+      // is, at worst, one more bit than the inputs.
+      Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
+      if (Tmp == 1) break;
+      return std::min(Tmp, Tmp2) - 1;
+
+    case Instruction::Mul: {
+      // The output of the Mul can be at most twice the valid bits in the
+      // inputs.
+      unsigned SignBitsOp0 = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
+      if (SignBitsOp0 == 1) break;
+      unsigned SignBitsOp1 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
+      if (SignBitsOp1 == 1) break;
+      unsigned OutValidBits =
+          (TyBits - SignBitsOp0 + 1) + (TyBits - SignBitsOp1 + 1);
+      return OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1;
     }
-    return Tmp;
-  }
 
-  case Instruction::Trunc:
-    // FIXME: it's tricky to do anything useful for this, but it is an important
-    // case for targets like X86.
-    break;
+    case Instruction::PHI: {
+      const PHINode *PN = cast<PHINode>(U);
+      unsigned NumIncomingValues = PN->getNumIncomingValues();
+      // Don't analyze large in-degree PHIs.
+      if (NumIncomingValues > 4) break;
+      // Unreachable blocks may have zero-operand PHI nodes.
+      if (NumIncomingValues == 0) break;
+
+      // Take the minimum of all incoming values.  This can't infinitely loop
+      // because of our depth threshold.
+      Tmp = ComputeNumSignBits(PN->getIncomingValue(0), Depth + 1, Q);
+      for (unsigned i = 1, e = NumIncomingValues; i != e; ++i) {
+        if (Tmp == 1) return Tmp;
+        Tmp = std::min(
+            Tmp, ComputeNumSignBits(PN->getIncomingValue(i), Depth + 1, Q));
+      }
+      return Tmp;
+    }
 
-  case Instruction::ExtractElement:
-    // Look through extract element. At the moment we keep this simple and skip
-    // tracking the specific element. But at least we might find information
-    // valid for all elements of the vector (for example if vector is sign
-    // extended, shifted, etc).
-    return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
-
-  case Instruction::ShuffleVector: {
-    // TODO: This is copied almost directly from the SelectionDAG version of
-    //       ComputeNumSignBits. It would be better if we could share common
-    //       code. If not, make sure that changes are translated to the DAG.
-
-    // Collect the minimum number of sign bits that are shared by every vector
-    // element referenced by the shuffle.
-    auto *Shuf = cast<ShuffleVectorInst>(U);
-    int NumElts = Shuf->getOperand(0)->getType()->getVectorNumElements();
-    int NumMaskElts = Shuf->getMask()->getType()->getVectorNumElements();
-    APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0);
-    for (int i = 0; i != NumMaskElts; ++i) {
-      int M = Shuf->getMaskValue(i);
-      assert(M < NumElts * 2 && "Invalid shuffle mask constant");
-      // For undef elements, we don't know anything about the common state of
-      // the shuffle result.
-      if (M == -1)
-        return 1;
-      if (M < NumElts)
-        DemandedLHS.setBit(M % NumElts);
-      else
-        DemandedRHS.setBit(M % NumElts);
+    case Instruction::Trunc:
+      // FIXME: it's tricky to do anything useful for this, but it is an
+      // important case for targets like X86.
+      break;
+
+    case Instruction::ExtractElement:
+      // Look through extract element. At the moment we keep this simple and
+      // skip tracking the specific element. But at least we might find
+      // information valid for all elements of the vector (for example if vector
+      // is sign extended, shifted, etc).
+      return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
+
+    case Instruction::ShuffleVector: {
+      // TODO: This is copied almost directly from the SelectionDAG version of
+      //       ComputeNumSignBits. It would be better if we could share common
+      //       code. If not, make sure that changes are translated to the DAG.
+
+      // Collect the minimum number of sign bits that are shared by every vector
+      // element referenced by the shuffle.
+      auto *Shuf = cast<ShuffleVectorInst>(U);
+      int NumElts = Shuf->getOperand(0)->getType()->getVectorNumElements();
+      int NumMaskElts = Shuf->getMask()->getType()->getVectorNumElements();
+      APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0);
+      for (int i = 0; i != NumMaskElts; ++i) {
+        int M = Shuf->getMaskValue(i);
+        assert(M < NumElts * 2 && "Invalid shuffle mask constant");
+        // For undef elements, we don't know anything about the common state of
+        // the shuffle result.
+        if (M == -1)
+          return 1;
+        if (M < NumElts)
+          DemandedLHS.setBit(M % NumElts);
+        else
+          DemandedRHS.setBit(M % NumElts);
+      }
+      Tmp = std::numeric_limits<unsigned>::max();
+      if (!!DemandedLHS)
+        Tmp = ComputeNumSignBits(Shuf->getOperand(0), Depth + 1, Q);
+      if (!!DemandedRHS) {
+        Tmp2 = ComputeNumSignBits(Shuf->getOperand(1), Depth + 1, Q);
+        Tmp = std::min(Tmp, Tmp2);
+      }
+      // If we don't know anything, early out and try computeKnownBits
+      // fall-back.
+      if (Tmp == 1)
+        break;
+      assert(Tmp <= V->getType()->getScalarSizeInBits() &&
+             "Failed to determine minimum sign bits");
+      return Tmp;
     }
-    Tmp = std::numeric_limits<unsigned>::max();
-    if (!!DemandedLHS)
-      Tmp = ComputeNumSignBits(Shuf->getOperand(0), Depth + 1, Q);
-    if (!!DemandedRHS) {
-      Tmp2 = ComputeNumSignBits(Shuf->getOperand(1), Depth + 1, Q);
-      Tmp = std::min(Tmp, Tmp2);
     }
-    // If we don't know anything, early out and try computeKnownBits fall-back.
-    if (Tmp == 1)
-      break;
-    assert(Tmp <= V->getType()->getScalarSizeInBits() &&
-           "Failed to determine minimum sign bits");
-    return Tmp;
-  }
   }
 
   // Finally, if we can prove that the top bits of the result are 0's or 1's,
@@ -2655,8 +2764,6 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
 /// through SExt instructions only if LookThroughSExt is true.
 bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
                            bool LookThroughSExt, unsigned Depth) {
-  const unsigned MaxDepth = 6;
-
   assert(V && "No Value?");
   assert(Depth <= MaxDepth && "Limit Search Depth");
   assert(V->getType()->isIntegerTy() && "Not integer or pointer type!");
@@ -3086,6 +3193,58 @@ bool llvm::SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI) {
   return cannotBeOrderedLessThanZeroImpl(V, TLI, true, 0);
 }
 
+bool llvm::isKnownNeverInfinity(const Value *V, const TargetLibraryInfo *TLI,
+                                unsigned Depth) {
+  assert(V->getType()->isFPOrFPVectorTy() && "Querying for Inf on non-FP type");
+
+  // If we're told that infinities won't happen, assume they won't.
+  if (auto *FPMathOp = dyn_cast<FPMathOperator>(V))
+    if (FPMathOp->hasNoInfs())
+      return true;
+
+  // Handle scalar constants.
+  if (auto *CFP = dyn_cast<ConstantFP>(V))
+    return !CFP->isInfinity();
+
+  if (Depth == MaxDepth)
+    return false;
+
+  if (auto *Inst = dyn_cast<Instruction>(V)) {
+    switch (Inst->getOpcode()) {
+    case Instruction::Select: {
+      return isKnownNeverInfinity(Inst->getOperand(1), TLI, Depth + 1) &&
+             isKnownNeverInfinity(Inst->getOperand(2), TLI, Depth + 1);
+    }
+    case Instruction::UIToFP:
+      // If the input type fits into the floating type the result is finite.
+      return ilogb(APFloat::getLargest(
+                 Inst->getType()->getScalarType()->getFltSemantics())) >=
+             (int)Inst->getOperand(0)->getType()->getScalarSizeInBits();
+    default:
+      break;
+    }
+  }
+
+  // Bail out for constant expressions, but try to handle vector constants.
+  if (!V->getType()->isVectorTy() || !isa<Constant>(V))
+    return false;
+
+  // For vectors, verify that each element is not infinity.
+  unsigned NumElts = V->getType()->getVectorNumElements();
+  for (unsigned i = 0; i != NumElts; ++i) {
+    Constant *Elt = cast<Constant>(V)->getAggregateElement(i);
+    if (!Elt)
+      return false;
+    if (isa<UndefValue>(Elt))
+      continue;
+    auto *CElt = dyn_cast<ConstantFP>(Elt);
+    if (!CElt || CElt->isInfinity())
+      return false;
+  }
+  // All elements were confirmed non-infinity or undefined.
+  return true;
+}
+
 bool llvm::isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI,
                            unsigned Depth) {
   assert(V->getType()->isFPOrFPVectorTy() && "Querying for NaN on non-FP type");
@@ -3105,13 +3264,26 @@ bool llvm::isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI,
   if (auto *Inst = dyn_cast<Instruction>(V)) {
     switch (Inst->getOpcode()) {
     case Instruction::FAdd:
-    case Instruction::FMul:
     case Instruction::FSub:
+      // Adding positive and negative infinity produces NaN.
+      return isKnownNeverNaN(Inst->getOperand(0), TLI, Depth + 1) &&
+             isKnownNeverNaN(Inst->getOperand(1), TLI, Depth + 1) &&
+             (isKnownNeverInfinity(Inst->getOperand(0), TLI, Depth + 1) ||
+              isKnownNeverInfinity(Inst->getOperand(1), TLI, Depth + 1));
+
+    case Instruction::FMul:
+      // Zero multiplied with infinity produces NaN.
+      // FIXME: If neither side can be zero fmul never produces NaN.
+      return isKnownNeverNaN(Inst->getOperand(0), TLI, Depth + 1) &&
+             isKnownNeverInfinity(Inst->getOperand(0), TLI, Depth + 1) &&
+             isKnownNeverNaN(Inst->getOperand(1), TLI, Depth + 1) &&
+             isKnownNeverInfinity(Inst->getOperand(1), TLI, Depth + 1);
+
     case Instruction::FDiv:
-    case Instruction::FRem: {
-      // TODO: Need isKnownNeverInfinity
+    case Instruction::FRem:
+      // FIXME: Only 0/0, Inf/Inf, Inf REM x and x REM 0 produce NaN.
       return false;
-    }
+
     case Instruction::Select: {
       return isKnownNeverNaN(Inst->getOperand(1), TLI, Depth + 1) &&
              isKnownNeverNaN(Inst->getOperand(2), TLI, Depth + 1);
@@ -3651,23 +3823,28 @@ uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) {
   return Len == ~0ULL ? 1 : Len;
 }
 
-const Value *llvm::getArgumentAliasingToReturnedPointer(const CallBase *Call) {
+const Value *
+llvm::getArgumentAliasingToReturnedPointer(const CallBase *Call,
+                                           bool MustPreserveNullness) {
   assert(Call &&
          "getArgumentAliasingToReturnedPointer only works on nonnull calls");
   if (const Value *RV = Call->getReturnedArgOperand())
     return RV;
   // This can be used only as a aliasing property.
-  if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call))
+  if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
+          Call, MustPreserveNullness))
     return Call->getArgOperand(0);
   return nullptr;
 }
 
 bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
-    const CallBase *Call) {
+    const CallBase *Call, bool MustPreserveNullness) {
   return Call->getIntrinsicID() == Intrinsic::launder_invariant_group ||
          Call->getIntrinsicID() == Intrinsic::strip_invariant_group ||
          Call->getIntrinsicID() == Intrinsic::aarch64_irg ||
-         Call->getIntrinsicID() == Intrinsic::aarch64_tagp;
+         Call->getIntrinsicID() == Intrinsic::aarch64_tagp ||
+         (!MustPreserveNullness &&
+          Call->getIntrinsicID() == Intrinsic::ptrmask);
 }
 
 /// \p PN defines a loop-variant pointer to an object.  Check if the
@@ -3725,7 +3902,7 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL,
         // because it should be in sync with CaptureTracking. Not using it may
         // cause weird miscompilations where 2 aliasing pointers are assumed to
         // noalias.
-        if (auto *RP = getArgumentAliasingToReturnedPointer(Call)) {
+        if (auto *RP = getArgumentAliasingToReturnedPointer(Call, false)) {
           V = RP;
           continue;
         }
@@ -3865,6 +4042,18 @@ bool llvm::onlyUsedByLifetimeMarkers(const Value *V) {
   return true;
 }
 
+bool llvm::mustSuppressSpeculation(const LoadInst &LI) {
+  if (!LI.isUnordered())
+    return true;
+  const Function &F = *LI.getFunction();
+  // Speculative load may create a race that did not exist in the source.
+  return F.hasFnAttribute(Attribute::SanitizeThread) ||
+    // Speculative load may load data from dirty regions.
+    F.hasFnAttribute(Attribute::SanitizeAddress) ||
+    F.hasFnAttribute(Attribute::SanitizeHWAddress);
+}
+
+
 bool llvm::isSafeToSpeculativelyExecute(const Value *V,
                                         const Instruction *CtxI,
                                         const DominatorTree *DT) {
@@ -3909,17 +4098,12 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
   }
   case Instruction::Load: {
     const LoadInst *LI = cast<LoadInst>(Inst);
-    if (!LI->isUnordered() ||
-        // Speculative load may create a race that did not exist in the source.
-        LI->getFunction()->hasFnAttribute(Attribute::SanitizeThread) ||
-        // Speculative load may load data from dirty regions.
-        LI->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
-        LI->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress))
+    if (mustSuppressSpeculation(*LI))
       return false;
     const DataLayout &DL = LI->getModule()->getDataLayout();
-    return isDereferenceableAndAlignedPointer(LI->getPointerOperand(),
-                                              LI->getType(), LI->getAlignment(),
-                                              DL, CtxI, DT);
+    return isDereferenceableAndAlignedPointer(
+        LI->getPointerOperand(), LI->getType(), MaybeAlign(LI->getAlignment()),
+        DL, CtxI, DT);
   }
   case Instruction::Call: {
     auto *CI = cast<const CallInst>(Inst);
@@ -4201,6 +4385,20 @@ bool llvm::isOverflowIntrinsicNoWrap(const WithOverflowInst *WO,
   return llvm::any_of(GuardingBranches, AllUsesGuardedByBranch);
 }
 
+bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value *V) {
+  // If the value is a freeze instruction, then it can never
+  // be undef or poison.
+  if (isa<FreezeInst>(V))
+    return true;
+  // TODO: Some instructions are guaranteed to return neither undef
+  // nor poison if their arguments are not poison/undef.
+
+  // TODO: Deal with other Constant subclasses.
+  if (isa<ConstantInt>(V) || isa<GlobalVariable>(V))
+    return true;
+
+  return false;
+}
 
 OverflowResult llvm::computeOverflowForSignedAdd(const AddOperator *Add,
                                                  const DataLayout &DL,
@@ -4221,22 +4419,9 @@ OverflowResult llvm::computeOverflowForSignedAdd(const Value *LHS,
 }
 
 bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
-  // A memory operation returns normally if it isn't volatile. A volatile
-  // operation is allowed to trap.
-  //
-  // An atomic operation isn't guaranteed to return in a reasonable amount of
-  // time because it's possible for another thread to interfere with it for an
+  // Note: An atomic operation isn't guaranteed to return in a reasonable amount
+  // of time because it's possible for another thread to interfere with it for an
   // arbitrary length of time, but programs aren't allowed to rely on that.
-  if (const LoadInst *LI = dyn_cast<LoadInst>(I))
-    return !LI->isVolatile();
-  if (const StoreInst *SI = dyn_cast<StoreInst>(I))
-    return !SI->isVolatile();
-  if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I))
-    return !CXI->isVolatile();
-  if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I))
-    return !RMWI->isVolatile();
-  if (const MemIntrinsic *MII = dyn_cast<MemIntrinsic>(I))
-    return !MII->isVolatile();
 
   // If there is no successor, then execution can't transfer to it.
   if (const auto *CRI = dyn_cast<CleanupReturnInst>(I))
@@ -4277,10 +4462,7 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
 
     // FIXME: This isn't aggressive enough; a call which only writes to a global
     // is guaranteed to return.
-    return CS.onlyReadsMemory() || CS.onlyAccessesArgMemory() ||
-           match(I, m_Intrinsic<Intrinsic::assume>()) ||
-           match(I, m_Intrinsic<Intrinsic::sideeffect>()) ||
-           match(I, m_Intrinsic<Intrinsic::experimental_widenable_condition>());
+    return CS.onlyReadsMemory() || CS.onlyAccessesArgMemory();
   }
 
   // Other instructions return normally.
@@ -4572,12 +4754,12 @@ static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred,
   // TODO: Allow FP min/max with nnan/nsz.
   assert(CmpInst::isIntPredicate(Pred) && "Expected integer comparison");
 
-  Value *A, *B;
+  Value *A = nullptr, *B = nullptr;
   SelectPatternResult L = matchSelectPattern(TVal, A, B, nullptr, Depth + 1);
   if (!SelectPatternResult::isMinOrMax(L.Flavor))
     return {SPF_UNKNOWN, SPNB_NA, false};
 
-  Value *C, *D;
+  Value *C = nullptr, *D = nullptr;
   SelectPatternResult R = matchSelectPattern(FVal, C, D, nullptr, Depth + 1);
   if (L.Flavor != R.Flavor)
     return {SPF_UNKNOWN, SPNB_NA, false};
@@ -5627,8 +5809,8 @@ static void setLimitsForIntrinsic(const IntrinsicInst &II, APInt &Lower,
 }
 
 static void setLimitsForSelectPattern(const SelectInst &SI, APInt &Lower,
-                                      APInt &Upper) {
-  const Value *LHS, *RHS;
+                                      APInt &Upper, const InstrInfoQuery &IIQ) {
+  const Value *LHS = nullptr, *RHS = nullptr;
   SelectPatternResult R = matchSelectPattern(&SI, LHS, RHS);
   if (R.Flavor == SPF_UNKNOWN)
     return;
@@ -5640,7 +5822,8 @@ static void setLimitsForSelectPattern(const SelectInst &SI, APInt &Lower,
     // then the result of abs(X) is [0..SIGNED_MAX],
     // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN.
     Lower = APInt::getNullValue(BitWidth);
-    if (cast<Instruction>(RHS)->hasNoSignedWrap())
+    if (match(RHS, m_Neg(m_Specific(LHS))) &&
+        IIQ.hasNoSignedWrap(cast<Instruction>(RHS)))
       Upper = APInt::getSignedMaxValue(BitWidth) + 1;
     else
       Upper = APInt::getSignedMinValue(BitWidth) + 1;
@@ -5694,7 +5877,7 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo) {
   else if (auto *II = dyn_cast<IntrinsicInst>(V))
     setLimitsForIntrinsic(*II, Lower, Upper);
   else if (auto *SI = dyn_cast<SelectInst>(V))
-    setLimitsForSelectPattern(*SI, Lower, Upper);
+    setLimitsForSelectPattern(*SI, Lower, Upper, IIQ);
 
   ConstantRange CR = ConstantRange::getNonEmpty(Lower, Upper);
 
@@ -5704,3 +5887,111 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo) {
 
   return CR;
 }
+
+static Optional<int64_t>
+getOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, const DataLayout &DL) {
+  // Skip over the first indices.
+  gep_type_iterator GTI = gep_type_begin(GEP);
+  for (unsigned i = 1; i != Idx; ++i, ++GTI)
+    /*skip along*/;
+
+  // Compute the offset implied by the rest of the indices.
+  int64_t Offset = 0;
+  for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
+    ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i));
+    if (!OpC)
+      return None;
+    if (OpC->isZero())
+      continue; // No offset.
+
+    // Handle struct indices, which add their field offset to the pointer.
+    if (StructType *STy = GTI.getStructTypeOrNull()) {
+      Offset += DL.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
+      continue;
+    }
+
+    // Otherwise, we have a sequential type like an array or vector.  Multiply
+    // the index by the ElementSize.
+    uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType());
+    Offset += Size * OpC->getSExtValue();
+  }
+
+  return Offset;
+}
+
+Optional<int64_t> llvm::isPointerOffset(const Value *Ptr1, const Value *Ptr2,
+                                        const DataLayout &DL) {
+  Ptr1 = Ptr1->stripPointerCasts();
+  Ptr2 = Ptr2->stripPointerCasts();
+
+  // Handle the trivial case first.
+  if (Ptr1 == Ptr2) {
+    return 0;
+  }
+
+  const GEPOperator *GEP1 = dyn_cast<GEPOperator>(Ptr1);
+  const GEPOperator *GEP2 = dyn_cast<GEPOperator>(Ptr2);
+
+  // If one pointer is a GEP see if the GEP is a constant offset from the base,
+  // as in "P" and "gep P, 1".
+  // Also do this iteratively to handle the the following case:
+  //   Ptr_t1 = GEP Ptr1, c1
+  //   Ptr_t2 = GEP Ptr_t1, c2
+  //   Ptr2 = GEP Ptr_t2, c3
+  // where we will return c1+c2+c3.
+  // TODO: Handle the case when both Ptr1 and Ptr2 are GEPs of some common base
+  // -- replace getOffsetFromBase with getOffsetAndBase, check that the bases
+  // are the same, and return the difference between offsets.
+  auto getOffsetFromBase = [&DL](const GEPOperator *GEP,
+                                 const Value *Ptr) -> Optional<int64_t> {
+    const GEPOperator *GEP_T = GEP;
+    int64_t OffsetVal = 0;
+    bool HasSameBase = false;
+    while (GEP_T) {
+      auto Offset = getOffsetFromIndex(GEP_T, 1, DL);
+      if (!Offset)
+        return None;
+      OffsetVal += *Offset;
+      auto Op0 = GEP_T->getOperand(0)->stripPointerCasts();
+      if (Op0 == Ptr) {
+        HasSameBase = true;
+        break;
+      }
+      GEP_T = dyn_cast<GEPOperator>(Op0);
+    }
+    if (!HasSameBase)
+      return None;
+    return OffsetVal;
+  };
+
+  if (GEP1) {
+    auto Offset = getOffsetFromBase(GEP1, Ptr2);
+    if (Offset)
+      return -*Offset;
+  }
+  if (GEP2) {
+    auto Offset = getOffsetFromBase(GEP2, Ptr1);
+    if (Offset)
+      return Offset;
+  }
+
+  // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical
+  // base.  After that base, they may have some number of common (and
+  // potentially variable) indices.  After that they handle some constant
+  // offset, which determines their offset from each other.  At this point, we
+  // handle no other case.
+  if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0))
+    return None;
+
+  // Skip any common indices and track the GEP types.
+  unsigned Idx = 1;
+  for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx)
+    if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx))
+      break;
+
+  auto Offset1 = getOffsetFromIndex(GEP1, Idx, DL);
+  auto Offset2 = getOffsetFromIndex(GEP2, Idx, DL);
+  if (!Offset1 || !Offset2)
+    return None;
+  return *Offset2 - *Offset1;
+}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp b/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp
index 986756eb2627..c45ab941a142 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp
@@ -24,6 +24,7 @@
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Value.h"
+#include "llvm/Support/CommandLine.h"
 
 #define DEBUG_TYPE "vectorutils"
 
@@ -56,6 +57,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
   case Intrinsic::smul_fix:
   case Intrinsic::smul_fix_sat:
   case Intrinsic::umul_fix:
+  case Intrinsic::umul_fix_sat:
   case Intrinsic::sqrt: // Begin floating-point.
   case Intrinsic::sin:
   case Intrinsic::cos:
@@ -98,6 +100,7 @@ bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID,
   case Intrinsic::smul_fix:
   case Intrinsic::smul_fix_sat:
   case Intrinsic::umul_fix:
+  case Intrinsic::umul_fix_sat:
     return (ScalarOpdIdx == 2);
   default:
     return false;
@@ -830,15 +833,15 @@ void InterleavedAccessInfo::collectConstStrideAccesses(
                                     /*Assume=*/true, /*ShouldCheckWrap=*/false);
 
       const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
-      PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
+      PointerType *PtrTy = cast<PointerType>(Ptr->getType());
       uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
 
       // An alignment of 0 means target ABI alignment.
-      unsigned Align = getLoadStoreAlignment(&I);
-      if (!Align)
-        Align = DL.getABITypeAlignment(PtrTy->getElementType());
+      MaybeAlign Alignment = MaybeAlign(getLoadStoreAlignment(&I));
+      if (!Alignment)
+        Alignment = Align(DL.getABITypeAlignment(PtrTy->getElementType()));
 
-      AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, Align);
+      AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, *Alignment);
     }
 }
 
@@ -925,7 +928,7 @@ void InterleavedAccessInfo::analyzeInterleaving(
       if (!Group) {
         LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
                           << '\n');
-        Group = createInterleaveGroup(B, DesB.Stride, DesB.Align);
+        Group = createInterleaveGroup(B, DesB.Stride, DesB.Alignment);
       }
       if (B->mayWriteToMemory())
         StoreGroups.insert(Group);
@@ -964,6 +967,10 @@ void InterleavedAccessInfo::analyzeInterleaving(
         // instructions that precede it.
         if (isInterleaved(A)) {
           InterleaveGroup<Instruction> *StoreGroup = getInterleaveGroup(A);
+
+          LLVM_DEBUG(dbgs() << "LV: Invalidated store group due to "
+                               "dependence between " << *A << " and "<< *B << '\n');
+
           StoreGroups.remove(StoreGroup);
           releaseGroup(StoreGroup);
         }
@@ -1028,7 +1035,7 @@ void InterleavedAccessInfo::analyzeInterleaving(
           Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);
 
       // Try to insert A into B's group.
-      if (Group->insertMember(A, IndexA, DesA.Align)) {
+      if (Group->insertMember(A, IndexA, DesA.Alignment)) {
         LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
                           << "    into the interleave group with" << *B
                           << '\n');
@@ -1153,3 +1160,69 @@ void InterleaveGroup<Instruction>::addMetadata(Instruction *NewInst) const {
   propagateMetadata(NewInst, VL);
 }
 }
+
+void VFABI::getVectorVariantNames(
+    const CallInst &CI, SmallVectorImpl<std::string> &VariantMappings) {
+  const StringRef S =
+      CI.getAttribute(AttributeList::FunctionIndex, VFABI::MappingsAttrName)
+          .getValueAsString();
+  if (S.empty())
+    return;
+
+  SmallVector<StringRef, 8> ListAttr;
+  S.split(ListAttr, ",");
+
+  for (auto &S : SetVector<StringRef>(ListAttr.begin(), ListAttr.end())) {
+#ifndef NDEBUG
+    Optional<VFInfo> Info = VFABI::tryDemangleForVFABI(S);
+    assert(Info.hasValue() && "Invalid name for a VFABI variant.");
+    assert(CI.getModule()->getFunction(Info.getValue().VectorName) &&
+           "Vector function is missing.");
+#endif
+    VariantMappings.push_back(S);
+  }
+}
+
+bool VFShape::hasValidParameterList() const {
+  for (unsigned Pos = 0, NumParams = Parameters.size(); Pos < NumParams;
+       ++Pos) {
+    assert(Parameters[Pos].ParamPos == Pos && "Broken parameter list.");
+
+    switch (Parameters[Pos].ParamKind) {
+    default: // Nothing to check.
+      break;
+    case VFParamKind::OMP_Linear:
+    case VFParamKind::OMP_LinearRef:
+    case VFParamKind::OMP_LinearVal:
+    case VFParamKind::OMP_LinearUVal:
+      // Compile time linear steps must be non-zero.
+      if (Parameters[Pos].LinearStepOrPos == 0)
+        return false;
+      break;
+    case VFParamKind::OMP_LinearPos:
+    case VFParamKind::OMP_LinearRefPos:
+    case VFParamKind::OMP_LinearValPos:
+    case VFParamKind::OMP_LinearUValPos:
+      // The runtime linear step must be referring to some other
+      // parameters in the signature.
+      if (Parameters[Pos].LinearStepOrPos >= int(NumParams))
+        return false;
+      // The linear step parameter must be marked as uniform.
+      if (Parameters[Parameters[Pos].LinearStepOrPos].ParamKind !=
+          VFParamKind::OMP_Uniform)
+        return false;
+      // The linear step parameter can't point at itself.
+      if (Parameters[Pos].LinearStepOrPos == int(Pos))
+        return false;
+      break;
+    case VFParamKind::GlobalPredicate:
+      // The global predicate must be the unique. Can be placed anywhere in the
+      // signature.
+      for (unsigned NextPos = Pos + 1; NextPos < NumParams; ++NextPos)
+        if (Parameters[NextPos].ParamKind == VFParamKind::GlobalPredicate)
+          return false;
+      break;
+    }
+  }
+  return true;
+}