vendor/llvm/llvm-trunk-r290819

author: Dimitry Andric <dim@FreeBSD.org> 2017-01-02 19:17:04 +0000
committer: Dimitry Andric <dim@FreeBSD.org> 2017-01-02 19:17:04 +0000
commit: b915e9e0fc85ba6f398b3fab0db6a81a8913af94 (patch)
tree: 98b8f811c7aff2547cab8642daf372d6c59502fb /lib/Analysis
parent: 6421cca32f69ac849537a3cff78c352195e99f1b (diff)
download: src-test2-b915e9e0fc85ba6f398b3fab0db6a81a8913af94.tar.gz
src-test2-b915e9e0fc85ba6f398b3fab0db6a81a8913af94.zip
65 files changed, 6919 insertions, 3823 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index f931b6fc6523..84da76be98bb 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -53,7 +53,8 @@ using namespace llvm;
 static cl::opt<bool> DisableBasicAA("disable-basicaa", cl::Hidden,
                                     cl::init(false));
 
-AAResults::AAResults(AAResults &&Arg) : TLI(Arg.TLI), AAs(std::move(Arg.AAs)) {
+AAResults::AAResults(AAResults &&Arg)
+    : TLI(Arg.TLI), AAs(std::move(Arg.AAs)), AADeps(std::move(Arg.AADeps)) {
   for (auto &AA : AAs)
     AA->setAAResults(this);
 }
@@ -69,6 +70,22 @@ AAResults::~AAResults() {
 #endif
 }
 
+bool AAResults::invalidate(Function &F, const PreservedAnalyses &PA,
+                           FunctionAnalysisManager::Invalidator &Inv) {
+  // Check if the AA manager itself has been invalidated.
+  auto PAC = PA.getChecker<AAManager>();
+  if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
+    return true; // The manager needs to be blown away, clear everything.
+
+  // Check all of the dependencies registered.
+  for (AnalysisKey *ID : AADeps)
+    if (Inv.invalidate(ID, F, PA))
+      return true;
+
+  // Everything we depend on is still fine, so are we. Nothing to invalidate.
+  return false;
+}
+
 //===----------------------------------------------------------------------===//
 // Default chaining methods
 //===----------------------------------------------------------------------===//
@@ -141,7 +158,8 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS,
   // Try to refine the mod-ref info further using other API entry points to the
   // aggregate set of AA results.
   auto MRB = getModRefBehavior(CS);
-  if (MRB == FMRB_DoesNotAccessMemory)
+  if (MRB == FMRB_DoesNotAccessMemory ||
+      MRB == FMRB_OnlyAccessesInaccessibleMem)
     return MRI_NoModRef;
 
   if (onlyReadsMemory(MRB))
@@ -149,7 +167,7 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS,
   else if (doesNotReadMemory(MRB))
     Result = ModRefInfo(Result & MRI_Mod);
 
-  if (onlyAccessesArgPointees(MRB)) {
+  if (onlyAccessesArgPointees(MRB) || onlyAccessesInaccessibleOrArgMem(MRB)) {
     bool DoesAlias = false;
     ModRefInfo AllArgsMask = MRI_NoModRef;
     if (doesAccessArgPointees(MRB)) {
@@ -459,7 +477,8 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I,
     // pointer were passed to arguments that were neither of these, then it
     // couldn't be no-capture.
     if (!(*CI)->getType()->isPointerTy() ||
-        (!CS.doesNotCapture(ArgNo) && !CS.isByValArgument(ArgNo)))
+        (!CS.doesNotCapture(ArgNo) &&
+         ArgNo < CS.getNumArgOperands() && !CS.isByValArgument(ArgNo)))
       continue;
 
     // If this is a no-capture pointer argument, see if we can tell that it
@@ -512,7 +531,7 @@ bool AAResults::canInstructionRangeModRef(const Instruction &I1,
 AAResults::Concept::~Concept() {}
 
 // Provide a definition for the static object used to identify passes.
-char AAManager::PassID;
+AnalysisKey AAManager::Key;
 
 namespace {
 /// A wrapper pass for external alias analyses. This just squirrels away the
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index baf8f3f881db..4d6a6c9a30aa 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -88,7 +88,7 @@ static inline bool isInterestingPointer(Value *V) {
       && !isa<ConstantPointerNull>(V);
 }
 
-PreservedAnalyses AAEvaluator::run(Function &F, AnalysisManager<Function> &AM) {
+PreservedAnalyses AAEvaluator::run(Function &F, FunctionAnalysisManager &AM) {
   runInternal(F, AM.getResult<AAManager>(F));
   return PreservedAnalyses::all();
 }
diff --git a/lib/Analysis/AliasAnalysisSummary.cpp b/lib/Analysis/AliasAnalysisSummary.cpp
index f3f13df283db..2b4879453beb 100644
--- a/lib/Analysis/AliasAnalysisSummary.cpp
+++ b/lib/Analysis/AliasAnalysisSummary.cpp
@@ -7,25 +7,23 @@ namespace llvm {
 namespace cflaa {
 
 namespace {
-LLVM_CONSTEXPR unsigned AttrEscapedIndex = 0;
-LLVM_CONSTEXPR unsigned AttrUnknownIndex = 1;
-LLVM_CONSTEXPR unsigned AttrGlobalIndex = 2;
-LLVM_CONSTEXPR unsigned AttrCallerIndex = 3;
-LLVM_CONSTEXPR unsigned AttrFirstArgIndex = 4;
-LLVM_CONSTEXPR unsigned AttrLastArgIndex = NumAliasAttrs;
-LLVM_CONSTEXPR unsigned AttrMaxNumArgs = AttrLastArgIndex - AttrFirstArgIndex;
+const unsigned AttrEscapedIndex = 0;
+const unsigned AttrUnknownIndex = 1;
+const unsigned AttrGlobalIndex = 2;
+const unsigned AttrCallerIndex = 3;
+const unsigned AttrFirstArgIndex = 4;
+const unsigned AttrLastArgIndex = NumAliasAttrs;
+const unsigned AttrMaxNumArgs = AttrLastArgIndex - AttrFirstArgIndex;
 
-// NOTE: These aren't AliasAttrs because bitsets don't have a constexpr
-// ctor for some versions of MSVC that we support. We could maybe refactor,
-// but...
+// It would be *slightly* prettier if we changed these to AliasAttrs, but it
+// seems that both GCC and MSVC emit dynamic initializers for const bitsets.
 using AliasAttr = unsigned;
-LLVM_CONSTEXPR AliasAttr AttrNone = 0;
-LLVM_CONSTEXPR AliasAttr AttrEscaped = 1 << AttrEscapedIndex;
-LLVM_CONSTEXPR AliasAttr AttrUnknown = 1 << AttrUnknownIndex;
-LLVM_CONSTEXPR AliasAttr AttrGlobal = 1 << AttrGlobalIndex;
-LLVM_CONSTEXPR AliasAttr AttrCaller = 1 << AttrCallerIndex;
-LLVM_CONSTEXPR AliasAttr ExternalAttrMask =
-    AttrEscaped | AttrUnknown | AttrGlobal;
+const AliasAttr AttrNone = 0;
+const AliasAttr AttrEscaped = 1 << AttrEscapedIndex;
+const AliasAttr AttrUnknown = 1 << AttrUnknownIndex;
+const AliasAttr AttrGlobal = 1 << AttrGlobalIndex;
+const AliasAttr AttrCaller = 1 << AttrCallerIndex;
+const AliasAttr ExternalAttrMask = AttrEscaped | AttrUnknown | AttrGlobal;
 }
 
 AliasAttrs getAttrNone() { return AttrNone; }
@@ -91,7 +89,7 @@ instantiateExternalRelation(ExternalRelation ERelation, CallSite CS) {
   auto To = instantiateInterfaceValue(ERelation.To, CS);
   if (!To)
     return None;
-  return InstantiatedRelation{*From, *To};
+  return InstantiatedRelation{*From, *To, ERelation.Offset};
 }
 
 Optional<InstantiatedAttr> instantiateExternalAttribute(ExternalAttribute EAttr,
diff --git a/lib/Analysis/AliasAnalysisSummary.h b/lib/Analysis/AliasAnalysisSummary.h
index 43c0d4cb14f9..51a85f4e7061 100644
--- a/lib/Analysis/AliasAnalysisSummary.h
+++ b/lib/Analysis/AliasAnalysisSummary.h
@@ -99,7 +99,7 @@ AliasAttrs getExternallyVisibleAttrs(AliasAttrs);
 //===----------------------------------------------------------------------===//
 
 /// The maximum number of arguments we can put into a summary.
-LLVM_CONSTEXPR static unsigned MaxSupportedArgsInSummary = 50;
+static const unsigned MaxSupportedArgsInSummary = 50;
 
 /// We use InterfaceValue to describe parameters/return value, as well as
 /// potential memory locations that are pointed to by parameters/return value,
@@ -120,13 +120,66 @@ inline bool operator==(InterfaceValue LHS, InterfaceValue RHS) {
 inline bool operator!=(InterfaceValue LHS, InterfaceValue RHS) {
   return !(LHS == RHS);
 }
+inline bool operator<(InterfaceValue LHS, InterfaceValue RHS) {
+  return LHS.Index < RHS.Index ||
+         (LHS.Index == RHS.Index && LHS.DerefLevel < RHS.DerefLevel);
+}
+inline bool operator>(InterfaceValue LHS, InterfaceValue RHS) {
+  return RHS < LHS;
+}
+inline bool operator<=(InterfaceValue LHS, InterfaceValue RHS) {
+  return !(RHS < LHS);
+}
+inline bool operator>=(InterfaceValue LHS, InterfaceValue RHS) {
+  return !(LHS < RHS);
+}
+
+// We use UnknownOffset to represent pointer offsets that cannot be determined
+// at compile time. Note that MemoryLocation::UnknownSize cannot be used here
+// because we require a signed value.
+static const int64_t UnknownOffset = INT64_MAX;
+
+inline int64_t addOffset(int64_t LHS, int64_t RHS) {
+  if (LHS == UnknownOffset || RHS == UnknownOffset)
+    return UnknownOffset;
+  // FIXME: Do we need to guard against integer overflow here?
+  return LHS + RHS;
+}
 
 /// We use ExternalRelation to describe an externally visible aliasing relations
 /// between parameters/return value of a function.
 struct ExternalRelation {
   InterfaceValue From, To;
+  int64_t Offset;
 };
 
+inline bool operator==(ExternalRelation LHS, ExternalRelation RHS) {
+  return LHS.From == RHS.From && LHS.To == RHS.To && LHS.Offset == RHS.Offset;
+}
+inline bool operator!=(ExternalRelation LHS, ExternalRelation RHS) {
+  return !(LHS == RHS);
+}
+inline bool operator<(ExternalRelation LHS, ExternalRelation RHS) {
+  if (LHS.From < RHS.From)
+    return true;
+  if (LHS.From > RHS.From)
+    return false;
+  if (LHS.To < RHS.To)
+    return true;
+  if (LHS.To > RHS.To)
+    return false;
+  return LHS.Offset < RHS.Offset;
+}
+inline bool operator>(ExternalRelation LHS, ExternalRelation RHS) {
+  return RHS < LHS;
+}
+inline bool operator<=(ExternalRelation LHS, ExternalRelation RHS) {
+  return !(RHS < LHS);
+}
+inline bool operator>=(ExternalRelation LHS, ExternalRelation RHS) {
+  return !(LHS < RHS);
+}
+
 /// We use ExternalAttribute to describe an externally visible AliasAttrs
 /// for parameters/return value.
 struct ExternalAttribute {
@@ -174,6 +227,7 @@ inline bool operator>=(InstantiatedValue LHS, InstantiatedValue RHS) {
 /// callsite
 struct InstantiatedRelation {
   InstantiatedValue From, To;
+  int64_t Offset;
 };
 Optional<InstantiatedRelation> instantiateExternalRelation(ExternalRelation,
                                                            CallSite);
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index d349ac51a9b9..701b0e1a5925 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -26,12 +26,19 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
+static cl::opt<unsigned>
+    SaturationThreshold("alias-set-saturation-threshold", cl::Hidden,
+                        cl::init(250),
+                        cl::desc("The maximum number of pointers may-alias "
+                                 "sets may contain before degradation"));
+
 /// mergeSetIn - Merge the specified alias set into this alias set.
 ///
 void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
   assert(!AS.Forward && "Alias set is already forwarding!");
   assert(!Forward && "This set is a forwarding set!!");
 
+  bool WasMustAlias = (Alias == SetMustAlias);
   // Update the alias and access types of this set...
   Access |= AS.Access;
   Alias  |= AS.Alias;
@@ -52,6 +59,13 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
       Alias = SetMayAlias;
   }
 
+  if (Alias == SetMayAlias) {
+    if (WasMustAlias)
+      AST.TotalMayAliasSetSize += size();
+    if (AS.Alias == SetMustAlias)
+      AST.TotalMayAliasSetSize += AS.size();
+  }
+
   bool ASHadUnknownInsts = !AS.UnknownInsts.empty();
   if (UnknownInsts.empty()) {            // Merge call sites...
     if (ASHadUnknownInsts) {
@@ -63,11 +77,13 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
     AS.UnknownInsts.clear();
   }
 
-  AS.Forward = this;  // Forward across AS now...
-  addRef();           // AS is now pointing to us...
+  AS.Forward = this; // Forward across AS now...
+  addRef();          // AS is now pointing to us...
 
   // Merge the list of constituent pointers...
   if (AS.PtrList) {
+    SetSize += AS.size();
+    AS.SetSize = 0;
     *PtrListEnd = AS.PtrList;
     AS.PtrList->setPrevInList(PtrListEnd);
     PtrListEnd = AS.PtrListEnd;
@@ -85,7 +101,12 @@ void AliasSetTracker::removeAliasSet(AliasSet *AS) {
     Fwd->dropRef(*this);
     AS->Forward = nullptr;
   }
+
+  if (AS->Alias == AliasSet::SetMayAlias)
+    TotalMayAliasSetSize -= AS->size();
+
   AliasSets.erase(AS);
+
 }
 
 void AliasSet::removeFromTracker(AliasSetTracker &AST) {
@@ -105,10 +126,13 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
       AliasResult Result =
           AA.alias(MemoryLocation(P->getValue(), P->getSize(), P->getAAInfo()),
                    MemoryLocation(Entry.getValue(), Size, AAInfo));
-      if (Result != MustAlias)
+      if (Result != MustAlias) {
         Alias = SetMayAlias;
-      else                  // First entry of must alias must have maximum size!
+        AST.TotalMayAliasSetSize += size();
+      } else {
+        // First entry of must alias must have maximum size!        
         P->updateSizeAndAAInfo(Size, AAInfo);
+      }
       assert(Result != NoAlias && "Cannot be part of must set!");
     }
 
@@ -116,11 +140,16 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
   Entry.updateSizeAndAAInfo(Size, AAInfo);
 
   // Add it to the end of the list...
+  ++SetSize;
   assert(*PtrListEnd == nullptr && "End of list is not null?");
   *PtrListEnd = &Entry;
   PtrListEnd = Entry.setPrevInList(PtrListEnd);
   assert(*PtrListEnd == nullptr && "End of list is not null?");
-  addRef();               // Entry points to alias set.
+  // Entry points to alias set.
+  addRef();
+
+  if (Alias == SetMayAlias)
+    AST.TotalMayAliasSetSize++;
 }
 
 void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) {
@@ -145,6 +174,9 @@ void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) {
 bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,
                               const AAMDNodes &AAInfo,
                               AliasAnalysis &AA) const {
+  if (AliasAny)
+    return true;
+
   if (Alias == SetMustAlias) {
     assert(UnknownInsts.empty() && "Illegal must alias set!");
 
@@ -177,6 +209,10 @@ bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,
 
 bool AliasSet::aliasesUnknownInst(const Instruction *Inst,
                                   AliasAnalysis &AA) const {
+
+  if (AliasAny)
+    return true;
+
   if (!Inst->mayReadOrWriteMemory())
     return false;
 
@@ -229,17 +265,6 @@ AliasSet *AliasSetTracker::mergeAliasSetsForPointer(const Value *Ptr,
   return FoundSet;
 }
 
-/// containsPointer - Return true if the specified location is represented by
-/// this alias set, false otherwise.  This does not modify the AST object or
-/// alias sets.
-bool AliasSetTracker::containsPointer(const Value *Ptr, uint64_t Size,
-                                      const AAMDNodes &AAInfo) const {
-  for (const AliasSet &AS : *this)
-    if (!AS.Forward && AS.aliasesPointer(Ptr, Size, AAInfo, AA))
-      return true;
-  return false;
-}
-
 bool AliasSetTracker::containsUnknown(const Instruction *Inst) const {
   for (const AliasSet &AS : *this)
     if (!AS.Forward && AS.aliasesUnknownInst(Inst, AA))
@@ -261,16 +286,28 @@ AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) {
   return FoundSet;
 }
 
-
-
-
 /// getAliasSetForPointer - Return the alias set that the specified pointer
 /// lives in.
 AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, uint64_t Size,
-                                                 const AAMDNodes &AAInfo,
-                                                 bool *New) {
+                                                 const AAMDNodes &AAInfo) {
   AliasSet::PointerRec &Entry = getEntryFor(Pointer);
 
+  if (AliasAnyAS) {
+    // At this point, the AST is saturated, so we only have one active alias
+    // set. That means we already know which alias set we want to return, and
+    // just need to add the pointer to that set to keep the data structure
+    // consistent.
+    // This, of course, means that we will never need a merge here.
+    if (Entry.hasAliasSet()) {
+      Entry.updateSizeAndAAInfo(Size, AAInfo);
+      assert(Entry.getAliasSet(*this) == AliasAnyAS &&
+             "Entry in saturated AST must belong to only alias set");
+    } else {
+      AliasAnyAS->addPointer(*this, Entry, Size, AAInfo);
+    }
+    return *AliasAnyAS;
+  }
+
   // Check to see if the pointer is already known.
   if (Entry.hasAliasSet()) {
     // If the size changed, we may need to merge several alias sets.
@@ -290,68 +327,55 @@ AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, uint64_t Size,
     return *AS;
   }
   
-  if (New) *New = true;
   // Otherwise create a new alias set to hold the loaded pointer.
   AliasSets.push_back(new AliasSet());
   AliasSets.back().addPointer(*this, Entry, Size, AAInfo);
   return AliasSets.back();
 }
 
-bool AliasSetTracker::add(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo) {
-  bool NewPtr;
-  addPointer(Ptr, Size, AAInfo, AliasSet::NoAccess, NewPtr);
-  return NewPtr;
+void AliasSetTracker::add(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo) {
+  addPointer(Ptr, Size, AAInfo, AliasSet::NoAccess);
 }
 
-
-bool AliasSetTracker::add(LoadInst *LI) {
+void AliasSetTracker::add(LoadInst *LI) {
   if (isStrongerThanMonotonic(LI->getOrdering())) return addUnknown(LI);
 
   AAMDNodes AAInfo;
   LI->getAAMetadata(AAInfo);
 
   AliasSet::AccessLattice Access = AliasSet::RefAccess;
-  bool NewPtr;
   const DataLayout &DL = LI->getModule()->getDataLayout();
   AliasSet &AS = addPointer(LI->getOperand(0),
-                            DL.getTypeStoreSize(LI->getType()),
-                            AAInfo, Access, NewPtr);
+                            DL.getTypeStoreSize(LI->getType()), AAInfo, Access);
   if (LI->isVolatile()) AS.setVolatile();
-  return NewPtr;
 }
 
-bool AliasSetTracker::add(StoreInst *SI) {
+void AliasSetTracker::add(StoreInst *SI) {
   if (isStrongerThanMonotonic(SI->getOrdering())) return addUnknown(SI);
 
   AAMDNodes AAInfo;
   SI->getAAMetadata(AAInfo);
 
   AliasSet::AccessLattice Access = AliasSet::ModAccess;
-  bool NewPtr;
   const DataLayout &DL = SI->getModule()->getDataLayout();
   Value *Val = SI->getOperand(0);
-  AliasSet &AS = addPointer(SI->getOperand(1),
-                            DL.getTypeStoreSize(Val->getType()),
-                            AAInfo, Access, NewPtr);
+  AliasSet &AS = addPointer(
+      SI->getOperand(1), DL.getTypeStoreSize(Val->getType()), AAInfo, Access);
   if (SI->isVolatile()) AS.setVolatile();
-  return NewPtr;
 }
 
-bool AliasSetTracker::add(VAArgInst *VAAI) {
+void AliasSetTracker::add(VAArgInst *VAAI) {
   AAMDNodes AAInfo;
   VAAI->getAAMetadata(AAInfo);
 
-  bool NewPtr;
   addPointer(VAAI->getOperand(0), MemoryLocation::UnknownSize, AAInfo,
-             AliasSet::ModRefAccess, NewPtr);
-  return NewPtr;
+             AliasSet::ModRefAccess);
 }
 
-bool AliasSetTracker::add(MemSetInst *MSI) {
+void AliasSetTracker::add(MemSetInst *MSI) {
   AAMDNodes AAInfo;
   MSI->getAAMetadata(AAInfo);
 
-  bool NewPtr;
   uint64_t Len;
 
   if (ConstantInt *C = dyn_cast<ConstantInt>(MSI->getLength()))
@@ -360,30 +384,61 @@ bool AliasSetTracker::add(MemSetInst *MSI) {
     Len = MemoryLocation::UnknownSize;
 
   AliasSet &AS =
-      addPointer(MSI->getRawDest(), Len, AAInfo, AliasSet::ModAccess, NewPtr);
+      addPointer(MSI->getRawDest(), Len, AAInfo, AliasSet::ModAccess);
   if (MSI->isVolatile())
     AS.setVolatile();
-  return NewPtr;
 }
 
-bool AliasSetTracker::addUnknown(Instruction *Inst) {
-  if (isa<DbgInfoIntrinsic>(Inst)) 
-    return true; // Ignore DbgInfo Intrinsics.
+void AliasSetTracker::add(MemTransferInst *MTI) {
+  AAMDNodes AAInfo;
+  MTI->getAAMetadata(AAInfo);
+
+  uint64_t Len;
+  if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
+    Len = C->getZExtValue();
+  else
+    Len = MemoryLocation::UnknownSize;
+
+  AliasSet &ASSrc =
+      addPointer(MTI->getRawSource(), Len, AAInfo, AliasSet::RefAccess);
+  if (MTI->isVolatile())
+    ASSrc.setVolatile();
+
+  AliasSet &ASDst =
+      addPointer(MTI->getRawDest(), Len, AAInfo, AliasSet::ModAccess);
+  if (MTI->isVolatile())
+    ASDst.setVolatile();
+}
+
+void AliasSetTracker::addUnknown(Instruction *Inst) {
+  if (isa<DbgInfoIntrinsic>(Inst))
+    return; // Ignore DbgInfo Intrinsics.
+
+  if (auto *II = dyn_cast<IntrinsicInst>(Inst)) {
+    // These intrinsics will show up as affecting memory, but they are just
+    // markers.
+    switch (II->getIntrinsicID()) {
+    default:
+      break;
+      // FIXME: Add lifetime/invariant intrinsics (See: PR30807).
+    case Intrinsic::assume:
+      return;
+    }
+  }
   if (!Inst->mayReadOrWriteMemory())
-    return true; // doesn't alias anything
+    return; // doesn't alias anything
 
   AliasSet *AS = findAliasSetForUnknownInst(Inst);
   if (AS) {
     AS->addUnknownInst(Inst, AA);
-    return false;
+    return;
   }
   AliasSets.push_back(new AliasSet());
   AS = &AliasSets.back();
   AS->addUnknownInst(Inst, AA);
-  return true;
 }
 
-bool AliasSetTracker::add(Instruction *I) {
+void AliasSetTracker::add(Instruction *I) {
   // Dispatch to one of the other add methods.
   if (LoadInst *LI = dyn_cast<LoadInst>(I))
     return add(LI);
@@ -393,8 +448,9 @@ bool AliasSetTracker::add(Instruction *I) {
     return add(VAAI);
   if (MemSetInst *MSI = dyn_cast<MemSetInst>(I))
     return add(MSI);
+  if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I))
+    return add(MTI);
   return addUnknown(I);
-  // FIXME: add support of memcpy and memmove. 
 }
 
 void AliasSetTracker::add(BasicBlock &BB) {
@@ -418,134 +474,15 @@ void AliasSetTracker::add(const AliasSetTracker &AST) {
       add(AS.UnknownInsts[i]);
 
     // Loop over all of the pointers in this alias set.
-    bool X;
     for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
-      AliasSet &NewAS = addPointer(ASI.getPointer(), ASI.getSize(),
-                                   ASI.getAAInfo(),
-                                   (AliasSet::AccessLattice)AS.Access, X);
+      AliasSet &NewAS =
+          addPointer(ASI.getPointer(), ASI.getSize(), ASI.getAAInfo(),
+                     (AliasSet::AccessLattice)AS.Access);
       if (AS.isVolatile()) NewAS.setVolatile();
     }
   }
 }
 
-/// remove - Remove the specified (potentially non-empty) alias set from the
-/// tracker.
-void AliasSetTracker::remove(AliasSet &AS) {
-  // Drop all call sites.
-  if (!AS.UnknownInsts.empty())
-    AS.dropRef(*this);
-  AS.UnknownInsts.clear();
-  
-  // Clear the alias set.
-  unsigned NumRefs = 0;
-  while (!AS.empty()) {
-    AliasSet::PointerRec *P = AS.PtrList;
-
-    Value *ValToRemove = P->getValue();
-    
-    // Unlink and delete entry from the list of values.
-    P->eraseFromList();
-    
-    // Remember how many references need to be dropped.
-    ++NumRefs;
-
-    // Finally, remove the entry.
-    PointerMap.erase(ValToRemove);
-  }
-  
-  // Stop using the alias set, removing it.
-  AS.RefCount -= NumRefs;
-  if (AS.RefCount == 0)
-    AS.removeFromTracker(*this);
-}
-
-bool
-AliasSetTracker::remove(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo) {
-  AliasSet *AS = mergeAliasSetsForPointer(Ptr, Size, AAInfo);
-  if (!AS) return false;
-  remove(*AS);
-  return true;
-}
-
-bool AliasSetTracker::remove(LoadInst *LI) {
-  const DataLayout &DL = LI->getModule()->getDataLayout();
-  uint64_t Size = DL.getTypeStoreSize(LI->getType());
-
-  AAMDNodes AAInfo;
-  LI->getAAMetadata(AAInfo);
-
-  AliasSet *AS = mergeAliasSetsForPointer(LI->getOperand(0), Size, AAInfo);
-  if (!AS) return false;
-  remove(*AS);
-  return true;
-}
-
-bool AliasSetTracker::remove(StoreInst *SI) {
-  const DataLayout &DL = SI->getModule()->getDataLayout();
-  uint64_t Size = DL.getTypeStoreSize(SI->getOperand(0)->getType());
-
-  AAMDNodes AAInfo;
-  SI->getAAMetadata(AAInfo);
-
-  AliasSet *AS = mergeAliasSetsForPointer(SI->getOperand(1), Size, AAInfo);
-  if (!AS) return false;
-  remove(*AS);
-  return true;
-}
-
-bool AliasSetTracker::remove(VAArgInst *VAAI) {
-  AAMDNodes AAInfo;
-  VAAI->getAAMetadata(AAInfo);
-
-  AliasSet *AS = mergeAliasSetsForPointer(VAAI->getOperand(0),
-                                          MemoryLocation::UnknownSize, AAInfo);
-  if (!AS) return false;
-  remove(*AS);
-  return true;
-}
-
-bool AliasSetTracker::remove(MemSetInst *MSI) {
-  AAMDNodes AAInfo;
-  MSI->getAAMetadata(AAInfo);
-  uint64_t Len;
-
-  if (ConstantInt *C = dyn_cast<ConstantInt>(MSI->getLength()))
-    Len = C->getZExtValue();
-  else
-    Len = MemoryLocation::UnknownSize;
-
-  AliasSet *AS = mergeAliasSetsForPointer(MSI->getRawDest(), Len, AAInfo);
-  if (!AS)
-    return false;
-  remove(*AS);
-  return true;
-}
-
-bool AliasSetTracker::removeUnknown(Instruction *I) {
-  if (!I->mayReadOrWriteMemory())
-    return false; // doesn't alias anything
-
-  AliasSet *AS = findAliasSetForUnknownInst(I);
-  if (!AS) return false;
-  remove(*AS);
-  return true;
-}
-
-bool AliasSetTracker::remove(Instruction *I) {
-  // Dispatch to one of the other remove methods...
-  if (LoadInst *LI = dyn_cast<LoadInst>(I))
-    return remove(LI);
-  if (StoreInst *SI = dyn_cast<StoreInst>(I))
-    return remove(SI);
-  if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
-    return remove(VAAI);
-  if (MemSetInst *MSI = dyn_cast<MemSetInst>(I))
-    return remove(MSI);
-  return removeUnknown(I);
-  // FIXME: add support of memcpy and memmove. 
-}
-
-
 // deleteValue method - This method is used to remove a pointer value from the
 // AliasSetTracker entirely.  It should be used when an instruction is deleted
 // from the program to update the AST.  If you don't use this, you would have
@@ -575,6 +512,11 @@ void AliasSetTracker::deleteValue(Value *PtrVal) {
 
   // Unlink and delete from the list of values.
   PtrValEnt->eraseFromList();
+
+  if (AS->Alias == AliasSet::SetMayAlias) {
+    AS->SetSize--;
+    TotalMayAliasSetSize--;
+  }
   
   // Stop using the alias set.
   AS->dropRef(*this);
@@ -597,15 +539,68 @@ void AliasSetTracker::copyValue(Value *From, Value *To) {
   AliasSet::PointerRec &Entry = getEntryFor(To);
   if (Entry.hasAliasSet()) return;    // Already in the tracker!
 
-  // Add it to the alias set it aliases...
+  // getEntryFor above may invalidate iterator \c I, so reinitialize it.
   I = PointerMap.find_as(From);
+  // Add it to the alias set it aliases...
   AliasSet *AS = I->second->getAliasSet(*this);
   AS->addPointer(*this, Entry, I->second->getSize(),
                  I->second->getAAInfo(),
                  true);
 }
 
+AliasSet &AliasSetTracker::mergeAllAliasSets() {
+  assert(!AliasAnyAS && (TotalMayAliasSetSize > SaturationThreshold) &&
+         "Full merge should happen once, when the saturation threshold is "
+         "reached");
+
+  // Collect all alias sets, so that we can drop references with impunity
+  // without worrying about iterator invalidation.
+  std::vector<AliasSet *> ASVector;
+  ASVector.reserve(SaturationThreshold);
+  for (iterator I = begin(), E = end(); I != E; I++)
+    ASVector.push_back(&*I);
+
+  // Copy all instructions and pointers into a new set, and forward all other
+  // sets to it.
+  AliasSets.push_back(new AliasSet());
+  AliasAnyAS = &AliasSets.back();
+  AliasAnyAS->Alias = AliasSet::SetMayAlias;
+  AliasAnyAS->Access = AliasSet::ModRefAccess;
+  AliasAnyAS->AliasAny = true;
+
+  for (auto Cur : ASVector) {
+    
+    // If Cur was already forwarding, just forward to the new AS instead.
+    AliasSet *FwdTo = Cur->Forward;
+    if (FwdTo) {
+      Cur->Forward = AliasAnyAS;
+      AliasAnyAS->addRef();      
+      FwdTo->dropRef(*this);
+      continue;
+    }
+
+    // Otherwise, perform the actual merge.
+    AliasAnyAS->mergeSetIn(*Cur, *this);
+  }
+
+  return *AliasAnyAS;
+}
+
+AliasSet &AliasSetTracker::addPointer(Value *P, uint64_t Size,
+                                      const AAMDNodes &AAInfo,
+                                      AliasSet::AccessLattice E) {
+
+  AliasSet &AS = getAliasSetForPointer(P, Size, AAInfo);
+  AS.Access |= E;
+
+  if (!AliasAnyAS && (TotalMayAliasSetSize > SaturationThreshold)) {
+    // The AST is now saturated. From here on, we conservatively consider all
+    // pointers to alias each-other.
+    return mergeAllAliasSets();
+  }
 
+  return AS;
+}
 
 //===----------------------------------------------------------------------===//
 //               AliasSet/AliasSetTracker Printing Support
@@ -700,7 +695,7 @@ namespace {
     bool runOnFunction(Function &F) override {
       auto &AAWP = getAnalysis<AAResultsWrapperPass>();
       Tracker = new AliasSetTracker(AAWP.getAAResults());
-
+      errs() << "Alias sets for function '" << F.getName() << "':\n";
       for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
         Tracker->add(&*I);
       Tracker->print(errs());
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index c04447ca58c9..0e7cf402cdb5 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -30,10 +30,10 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
   initializeCallGraphPrinterLegacyPassPass(Registry);
   initializeCallGraphViewerPass(Registry);
   initializeCostModelAnalysisPass(Registry);
-  initializeCFGViewerPass(Registry);
-  initializeCFGPrinterPass(Registry);
-  initializeCFGOnlyViewerPass(Registry);
-  initializeCFGOnlyPrinterPass(Registry);
+  initializeCFGViewerLegacyPassPass(Registry);
+  initializeCFGPrinterLegacyPassPass(Registry);
+  initializeCFGOnlyViewerLegacyPassPass(Registry);
+  initializeCFGOnlyPrinterLegacyPassPass(Registry);
   initializeCFLAndersAAWrapperPassPass(Registry);
   initializeCFLSteensAAWrapperPassPass(Registry);
   initializeDependenceAnalysisWrapperPassPass(Registry);
@@ -54,6 +54,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
   initializeIVUsersWrapperPassPass(Registry);
   initializeInstCountPass(Registry);
   initializeIntervalPartitionPass(Registry);
+  initializeLazyBranchProbabilityInfoPassPass(Registry);
   initializeLazyBlockFrequencyInfoPassPass(Registry);
   initializeLazyValueInfoWrapperPassPass(Registry);
   initializeLintPass(Registry);
@@ -76,6 +77,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
   initializeTargetTransformInfoWrapperPassPass(Registry);
   initializeTypeBasedAAWrapperPassPass(Registry);
   initializeScopedNoAliasAAWrapperPassPass(Registry);
+  initializeLCSSAVerificationPassPass(Registry);
 }
 
 void LLVMInitializeAnalysis(LLVMPassRegistryRef R) {
diff --git a/lib/Analysis/AssumptionCache.cpp b/lib/Analysis/AssumptionCache.cpp
index ca71644757f0..3c518034ba62 100644
--- a/lib/Analysis/AssumptionCache.cpp
+++ b/lib/Analysis/AssumptionCache.cpp
@@ -74,10 +74,10 @@ void AssumptionCache::registerAssumption(CallInst *CI) {
 #endif
 }
 
-char AssumptionAnalysis::PassID;
+AnalysisKey AssumptionAnalysis::Key;
 
 PreservedAnalyses AssumptionPrinterPass::run(Function &F,
-                                             AnalysisManager<Function> &AM) {
+                                             FunctionAnalysisManager &AM) {
   AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
 
   OS << "Cached assumptions for function: " << F.getName() << "\n";
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 43d5c3ccf907..e8d86bdbca5b 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -63,6 +63,21 @@ const unsigned MaxNumPhiBBsValueReachabilityCheck = 20;
 // depth otherwise the algorithm in aliasGEP will assert.
 static const unsigned MaxLookupSearchDepth = 6;
 
+bool BasicAAResult::invalidate(Function &F, const PreservedAnalyses &PA,
+                               FunctionAnalysisManager::Invalidator &Inv) {
+  // We don't care if this analysis itself is preserved, it has no state. But
+  // we need to check that the analyses it depends on have been. Note that we
+  // may be created without handles to some analyses and in that case don't
+  // depend on them.
+  if (Inv.invalidate<AssumptionAnalysis>(F, PA) ||
+      (DT && Inv.invalidate<DominatorTreeAnalysis>(F, PA)) ||
+      (LI && Inv.invalidate<LoopAnalysis>(F, PA)))
+    return true;
+
+  // Otherwise this analysis result remains valid.
+  return false;
+}
+
 //===----------------------------------------------------------------------===//
 // Useful predicates
 //===----------------------------------------------------------------------===//
@@ -227,7 +242,7 @@ static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
           Offset = 0;
           return V;
         }
-      // FALL THROUGH.
+        LLVM_FALLTHROUGH;
       case Instruction::Add:
         V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits,
                                 SExtBits, DL, Depth + 1, AC, DT, NSW, NUW);
@@ -275,7 +290,7 @@ static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
         GetLinearExpression(CastOp, Scale, Offset, ZExtBits, SExtBits, DL,
                             Depth + 1, AC, DT, NSW, NUW);
 
-    // zext(zext(%x)) == zext(%x), and similiarly for sext; we'll handle this
+    // zext(zext(%x)) == zext(%x), and similarly for sext; we'll handle this
     // by just incrementing the number of bits we've extended by.
     unsigned ExtendedBy = NewWidth - SmallWidth;
 
@@ -409,11 +424,13 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
     // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices.
     gep_type_iterator GTI = gep_type_begin(GEPOp);
     unsigned PointerSize = DL.getPointerSizeInBits(AS);
+    // Assume all GEP operands are constants until proven otherwise.
+    bool GepHasConstantOffset = true;
     for (User::const_op_iterator I = GEPOp->op_begin() + 1, E = GEPOp->op_end();
-         I != E; ++I) {
+         I != E; ++I, ++GTI) {
       const Value *Index = *I;
       // Compute the (potentially symbolic) offset in bytes for this index.
-      if (StructType *STy = dyn_cast<StructType>(*GTI++)) {
+      if (StructType *STy = GTI.getStructTypeOrNull()) {
         // For a struct, add the member offset.
         unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
         if (FieldNo == 0)
@@ -429,11 +446,13 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
         if (CIdx->isZero())
           continue;
         Decomposed.OtherOffset +=
-          DL.getTypeAllocSize(*GTI) * CIdx->getSExtValue();
+          DL.getTypeAllocSize(GTI.getIndexedType()) * CIdx->getSExtValue();
         continue;
       }
 
-      uint64_t Scale = DL.getTypeAllocSize(*GTI);
+      GepHasConstantOffset = false;
+
+      uint64_t Scale = DL.getTypeAllocSize(GTI.getIndexedType());
       unsigned ZExtBits = 0, SExtBits = 0;
 
       // If the integer type is smaller than the pointer size, it is implicitly
@@ -458,7 +477,7 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
       //   A[x][x] -> x*16 + x*4 -> x*20
       // This also ensures that 'x' only appears in the index list once.
       for (unsigned i = 0, e = Decomposed.VarIndices.size(); i != e; ++i) {
-        if (Decomposed.VarIndices[i].V == Index && 
+        if (Decomposed.VarIndices[i].V == Index &&
             Decomposed.VarIndices[i].ZExtBits == ZExtBits &&
             Decomposed.VarIndices[i].SExtBits == SExtBits) {
           Scale += Decomposed.VarIndices[i].Scale;
@@ -479,10 +498,12 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
     }
 
     // Take care of wrap-arounds
-    Decomposed.StructOffset =
-      adjustToPointerSize(Decomposed.StructOffset, PointerSize);
-    Decomposed.OtherOffset =
-      adjustToPointerSize(Decomposed.OtherOffset, PointerSize);
+    if (GepHasConstantOffset) {
+      Decomposed.StructOffset =
+          adjustToPointerSize(Decomposed.StructOffset, PointerSize);
+      Decomposed.OtherOffset =
+          adjustToPointerSize(Decomposed.OtherOffset, PointerSize);
+    }
 
     // Analyze the base pointer next.
     V = GEPOp->getOperand(0);
@@ -603,6 +624,10 @@ FunctionModRefBehavior BasicAAResult::getModRefBehavior(const Function *F) {
 
   if (F->onlyAccessesArgMemory())
     Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesArgumentPointees);
+  else if (F->onlyAccessesInaccessibleMemory())
+    Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesInaccessibleMem);
+  else if (F->onlyAccessesInaccessibleMemOrArgMem())
+    Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesInaccessibleOrArgMem);
 
   return Min;
 }
@@ -732,7 +757,8 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
       // pointer were passed to arguments that were neither of these, then it
       // couldn't be no-capture.
       if (!(*CI)->getType()->isPointerTy() ||
-          (!CS.doesNotCapture(OperandNo) && !CS.isByValArgument(OperandNo)))
+          (!CS.doesNotCapture(OperandNo) &&
+           OperandNo < CS.getNumArgOperands() && !CS.isByValArgument(OperandNo)))
         continue;
 
       // If this is a no-capture pointer argument, see if we can tell that it
@@ -765,6 +791,31 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
       return MRI_NoModRef;
   }
 
+  // The semantics of memcpy intrinsics forbid overlap between their respective
+  // operands, i.e., source and destination of any given memcpy must no-alias.
+  // If Loc must-aliases either one of these two locations, then it necessarily
+  // no-aliases the other.
+  if (auto *Inst = dyn_cast<MemCpyInst>(CS.getInstruction())) {
+    AliasResult SrcAA, DestAA;
+
+    if ((SrcAA = getBestAAResults().alias(MemoryLocation::getForSource(Inst),
+                                          Loc)) == MustAlias)
+      // Loc is exactly the memcpy source thus disjoint from memcpy dest.
+      return MRI_Ref;
+    if ((DestAA = getBestAAResults().alias(MemoryLocation::getForDest(Inst),
+                                           Loc)) == MustAlias)
+      // The converse case.
+      return MRI_Mod;
+
+    // It's also possible for Loc to alias both src and dest, or neither.
+    ModRefInfo rv = MRI_NoModRef;
+    if (SrcAA != NoAlias)
+      rv = static_cast<ModRefInfo>(rv | MRI_Ref);
+    if (DestAA != NoAlias)
+      rv = static_cast<ModRefInfo>(rv | MRI_Mod);
+    return rv;
+  }
+
   // While the assume intrinsic is marked as arbitrarily writing so that
   // proper control dependencies will be maintained, it never aliases any
   // particular memory location.
@@ -781,6 +832,32 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
   if (isIntrinsicCall(CS, Intrinsic::experimental_guard))
     return MRI_Ref;
 
+  // Like assumes, invariant.start intrinsics were also marked as arbitrarily
+  // writing so that proper control dependencies are maintained but they never
+  // mod any particular memory location visible to the IR.
+  // *Unlike* assumes (which are now modeled as NoModRef), invariant.start
+  // intrinsic is now modeled as reading memory. This prevents hoisting the
+  // invariant.start intrinsic over stores. Consider:
+  // *ptr = 40;
+  // *ptr = 50;
+  // invariant_start(ptr)
+  // int val = *ptr;
+  // print(val);
+  //
+  // This cannot be transformed to:
+  //
+  // *ptr = 40;
+  // invariant_start(ptr)
+  // *ptr = 50;
+  // int val = *ptr;
+  // print(val);
+  //
+  // The transformation will cause the second store to be ignored (based on
+  // rules of invariant.start)  and print 40, while the first program always
+  // prints 50.
+  if (isIntrinsicCall(CS, Intrinsic::invariant_start))
+    return MRI_Ref;
+
   // The AAResultBase base class has some smarts, lets use them.
   return AAResultBase::getModRefInfo(CS, Loc);
 }
@@ -1114,7 +1191,8 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
       return MayAlias;
 
     AliasResult R = aliasCheck(UnderlyingV1, MemoryLocation::UnknownSize,
-                               AAMDNodes(), V2, V2Size, V2AAInfo);
+                               AAMDNodes(), V2, V2Size, V2AAInfo,
+                               nullptr, UnderlyingV2);
     if (R != MustAlias)
       // If V2 may alias GEP base pointer, conservatively returns MayAlias.
       // If V2 is known not to alias GEP base pointer, then the two values
@@ -1251,7 +1329,8 @@ static AliasResult MergeAliasResults(AliasResult A, AliasResult B) {
 AliasResult BasicAAResult::aliasSelect(const SelectInst *SI, uint64_t SISize,
                                        const AAMDNodes &SIAAInfo,
                                        const Value *V2, uint64_t V2Size,
-                                       const AAMDNodes &V2AAInfo) {
+                                       const AAMDNodes &V2AAInfo,
+                                       const Value *UnderV2) {
   // If the values are Selects with the same condition, we can do a more precise
   // check: just check for aliases between the values on corresponding arms.
   if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2))
@@ -1269,12 +1348,14 @@ AliasResult BasicAAResult::aliasSelect(const SelectInst *SI, uint64_t SISize,
   // If both arms of the Select node NoAlias or MustAlias V2, then returns
   // NoAlias / MustAlias. Otherwise, returns MayAlias.
   AliasResult Alias =
-      aliasCheck(V2, V2Size, V2AAInfo, SI->getTrueValue(), SISize, SIAAInfo);
+      aliasCheck(V2, V2Size, V2AAInfo, SI->getTrueValue(),
+                 SISize, SIAAInfo, UnderV2);
   if (Alias == MayAlias)
     return MayAlias;
 
   AliasResult ThisAlias =
-      aliasCheck(V2, V2Size, V2AAInfo, SI->getFalseValue(), SISize, SIAAInfo);
+      aliasCheck(V2, V2Size, V2AAInfo, SI->getFalseValue(), SISize, SIAAInfo,
+                 UnderV2);
   return MergeAliasResults(ThisAlias, Alias);
 }
 
@@ -1282,8 +1363,8 @@ AliasResult BasicAAResult::aliasSelect(const SelectInst *SI, uint64_t SISize,
 /// another.
 AliasResult BasicAAResult::aliasPHI(const PHINode *PN, uint64_t PNSize,
                                     const AAMDNodes &PNAAInfo, const Value *V2,
-                                    uint64_t V2Size,
-                                    const AAMDNodes &V2AAInfo) {
+                                    uint64_t V2Size, const AAMDNodes &V2AAInfo,
+                                    const Value *UnderV2) {
   // Track phi nodes we have visited. We use this information when we determine
   // value equivalence.
   VisitedPhiBBs.insert(PN->getParent());
@@ -1362,7 +1443,8 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, uint64_t PNSize,
     PNSize = MemoryLocation::UnknownSize;
 
   AliasResult Alias =
-      aliasCheck(V2, V2Size, V2AAInfo, V1Srcs[0], PNSize, PNAAInfo);
+      aliasCheck(V2, V2Size, V2AAInfo, V1Srcs[0],
+                 PNSize, PNAAInfo, UnderV2);
 
   // Early exit if the check of the first PHI source against V2 is MayAlias.
   // Other results are not possible.
@@ -1375,7 +1457,7 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, uint64_t PNSize,
     Value *V = V1Srcs[i];
 
     AliasResult ThisAlias =
-        aliasCheck(V2, V2Size, V2AAInfo, V, PNSize, PNAAInfo);
+        aliasCheck(V2, V2Size, V2AAInfo, V, PNSize, PNAAInfo, UnderV2);
     Alias = MergeAliasResults(ThisAlias, Alias);
     if (Alias == MayAlias)
       break;
@@ -1388,7 +1470,8 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, uint64_t PNSize,
 /// array references.
 AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size,
                                       AAMDNodes V1AAInfo, const Value *V2,
-                                      uint64_t V2Size, AAMDNodes V2AAInfo) {
+                                      uint64_t V2Size, AAMDNodes V2AAInfo, 
+                                      const Value *O1, const Value *O2) {
   // If either of the memory references is empty, it doesn't matter what the
   // pointer values are.
   if (V1Size == 0 || V2Size == 0)
@@ -1416,8 +1499,11 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size,
     return NoAlias; // Scalars cannot alias each other
 
   // Figure out what objects these things are pointing to if we can.
-  const Value *O1 = GetUnderlyingObject(V1, DL, MaxLookupSearchDepth);
-  const Value *O2 = GetUnderlyingObject(V2, DL, MaxLookupSearchDepth);
+  if (O1 == nullptr)
+    O1 = GetUnderlyingObject(V1, DL, MaxLookupSearchDepth);
+
+  if (O2 == nullptr)
+    O2 = GetUnderlyingObject(V2, DL, MaxLookupSearchDepth);
 
   // Null values in the default address space don't point to any object, so they
   // don't alias any other pointer.
@@ -1500,23 +1586,26 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size,
 
   if (isa<PHINode>(V2) && !isa<PHINode>(V1)) {
     std::swap(V1, V2);
+    std::swap(O1, O2);
     std::swap(V1Size, V2Size);
     std::swap(V1AAInfo, V2AAInfo);
   }
   if (const PHINode *PN = dyn_cast<PHINode>(V1)) {
-    AliasResult Result = aliasPHI(PN, V1Size, V1AAInfo, V2, V2Size, V2AAInfo);
+    AliasResult Result = aliasPHI(PN, V1Size, V1AAInfo,
+                                  V2, V2Size, V2AAInfo, O2);
     if (Result != MayAlias)
       return AliasCache[Locs] = Result;
   }
 
   if (isa<SelectInst>(V2) && !isa<SelectInst>(V1)) {
     std::swap(V1, V2);
+    std::swap(O1, O2);
     std::swap(V1Size, V2Size);
     std::swap(V1AAInfo, V2AAInfo);
   }
   if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) {
     AliasResult Result =
-        aliasSelect(S1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo);
+        aliasSelect(S1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O2);
     if (Result != MayAlias)
       return AliasCache[Locs] = Result;
   }
@@ -1667,9 +1756,9 @@ bool BasicAAResult::constantOffsetHeuristic(
 // BasicAliasAnalysis Pass
 //===----------------------------------------------------------------------===//
 
-char BasicAA::PassID;
+AnalysisKey BasicAA::Key;
 
-BasicAAResult BasicAA::run(Function &F, AnalysisManager<Function> &AM) {
+BasicAAResult BasicAA::run(Function &F, FunctionAnalysisManager &AM) {
   return BasicAAResult(F.getParent()->getDataLayout(),
                        AM.getResult<TargetLibraryAnalysis>(F),
                        AM.getResult<AssumptionAnalysis>(F),
diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp
index 1dd8f4fdfcfe..4cdbe4d0fcf6 100644
--- a/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/lib/Analysis/BlockFrequencyInfo.cpp
@@ -39,8 +39,7 @@ static cl::opt<GVDAGType> ViewBlockFreqPropagationDAG(
                           "display a graph using the raw "
                           "integer fractional block frequency representation."),
                clEnumValN(GVDT_Count, "count", "display a graph using the real "
-                                               "profile count if available."),
-               clEnumValEnd));
+                                               "profile count if available.")));
 
 cl::opt<std::string>
     ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden,
@@ -60,24 +59,22 @@ namespace llvm {
 
 template <>
 struct GraphTraits<BlockFrequencyInfo *> {
-  typedef const BasicBlock NodeType;
+  typedef const BasicBlock *NodeRef;
   typedef succ_const_iterator ChildIteratorType;
-  typedef Function::const_iterator nodes_iterator;
+  typedef pointer_iterator<Function::const_iterator> nodes_iterator;
 
-  static inline const NodeType *getEntryNode(const BlockFrequencyInfo *G) {
+  static NodeRef getEntryNode(const BlockFrequencyInfo *G) {
     return &G->getFunction()->front();
   }
-  static ChildIteratorType child_begin(const NodeType *N) {
+  static ChildIteratorType child_begin(const NodeRef N) {
     return succ_begin(N);
   }
-  static ChildIteratorType child_end(const NodeType *N) {
-    return succ_end(N);
-  }
+  static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
   static nodes_iterator nodes_begin(const BlockFrequencyInfo *G) {
-    return G->getFunction()->begin();
+    return nodes_iterator(G->getFunction()->begin());
   }
   static nodes_iterator nodes_end(const BlockFrequencyInfo *G) {
-    return G->getFunction()->end();
+    return nodes_iterator(G->getFunction()->end());
   }
 };
 
@@ -162,6 +159,13 @@ BlockFrequencyInfo::getBlockProfileCount(const BasicBlock *BB) const {
   return BFI->getBlockProfileCount(*getFunction(), BB);
 }
 
+Optional<uint64_t>
+BlockFrequencyInfo::getProfileCountFromFreq(uint64_t Freq) const {
+  if (!BFI)
+    return None;
+  return BFI->getProfileCountFromFreq(*getFunction(), Freq);
+}
+
 void BlockFrequencyInfo::setBlockFreq(const BasicBlock *BB, uint64_t Freq) {
   assert(BFI && "Expected analysis to be available");
   BFI->setBlockFreq(BB, Freq);
@@ -248,9 +252,9 @@ bool BlockFrequencyInfoWrapperPass::runOnFunction(Function &F) {
   return false;
 }
 
-char BlockFrequencyAnalysis::PassID;
+AnalysisKey BlockFrequencyAnalysis::Key;
 BlockFrequencyInfo BlockFrequencyAnalysis::run(Function &F,
-                                               AnalysisManager<Function> &AM) {
+                                               FunctionAnalysisManager &AM) {
   BlockFrequencyInfo BFI;
   BFI.calculate(F, AM.getResult<BranchProbabilityAnalysis>(F),
                 AM.getResult<LoopAnalysis>(F));
@@ -258,7 +262,7 @@ BlockFrequencyInfo BlockFrequencyAnalysis::run(Function &F,
 }
 
 PreservedAnalyses
-BlockFrequencyPrinterPass::run(Function &F, AnalysisManager<Function> &AM) {
+BlockFrequencyPrinterPass::run(Function &F, FunctionAnalysisManager &AM) {
   OS << "Printing analysis results of BFI for function "
      << "'" << F.getName() << "':"
      << "\n";
diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp
index c2039e1dec2b..9850e02fca22 100644
--- a/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -533,12 +533,18 @@ BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const {
 Optional<uint64_t>
 BlockFrequencyInfoImplBase::getBlockProfileCount(const Function &F,
                                                  const BlockNode &Node) const {
+  return getProfileCountFromFreq(F, getBlockFreq(Node).getFrequency());
+}
+
+Optional<uint64_t>
+BlockFrequencyInfoImplBase::getProfileCountFromFreq(const Function &F,
+                                                    uint64_t Freq) const {
   auto EntryCount = F.getEntryCount();
   if (!EntryCount)
     return None;
   // Use 128 bit APInt to do the arithmetic to avoid overflow.
   APInt BlockCount(128, EntryCount.getValue());
-  APInt BlockFreq(128, getBlockFreq(Node).getFrequency());
+  APInt BlockFreq(128, Freq);
   APInt EntryFreq(128, getEntryFreq());
   BlockCount *= BlockFreq;
   BlockCount = BlockCount.udiv(EntryFreq);
@@ -622,15 +628,12 @@ namespace llvm {
 template <> struct GraphTraits<IrreducibleGraph> {
   typedef bfi_detail::IrreducibleGraph GraphT;
 
-  typedef const GraphT::IrrNode NodeType;
   typedef const GraphT::IrrNode *NodeRef;
   typedef GraphT::IrrNode::iterator ChildIteratorType;
 
-  static const NodeType *getEntryNode(const GraphT &G) {
-    return G.StartIrr;
-  }
-  static ChildIteratorType child_begin(NodeType *N) { return N->succ_begin(); }
-  static ChildIteratorType child_end(NodeType *N) { return N->succ_end(); }
+  static NodeRef getEntryNode(const GraphT &G) { return G.StartIrr; }
+  static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); }
+  static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
 };
 } // end namespace llvm
 
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index d802552d4e29..3eabb780398c 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -162,12 +162,12 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) {
     return true;
   }
 
-  BranchProbability UnreachableProb(UR_TAKEN_WEIGHT,
-                                    (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) *
-                                        UnreachableEdges.size());
-  BranchProbability ReachableProb(UR_NONTAKEN_WEIGHT,
-                                  (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) *
-                                      ReachableEdges.size());
+  auto UnreachableProb = BranchProbability::getBranchProbability(
+      UR_TAKEN_WEIGHT, (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) *
+                           uint64_t(UnreachableEdges.size()));
+  auto ReachableProb = BranchProbability::getBranchProbability(
+      UR_NONTAKEN_WEIGHT,
+      (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) * uint64_t(ReachableEdges.size()));
 
   for (unsigned SuccIdx : UnreachableEdges)
     setEdgeProbability(BB, SuccIdx, UnreachableProb);
@@ -279,6 +279,16 @@ bool BranchProbabilityInfo::calcColdCallHeuristics(const BasicBlock *BB) {
         }
   }
 
+  if (auto *II = dyn_cast<InvokeInst>(TI)) {
+    // If the terminator is an InvokeInst, consider only the normal destination
+    // block.
+    if (PostDominatedByColdCall.count(II->getNormalDest()))
+      PostDominatedByColdCall.insert(BB);
+    // Return false here so that edge weights for InvokeInst could be decided
+    // in calcInvokeHeuristics().
+    return false;
+  }
+
   // Skip probabilities if this block has a single successor.
   if (TI->getNumSuccessors() == 1 || ColdEdges.empty())
     return false;
@@ -290,12 +300,12 @@ bool BranchProbabilityInfo::calcColdCallHeuristics(const BasicBlock *BB) {
     return true;
   }
 
-  BranchProbability ColdProb(CC_TAKEN_WEIGHT,
-                             (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) *
-                                 ColdEdges.size());
-  BranchProbability NormalProb(CC_NONTAKEN_WEIGHT,
-                               (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) *
-                                   NormalEdges.size());
+  auto ColdProb = BranchProbability::getBranchProbability(
+      CC_TAKEN_WEIGHT,
+      (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) * uint64_t(ColdEdges.size()));
+  auto NormalProb = BranchProbability::getBranchProbability(
+      CC_NONTAKEN_WEIGHT,
+      (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) * uint64_t(NormalEdges.size()));
 
   for (unsigned SuccIdx : ColdEdges)
     setEdgeProbability(BB, SuccIdx, ColdProb);
@@ -701,16 +711,16 @@ void BranchProbabilityInfoWrapperPass::print(raw_ostream &OS,
   BPI.print(OS);
 }
 
-char BranchProbabilityAnalysis::PassID;
+AnalysisKey BranchProbabilityAnalysis::Key;
 BranchProbabilityInfo
-BranchProbabilityAnalysis::run(Function &F, AnalysisManager<Function> &AM) {
+BranchProbabilityAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
   BranchProbabilityInfo BPI;
   BPI.calculate(F, AM.getResult<LoopAnalysis>(F));
   return BPI;
 }
 
 PreservedAnalyses
-BranchProbabilityPrinterPass::run(Function &F, AnalysisManager<Function> &AM) {
+BranchProbabilityPrinterPass::run(Function &F, FunctionAnalysisManager &AM) {
   OS << "Printing analysis results of BPI for function "
      << "'" << F.getName() << "':"
      << "\n";
diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp
index c86f1f55954b..a85af6c9c93f 100644
--- a/lib/Analysis/CFGPrinter.cpp
+++ b/lib/Analysis/CFGPrinter.cpp
@@ -23,10 +23,10 @@
 using namespace llvm;
 
 namespace {
-  struct CFGViewer : public FunctionPass {
+  struct CFGViewerLegacyPass : public FunctionPass {
     static char ID; // Pass identifcation, replacement for typeid
-    CFGViewer() : FunctionPass(ID) {
-      initializeCFGOnlyViewerPass(*PassRegistry::getPassRegistry());
+    CFGViewerLegacyPass() : FunctionPass(ID) {
+      initializeCFGViewerLegacyPassPass(*PassRegistry::getPassRegistry());
     }
 
     bool runOnFunction(Function &F) override {
@@ -42,14 +42,21 @@ namespace {
   };
 }
 
-char CFGViewer::ID = 0;
-INITIALIZE_PASS(CFGViewer, "view-cfg", "View CFG of function", false, true)
+char CFGViewerLegacyPass::ID = 0;
+INITIALIZE_PASS(CFGViewerLegacyPass, "view-cfg", "View CFG of function", false, true)
+
+PreservedAnalyses CFGViewerPass::run(Function &F,
+                                     FunctionAnalysisManager &AM) {
+  F.viewCFG();
+  return PreservedAnalyses::all();
+}
+
 
 namespace {
-  struct CFGOnlyViewer : public FunctionPass {
+  struct CFGOnlyViewerLegacyPass : public FunctionPass {
     static char ID; // Pass identifcation, replacement for typeid
-    CFGOnlyViewer() : FunctionPass(ID) {
-      initializeCFGOnlyViewerPass(*PassRegistry::getPassRegistry());
+    CFGOnlyViewerLegacyPass() : FunctionPass(ID) {
+      initializeCFGOnlyViewerLegacyPassPass(*PassRegistry::getPassRegistry());
     }
 
     bool runOnFunction(Function &F) override {
@@ -65,29 +72,39 @@ namespace {
   };
 }
 
-char CFGOnlyViewer::ID = 0;
-INITIALIZE_PASS(CFGOnlyViewer, "view-cfg-only",
+char CFGOnlyViewerLegacyPass::ID = 0;
+INITIALIZE_PASS(CFGOnlyViewerLegacyPass, "view-cfg-only",
                 "View CFG of function (with no function bodies)", false, true)
 
+PreservedAnalyses CFGOnlyViewerPass::run(Function &F,
+                                         FunctionAnalysisManager &AM) {
+  F.viewCFGOnly();
+  return PreservedAnalyses::all();
+}
+
+static void writeCFGToDotFile(Function &F) {
+  std::string Filename = ("cfg." + F.getName() + ".dot").str();
+  errs() << "Writing '" << Filename << "'...";
+
+  std::error_code EC;
+  raw_fd_ostream File(Filename, EC, sys::fs::F_Text);
+
+  if (!EC)
+    WriteGraph(File, (const Function*)&F);
+  else
+    errs() << "  error opening file for writing!";
+  errs() << "\n";
+}
+
 namespace {
-  struct CFGPrinter : public FunctionPass {
+  struct CFGPrinterLegacyPass : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    CFGPrinter() : FunctionPass(ID) {
-      initializeCFGPrinterPass(*PassRegistry::getPassRegistry());
+    CFGPrinterLegacyPass() : FunctionPass(ID) {
+      initializeCFGPrinterLegacyPassPass(*PassRegistry::getPassRegistry());
     }
 
     bool runOnFunction(Function &F) override {
-      std::string Filename = ("cfg." + F.getName() + ".dot").str();
-      errs() << "Writing '" << Filename << "'...";
-
-      std::error_code EC;
-      raw_fd_ostream File(Filename, EC, sys::fs::F_Text);
-
-      if (!EC)
-        WriteGraph(File, (const Function*)&F);
-      else
-        errs() << "  error opening file for writing!";
-      errs() << "\n";
+      writeCFGToDotFile(F);
       return false;
     }
 
@@ -99,29 +116,25 @@ namespace {
   };
 }
 
-char CFGPrinter::ID = 0;
-INITIALIZE_PASS(CFGPrinter, "dot-cfg", "Print CFG of function to 'dot' file", 
+char CFGPrinterLegacyPass::ID = 0;
+INITIALIZE_PASS(CFGPrinterLegacyPass, "dot-cfg", "Print CFG of function to 'dot' file", 
                 false, true)
 
+PreservedAnalyses CFGPrinterPass::run(Function &F,
+                                      FunctionAnalysisManager &AM) {
+  writeCFGToDotFile(F);
+  return PreservedAnalyses::all();
+}
+
 namespace {
-  struct CFGOnlyPrinter : public FunctionPass {
+  struct CFGOnlyPrinterLegacyPass : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    CFGOnlyPrinter() : FunctionPass(ID) {
-      initializeCFGOnlyPrinterPass(*PassRegistry::getPassRegistry());
+    CFGOnlyPrinterLegacyPass() : FunctionPass(ID) {
+      initializeCFGOnlyPrinterLegacyPassPass(*PassRegistry::getPassRegistry());
     }
 
     bool runOnFunction(Function &F) override {
-      std::string Filename = ("cfg." + F.getName() + ".dot").str();
-      errs() << "Writing '" << Filename << "'...";
-
-      std::error_code EC;
-      raw_fd_ostream File(Filename, EC, sys::fs::F_Text);
-
-      if (!EC)
-        WriteGraph(File, (const Function*)&F, true);
-      else
-        errs() << "  error opening file for writing!";
-      errs() << "\n";
+      writeCFGToDotFile(F);
       return false;
     }
     void print(raw_ostream &OS, const Module* = nullptr) const override {}
@@ -132,11 +145,17 @@ namespace {
   };
 }
 
-char CFGOnlyPrinter::ID = 0;
-INITIALIZE_PASS(CFGOnlyPrinter, "dot-cfg-only",
+char CFGOnlyPrinterLegacyPass::ID = 0;
+INITIALIZE_PASS(CFGOnlyPrinterLegacyPass, "dot-cfg-only",
    "Print CFG of function to 'dot' file (with no function bodies)",
    false, true)
 
+PreservedAnalyses CFGOnlyPrinterPass::run(Function &F,
+                                          FunctionAnalysisManager &AM) {
+  writeCFGToDotFile(F);
+  return PreservedAnalyses::all();
+}
+
 /// viewCFG - This function is meant for use from the debugger.  You can just
 /// say 'call F->viewCFG()' and a ghostview window should pop up from the
 /// program, displaying the CFG of the current function.  This depends on there
@@ -155,11 +174,11 @@ void Function::viewCFGOnly() const {
   ViewGraph(this, "cfg" + getName(), true);
 }
 
-FunctionPass *llvm::createCFGPrinterPass () {
-  return new CFGPrinter();
+FunctionPass *llvm::createCFGPrinterLegacyPassPass () {
+  return new CFGPrinterLegacyPass();
 }
 
-FunctionPass *llvm::createCFGOnlyPrinterPass () {
-  return new CFGOnlyPrinter();
+FunctionPass *llvm::createCFGOnlyPrinterLegacyPassPass () {
+  return new CFGOnlyPrinterLegacyPass();
 }
 
diff --git a/lib/Analysis/CFLAndersAliasAnalysis.cpp b/lib/Analysis/CFLAndersAliasAnalysis.cpp
index 7d5bd94133a7..e48ff230f43c 100644
--- a/lib/Analysis/CFLAndersAliasAnalysis.cpp
+++ b/lib/Analysis/CFLAndersAliasAnalysis.cpp
@@ -27,12 +27,23 @@
 // codes: all we do here is to selectively expand the transitive closure by
 // discarding edges that are not recognized by the state machine.
 //
-// There is one difference between our current implementation and the one
-// described in the paper: out algorithm eagerly computes all alias pairs after
-// the CFLGraph is built, while in the paper the authors did the computation in
-// a demand-driven fashion. We did not implement the demand-driven algorithm due
-// to the additional coding complexity and higher memory profile, but if we
-// found it necessary we may switch to it eventually.
+// There are two differences between our current implementation and the one
+// described in the paper:
+// - Our algorithm eagerly computes all alias pairs after the CFLGraph is built,
+// while in the paper the authors did the computation in a demand-driven
+// fashion. We did not implement the demand-driven algorithm due to the
+// additional coding complexity and higher memory profile, but if we found it
+// necessary we may switch to it eventually.
+// - In the paper the authors use a state machine that does not distinguish
+// value reads from value writes. For example, if Y is reachable from X at state
+// S3, it may be the case that X is written into Y, or it may be the case that
+// there's a third value Z that writes into both X and Y. To make that
+// distinction (which is crucial in building function summary as well as
+// retrieving mod-ref info), we choose to duplicate some of the states in the
+// paper's proposed state machine. The duplication does not change the set the
+// machine accepts. Given a pair of reachable values, it only provides more
+// detailed information on which value is being written into and which is being
+// read from.
 //
 //===----------------------------------------------------------------------===//
 
@@ -71,16 +82,65 @@ static const Function *parentFunctionOfValue(const Value *Val) {
 namespace {
 
 enum class MatchState : uint8_t {
-  FlowFrom = 0,     // S1 in the paper
-  FlowFromMemAlias, // S2 in the paper
-  FlowTo,           // S3 in the paper
-  FlowToMemAlias    // S4 in the paper
+  // The following state represents S1 in the paper.
+  FlowFromReadOnly = 0,
+  // The following two states together represent S2 in the paper.
+  // The 'NoReadWrite' suffix indicates that there exists an alias path that
+  // does not contain assignment and reverse assignment edges.
+  // The 'ReadOnly' suffix indicates that there exists an alias path that
+  // contains reverse assignment edges only.
+  FlowFromMemAliasNoReadWrite,
+  FlowFromMemAliasReadOnly,
+  // The following two states together represent S3 in the paper.
+  // The 'WriteOnly' suffix indicates that there exists an alias path that
+  // contains assignment edges only.
+  // The 'ReadWrite' suffix indicates that there exists an alias path that
+  // contains both assignment and reverse assignment edges. Note that if X and Y
+  // are reachable at 'ReadWrite' state, it does NOT mean X is both read from
+  // and written to Y. Instead, it means that a third value Z is written to both
+  // X and Y.
+  FlowToWriteOnly,
+  FlowToReadWrite,
+  // The following two states together represent S4 in the paper.
+  FlowToMemAliasWriteOnly,
+  FlowToMemAliasReadWrite,
 };
 
+typedef std::bitset<7> StateSet;
+const unsigned ReadOnlyStateMask =
+    (1U << static_cast<uint8_t>(MatchState::FlowFromReadOnly)) |
+    (1U << static_cast<uint8_t>(MatchState::FlowFromMemAliasReadOnly));
+const unsigned WriteOnlyStateMask =
+    (1U << static_cast<uint8_t>(MatchState::FlowToWriteOnly)) |
+    (1U << static_cast<uint8_t>(MatchState::FlowToMemAliasWriteOnly));
+
+// A pair that consists of a value and an offset
+struct OffsetValue {
+  const Value *Val;
+  int64_t Offset;
+};
+
+bool operator==(OffsetValue LHS, OffsetValue RHS) {
+  return LHS.Val == RHS.Val && LHS.Offset == RHS.Offset;
+}
+bool operator<(OffsetValue LHS, OffsetValue RHS) {
+  return std::less<const Value *>()(LHS.Val, RHS.Val) ||
+         (LHS.Val == RHS.Val && LHS.Offset < RHS.Offset);
+}
+
+// A pair that consists of an InstantiatedValue and an offset
+struct OffsetInstantiatedValue {
+  InstantiatedValue IVal;
+  int64_t Offset;
+};
+
+bool operator==(OffsetInstantiatedValue LHS, OffsetInstantiatedValue RHS) {
+  return LHS.IVal == RHS.IVal && LHS.Offset == RHS.Offset;
+}
+
 // We use ReachabilitySet to keep track of value aliases (The nonterminal "V" in
 // the paper) during the analysis.
 class ReachabilitySet {
-  typedef std::bitset<4> StateSet;
   typedef DenseMap<InstantiatedValue, StateSet> ValueStateMap;
   typedef DenseMap<InstantiatedValue, ValueStateMap> ValueReachMap;
   ValueReachMap ReachMap;
@@ -91,6 +151,7 @@ public:
 
   // Insert edge 'From->To' at state 'State'
   bool insert(InstantiatedValue From, InstantiatedValue To, MatchState State) {
+    assert(From != To);
     auto &States = ReachMap[To][From];
     auto Idx = static_cast<size_t>(State);
     if (!States.test(Idx)) {
@@ -150,8 +211,6 @@ public:
   typedef MapType::const_iterator const_iterator;
 
   bool add(InstantiatedValue V, AliasAttrs Attr) {
-    if (Attr.none())
-      return false;
     auto &OldAttr = AttrMap[V];
     auto NewAttr = OldAttr | Attr;
     if (OldAttr == NewAttr)
@@ -178,6 +237,57 @@ struct WorkListItem {
   InstantiatedValue To;
   MatchState State;
 };
+
+struct ValueSummary {
+  struct Record {
+    InterfaceValue IValue;
+    unsigned DerefLevel;
+  };
+  SmallVector<Record, 4> FromRecords, ToRecords;
+};
+}
+
+namespace llvm {
+// Specialize DenseMapInfo for OffsetValue.
+template <> struct DenseMapInfo<OffsetValue> {
+  static OffsetValue getEmptyKey() {
+    return OffsetValue{DenseMapInfo<const Value *>::getEmptyKey(),
+                       DenseMapInfo<int64_t>::getEmptyKey()};
+  }
+  static OffsetValue getTombstoneKey() {
+    return OffsetValue{DenseMapInfo<const Value *>::getTombstoneKey(),
+                       DenseMapInfo<int64_t>::getEmptyKey()};
+  }
+  static unsigned getHashValue(const OffsetValue &OVal) {
+    return DenseMapInfo<std::pair<const Value *, int64_t>>::getHashValue(
+        std::make_pair(OVal.Val, OVal.Offset));
+  }
+  static bool isEqual(const OffsetValue &LHS, const OffsetValue &RHS) {
+    return LHS == RHS;
+  }
+};
+
+// Specialize DenseMapInfo for OffsetInstantiatedValue.
+template <> struct DenseMapInfo<OffsetInstantiatedValue> {
+  static OffsetInstantiatedValue getEmptyKey() {
+    return OffsetInstantiatedValue{
+        DenseMapInfo<InstantiatedValue>::getEmptyKey(),
+        DenseMapInfo<int64_t>::getEmptyKey()};
+  }
+  static OffsetInstantiatedValue getTombstoneKey() {
+    return OffsetInstantiatedValue{
+        DenseMapInfo<InstantiatedValue>::getTombstoneKey(),
+        DenseMapInfo<int64_t>::getEmptyKey()};
+  }
+  static unsigned getHashValue(const OffsetInstantiatedValue &OVal) {
+    return DenseMapInfo<std::pair<InstantiatedValue, int64_t>>::getHashValue(
+        std::make_pair(OVal.IVal, OVal.Offset));
+  }
+  static bool isEqual(const OffsetInstantiatedValue &LHS,
+                      const OffsetInstantiatedValue &RHS) {
+    return LHS == RHS;
+  }
+};
 }
 
 class CFLAndersAAResult::FunctionInfo {
@@ -185,7 +295,7 @@ class CFLAndersAAResult::FunctionInfo {
   /// Since the alias relation is symmetric, to save some space we assume values
   /// are properly ordered: if a and b alias each other, and a < b, then b is in
   /// AliasMap[a] but not vice versa.
-  DenseMap<const Value *, std::vector<const Value *>> AliasMap;
+  DenseMap<const Value *, std::vector<OffsetValue>> AliasMap;
 
   /// Map a value to its corresponding AliasAttrs
   DenseMap<const Value *, AliasAttrs> AttrMap;
@@ -193,27 +303,56 @@ class CFLAndersAAResult::FunctionInfo {
   /// Summary of externally visible effects.
   AliasSummary Summary;
 
-  AliasAttrs getAttrs(const Value *) const;
+  Optional<AliasAttrs> getAttrs(const Value *) const;
 
 public:
-  FunctionInfo(const ReachabilitySet &, AliasAttrMap);
+  FunctionInfo(const Function &, const SmallVectorImpl<Value *> &,
+               const ReachabilitySet &, AliasAttrMap);
 
-  bool mayAlias(const Value *LHS, const Value *RHS) const;
+  bool mayAlias(const Value *, uint64_t, const Value *, uint64_t) const;
   const AliasSummary &getAliasSummary() const { return Summary; }
 };
 
-CFLAndersAAResult::FunctionInfo::FunctionInfo(const ReachabilitySet &ReachSet,
-                                              AliasAttrMap AMap) {
-  // Populate AttrMap
+static bool hasReadOnlyState(StateSet Set) {
+  return (Set & StateSet(ReadOnlyStateMask)).any();
+}
+
+static bool hasWriteOnlyState(StateSet Set) {
+  return (Set & StateSet(WriteOnlyStateMask)).any();
+}
+
+static Optional<InterfaceValue>
+getInterfaceValue(InstantiatedValue IValue,
+                  const SmallVectorImpl<Value *> &RetVals) {
+  auto Val = IValue.Val;
+
+  Optional<unsigned> Index;
+  if (auto Arg = dyn_cast<Argument>(Val))
+    Index = Arg->getArgNo() + 1;
+  else if (is_contained(RetVals, Val))
+    Index = 0;
+
+  if (Index)
+    return InterfaceValue{*Index, IValue.DerefLevel};
+  return None;
+}
+
+static void populateAttrMap(DenseMap<const Value *, AliasAttrs> &AttrMap,
+                            const AliasAttrMap &AMap) {
   for (const auto &Mapping : AMap.mappings()) {
     auto IVal = Mapping.first;
 
+    // Insert IVal into the map
+    auto &Attr = AttrMap[IVal.Val];
     // AttrMap only cares about top-level values
     if (IVal.DerefLevel == 0)
-      AttrMap[IVal.Val] = Mapping.second;
+      Attr |= Mapping.second;
   }
+}
 
-  // Populate AliasMap
+static void
+populateAliasMap(DenseMap<const Value *, std::vector<OffsetValue>> &AliasMap,
+                 const ReachabilitySet &ReachSet) {
   for (const auto &OuterMapping : ReachSet.value_mappings()) {
     // AliasMap only cares about top-level values
     if (OuterMapping.first.DerefLevel > 0)
@@ -224,48 +363,202 @@ CFLAndersAAResult::FunctionInfo::FunctionInfo(const ReachabilitySet &ReachSet,
     for (const auto &InnerMapping : OuterMapping.second) {
       // Again, AliasMap only cares about top-level values
       if (InnerMapping.first.DerefLevel == 0)
-        AliasList.push_back(InnerMapping.first.Val);
+        AliasList.push_back(OffsetValue{InnerMapping.first.Val, UnknownOffset});
     }
 
     // Sort AliasList for faster lookup
-    std::sort(AliasList.begin(), AliasList.end(), std::less<const Value *>());
+    std::sort(AliasList.begin(), AliasList.end());
   }
+}
 
-  // TODO: Populate function summary here
+static void populateExternalRelations(
+    SmallVectorImpl<ExternalRelation> &ExtRelations, const Function &Fn,
+    const SmallVectorImpl<Value *> &RetVals, const ReachabilitySet &ReachSet) {
+  // If a function only returns one of its argument X, then X will be both an
+  // argument and a return value at the same time. This is an edge case that
+  // needs special handling here.
+  for (const auto &Arg : Fn.args()) {
+    if (is_contained(RetVals, &Arg)) {
+      auto ArgVal = InterfaceValue{Arg.getArgNo() + 1, 0};
+      auto RetVal = InterfaceValue{0, 0};
+      ExtRelations.push_back(ExternalRelation{ArgVal, RetVal, 0});
+    }
+  }
+
+  // Below is the core summary construction logic.
+  // A naive solution of adding only the value aliases that are parameters or
+  // return values in ReachSet to the summary won't work: It is possible that a
+  // parameter P is written into an intermediate value I, and the function
+  // subsequently returns *I. In that case, *I is does not value alias anything
+  // in ReachSet, and the naive solution will miss a summary edge from (P, 1) to
+  // (I, 1).
+  // To account for the aforementioned case, we need to check each non-parameter
+  // and non-return value for the possibility of acting as an intermediate.
+  // 'ValueMap' here records, for each value, which InterfaceValues read from or
+  // write into it. If both the read list and the write list of a given value
+  // are non-empty, we know that a particular value is an intermidate and we
+  // need to add summary edges from the writes to the reads.
+  DenseMap<Value *, ValueSummary> ValueMap;
+  for (const auto &OuterMapping : ReachSet.value_mappings()) {
+    if (auto Dst = getInterfaceValue(OuterMapping.first, RetVals)) {
+      for (const auto &InnerMapping : OuterMapping.second) {
+        // If Src is a param/return value, we get a same-level assignment.
+        if (auto Src = getInterfaceValue(InnerMapping.first, RetVals)) {
+          // This may happen if both Dst and Src are return values
+          if (*Dst == *Src)
+            continue;
+
+          if (hasReadOnlyState(InnerMapping.second))
+            ExtRelations.push_back(ExternalRelation{*Dst, *Src, UnknownOffset});
+          // No need to check for WriteOnly state, since ReachSet is symmetric
+        } else {
+          // If Src is not a param/return, add it to ValueMap
+          auto SrcIVal = InnerMapping.first;
+          if (hasReadOnlyState(InnerMapping.second))
+            ValueMap[SrcIVal.Val].FromRecords.push_back(
+                ValueSummary::Record{*Dst, SrcIVal.DerefLevel});
+          if (hasWriteOnlyState(InnerMapping.second))
+            ValueMap[SrcIVal.Val].ToRecords.push_back(
+                ValueSummary::Record{*Dst, SrcIVal.DerefLevel});
+        }
+      }
+    }
+  }
+
+  for (const auto &Mapping : ValueMap) {
+    for (const auto &FromRecord : Mapping.second.FromRecords) {
+      for (const auto &ToRecord : Mapping.second.ToRecords) {
+        auto ToLevel = ToRecord.DerefLevel;
+        auto FromLevel = FromRecord.DerefLevel;
+        // Same-level assignments should have already been processed by now
+        if (ToLevel == FromLevel)
+          continue;
+
+        auto SrcIndex = FromRecord.IValue.Index;
+        auto SrcLevel = FromRecord.IValue.DerefLevel;
+        auto DstIndex = ToRecord.IValue.Index;
+        auto DstLevel = ToRecord.IValue.DerefLevel;
+        if (ToLevel > FromLevel)
+          SrcLevel += ToLevel - FromLevel;
+        else
+          DstLevel += FromLevel - ToLevel;
+
+        ExtRelations.push_back(ExternalRelation{
+            InterfaceValue{SrcIndex, SrcLevel},
+            InterfaceValue{DstIndex, DstLevel}, UnknownOffset});
+      }
+    }
+  }
+
+  // Remove duplicates in ExtRelations
+  std::sort(ExtRelations.begin(), ExtRelations.end());
+  ExtRelations.erase(std::unique(ExtRelations.begin(), ExtRelations.end()),
+                     ExtRelations.end());
+}
+
+static void populateExternalAttributes(
+    SmallVectorImpl<ExternalAttribute> &ExtAttributes, const Function &Fn,
+    const SmallVectorImpl<Value *> &RetVals, const AliasAttrMap &AMap) {
+  for (const auto &Mapping : AMap.mappings()) {
+    if (auto IVal = getInterfaceValue(Mapping.first, RetVals)) {
+      auto Attr = getExternallyVisibleAttrs(Mapping.second);
+      if (Attr.any())
+        ExtAttributes.push_back(ExternalAttribute{*IVal, Attr});
+    }
+  }
+}
+
+CFLAndersAAResult::FunctionInfo::FunctionInfo(
+    const Function &Fn, const SmallVectorImpl<Value *> &RetVals,
+    const ReachabilitySet &ReachSet, AliasAttrMap AMap) {
+  populateAttrMap(AttrMap, AMap);
+  populateExternalAttributes(Summary.RetParamAttributes, Fn, RetVals, AMap);
+  populateAliasMap(AliasMap, ReachSet);
+  populateExternalRelations(Summary.RetParamRelations, Fn, RetVals, ReachSet);
 }
 
-AliasAttrs CFLAndersAAResult::FunctionInfo::getAttrs(const Value *V) const {
+Optional<AliasAttrs>
+CFLAndersAAResult::FunctionInfo::getAttrs(const Value *V) const {
   assert(V != nullptr);
 
-  AliasAttrs Attr;
   auto Itr = AttrMap.find(V);
   if (Itr != AttrMap.end())
-    Attr = Itr->second;
-  return Attr;
+    return Itr->second;
+  return None;
 }
 
 bool CFLAndersAAResult::FunctionInfo::mayAlias(const Value *LHS,
-                                               const Value *RHS) const {
+                                               uint64_t LHSSize,
+                                               const Value *RHS,
+                                               uint64_t RHSSize) const {
   assert(LHS && RHS);
 
+  // Check if we've seen LHS and RHS before. Sometimes LHS or RHS can be created
+  // after the analysis gets executed, and we want to be conservative in those
+  // cases.
+  auto MaybeAttrsA = getAttrs(LHS);
+  auto MaybeAttrsB = getAttrs(RHS);
+  if (!MaybeAttrsA || !MaybeAttrsB)
+    return true;
+
+  // Check AliasAttrs before AliasMap lookup since it's cheaper
+  auto AttrsA = *MaybeAttrsA;
+  auto AttrsB = *MaybeAttrsB;
+  if (hasUnknownOrCallerAttr(AttrsA))
+    return AttrsB.any();
+  if (hasUnknownOrCallerAttr(AttrsB))
+    return AttrsA.any();
+  if (isGlobalOrArgAttr(AttrsA))
+    return isGlobalOrArgAttr(AttrsB);
+  if (isGlobalOrArgAttr(AttrsB))
+    return isGlobalOrArgAttr(AttrsA);
+
+  // At this point both LHS and RHS should point to locally allocated objects
+
   auto Itr = AliasMap.find(LHS);
   if (Itr != AliasMap.end()) {
-    if (std::binary_search(Itr->second.begin(), Itr->second.end(), RHS,
-                           std::less<const Value *>()))
-      return true;
-  }
 
-  // Even if LHS and RHS are not reachable, they may still alias due to their
-  // AliasAttrs
-  auto AttrsA = getAttrs(LHS);
-  auto AttrsB = getAttrs(RHS);
+    // Find out all (X, Offset) where X == RHS
+    auto Comparator = [](OffsetValue LHS, OffsetValue RHS) {
+      return std::less<const Value *>()(LHS.Val, RHS.Val);
+    };
+#ifdef EXPENSIVE_CHECKS
+    assert(std::is_sorted(Itr->second.begin(), Itr->second.end(), Comparator));
+#endif
+    auto RangePair = std::equal_range(Itr->second.begin(), Itr->second.end(),
+                                      OffsetValue{RHS, 0}, Comparator);
+
+    if (RangePair.first != RangePair.second) {
+      // Be conservative about UnknownSize
+      if (LHSSize == MemoryLocation::UnknownSize ||
+          RHSSize == MemoryLocation::UnknownSize)
+        return true;
+
+      for (const auto &OVal : make_range(RangePair)) {
+        // Be conservative about UnknownOffset
+        if (OVal.Offset == UnknownOffset)
+          return true;
+
+        // We know that LHS aliases (RHS + OVal.Offset) if the control flow
+        // reaches here. The may-alias query essentially becomes integer
+        // range-overlap queries over two ranges [OVal.Offset, OVal.Offset +
+        // LHSSize) and [0, RHSSize).
+
+        // Try to be conservative on super large offsets
+        if (LLVM_UNLIKELY(LHSSize > INT64_MAX || RHSSize > INT64_MAX))
+          return true;
+
+        auto LHSStart = OVal.Offset;
+        // FIXME: Do we need to guard against integer overflow?
+        auto LHSEnd = OVal.Offset + static_cast<int64_t>(LHSSize);
+        auto RHSStart = 0;
+        auto RHSEnd = static_cast<int64_t>(RHSSize);
+        if (LHSEnd > RHSStart && LHSStart < RHSEnd)
+          return true;
+      }
+    }
+  }
 
-  if (AttrsA.none() || AttrsB.none())
-    return false;
-  if (hasUnknownOrCallerAttr(AttrsA) || hasUnknownOrCallerAttr(AttrsB))
-    return true;
-  if (isGlobalOrArgAttr(AttrsA) && isGlobalOrArgAttr(AttrsB))
-    return true;
   return false;
 }
 
@@ -292,8 +585,10 @@ static void initializeWorkList(std::vector<WorkListItem> &WorkList,
       // If there's an assignment edge from X to Y, it means Y is reachable from
       // X at S2 and X is reachable from Y at S1
       for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges) {
-        propagate(Edge.Other, Src, MatchState::FlowFrom, ReachSet, WorkList);
-        propagate(Src, Edge.Other, MatchState::FlowTo, ReachSet, WorkList);
+        propagate(Edge.Other, Src, MatchState::FlowFromReadOnly, ReachSet,
+                  WorkList);
+        propagate(Src, Edge.Other, MatchState::FlowToWriteOnly, ReachSet,
+                  WorkList);
       }
     }
   }
@@ -328,16 +623,21 @@ static void processWorkListItem(const WorkListItem &Item, const CFLGraph &Graph,
   auto ToNodeBelow = getNodeBelow(Graph, ToNode);
   if (FromNodeBelow && ToNodeBelow &&
       MemSet.insert(*FromNodeBelow, *ToNodeBelow)) {
-    propagate(*FromNodeBelow, *ToNodeBelow, MatchState::FlowFromMemAlias,
-              ReachSet, WorkList);
+    propagate(*FromNodeBelow, *ToNodeBelow,
+              MatchState::FlowFromMemAliasNoReadWrite, ReachSet, WorkList);
     for (const auto &Mapping : ReachSet.reachableValueAliases(*FromNodeBelow)) {
       auto Src = Mapping.first;
-      if (Mapping.second.test(static_cast<size_t>(MatchState::FlowFrom)))
-        propagate(Src, *ToNodeBelow, MatchState::FlowFromMemAlias, ReachSet,
-                  WorkList);
-      if (Mapping.second.test(static_cast<size_t>(MatchState::FlowTo)))
-        propagate(Src, *ToNodeBelow, MatchState::FlowToMemAlias, ReachSet,
-                  WorkList);
+      auto MemAliasPropagate = [&](MatchState FromState, MatchState ToState) {
+        if (Mapping.second.test(static_cast<size_t>(FromState)))
+          propagate(Src, *ToNodeBelow, ToState, ReachSet, WorkList);
+      };
+
+      MemAliasPropagate(MatchState::FlowFromReadOnly,
+                        MatchState::FlowFromMemAliasReadOnly);
+      MemAliasPropagate(MatchState::FlowToWriteOnly,
+                        MatchState::FlowToMemAliasWriteOnly);
+      MemAliasPropagate(MatchState::FlowToReadWrite,
+                        MatchState::FlowToMemAliasReadWrite);
     }
   }
 
@@ -349,45 +649,54 @@ static void processWorkListItem(const WorkListItem &Item, const CFLGraph &Graph,
   // - If *X and *Y are memory aliases, then X and Y are value aliases
   // - If Y is an alias of X, then reverse assignment edges (if there is any)
   // should precede any assignment edges on the path from X to Y.
-  switch (Item.State) {
-  case MatchState::FlowFrom: {
-    for (const auto &RevAssignEdge : NodeInfo->ReverseEdges)
-      propagate(FromNode, RevAssignEdge.Other, MatchState::FlowFrom, ReachSet,
-                WorkList);
+  auto NextAssignState = [&](MatchState State) {
     for (const auto &AssignEdge : NodeInfo->Edges)
-      propagate(FromNode, AssignEdge.Other, MatchState::FlowTo, ReachSet,
-                WorkList);
+      propagate(FromNode, AssignEdge.Other, State, ReachSet, WorkList);
+  };
+  auto NextRevAssignState = [&](MatchState State) {
+    for (const auto &RevAssignEdge : NodeInfo->ReverseEdges)
+      propagate(FromNode, RevAssignEdge.Other, State, ReachSet, WorkList);
+  };
+  auto NextMemState = [&](MatchState State) {
     if (auto AliasSet = MemSet.getMemoryAliases(ToNode)) {
       for (const auto &MemAlias : *AliasSet)
-        propagate(FromNode, MemAlias, MatchState::FlowFromMemAlias, ReachSet,
-                  WorkList);
+        propagate(FromNode, MemAlias, State, ReachSet, WorkList);
     }
+  };
+
+  switch (Item.State) {
+  case MatchState::FlowFromReadOnly: {
+    NextRevAssignState(MatchState::FlowFromReadOnly);
+    NextAssignState(MatchState::FlowToReadWrite);
+    NextMemState(MatchState::FlowFromMemAliasReadOnly);
     break;
   }
-  case MatchState::FlowFromMemAlias: {
-    for (const auto &RevAssignEdge : NodeInfo->ReverseEdges)
-      propagate(FromNode, RevAssignEdge.Other, MatchState::FlowFrom, ReachSet,
-                WorkList);
-    for (const auto &AssignEdge : NodeInfo->Edges)
-      propagate(FromNode, AssignEdge.Other, MatchState::FlowTo, ReachSet,
-                WorkList);
+  case MatchState::FlowFromMemAliasNoReadWrite: {
+    NextRevAssignState(MatchState::FlowFromReadOnly);
+    NextAssignState(MatchState::FlowToWriteOnly);
     break;
   }
-  case MatchState::FlowTo: {
-    for (const auto &AssignEdge : NodeInfo->Edges)
-      propagate(FromNode, AssignEdge.Other, MatchState::FlowTo, ReachSet,
-                WorkList);
-    if (auto AliasSet = MemSet.getMemoryAliases(ToNode)) {
-      for (const auto &MemAlias : *AliasSet)
-        propagate(FromNode, MemAlias, MatchState::FlowToMemAlias, ReachSet,
-                  WorkList);
-    }
+  case MatchState::FlowFromMemAliasReadOnly: {
+    NextRevAssignState(MatchState::FlowFromReadOnly);
+    NextAssignState(MatchState::FlowToReadWrite);
     break;
   }
-  case MatchState::FlowToMemAlias: {
-    for (const auto &AssignEdge : NodeInfo->Edges)
-      propagate(FromNode, AssignEdge.Other, MatchState::FlowTo, ReachSet,
-                WorkList);
+  case MatchState::FlowToWriteOnly: {
+    NextAssignState(MatchState::FlowToWriteOnly);
+    NextMemState(MatchState::FlowToMemAliasWriteOnly);
+    break;
+  }
+  case MatchState::FlowToReadWrite: {
+    NextAssignState(MatchState::FlowToReadWrite);
+    NextMemState(MatchState::FlowToMemAliasReadWrite);
+    break;
+  }
+  case MatchState::FlowToMemAliasWriteOnly: {
+    NextAssignState(MatchState::FlowToWriteOnly);
+    break;
+  }
+  case MatchState::FlowToMemAliasReadWrite: {
+    NextAssignState(MatchState::FlowToReadWrite);
     break;
   }
   }
@@ -465,7 +774,8 @@ CFLAndersAAResult::buildInfoFrom(const Function &Fn) {
   // to it
   auto IValueAttrMap = buildAttrMap(Graph, ReachSet);
 
-  return FunctionInfo(ReachSet, std::move(IValueAttrMap));
+  return FunctionInfo(Fn, GraphBuilder.getReturnValues(), ReachSet,
+                      std::move(IValueAttrMap));
 }
 
 void CFLAndersAAResult::scan(const Function &Fn) {
@@ -530,7 +840,7 @@ AliasResult CFLAndersAAResult::query(const MemoryLocation &LocA,
   auto &FunInfo = ensureCached(*Fn);
 
   // AliasMap lookup
-  if (FunInfo->mayAlias(ValA, ValB))
+  if (FunInfo->mayAlias(ValA, LocA.Size, ValB, LocB.Size))
     return MayAlias;
   return NoAlias;
 }
@@ -555,9 +865,9 @@ AliasResult CFLAndersAAResult::alias(const MemoryLocation &LocA,
   return QueryResult;
 }
 
-char CFLAndersAA::PassID;
+AnalysisKey CFLAndersAA::Key;
 
-CFLAndersAAResult CFLAndersAA::run(Function &F, AnalysisManager<Function> &AM) {
+CFLAndersAAResult CFLAndersAA::run(Function &F, FunctionAnalysisManager &AM) {
   return CFLAndersAAResult(AM.getResult<TargetLibraryAnalysis>(F));
 }
 
diff --git a/lib/Analysis/CFLGraph.h b/lib/Analysis/CFLGraph.h
index bc6e794d0b2a..e526e0e16aa7 100644
--- a/lib/Analysis/CFLGraph.h
+++ b/lib/Analysis/CFLGraph.h
@@ -40,6 +40,7 @@ public:
 
   struct Edge {
     Node Other;
+    int64_t Offset;
   };
 
   typedef std::vector<Edge> EdgeList;
@@ -107,8 +108,8 @@ public:
     auto *ToInfo = getNode(To);
     assert(ToInfo != nullptr);
 
-    FromInfo->Edges.push_back(Edge{To});
-    ToInfo->ReverseEdges.push_back(Edge{From});
+    FromInfo->Edges.push_back(Edge{To, Offset});
+    ToInfo->ReverseEdges.push_back(Edge{From, Offset});
   }
 
   const NodeInfo *getNode(Node N) const {
@@ -151,6 +152,7 @@ template <typename CFLAA> class CFLGraphBuilder {
   /// Gets the edges our graph should have, based on an Instruction*
   class GetEdgesVisitor : public InstVisitor<GetEdgesVisitor, void> {
     CFLAA &AA;
+    const DataLayout &DL;
     const TargetLibraryInfo &TLI;
 
     CFLGraph &Graph;
@@ -225,8 +227,8 @@ template <typename CFLAA> class CFLGraphBuilder {
     void addStoreEdge(Value *From, Value *To) { addDerefEdge(From, To, false); }
 
   public:
-    GetEdgesVisitor(CFLGraphBuilder &Builder)
-        : AA(Builder.Analysis), TLI(Builder.TLI), Graph(Builder.Graph),
+    GetEdgesVisitor(CFLGraphBuilder &Builder, const DataLayout &DL)
+        : AA(Builder.Analysis), DL(DL), TLI(Builder.TLI), Graph(Builder.Graph),
           ReturnValues(Builder.ReturnedValues) {}
 
     void visitInstruction(Instruction &) {
@@ -281,9 +283,20 @@ template <typename CFLAA> class CFLGraphBuilder {
         addAssignEdge(Val, &Inst);
     }
 
+    void visitGEP(GEPOperator &GEPOp) {
+      uint64_t Offset = UnknownOffset;
+      APInt APOffset(DL.getPointerSizeInBits(GEPOp.getPointerAddressSpace()),
+                     0);
+      if (GEPOp.accumulateConstantOffset(DL, APOffset))
+        Offset = APOffset.getSExtValue();
+
+      auto *Op = GEPOp.getPointerOperand();
+      addAssignEdge(Op, &GEPOp, Offset);
+    }
+
     void visitGetElementPtrInst(GetElementPtrInst &Inst) {
-      auto *Op = Inst.getPointerOperand();
-      addAssignEdge(Op, &Inst);
+      auto *GEPOp = cast<GEPOperator>(&Inst);
+      visitGEP(*GEPOp);
     }
 
     void visitSelectInst(SelectInst &Inst) {
@@ -321,7 +334,8 @@ template <typename CFLAA> class CFLGraphBuilder {
       // For now, we'll handle this like a landingpad instruction (by placing
       // the
       // result in its own group, and having that group alias externals).
-      addNode(&Inst, getAttrUnknown());
+      if (Inst.getType()->isPointerTy())
+        addNode(&Inst, getAttrUnknown());
     }
 
     static bool isFunctionExternal(Function *Fn) {
@@ -444,7 +458,8 @@ template <typename CFLAA> class CFLGraphBuilder {
       // Exceptions come from "nowhere", from our analysis' perspective.
       // So we place the instruction its own group, noting that said group may
       // alias externals
-      addNode(&Inst, getAttrUnknown());
+      if (Inst.getType()->isPointerTy())
+        addNode(&Inst, getAttrUnknown());
     }
 
     void visitInsertValueInst(InsertValueInst &Inst) {
@@ -468,14 +483,97 @@ template <typename CFLAA> class CFLGraphBuilder {
 
     void visitConstantExpr(ConstantExpr *CE) {
       switch (CE->getOpcode()) {
+      case Instruction::GetElementPtr: {
+        auto GEPOp = cast<GEPOperator>(CE);
+        visitGEP(*GEPOp);
+        break;
+      }
+      case Instruction::PtrToInt: {
+        auto *Ptr = CE->getOperand(0);
+        addNode(Ptr, getAttrEscaped());
+        break;
+      }
+      case Instruction::IntToPtr: {
+        addNode(CE, getAttrUnknown());
+        break;
+      }
+      case Instruction::BitCast:
+      case Instruction::AddrSpaceCast:
+      case Instruction::Trunc:
+      case Instruction::ZExt:
+      case Instruction::SExt:
+      case Instruction::FPExt:
+      case Instruction::FPTrunc:
+      case Instruction::UIToFP:
+      case Instruction::SIToFP:
+      case Instruction::FPToUI:
+      case Instruction::FPToSI: {
+        auto *Src = CE->getOperand(0);
+        addAssignEdge(Src, CE);
+        break;
+      }
+      case Instruction::Select: {
+        auto *TrueVal = CE->getOperand(0);
+        auto *FalseVal = CE->getOperand(1);
+        addAssignEdge(TrueVal, CE);
+        addAssignEdge(FalseVal, CE);
+        break;
+      }
+      case Instruction::InsertElement: {
+        auto *Vec = CE->getOperand(0);
+        auto *Val = CE->getOperand(1);
+        addAssignEdge(Vec, CE);
+        addStoreEdge(Val, CE);
+        break;
+      }
+      case Instruction::ExtractElement: {
+        auto *Ptr = CE->getOperand(0);
+        addLoadEdge(Ptr, CE);
+        break;
+      }
+      case Instruction::InsertValue: {
+        auto *Agg = CE->getOperand(0);
+        auto *Val = CE->getOperand(1);
+        addAssignEdge(Agg, CE);
+        addStoreEdge(Val, CE);
+        break;
+      }
+      case Instruction::ExtractValue: {
+        auto *Ptr = CE->getOperand(0);
+        addLoadEdge(Ptr, CE);
+      }
+      case Instruction::ShuffleVector: {
+        auto *From1 = CE->getOperand(0);
+        auto *From2 = CE->getOperand(1);
+        addAssignEdge(From1, CE);
+        addAssignEdge(From2, CE);
+        break;
+      }
+      case Instruction::Add:
+      case Instruction::Sub:
+      case Instruction::FSub:
+      case Instruction::Mul:
+      case Instruction::FMul:
+      case Instruction::UDiv:
+      case Instruction::SDiv:
+      case Instruction::FDiv:
+      case Instruction::URem:
+      case Instruction::SRem:
+      case Instruction::FRem:
+      case Instruction::And:
+      case Instruction::Or:
+      case Instruction::Xor:
+      case Instruction::Shl:
+      case Instruction::LShr:
+      case Instruction::AShr:
+      case Instruction::ICmp:
+      case Instruction::FCmp: {
+        addAssignEdge(CE->getOperand(0), CE);
+        addAssignEdge(CE->getOperand(1), CE);
+        break;
+      }
       default:
         llvm_unreachable("Unknown instruction type encountered!");
-// Build the switch statement using the Instruction.def file.
-#define HANDLE_INST(NUM, OPCODE, CLASS)                                        \
-  case Instruction::OPCODE:                                                    \
-    this->visit##OPCODE(*(CLASS *)CE);                                         \
-    break;
-#include "llvm/IR/Instruction.def"
       }
     }
   };
@@ -517,7 +615,7 @@ template <typename CFLAA> class CFLGraphBuilder {
   // Builds the graph needed for constructing the StratifiedSets for the given
   // function
   void buildGraphFrom(Function &Fn) {
-    GetEdgesVisitor Visitor(*this);
+    GetEdgesVisitor Visitor(*this, Fn.getParent()->getDataLayout());
 
     for (auto &Bb : Fn.getBasicBlockList())
       for (auto &Inst : Bb.getInstList())
diff --git a/lib/Analysis/CFLSteensAliasAnalysis.cpp b/lib/Analysis/CFLSteensAliasAnalysis.cpp
index d816822aaaea..dde24ef5fdd5 100644
--- a/lib/Analysis/CFLSteensAliasAnalysis.cpp
+++ b/lib/Analysis/CFLSteensAliasAnalysis.cpp
@@ -153,7 +153,7 @@ CFLSteensAAResult::FunctionInfo::FunctionInfo(
       if (Itr != InterfaceMap.end()) {
         if (CurrValue != Itr->second)
           Summary.RetParamRelations.push_back(
-              ExternalRelation{CurrValue, Itr->second});
+              ExternalRelation{CurrValue, Itr->second, UnknownOffset});
         break;
       }
 
@@ -341,81 +341,9 @@ AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA,
   return NoAlias;
 }
 
-ModRefInfo CFLSteensAAResult::getArgModRefInfo(ImmutableCallSite CS,
-                                               unsigned ArgIdx) {
-  if (auto CalledFunc = CS.getCalledFunction()) {
-    auto &MaybeInfo = ensureCached(const_cast<Function *>(CalledFunc));
-    if (!MaybeInfo.hasValue())
-      return MRI_ModRef;
-    auto &RetParamAttributes = MaybeInfo->getAliasSummary().RetParamAttributes;
-    auto &RetParamRelations = MaybeInfo->getAliasSummary().RetParamRelations;
-
-    bool ArgAttributeIsWritten =
-        std::any_of(RetParamAttributes.begin(), RetParamAttributes.end(),
-                    [ArgIdx](const ExternalAttribute &ExtAttr) {
-                      return ExtAttr.IValue.Index == ArgIdx + 1;
-                    });
-    bool ArgIsAccessed =
-        std::any_of(RetParamRelations.begin(), RetParamRelations.end(),
-                    [ArgIdx](const ExternalRelation &ExtRelation) {
-                      return ExtRelation.To.Index == ArgIdx + 1 ||
-                             ExtRelation.From.Index == ArgIdx + 1;
-                    });
-
-    return (!ArgIsAccessed && !ArgAttributeIsWritten) ? MRI_NoModRef
-                                                      : MRI_ModRef;
-  }
-
-  return MRI_ModRef;
-}
-
-FunctionModRefBehavior
-CFLSteensAAResult::getModRefBehavior(ImmutableCallSite CS) {
-  // If we know the callee, try analyzing it
-  if (auto CalledFunc = CS.getCalledFunction())
-    return getModRefBehavior(CalledFunc);
-
-  // Otherwise, be conservative
-  return FMRB_UnknownModRefBehavior;
-}
-
-FunctionModRefBehavior CFLSteensAAResult::getModRefBehavior(const Function *F) {
-  assert(F != nullptr);
-
-  // TODO: Remove the const_cast
-  auto &MaybeInfo = ensureCached(const_cast<Function *>(F));
-  if (!MaybeInfo.hasValue())
-    return FMRB_UnknownModRefBehavior;
-  auto &RetParamAttributes = MaybeInfo->getAliasSummary().RetParamAttributes;
-  auto &RetParamRelations = MaybeInfo->getAliasSummary().RetParamRelations;
-
-  // First, if any argument is marked Escpaed, Unknown or Global, anything may
-  // happen to them and thus we can't draw any conclusion.
-  if (!RetParamAttributes.empty())
-    return FMRB_UnknownModRefBehavior;
-
-  // Currently we don't (and can't) distinguish reads from writes in
-  // RetParamRelations. All we can say is whether there may be memory access or
-  // not.
-  if (RetParamRelations.empty())
-    return FMRB_DoesNotAccessMemory;
-
-  // Check if something beyond argmem gets touched.
-  bool AccessArgMemoryOnly =
-      std::all_of(RetParamRelations.begin(), RetParamRelations.end(),
-                  [](const ExternalRelation &ExtRelation) {
-                    // Both DerefLevels has to be 0, since we don't know which
-                    // one is a read and which is a write.
-                    return ExtRelation.From.DerefLevel == 0 &&
-                           ExtRelation.To.DerefLevel == 0;
-                  });
-  return AccessArgMemoryOnly ? FMRB_OnlyAccessesArgumentPointees
-                             : FMRB_UnknownModRefBehavior;
-}
-
-char CFLSteensAA::PassID;
+AnalysisKey CFLSteensAA::Key;
 
-CFLSteensAAResult CFLSteensAA::run(Function &F, AnalysisManager<Function> &AM) {
+CFLSteensAAResult CFLSteensAA::run(Function &F, FunctionAnalysisManager &AM) {
   return CFLSteensAAResult(AM.getResult<TargetLibraryAnalysis>(F));
 }
 
diff --git a/lib/Analysis/CGSCCPassManager.cpp b/lib/Analysis/CGSCCPassManager.cpp
index f6f30bb927a5..054bdc45ad67 100644
--- a/lib/Analysis/CGSCCPassManager.cpp
+++ b/lib/Analysis/CGSCCPassManager.cpp
@@ -8,17 +8,506 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/InstIterator.h"
 
 using namespace llvm;
 
-// Explicit instantiations for the core proxy templates.
+// Explicit template instantiations and specialization defininitions for core
+// template typedefs.
 namespace llvm {
-template class PassManager<LazyCallGraph::SCC>;
-template class AnalysisManager<LazyCallGraph::SCC>;
+
+// Explicit instantiations for the core proxy templates.
+template class AllAnalysesOn<LazyCallGraph::SCC>;
+template class AnalysisManager<LazyCallGraph::SCC, LazyCallGraph &>;
+template class PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager,
+                           LazyCallGraph &, CGSCCUpdateResult &>;
 template class InnerAnalysisManagerProxy<CGSCCAnalysisManager, Module>;
 template class OuterAnalysisManagerProxy<ModuleAnalysisManager,
-                                         LazyCallGraph::SCC>;
-template class InnerAnalysisManagerProxy<FunctionAnalysisManager,
-                                         LazyCallGraph::SCC>;
+                                         LazyCallGraph::SCC, LazyCallGraph &>;
 template class OuterAnalysisManagerProxy<CGSCCAnalysisManager, Function>;
+
+/// Explicitly specialize the pass manager run method to handle call graph
+/// updates.
+template <>
+PreservedAnalyses
+PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &,
+            CGSCCUpdateResult &>::run(LazyCallGraph::SCC &InitialC,
+                                      CGSCCAnalysisManager &AM,
+                                      LazyCallGraph &G, CGSCCUpdateResult &UR) {
+  PreservedAnalyses PA = PreservedAnalyses::all();
+
+  if (DebugLogging)
+    dbgs() << "Starting CGSCC pass manager run.\n";
+
+  // The SCC may be refined while we are running passes over it, so set up
+  // a pointer that we can update.
+  LazyCallGraph::SCC *C = &InitialC;
+
+  for (auto &Pass : Passes) {
+    if (DebugLogging)
+      dbgs() << "Running pass: " << Pass->name() << " on " << *C << "\n";
+
+    PreservedAnalyses PassPA = Pass->run(*C, AM, G, UR);
+
+    // Update the SCC if necessary.
+    C = UR.UpdatedC ? UR.UpdatedC : C;
+
+    // Check that we didn't miss any update scenario.
+    assert(!UR.InvalidatedSCCs.count(C) && "Processing an invalid SCC!");
+    assert(C->begin() != C->end() && "Cannot have an empty SCC!");
+
+    // Update the analysis manager as each pass runs and potentially
+    // invalidates analyses.
+    AM.invalidate(*C, PassPA);
+
+    // Finally, we intersect the final preserved analyses to compute the
+    // aggregate preserved set for this pass manager.
+    PA.intersect(std::move(PassPA));
+
+    // FIXME: Historically, the pass managers all called the LLVM context's
+    // yield function here. We don't have a generic way to acquire the
+    // context and it isn't yet clear what the right pattern is for yielding
+    // in the new pass manager so it is currently omitted.
+    // ...getContext().yield();
+  }
+
+  // Invaliadtion was handled after each pass in the above loop for the current
+  // SCC. Therefore, the remaining analysis results in the AnalysisManager are
+  // preserved. We mark this with a set so that we don't need to inspect each
+  // one individually.
+  PA.preserveSet<AllAnalysesOn<LazyCallGraph::SCC>>();
+
+  if (DebugLogging)
+    dbgs() << "Finished CGSCC pass manager run.\n";
+
+  return PA;
+}
+
+bool CGSCCAnalysisManagerModuleProxy::Result::invalidate(
+    Module &M, const PreservedAnalyses &PA,
+    ModuleAnalysisManager::Invalidator &Inv) {
+  // If literally everything is preserved, we're done.
+  if (PA.areAllPreserved())
+    return false; // This is still a valid proxy.
+
+  // If this proxy or the call graph is going to be invalidated, we also need
+  // to clear all the keys coming from that analysis.
+  //
+  // We also directly invalidate the FAM's module proxy if necessary, and if
+  // that proxy isn't preserved we can't preserve this proxy either. We rely on
+  // it to handle module -> function analysis invalidation in the face of
+  // structural changes and so if it's unavailable we conservatively clear the
+  // entire SCC layer as well rather than trying to do invalidation ourselves.
+  auto PAC = PA.getChecker<CGSCCAnalysisManagerModuleProxy>();
+  if (!(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Module>>()) ||
+      Inv.invalidate<LazyCallGraphAnalysis>(M, PA) ||
+      Inv.invalidate<FunctionAnalysisManagerModuleProxy>(M, PA)) {
+    InnerAM->clear();
+
+    // And the proxy itself should be marked as invalid so that we can observe
+    // the new call graph. This isn't strictly necessary because we cheat
+    // above, but is still useful.
+    return true;
+  }
+
+  // Directly check if the relevant set is preserved so we can short circuit
+  // invalidating SCCs below.
+  bool AreSCCAnalysesPreserved =
+      PA.allAnalysesInSetPreserved<AllAnalysesOn<LazyCallGraph::SCC>>();
+
+  // Ok, we have a graph, so we can propagate the invalidation down into it.
+  for (auto &RC : G->postorder_ref_sccs())
+    for (auto &C : RC) {
+      Optional<PreservedAnalyses> InnerPA;
+
+      // Check to see whether the preserved set needs to be adjusted based on
+      // module-level analysis invalidation triggering deferred invalidation
+      // for this SCC.
+      if (auto *OuterProxy =
+              InnerAM->getCachedResult<ModuleAnalysisManagerCGSCCProxy>(C))
+        for (const auto &OuterInvalidationPair :
+             OuterProxy->getOuterInvalidations()) {
+          AnalysisKey *OuterAnalysisID = OuterInvalidationPair.first;
+          const auto &InnerAnalysisIDs = OuterInvalidationPair.second;
+          if (Inv.invalidate(OuterAnalysisID, M, PA)) {
+            if (!InnerPA)
+              InnerPA = PA;
+            for (AnalysisKey *InnerAnalysisID : InnerAnalysisIDs)
+              InnerPA->abandon(InnerAnalysisID);
+          }
+        }
+
+      // Check if we needed a custom PA set. If so we'll need to run the inner
+      // invalidation.
+      if (InnerPA) {
+        InnerAM->invalidate(C, *InnerPA);
+        continue;
+      }
+
+      // Otherwise we only need to do invalidation if the original PA set didn't
+      // preserve all SCC analyses.
+      if (!AreSCCAnalysesPreserved)
+        InnerAM->invalidate(C, PA);
+    }
+
+  // Return false to indicate that this result is still a valid proxy.
+  return false;
+}
+
+template <>
+CGSCCAnalysisManagerModuleProxy::Result
+CGSCCAnalysisManagerModuleProxy::run(Module &M, ModuleAnalysisManager &AM) {
+  // Force the Function analysis manager to also be available so that it can
+  // be accessed in an SCC analysis and proxied onward to function passes.
+  // FIXME: It is pretty awkward to just drop the result here and assert that
+  // we can find it again later.
+  (void)AM.getResult<FunctionAnalysisManagerModuleProxy>(M);
+
+  return Result(*InnerAM, AM.getResult<LazyCallGraphAnalysis>(M));
+}
+
+AnalysisKey FunctionAnalysisManagerCGSCCProxy::Key;
+
+FunctionAnalysisManagerCGSCCProxy::Result
+FunctionAnalysisManagerCGSCCProxy::run(LazyCallGraph::SCC &C,
+                                       CGSCCAnalysisManager &AM,
+                                       LazyCallGraph &CG) {
+  // Collect the FunctionAnalysisManager from the Module layer and use that to
+  // build the proxy result.
+  //
+  // This allows us to rely on the FunctionAnalysisMangaerModuleProxy to
+  // invalidate the function analyses.
+  auto &MAM = AM.getResult<ModuleAnalysisManagerCGSCCProxy>(C, CG).getManager();
+  Module &M = *C.begin()->getFunction().getParent();
+  auto *FAMProxy = MAM.getCachedResult<FunctionAnalysisManagerModuleProxy>(M);
+  assert(FAMProxy && "The CGSCC pass manager requires that the FAM module "
+                     "proxy is run on the module prior to entering the CGSCC "
+                     "walk.");
+
+  // Note that we special-case invalidation handling of this proxy in the CGSCC
+  // analysis manager's Module proxy. This avoids the need to do anything
+  // special here to recompute all of this if ever the FAM's module proxy goes
+  // away.
+  return Result(FAMProxy->getManager());
+}
+
+bool FunctionAnalysisManagerCGSCCProxy::Result::invalidate(
+    LazyCallGraph::SCC &C, const PreservedAnalyses &PA,
+    CGSCCAnalysisManager::Invalidator &Inv) {
+  for (LazyCallGraph::Node &N : C)
+    FAM->invalidate(N.getFunction(), PA);
+
+  // This proxy doesn't need to handle invalidation itself. Instead, the
+  // module-level CGSCC proxy handles it above by ensuring that if the
+  // module-level FAM proxy becomes invalid the entire SCC layer, which
+  // includes this proxy, is cleared.
+  return false;
+}
+
+} // End llvm namespace
+
+namespace {
+/// Helper function to update both the \c CGSCCAnalysisManager \p AM and the \c
+/// CGSCCPassManager's \c CGSCCUpdateResult \p UR based on a range of newly
+/// added SCCs.
+///
+/// The range of new SCCs must be in postorder already. The SCC they were split
+/// out of must be provided as \p C. The current node being mutated and
+/// triggering updates must be passed as \p N.
+///
+/// This function returns the SCC containing \p N. This will be either \p C if
+/// no new SCCs have been split out, or it will be the new SCC containing \p N.
+template <typename SCCRangeT>
+LazyCallGraph::SCC *
+incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G,
+                       LazyCallGraph::Node &N, LazyCallGraph::SCC *C,
+                       CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR,
+                       bool DebugLogging = false) {
+  typedef LazyCallGraph::SCC SCC;
+
+  if (NewSCCRange.begin() == NewSCCRange.end())
+    return C;
+
+  // Add the current SCC to the worklist as its shape has changed.
+  UR.CWorklist.insert(C);
+  if (DebugLogging)
+    dbgs() << "Enqueuing the existing SCC in the worklist:" << *C << "\n";
+
+  SCC *OldC = C;
+  (void)OldC;
+
+  // Update the current SCC. Note that if we have new SCCs, this must actually
+  // change the SCC.
+  assert(C != &*NewSCCRange.begin() &&
+         "Cannot insert new SCCs without changing current SCC!");
+  C = &*NewSCCRange.begin();
+  assert(G.lookupSCC(N) == C && "Failed to update current SCC!");
+
+  for (SCC &NewC :
+       reverse(make_range(std::next(NewSCCRange.begin()), NewSCCRange.end()))) {
+    assert(C != &NewC && "No need to re-visit the current SCC!");
+    assert(OldC != &NewC && "Already handled the original SCC!");
+    UR.CWorklist.insert(&NewC);
+    if (DebugLogging)
+      dbgs() << "Enqueuing a newly formed SCC:" << NewC << "\n";
+  }
+  return C;
+}
+}
+
+LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
+    LazyCallGraph &G, LazyCallGraph::SCC &InitialC, LazyCallGraph::Node &N,
+    CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR, bool DebugLogging) {
+  typedef LazyCallGraph::Node Node;
+  typedef LazyCallGraph::Edge Edge;
+  typedef LazyCallGraph::SCC SCC;
+  typedef LazyCallGraph::RefSCC RefSCC;
+
+  RefSCC &InitialRC = InitialC.getOuterRefSCC();
+  SCC *C = &InitialC;
+  RefSCC *RC = &InitialRC;
+  Function &F = N.getFunction();
+
+  // Walk the function body and build up the set of retained, promoted, and
+  // demoted edges.
+  SmallVector<Constant *, 16> Worklist;
+  SmallPtrSet<Constant *, 16> Visited;
+  SmallPtrSet<Function *, 16> RetainedEdges;
+  SmallSetVector<Function *, 4> PromotedRefTargets;
+  SmallSetVector<Function *, 4> DemotedCallTargets;
+
+  // First walk the function and handle all called functions. We do this first
+  // because if there is a single call edge, whether there are ref edges is
+  // irrelevant.
+  for (Instruction &I : instructions(F))
+    if (auto CS = CallSite(&I))
+      if (Function *Callee = CS.getCalledFunction())
+        if (Visited.insert(Callee).second && !Callee->isDeclaration()) {
+          const Edge *E = N.lookup(*Callee);
+          // FIXME: We should really handle adding new calls. While it will
+          // make downstream usage more complex, there is no fundamental
+          // limitation and it will allow passes within the CGSCC to be a bit
+          // more flexible in what transforms they can do. Until then, we
+          // verify that new calls haven't been introduced.
+          assert(E && "No function transformations should introduce *new* "
+                      "call edges! Any new calls should be modeled as "
+                      "promoted existing ref edges!");
+          RetainedEdges.insert(Callee);
+          if (!E->isCall())
+            PromotedRefTargets.insert(Callee);
+        }
+
+  // Now walk all references.
+  for (Instruction &I : instructions(F))
+    for (Value *Op : I.operand_values())
+      if (Constant *C = dyn_cast<Constant>(Op))
+        if (Visited.insert(C).second)
+          Worklist.push_back(C);
+
+  LazyCallGraph::visitReferences(Worklist, Visited, [&](Function &Referee) {
+    const Edge *E = N.lookup(Referee);
+    // FIXME: Similarly to new calls, we also currently preclude
+    // introducing new references. See above for details.
+    assert(E && "No function transformations should introduce *new* ref "
+                "edges! Any new ref edges would require IPO which "
+                "function passes aren't allowed to do!");
+    RetainedEdges.insert(&Referee);
+    if (E->isCall())
+      DemotedCallTargets.insert(&Referee);
+  });
+
+  // First remove all of the edges that are no longer present in this function.
+  // We have to build a list of dead targets first and then remove them as the
+  // data structures will all be invalidated by removing them.
+  SmallVector<PointerIntPair<Node *, 1, Edge::Kind>, 4> DeadTargets;
+  for (Edge &E : N)
+    if (!RetainedEdges.count(&E.getFunction()))
+      DeadTargets.push_back({E.getNode(), E.getKind()});
+  for (auto DeadTarget : DeadTargets) {
+    Node &TargetN = *DeadTarget.getPointer();
+    bool IsCall = DeadTarget.getInt() == Edge::Call;
+    SCC &TargetC = *G.lookupSCC(TargetN);
+    RefSCC &TargetRC = TargetC.getOuterRefSCC();
+
+    if (&TargetRC != RC) {
+      RC->removeOutgoingEdge(N, TargetN);
+      if (DebugLogging)
+        dbgs() << "Deleting outgoing edge from '" << N << "' to '" << TargetN
+               << "'\n";
+      continue;
+    }
+    if (DebugLogging)
+      dbgs() << "Deleting internal " << (IsCall ? "call" : "ref")
+             << " edge from '" << N << "' to '" << TargetN << "'\n";
+
+    if (IsCall) {
+      if (C != &TargetC) {
+        // For separate SCCs this is trivial.
+        RC->switchTrivialInternalEdgeToRef(N, TargetN);
+      } else {
+        // Otherwise we may end up re-structuring the call graph. First,
+        // invalidate any SCC analyses. We have to do this before we split
+        // functions into new SCCs and lose track of where their analyses are
+        // cached.
+        // FIXME: We should accept a more precise preserved set here. For
+        // example, it might be possible to preserve some function analyses
+        // even as the SCC structure is changed.
+        AM.invalidate(*C, PreservedAnalyses::none());
+        // Now update the call graph.
+        C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, TargetN), G,
+                                   N, C, AM, UR, DebugLogging);
+      }
+    }
+
+    auto NewRefSCCs = RC->removeInternalRefEdge(N, TargetN);
+    if (!NewRefSCCs.empty()) {
+      // Note that we don't bother to invalidate analyses as ref-edge
+      // connectivity is not really observable in any way and is intended
+      // exclusively to be used for ordering of transforms rather than for
+      // analysis conclusions.
+
+      // The RC worklist is in reverse postorder, so we first enqueue the
+      // current RefSCC as it will remain the parent of all split RefSCCs, then
+      // we enqueue the new ones in RPO except for the one which contains the
+      // source node as that is the "bottom" we will continue processing in the
+      // bottom-up walk.
+      UR.RCWorklist.insert(RC);
+      if (DebugLogging)
+        dbgs() << "Enqueuing the existing RefSCC in the update worklist: "
+               << *RC << "\n";
+      // Update the RC to the "bottom".
+      assert(G.lookupSCC(N) == C && "Changed the SCC when splitting RefSCCs!");
+      RC = &C->getOuterRefSCC();
+      assert(G.lookupRefSCC(N) == RC && "Failed to update current RefSCC!");
+      assert(NewRefSCCs.front() == RC &&
+             "New current RefSCC not first in the returned list!");
+      for (RefSCC *NewRC : reverse(
+               make_range(std::next(NewRefSCCs.begin()), NewRefSCCs.end()))) {
+        assert(NewRC != RC && "Should not encounter the current RefSCC further "
+                              "in the postorder list of new RefSCCs.");
+        UR.RCWorklist.insert(NewRC);
+        if (DebugLogging)
+          dbgs() << "Enqueuing a new RefSCC in the update worklist: " << *NewRC
+                 << "\n";
+      }
+    }
+  }
+
+  // Next demote all the call edges that are now ref edges. This helps make
+  // the SCCs small which should minimize the work below as we don't want to
+  // form cycles that this would break.
+  for (Function *RefTarget : DemotedCallTargets) {
+    Node &TargetN = *G.lookup(*RefTarget);
+    SCC &TargetC = *G.lookupSCC(TargetN);
+    RefSCC &TargetRC = TargetC.getOuterRefSCC();
+
+    // The easy case is when the target RefSCC is not this RefSCC. This is
+    // only supported when the target RefSCC is a child of this RefSCC.
+    if (&TargetRC != RC) {
+      assert(RC->isAncestorOf(TargetRC) &&
+             "Cannot potentially form RefSCC cycles here!");
+      RC->switchOutgoingEdgeToRef(N, TargetN);
+      if (DebugLogging)
+        dbgs() << "Switch outgoing call edge to a ref edge from '" << N
+               << "' to '" << TargetN << "'\n";
+      continue;
+    }
+
+    // We are switching an internal call edge to a ref edge. This may split up
+    // some SCCs.
+    if (C != &TargetC) {
+      // For separate SCCs this is trivial.
+      RC->switchTrivialInternalEdgeToRef(N, TargetN);
+      continue;
+    }
+
+    // Otherwise we may end up re-structuring the call graph. First, invalidate
+    // any SCC analyses. We have to do this before we split functions into new
+    // SCCs and lose track of where their analyses are cached.
+    // FIXME: We should accept a more precise preserved set here. For example,
+    // it might be possible to preserve some function analyses even as the SCC
+    // structure is changed.
+    AM.invalidate(*C, PreservedAnalyses::none());
+    // Now update the call graph.
+    C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, TargetN), G,
+                               N, C, AM, UR, DebugLogging);
+  }
+
+  // Now promote ref edges into call edges.
+  for (Function *CallTarget : PromotedRefTargets) {
+    Node &TargetN = *G.lookup(*CallTarget);
+    SCC &TargetC = *G.lookupSCC(TargetN);
+    RefSCC &TargetRC = TargetC.getOuterRefSCC();
+
+    // The easy case is when the target RefSCC is not this RefSCC. This is
+    // only supported when the target RefSCC is a child of this RefSCC.
+    if (&TargetRC != RC) {
+      assert(RC->isAncestorOf(TargetRC) &&
+             "Cannot potentially form RefSCC cycles here!");
+      RC->switchOutgoingEdgeToCall(N, TargetN);
+      if (DebugLogging)
+        dbgs() << "Switch outgoing ref edge to a call edge from '" << N
+               << "' to '" << TargetN << "'\n";
+      continue;
+    }
+    if (DebugLogging)
+      dbgs() << "Switch an internal ref edge to a call edge from '" << N
+             << "' to '" << TargetN << "'\n";
+
+    // Otherwise we are switching an internal ref edge to a call edge. This
+    // may merge away some SCCs, and we add those to the UpdateResult. We also
+    // need to make sure to update the worklist in the event SCCs have moved
+    // before the current one in the post-order sequence.
+    auto InitialSCCIndex = RC->find(*C) - RC->begin();
+    auto InvalidatedSCCs = RC->switchInternalEdgeToCall(N, TargetN);
+    if (!InvalidatedSCCs.empty()) {
+      C = &TargetC;
+      assert(G.lookupSCC(N) == C && "Failed to update current SCC!");
+
+      // Any analyses cached for this SCC are no longer precise as the shape
+      // has changed by introducing this cycle.
+      AM.invalidate(*C, PreservedAnalyses::none());
+
+      for (SCC *InvalidatedC : InvalidatedSCCs) {
+        assert(InvalidatedC != C && "Cannot invalidate the current SCC!");
+        UR.InvalidatedSCCs.insert(InvalidatedC);
+
+        // Also clear any cached analyses for the SCCs that are dead. This
+        // isn't really necessary for correctness but can release memory.
+        AM.clear(*InvalidatedC);
+      }
+    }
+    auto NewSCCIndex = RC->find(*C) - RC->begin();
+    if (InitialSCCIndex < NewSCCIndex) {
+      // Put our current SCC back onto the worklist as we'll visit other SCCs
+      // that are now definitively ordered prior to the current one in the
+      // post-order sequence, and may end up observing more precise context to
+      // optimize the current SCC.
+      UR.CWorklist.insert(C);
+      if (DebugLogging)
+        dbgs() << "Enqueuing the existing SCC in the worklist: " << *C << "\n";
+      // Enqueue in reverse order as we pop off the back of the worklist.
+      for (SCC &MovedC : reverse(make_range(RC->begin() + InitialSCCIndex,
+                                            RC->begin() + NewSCCIndex))) {
+        UR.CWorklist.insert(&MovedC);
+        if (DebugLogging)
+          dbgs() << "Enqueuing a newly earlier in post-order SCC: " << MovedC
+                 << "\n";
+      }
+    }
+  }
+
+  assert(!UR.InvalidatedSCCs.count(C) && "Invalidated the current SCC!");
+  assert(!UR.InvalidatedRefSCCs.count(RC) && "Invalidated the current RefSCC!");
+  assert(&C->getOuterRefSCC() == RC && "Current SCC not in current RefSCC!");
+
+  // Record the current RefSCC and SCC for higher layers of the CGSCC pass
+  // manager now that all the updates have been applied.
+  if (RC != &InitialRC)
+    UR.UpdatedRC = RC;
+  if (C != &InitialC)
+    UR.UpdatedC = C;
+
+  return *C;
 }
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index 57ad437ef4fd..08d50c29dfc8 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -37,6 +37,7 @@ add_llvm_library(LLVMAnalysis
   Interval.cpp
   IntervalPartition.cpp
   IteratedDominanceFrontier.cpp
+  LazyBranchProbabilityInfo.cpp
   LazyBlockFrequencyInfo.cpp
   LazyCallGraph.cpp
   LazyValueInfo.cpp
@@ -82,6 +83,7 @@ add_llvm_library(LLVMAnalysis
 
   ADDITIONAL_HEADER_DIRS
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/Analysis
-  )
 
-add_dependencies(LLVMAnalysis intrinsics_gen)
+  DEPENDS
+  intrinsics_gen
+  )
diff --git a/lib/Analysis/CallGraph.cpp b/lib/Analysis/CallGraph.cpp
index 39cb86d2ccb1..458b7bfae959 100644
--- a/lib/Analysis/CallGraph.cpp
+++ b/lib/Analysis/CallGraph.cpp
@@ -258,10 +258,10 @@ void CallGraphNode::replaceCallEdge(CallSite CS,
 }
 
 // Provide an explicit template instantiation for the static ID.
-char CallGraphAnalysis::PassID;
+AnalysisKey CallGraphAnalysis::Key;
 
 PreservedAnalyses CallGraphPrinterPass::run(Module &M,
-                                            AnalysisManager<Module> &AM) {
+                                            ModuleAnalysisManager &AM) {
   AM.getResult<CallGraphAnalysis>(M).print(OS);
   return PreservedAnalyses::all();
 }
diff --git a/lib/Analysis/CallGraphSCCPass.cpp b/lib/Analysis/CallGraphSCCPass.cpp
index 69d767354785..9cef78144150 100644
--- a/lib/Analysis/CallGraphSCCPass.cpp
+++ b/lib/Analysis/CallGraphSCCPass.cpp
@@ -67,9 +67,7 @@ public:
     Info.setPreservesAll();
   }
 
-  const char *getPassName() const override {
-    return "CallGraph Pass Manager";
-  }
+  StringRef getPassName() const override { return "CallGraph Pass Manager"; }
 
   PMDataManager *getAsPMDataManager() override { return this; }
   Pass *getAsPass() override { return this; }
@@ -100,7 +98,7 @@ private:
   bool RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
                     CallGraph &CG, bool &CallGraphUpToDate,
                     bool &DevirtualizedCall);
-  bool RefreshCallGraph(CallGraphSCC &CurSCC, CallGraph &CG,
+  bool RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
                         bool IsCheckingMode);
 };
 
@@ -175,8 +173,8 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
 /// a function pass like GVN optimizes away stuff feeding the indirect call.
 /// This never happens in checking mode.
 ///
-bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
-                                     CallGraph &CG, bool CheckingMode) {
+bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
+                                     bool CheckingMode) {
   DenseMap<Value*, CallGraphNode*> CallSites;
   
   DEBUG(dbgs() << "CGSCCPASSMGR: Refreshing SCC with " << CurSCC.size()
@@ -450,7 +448,7 @@ bool CGPassManager::runOnModule(Module &M) {
     // Copy the current SCC and increment past it so that the pass can hack
     // on the SCC if it wants to without invalidating our iterator.
     const std::vector<CallGraphNode *> &NodeVec = *CGI;
-    CurSCC.initialize(NodeVec.data(), NodeVec.data() + NodeVec.size());
+    CurSCC.initialize(NodeVec);
     ++CGI;
 
     // At the top level, we run all the passes in this pass manager on the
diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp
index ed8370498dd0..bdffdd8eb270 100644
--- a/lib/Analysis/CodeMetrics.cpp
+++ b/lib/Analysis/CodeMetrics.cpp
@@ -27,36 +27,45 @@
 
 using namespace llvm;
 
-static void completeEphemeralValues(SmallVector<const Value *, 16> &WorkSet,
-                                    SmallPtrSetImpl<const Value*> &EphValues) {
-  SmallPtrSet<const Value *, 32> Visited;
-
-  // Make sure that all of the items in WorkSet are in our EphValues set.
-  EphValues.insert(WorkSet.begin(), WorkSet.end());
+static void
+appendSpeculatableOperands(const Value *V,
+                           SmallPtrSetImpl<const Value *> &Visited,
+                           SmallVectorImpl<const Value *> &Worklist) {
+  const User *U = dyn_cast<User>(V);
+  if (!U)
+    return;
+
+  for (const Value *Operand : U->operands())
+    if (Visited.insert(Operand).second)
+      if (isSafeToSpeculativelyExecute(Operand))
+        Worklist.push_back(Operand);
+}
 
+static void completeEphemeralValues(SmallPtrSetImpl<const Value *> &Visited,
+                                    SmallVectorImpl<const Value *> &Worklist,
+                                    SmallPtrSetImpl<const Value *> &EphValues) {
   // Note: We don't speculate PHIs here, so we'll miss instruction chains kept
   // alive only by ephemeral values.
 
-  while (!WorkSet.empty()) {
-    const Value *V = WorkSet.front();
-    WorkSet.erase(WorkSet.begin());
+  // Walk the worklist using an index but without caching the size so we can
+  // append more entries as we process the worklist. This forms a queue without
+  // quadratic behavior by just leaving processed nodes at the head of the
+  // worklist forever.
+  for (int i = 0; i < (int)Worklist.size(); ++i) {
+    const Value *V = Worklist[i];
 
-    if (!Visited.insert(V).second)
-      continue;
+    assert(Visited.count(V) &&
+           "Failed to add a worklist entry to our visited set!");
 
     // If all uses of this value are ephemeral, then so is this value.
-    if (!std::all_of(V->user_begin(), V->user_end(),
-                     [&](const User *U) { return EphValues.count(U); }))
+    if (!all_of(V->users(), [&](const User *U) { return EphValues.count(U); }))
       continue;
 
     EphValues.insert(V);
     DEBUG(dbgs() << "Ephemeral Value: " << *V << "\n");
 
-    if (const User *U = dyn_cast<User>(V))
-      for (const Value *J : U->operands()) {
-        if (isSafeToSpeculativelyExecute(J))
-          WorkSet.push_back(J);
-      }
+    // Append any more operands to consider.
+    appendSpeculatableOperands(V, Visited, Worklist);
   }
 }
 
@@ -64,29 +73,32 @@ static void completeEphemeralValues(SmallVector<const Value *, 16> &WorkSet,
 void CodeMetrics::collectEphemeralValues(
     const Loop *L, AssumptionCache *AC,
     SmallPtrSetImpl<const Value *> &EphValues) {
-  SmallVector<const Value *, 16> WorkSet;
+  SmallPtrSet<const Value *, 32> Visited;
+  SmallVector<const Value *, 16> Worklist;
 
   for (auto &AssumeVH : AC->assumptions()) {
     if (!AssumeVH)
       continue;
     Instruction *I = cast<Instruction>(AssumeVH);
 
-    // Filter out call sites outside of the loop so we don't to a function's
+    // Filter out call sites outside of the loop so we don't do a function's
     // worth of work for each of its loops (and, in the common case, ephemeral
     // values in the loop are likely due to @llvm.assume calls in the loop).
     if (!L->contains(I->getParent()))
       continue;
 
-    WorkSet.push_back(I);
+    if (EphValues.insert(I).second)
+      appendSpeculatableOperands(I, Visited, Worklist);
   }
 
-  completeEphemeralValues(WorkSet, EphValues);
+  completeEphemeralValues(Visited, Worklist, EphValues);
 }
 
 void CodeMetrics::collectEphemeralValues(
     const Function *F, AssumptionCache *AC,
     SmallPtrSetImpl<const Value *> &EphValues) {
-  SmallVector<const Value *, 16> WorkSet;
+  SmallPtrSet<const Value *, 32> Visited;
+  SmallVector<const Value *, 16> Worklist;
 
   for (auto &AssumeVH : AC->assumptions()) {
     if (!AssumeVH)
@@ -94,17 +106,19 @@ void CodeMetrics::collectEphemeralValues(
     Instruction *I = cast<Instruction>(AssumeVH);
     assert(I->getParent()->getParent() == F &&
            "Found assumption for the wrong function!");
-    WorkSet.push_back(I);
+
+    if (EphValues.insert(I).second)
+      appendSpeculatableOperands(I, Visited, Worklist);
   }
 
-  completeEphemeralValues(WorkSet, EphValues);
+  completeEphemeralValues(Visited, Worklist, EphValues);
 }
 
 /// Fill in the current structure with information gleaned from the specified
 /// block.
 void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
                                     const TargetTransformInfo &TTI,
-                                    SmallPtrSetImpl<const Value*> &EphValues) {
+                                    const SmallPtrSetImpl<const Value*> &EphValues) {
   ++NumBlocks;
   unsigned NumInstsBeforeThisBB = NumInsts;
   for (const Instruction &I : *BB) {
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index c9adaa7b111c..73867279abe4 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -17,29 +17,38 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringMap.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Config/config.h"
+#include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
-#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include <cassert>
 #include <cerrno>
 #include <cfenv>
 #include <cmath>
-#include <limits>
+#include <cstddef>
+#include <cstdint>
 
 using namespace llvm;
 
@@ -49,6 +58,36 @@ namespace {
 // Constant Folding internal helper functions
 //===----------------------------------------------------------------------===//
 
+static Constant *foldConstVectorToAPInt(APInt &Result, Type *DestTy,
+                                        Constant *C, Type *SrcEltTy,
+                                        unsigned NumSrcElts,
+                                        const DataLayout &DL) {
+  // Now that we know that the input value is a vector of integers, just shift
+  // and insert them into our result.
+  unsigned BitShift = DL.getTypeSizeInBits(SrcEltTy);
+  for (unsigned i = 0; i != NumSrcElts; ++i) {
+    Constant *Element;
+    if (DL.isLittleEndian())
+      Element = C->getAggregateElement(NumSrcElts - i - 1);
+    else
+      Element = C->getAggregateElement(i);
+
+    if (Element && isa<UndefValue>(Element)) {
+      Result <<= BitShift;
+      continue;
+    }
+
+    auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element);
+    if (!ElementCI)
+      return ConstantExpr::getBitCast(C, DestTy);
+
+    Result <<= BitShift;
+    Result |= ElementCI->getValue().zextOrSelf(Result.getBitWidth());
+  }
+
+  return nullptr;
+}
+
 /// Constant fold bitcast, symbolically evaluating it with DataLayout.
 /// This always returns a non-null constant, but it may be a
 /// ConstantExpr if unfoldable.
@@ -60,45 +99,33 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
       !DestTy->isPtrOrPtrVectorTy()) // Don't get ones for ptr types!
     return Constant::getAllOnesValue(DestTy);
 
-  // Handle a vector->integer cast.
-  if (auto *IT = dyn_cast<IntegerType>(DestTy)) {
-    auto *VTy = dyn_cast<VectorType>(C->getType());
-    if (!VTy)
-      return ConstantExpr::getBitCast(C, DestTy);
+  if (auto *VTy = dyn_cast<VectorType>(C->getType())) {
+    // Handle a vector->scalar integer/fp cast.
+    if (isa<IntegerType>(DestTy) || DestTy->isFloatingPointTy()) {
+      unsigned NumSrcElts = VTy->getNumElements();
+      Type *SrcEltTy = VTy->getElementType();
+
+      // If the vector is a vector of floating point, convert it to vector of int
+      // to simplify things.
+      if (SrcEltTy->isFloatingPointTy()) {
+        unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
+        Type *SrcIVTy =
+          VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElts);
+        // Ask IR to do the conversion now that #elts line up.
+        C = ConstantExpr::getBitCast(C, SrcIVTy);
+      }
 
-    unsigned NumSrcElts = VTy->getNumElements();
-    Type *SrcEltTy = VTy->getElementType();
-
-    // If the vector is a vector of floating point, convert it to vector of int
-    // to simplify things.
-    if (SrcEltTy->isFloatingPointTy()) {
-      unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
-      Type *SrcIVTy =
-        VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElts);
-      // Ask IR to do the conversion now that #elts line up.
-      C = ConstantExpr::getBitCast(C, SrcIVTy);
-    }
+      APInt Result(DL.getTypeSizeInBits(DestTy), 0);
+      if (Constant *CE = foldConstVectorToAPInt(Result, DestTy, C,
+                                                SrcEltTy, NumSrcElts, DL))
+        return CE;
 
-    // Now that we know that the input value is a vector of integers, just shift
-    // and insert them into our result.
-    unsigned BitShift = DL.getTypeSizeInBits(SrcEltTy);
-    APInt Result(IT->getBitWidth(), 0);
-    for (unsigned i = 0; i != NumSrcElts; ++i) {
-      Constant *Element;
-      if (DL.isLittleEndian())
-        Element = C->getAggregateElement(NumSrcElts-i-1);
-      else
-        Element = C->getAggregateElement(i);
-
-      auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element);
-      if (!ElementCI)
-        return ConstantExpr::getBitCast(C, DestTy);
+      if (isa<IntegerType>(DestTy))
+        return ConstantInt::get(DestTy, Result);
 
-      Result <<= BitShift;
-      Result |= ElementCI->getValue().zextOrSelf(IT->getBitWidth());
+      APFloat FP(DestTy->getFltSemantics(), Result);
+      return ConstantFP::get(DestTy->getContext(), FP);
     }
-
-    return ConstantInt::get(IT, Result);
   }
 
   // The code below only handles casts to vectors currently.
@@ -180,7 +207,11 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
       Constant *Elt = Zero;
       unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1);
       for (unsigned j = 0; j != Ratio; ++j) {
-        Constant *Src = dyn_cast<ConstantInt>(C->getAggregateElement(SrcElt++));
+        Constant *Src = C->getAggregateElement(SrcElt++);
+        if (Src && isa<UndefValue>(Src))
+          Src = Constant::getNullValue(C->getType()->getVectorElementType());
+        else
+          Src = dyn_cast_or_null<ConstantInt>(Src);
         if (!Src)  // Reject constantexpr elements.
           return ConstantExpr::getBitCast(C, DestTy);
 
@@ -206,8 +237,19 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
 
   // Loop over each source value, expanding into multiple results.
   for (unsigned i = 0; i != NumSrcElt; ++i) {
-    auto *Src = dyn_cast<ConstantInt>(C->getAggregateElement(i));
-    if (!Src)  // Reject constantexpr elements.
+    auto *Element = C->getAggregateElement(i);
+
+    if (!Element) // Reject constantexpr elements.
+      return ConstantExpr::getBitCast(C, DestTy);
+
+    if (isa<UndefValue>(Element)) {
+      // Correctly Propagate undef values.
+      Result.append(Ratio, UndefValue::get(DstEltTy));
+      continue;
+    }
+
+    auto *Src = dyn_cast<ConstantInt>(Element);
+    if (!Src)
       return ConstantExpr::getBitCast(C, DestTy);
 
     unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1);
@@ -333,7 +375,7 @@ bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned char *CurPtr,
     uint64_t CurEltOffset = SL->getElementOffset(Index);
     ByteOffset -= CurEltOffset;
 
-    while (1) {
+    while (true) {
       // If the element access is to the element itself and not to tail padding,
       // read the bytes from the element.
       uint64_t EltSize = DL.getTypeAllocSize(CS->getOperand(Index)->getType());
@@ -689,23 +731,27 @@ Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, Constant *Op1,
 /// If array indices are not pointer-sized integers, explicitly cast them so
 /// that they aren't implicitly casted by the getelementptr.
 Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops,
-                         Type *ResultTy, const DataLayout &DL,
-                         const TargetLibraryInfo *TLI) {
+                         Type *ResultTy, Optional<unsigned> InRangeIndex,
+                         const DataLayout &DL, const TargetLibraryInfo *TLI) {
   Type *IntPtrTy = DL.getIntPtrType(ResultTy);
+  Type *IntPtrScalarTy = IntPtrTy->getScalarType();
 
   bool Any = false;
   SmallVector<Constant*, 32> NewIdxs;
   for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
     if ((i == 1 ||
-         !isa<StructType>(GetElementPtrInst::getIndexedType(SrcElemTy,
-             Ops.slice(1, i - 1)))) &&
-        Ops[i]->getType() != IntPtrTy) {
+         !isa<StructType>(GetElementPtrInst::getIndexedType(
+             SrcElemTy, Ops.slice(1, i - 1)))) &&
+        Ops[i]->getType()->getScalarType() != IntPtrScalarTy) {
       Any = true;
+      Type *NewType = Ops[i]->getType()->isVectorTy()
+                          ? IntPtrTy
+                          : IntPtrTy->getScalarType();
       NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i],
                                                                       true,
-                                                                      IntPtrTy,
+                                                                      NewType,
                                                                       true),
-                                              Ops[i], IntPtrTy));
+                                              Ops[i], NewType));
     } else
       NewIdxs.push_back(Ops[i]);
   }
@@ -713,11 +759,10 @@ Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops,
   if (!Any)
     return nullptr;
 
-  Constant *C = ConstantExpr::getGetElementPtr(SrcElemTy, Ops[0], NewIdxs);
-  if (auto *CE = dyn_cast<ConstantExpr>(C)) {
-    if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI))
-      C = Folded;
-  }
+  Constant *C = ConstantExpr::getGetElementPtr(
+      SrcElemTy, Ops[0], NewIdxs, /*InBounds=*/false, InRangeIndex);
+  if (Constant *Folded = ConstantFoldConstant(C, DL, TLI))
+    C = Folded;
 
   return C;
 }
@@ -744,13 +789,17 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
                                   ArrayRef<Constant *> Ops,
                                   const DataLayout &DL,
                                   const TargetLibraryInfo *TLI) {
+  const GEPOperator *InnermostGEP = GEP;
+  bool InBounds = GEP->isInBounds();
+
   Type *SrcElemTy = GEP->getSourceElementType();
   Type *ResElemTy = GEP->getResultElementType();
   Type *ResTy = GEP->getType();
   if (!SrcElemTy->isSized())
     return nullptr;
 
-  if (Constant *C = CastGEPIndices(SrcElemTy, Ops, ResTy, DL, TLI))
+  if (Constant *C = CastGEPIndices(SrcElemTy, Ops, ResTy,
+                                   GEP->getInRangeIndex(), DL, TLI))
     return C;
 
   Constant *Ptr = Ops[0];
@@ -775,8 +824,8 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
           Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType());
           Res = ConstantExpr::getSub(Res, CE->getOperand(1));
           Res = ConstantExpr::getIntToPtr(Res, ResTy);
-          if (auto *ResCE = dyn_cast<ConstantExpr>(Res))
-            Res = ConstantFoldConstantExpression(ResCE, DL, TLI);
+          if (auto *FoldedRes = ConstantFoldConstant(Res, DL, TLI))
+            Res = FoldedRes;
           return Res;
         }
       }
@@ -793,6 +842,9 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
 
   // If this is a GEP of a GEP, fold it all into a single GEP.
   while (auto *GEP = dyn_cast<GEPOperator>(Ptr)) {
+    InnermostGEP = GEP;
+    InBounds &= GEP->isInBounds();
+
     SmallVector<Value *, 4> NestedOps(GEP->op_begin() + 1, GEP->op_end());
 
     // Do not try the incorporate the sub-GEP if some index is not a number.
@@ -821,7 +873,9 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
     }
   }
 
-  if (Ptr->isNullValue() || BasePtr != 0) {
+  auto *PTy = cast<PointerType>(Ptr->getType());
+  if ((Ptr->isNullValue() || BasePtr != 0) &&
+      !DL.isNonIntegralPointerType(PTy)) {
     Constant *C = ConstantInt::get(Ptr->getContext(), Offset + BasePtr);
     return ConstantExpr::getIntToPtr(C, ResTy);
   }
@@ -830,8 +884,7 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
   // we eliminate over-indexing of the notional static type array bounds.
   // This makes it easy to determine if the getelementptr is "inbounds".
   // Also, this helps GlobalOpt do SROA on GlobalVariables.
-  Type *Ty = Ptr->getType();
-  assert(Ty->isPointerTy() && "Forming regular GEP of non-pointer type");
+  Type *Ty = PTy;
   SmallVector<Constant *, 32> NewIdxs;
 
   do {
@@ -897,8 +950,23 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
   if (Offset != 0)
     return nullptr;
 
+  // Preserve the inrange index from the innermost GEP if possible. We must
+  // have calculated the same indices up to and including the inrange index.
+  Optional<unsigned> InRangeIndex;
+  if (Optional<unsigned> LastIRIndex = InnermostGEP->getInRangeIndex())
+    if (SrcElemTy == InnermostGEP->getSourceElementType() &&
+        NewIdxs.size() > *LastIRIndex) {
+      InRangeIndex = LastIRIndex;
+      for (unsigned I = 0; I <= *LastIRIndex; ++I)
+        if (NewIdxs[I] != InnermostGEP->getOperand(I + 1)) {
+          InRangeIndex = None;
+          break;
+        }
+    }
+
   // Create a GEP.
-  Constant *C = ConstantExpr::getGetElementPtr(SrcElemTy, Ptr, NewIdxs);
+  Constant *C = ConstantExpr::getGetElementPtr(SrcElemTy, Ptr, NewIdxs,
+                                               InBounds, InRangeIndex);
   assert(C->getType()->getPointerElementType() == Ty &&
          "Computed GetElementPtr has unexpected type!");
 
@@ -916,15 +984,16 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
 /// attempting to fold instructions like loads and stores, which have no
 /// constant expression form.
 ///
-/// TODO: This function neither utilizes nor preserves nsw/nuw/inbounds/etc
-/// information, due to only being passed an opcode and operands. Constant
+/// TODO: This function neither utilizes nor preserves nsw/nuw/inbounds/inrange
+/// etc information, due to only being passed an opcode and operands. Constant
 /// folding using this function strips this information.
 ///
-Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, Type *DestTy,
-                                       unsigned Opcode,
+Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
                                        ArrayRef<Constant *> Ops,
                                        const DataLayout &DL,
                                        const TargetLibraryInfo *TLI) {
+  Type *DestTy = InstOrCE->getType();
+
   // Handle easy binops first.
   if (Instruction::isBinaryOp(Opcode))
     return ConstantFoldBinaryOpOperands(Opcode, Ops[0], Ops[1], DL);
@@ -936,10 +1005,14 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, Type *DestTy,
     if (Constant *C = SymbolicallyEvaluateGEP(GEP, Ops, DL, TLI))
       return C;
 
-    return ConstantExpr::getGetElementPtr(GEP->getSourceElementType(),
-                                          Ops[0], Ops.slice(1));
+    return ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), Ops[0],
+                                          Ops.slice(1), GEP->isInBounds(),
+                                          GEP->getInRangeIndex());
   }
 
+  if (auto *CE = dyn_cast<ConstantExpr>(InstOrCE))
+    return CE->getWithOperands(Ops);
+
   switch (Opcode) {
   default: return nullptr;
   case Instruction::ICmp:
@@ -966,12 +1039,58 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, Type *DestTy,
 // Constant Folding public APIs
 //===----------------------------------------------------------------------===//
 
+namespace {
+
+Constant *
+ConstantFoldConstantImpl(const Constant *C, const DataLayout &DL,
+                         const TargetLibraryInfo *TLI,
+                         SmallDenseMap<Constant *, Constant *> &FoldedOps) {
+  if (!isa<ConstantVector>(C) && !isa<ConstantExpr>(C))
+    return nullptr;
+
+  SmallVector<Constant *, 8> Ops;
+  for (const Use &NewU : C->operands()) {
+    auto *NewC = cast<Constant>(&NewU);
+    // Recursively fold the ConstantExpr's operands. If we have already folded
+    // a ConstantExpr, we don't have to process it again.
+    if (isa<ConstantVector>(NewC) || isa<ConstantExpr>(NewC)) {
+      auto It = FoldedOps.find(NewC);
+      if (It == FoldedOps.end()) {
+        if (auto *FoldedC =
+                ConstantFoldConstantImpl(NewC, DL, TLI, FoldedOps)) {
+          NewC = FoldedC;
+          FoldedOps.insert({NewC, FoldedC});
+        } else {
+          FoldedOps.insert({NewC, NewC});
+        }
+      } else {
+        NewC = It->second;
+      }
+    }
+    Ops.push_back(NewC);
+  }
+
+  if (auto *CE = dyn_cast<ConstantExpr>(C)) {
+    if (CE->isCompare())
+      return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1],
+                                             DL, TLI);
+
+    return ConstantFoldInstOperandsImpl(CE, CE->getOpcode(), Ops, DL, TLI);
+  }
+
+  assert(isa<ConstantVector>(C));
+  return ConstantVector::get(Ops);
+}
+
+} // end anonymous namespace
+
 Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL,
                                         const TargetLibraryInfo *TLI) {
   // Handle PHI nodes quickly here...
   if (auto *PN = dyn_cast<PHINode>(I)) {
     Constant *CommonValue = nullptr;
 
+    SmallDenseMap<Constant *, Constant *> FoldedOps;
     for (Value *Incoming : PN->incoming_values()) {
       // If the incoming value is undef then skip it.  Note that while we could
       // skip the value if it is equal to the phi node itself we choose not to
@@ -984,8 +1103,8 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL,
       if (!C)
         return nullptr;
       // Fold the PHI's operands.
-      if (auto *NewC = dyn_cast<ConstantExpr>(C))
-        C = ConstantFoldConstantExpression(NewC, DL, TLI);
+      if (auto *FoldedC = ConstantFoldConstantImpl(C, DL, TLI, FoldedOps))
+        C = FoldedC;
       // If the incoming value is a different constant to
       // the one we saw previously, then give up.
       if (CommonValue && C != CommonValue)
@@ -993,7 +1112,6 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL,
       CommonValue = C;
     }
 
-
     // If we reach here, all incoming values are the same constant or undef.
     return CommonValue ? CommonValue : UndefValue::get(PN->getType());
   }
@@ -1003,12 +1121,13 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL,
   if (!all_of(I->operands(), [](Use &U) { return isa<Constant>(U); }))
     return nullptr;
 
+  SmallDenseMap<Constant *, Constant *> FoldedOps;
   SmallVector<Constant *, 8> Ops;
   for (const Use &OpU : I->operands()) {
     auto *Op = cast<Constant>(&OpU);
     // Fold the Instruction's operands.
-    if (auto *NewCE = dyn_cast<ConstantExpr>(Op))
-      Op = ConstantFoldConstantExpression(NewCE, DL, TLI);
+    if (auto *FoldedOp = ConstantFoldConstantImpl(Op, DL, TLI, FoldedOps))
+      Op = FoldedOp;
 
     Ops.push_back(Op);
   }
@@ -1036,55 +1155,17 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL,
   return ConstantFoldInstOperands(I, Ops, DL, TLI);
 }
 
-namespace {
-
-Constant *
-ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout &DL,
-                                   const TargetLibraryInfo *TLI,
-                                   SmallPtrSetImpl<ConstantExpr *> &FoldedOps) {
-  SmallVector<Constant *, 8> Ops;
-  for (const Use &NewU : CE->operands()) {
-    auto *NewC = cast<Constant>(&NewU);
-    // Recursively fold the ConstantExpr's operands. If we have already folded
-    // a ConstantExpr, we don't have to process it again.
-    if (auto *NewCE = dyn_cast<ConstantExpr>(NewC)) {
-      if (FoldedOps.insert(NewCE).second)
-        NewC = ConstantFoldConstantExpressionImpl(NewCE, DL, TLI, FoldedOps);
-    }
-    Ops.push_back(NewC);
-  }
-
-  if (CE->isCompare())
-    return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1],
-                                           DL, TLI);
-
-  return ConstantFoldInstOperandsImpl(CE, CE->getType(), CE->getOpcode(), Ops,
-                                      DL, TLI);
-}
-
-} // end anonymous namespace
-
-Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
-                                               const DataLayout &DL,
-                                               const TargetLibraryInfo *TLI) {
-  SmallPtrSet<ConstantExpr *, 4> FoldedOps;
-  return ConstantFoldConstantExpressionImpl(CE, DL, TLI, FoldedOps);
+Constant *llvm::ConstantFoldConstant(const Constant *C, const DataLayout &DL,
+                                     const TargetLibraryInfo *TLI) {
+  SmallDenseMap<Constant *, Constant *> FoldedOps;
+  return ConstantFoldConstantImpl(C, DL, TLI, FoldedOps);
 }
 
 Constant *llvm::ConstantFoldInstOperands(Instruction *I,
                                          ArrayRef<Constant *> Ops,
                                          const DataLayout &DL,
                                          const TargetLibraryInfo *TLI) {
-  return ConstantFoldInstOperandsImpl(I, I->getType(), I->getOpcode(), Ops, DL,
-                                      TLI);
-}
-
-Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
-                                         ArrayRef<Constant *> Ops,
-                                         const DataLayout &DL,
-                                         const TargetLibraryInfo *TLI) {
-  assert(Opcode != Instruction::GetElementPtr && "Invalid for GEPs");
-  return ConstantFoldInstOperandsImpl(nullptr, DestTy, Opcode, Ops, DL, TLI);
+  return ConstantFoldInstOperandsImpl(I, I->getOpcode(), Ops, DL, TLI);
 }
 
 Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
@@ -1350,6 +1431,8 @@ bool llvm::canConstantFoldCallTo(const Function *F) {
            Name == "log10f";
   case 'p':
     return Name == "pow" || Name == "powf";
+  case 'r':
+    return Name == "round" || Name == "roundf";
   case 's':
     return Name == "sin" || Name == "sinh" || Name == "sqrt" ||
            Name == "sinf" || Name == "sinhf" || Name == "sqrtf";
@@ -1364,7 +1447,7 @@ Constant *GetConstantFoldFPValue(double V, Type *Ty) {
   if (Ty->isHalfTy()) {
     APFloat APF(V);
     bool unused;
-    APF.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &unused);
+    APF.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &unused);
     return ConstantFP::get(Ty->getContext(), APF);
   }
   if (Ty->isFloatTy())
@@ -1455,7 +1538,7 @@ double getValueAsDouble(ConstantFP *Op) {
 
   bool unused;
   APFloat APF = Op->getValueAPF();
-  APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused);
+  APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &unused);
   return APF.convertToDouble();
 }
 
@@ -1473,7 +1556,7 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
         APFloat Val(Op->getValueAPF());
 
         bool lost = false;
-        Val.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &lost);
+        Val.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &lost);
 
         return ConstantInt::get(Ty->getContext(), Val.bitcastToAPInt());
       }
@@ -1614,6 +1697,10 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
           }
         }
         break;
+      case 'r':
+        if ((Name == "round" && TLI->has(LibFunc::round)) ||
+            (Name == "roundf" && TLI->has(LibFunc::roundf)))
+          return ConstantFoldFP(round, V, Ty);
       case 's':
         if ((Name == "sin" && TLI->has(LibFunc::sin)) ||
             (Name == "sinf" && TLI->has(LibFunc::sinf)))
@@ -1648,7 +1735,7 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
       case Intrinsic::bitreverse:
         return ConstantInt::get(Ty->getContext(), Op->getValue().reverseBits());
       case Intrinsic::convert_from_fp16: {
-        APFloat Val(APFloat::IEEEhalf, Op->getValue());
+        APFloat Val(APFloat::IEEEhalf(), Op->getValue());
 
         bool lost = false;
         APFloat::opStatus status = Val.convert(
@@ -1927,3 +2014,152 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
 
   return ConstantFoldScalarCall(Name, F->getIntrinsicID(), Ty, Operands, TLI);
 }
+
+bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) {
+  // FIXME: Refactor this code; this duplicates logic in LibCallsShrinkWrap
+  // (and to some extent ConstantFoldScalarCall).
+  Function *F = CS.getCalledFunction();
+  if (!F)
+    return false;
+
+  LibFunc::Func Func;
+  if (!TLI || !TLI->getLibFunc(*F, Func))
+    return false;
+
+  if (CS.getNumArgOperands() == 1) {
+    if (ConstantFP *OpC = dyn_cast<ConstantFP>(CS.getArgOperand(0))) {
+      const APFloat &Op = OpC->getValueAPF();
+      switch (Func) {
+      case LibFunc::logl:
+      case LibFunc::log:
+      case LibFunc::logf:
+      case LibFunc::log2l:
+      case LibFunc::log2:
+      case LibFunc::log2f:
+      case LibFunc::log10l:
+      case LibFunc::log10:
+      case LibFunc::log10f:
+        return Op.isNaN() || (!Op.isZero() && !Op.isNegative());
+
+      case LibFunc::expl:
+      case LibFunc::exp:
+      case LibFunc::expf:
+        // FIXME: These boundaries are slightly conservative.
+        if (OpC->getType()->isDoubleTy())
+          return Op.compare(APFloat(-745.0)) != APFloat::cmpLessThan &&
+                 Op.compare(APFloat(709.0)) != APFloat::cmpGreaterThan;
+        if (OpC->getType()->isFloatTy())
+          return Op.compare(APFloat(-103.0f)) != APFloat::cmpLessThan &&
+                 Op.compare(APFloat(88.0f)) != APFloat::cmpGreaterThan;
+        break;
+
+      case LibFunc::exp2l:
+      case LibFunc::exp2:
+      case LibFunc::exp2f:
+        // FIXME: These boundaries are slightly conservative.
+        if (OpC->getType()->isDoubleTy())
+          return Op.compare(APFloat(-1074.0)) != APFloat::cmpLessThan &&
+                 Op.compare(APFloat(1023.0)) != APFloat::cmpGreaterThan;
+        if (OpC->getType()->isFloatTy())
+          return Op.compare(APFloat(-149.0f)) != APFloat::cmpLessThan &&
+                 Op.compare(APFloat(127.0f)) != APFloat::cmpGreaterThan;
+        break;
+
+      case LibFunc::sinl:
+      case LibFunc::sin:
+      case LibFunc::sinf:
+      case LibFunc::cosl:
+      case LibFunc::cos:
+      case LibFunc::cosf:
+        return !Op.isInfinity();
+
+      case LibFunc::tanl:
+      case LibFunc::tan:
+      case LibFunc::tanf: {
+        // FIXME: Stop using the host math library.
+        // FIXME: The computation isn't done in the right precision.
+        Type *Ty = OpC->getType();
+        if (Ty->isDoubleTy() || Ty->isFloatTy() || Ty->isHalfTy()) {
+          double OpV = getValueAsDouble(OpC);
+          return ConstantFoldFP(tan, OpV, Ty) != nullptr;
+        }
+        break;
+      }
+
+      case LibFunc::asinl:
+      case LibFunc::asin:
+      case LibFunc::asinf:
+      case LibFunc::acosl:
+      case LibFunc::acos:
+      case LibFunc::acosf:
+        return Op.compare(APFloat(Op.getSemantics(), "-1")) !=
+                   APFloat::cmpLessThan &&
+               Op.compare(APFloat(Op.getSemantics(), "1")) !=
+                   APFloat::cmpGreaterThan;
+
+      case LibFunc::sinh:
+      case LibFunc::cosh:
+      case LibFunc::sinhf:
+      case LibFunc::coshf:
+      case LibFunc::sinhl:
+      case LibFunc::coshl:
+        // FIXME: These boundaries are slightly conservative.
+        if (OpC->getType()->isDoubleTy())
+          return Op.compare(APFloat(-710.0)) != APFloat::cmpLessThan &&
+                 Op.compare(APFloat(710.0)) != APFloat::cmpGreaterThan;
+        if (OpC->getType()->isFloatTy())
+          return Op.compare(APFloat(-89.0f)) != APFloat::cmpLessThan &&
+                 Op.compare(APFloat(89.0f)) != APFloat::cmpGreaterThan;
+        break;
+
+      case LibFunc::sqrtl:
+      case LibFunc::sqrt:
+      case LibFunc::sqrtf:
+        return Op.isNaN() || Op.isZero() || !Op.isNegative();
+
+      // FIXME: Add more functions: sqrt_finite, atanh, expm1, log1p,
+      // maybe others?
+      default:
+        break;
+      }
+    }
+  }
+
+  if (CS.getNumArgOperands() == 2) {
+    ConstantFP *Op0C = dyn_cast<ConstantFP>(CS.getArgOperand(0));
+    ConstantFP *Op1C = dyn_cast<ConstantFP>(CS.getArgOperand(1));
+    if (Op0C && Op1C) {
+      const APFloat &Op0 = Op0C->getValueAPF();
+      const APFloat &Op1 = Op1C->getValueAPF();
+
+      switch (Func) {
+      case LibFunc::powl:
+      case LibFunc::pow:
+      case LibFunc::powf: {
+        // FIXME: Stop using the host math library.
+        // FIXME: The computation isn't done in the right precision.
+        Type *Ty = Op0C->getType();
+        if (Ty->isDoubleTy() || Ty->isFloatTy() || Ty->isHalfTy()) {
+          if (Ty == Op1C->getType()) {
+            double Op0V = getValueAsDouble(Op0C);
+            double Op1V = getValueAsDouble(Op1C);
+            return ConstantFoldBinaryFP(pow, Op0V, Op1V, Ty) != nullptr;
+          }
+        }
+        break;
+      }
+
+      case LibFunc::fmodl:
+      case LibFunc::fmod:
+      case LibFunc::fmodf:
+        return Op0.isNaN() || Op1.isNaN() ||
+               (!Op0.isInfinity() && !Op1.isZero());
+
+      default:
+        break;
+      }
+    }
+  }
+
+  return false;
+}
diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp
index 68a4bea96baa..67d1773f0811 100644
--- a/lib/Analysis/CostModel.cpp
+++ b/lib/Analysis/CostModel.cpp
@@ -20,6 +20,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
@@ -89,14 +90,35 @@ CostModelAnalysis::runOnFunction(Function &F) {
  return false;
 }
 
-static bool isReverseVectorMask(SmallVectorImpl<int> &Mask) {
+static bool isReverseVectorMask(ArrayRef<int> Mask) {
   for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i)
-    if (Mask[i] > 0 && Mask[i] != (int)(MaskSize - 1 - i))
+    if (Mask[i] >= 0 && Mask[i] != (int)(MaskSize - 1 - i))
       return false;
   return true;
 }
 
-static bool isAlternateVectorMask(SmallVectorImpl<int> &Mask) {
+static bool isSingleSourceVectorMask(ArrayRef<int> Mask) {
+  bool Vec0 = false;
+  bool Vec1 = false;
+  for (unsigned i = 0, NumVecElts = Mask.size(); i < NumVecElts; ++i) {
+    if (Mask[i] >= 0) {
+      if ((unsigned)Mask[i] >= NumVecElts)
+        Vec1 = true;
+      else
+        Vec0 = true;
+    }
+  }
+  return !(Vec0 && Vec1);
+}
+
+static bool isZeroEltBroadcastVectorMask(ArrayRef<int> Mask) {
+  for (unsigned i = 0; i < Mask.size(); ++i)
+    if (Mask[i] > 0)
+      return false;
+  return true;
+}
+
+static bool isAlternateVectorMask(ArrayRef<int> Mask) {
   bool isAlternate = true;
   unsigned MaskSize = Mask.size();
 
@@ -123,7 +145,7 @@ static bool isAlternateVectorMask(SmallVectorImpl<int> &Mask) {
 
 static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) {
   TargetTransformInfo::OperandValueKind OpInfo =
-    TargetTransformInfo::OK_AnyValue;
+      TargetTransformInfo::OK_AnyValue;
 
   // Check for a splat of a constant or for a non uniform vector of constants.
   if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
@@ -132,6 +154,12 @@ static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) {
       OpInfo = TargetTransformInfo::OK_UniformConstantValue;
   }
 
+  // Check for a splat of a uniform value. This is not loop aware, so return
+  // true only for the obviously uniform cases (argument, globalvalue)
+  const Value *Splat = getSplatValue(V);
+  if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat)))
+    OpInfo = TargetTransformInfo::OK_UniformValue;
+
   return OpInfo;
 }
 
@@ -494,6 +522,17 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
       if (isAlternateVectorMask(Mask))
         return TTI->getShuffleCost(TargetTransformInfo::SK_Alternate,
                                    VecTypOp0, 0, nullptr);
+
+      if (isZeroEltBroadcastVectorMask(Mask))
+        return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast,
+                                   VecTypOp0, 0, nullptr);
+
+      if (isSingleSourceVectorMask(Mask))
+        return TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
+                                   VecTypOp0, 0, nullptr);
+
+      return TTI->getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
+                                 VecTypOp0, 0, nullptr);
     }
 
     return -1;
diff --git a/lib/Analysis/DemandedBits.cpp b/lib/Analysis/DemandedBits.cpp
index a3f8b7fda08a..688c1db534c1 100644
--- a/lib/Analysis/DemandedBits.cpp
+++ b/lib/Analysis/DemandedBits.cpp
@@ -280,10 +280,8 @@ void DemandedBits::performAnalysis() {
     // add their operands to the work list (for integer values operands, mark
     // all bits as live).
     if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
-      if (!AliveBits.count(&I)) {
-        AliveBits[&I] = APInt(IT->getBitWidth(), 0);
+      if (AliveBits.try_emplace(&I, IT->getBitWidth(), 0).second)
         Worklist.push_back(&I);
-      }
 
       continue;
     }
@@ -363,8 +361,9 @@ APInt DemandedBits::getDemandedBits(Instruction *I) {
   performAnalysis();
   
   const DataLayout &DL = I->getParent()->getModule()->getDataLayout();
-  if (AliveBits.count(I))
-    return AliveBits[I];
+  auto Found = AliveBits.find(I);
+  if (Found != AliveBits.end())
+    return Found->second;
   return APInt::getAllOnesValue(DL.getTypeSizeInBits(I->getType()));
 }
 
@@ -387,10 +386,10 @@ FunctionPass *llvm::createDemandedBitsWrapperPass() {
   return new DemandedBitsWrapperPass();
 }
 
-char DemandedBitsAnalysis::PassID;
+AnalysisKey DemandedBitsAnalysis::Key;
 
 DemandedBits DemandedBitsAnalysis::run(Function &F,
-                                             AnalysisManager<Function> &AM) {
+                                             FunctionAnalysisManager &AM) {
   auto &AC = AM.getResult<AssumptionAnalysis>(F);
   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
   return DemandedBits(F, AC, DT);
diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp
index eb4d925fea73..a332a07ce864 100644
--- a/lib/Analysis/DependenceAnalysis.cpp
+++ b/lib/Analysis/DependenceAnalysis.cpp
@@ -122,7 +122,7 @@ DependenceAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
   return DependenceInfo(&F, &AA, &SE, &LI);
 }
 
-char DependenceAnalysis::PassID;
+AnalysisKey DependenceAnalysis::Key;
 
 INITIALIZE_PASS_BEGIN(DependenceAnalysisWrapperPass, "da",
                       "Dependence Analysis", true, true)
diff --git a/lib/Analysis/DominanceFrontier.cpp b/lib/Analysis/DominanceFrontier.cpp
index 4554374252a4..15856c3f8b7a 100644
--- a/lib/Analysis/DominanceFrontier.cpp
+++ b/lib/Analysis/DominanceFrontier.cpp
@@ -56,7 +56,7 @@ LLVM_DUMP_METHOD void DominanceFrontierWrapperPass::dump() const {
 }
 #endif
 
-char DominanceFrontierAnalysis::PassID;
+AnalysisKey DominanceFrontierAnalysis::Key;
 
 DominanceFrontier DominanceFrontierAnalysis::run(Function &F,
                                                  FunctionAnalysisManager &AM) {
diff --git a/lib/Analysis/EHPersonalities.cpp b/lib/Analysis/EHPersonalities.cpp
index 5f951f5112e9..ebf0a370b0b0 100644
--- a/lib/Analysis/EHPersonalities.cpp
+++ b/lib/Analysis/EHPersonalities.cpp
@@ -40,6 +40,29 @@ EHPersonality llvm::classifyEHPersonality(const Value *Pers) {
     .Default(EHPersonality::Unknown);
 }
 
+StringRef llvm::getEHPersonalityName(EHPersonality Pers) {
+  switch (Pers) {
+  case EHPersonality::GNU_Ada:       return "__gnat_eh_personality";
+  case EHPersonality::GNU_CXX:       return "__gxx_personality_v0";
+  case EHPersonality::GNU_CXX_SjLj:  return "__gxx_personality_sj0";
+  case EHPersonality::GNU_C:         return "__gcc_personality_v0";
+  case EHPersonality::GNU_C_SjLj:    return "__gcc_personality_sj0";
+  case EHPersonality::GNU_ObjC:      return "__objc_personality_v0";
+  case EHPersonality::MSVC_X86SEH:   return "_except_handler3";
+  case EHPersonality::MSVC_Win64SEH: return "__C_specific_handler";
+  case EHPersonality::MSVC_CXX:      return "__CxxFrameHandler3";
+  case EHPersonality::CoreCLR:       return "ProcessCLRException";
+  case EHPersonality::Rust:          return "rust_eh_personality";
+  case EHPersonality::Unknown:       llvm_unreachable("Unknown EHPersonality!");
+  }
+
+  llvm_unreachable("Invalid EHPersonality!");
+}
+
+EHPersonality llvm::getDefaultEHPersonality(const Triple &T) {
+  return EHPersonality::GNU_C;
+}
+
 bool llvm::canSimplifyInvokeNoUnwind(const Function *F) {
   EHPersonality Personality = classifyEHPersonality(F->getPersonalityFn());
   // We can't simplify any invokes to nounwind functions if the personality
@@ -82,7 +105,7 @@ DenseMap<BasicBlock *, ColorVector> llvm::colorEHFunclets(Function &F) {
     }
     // Note that this is a member of the given color.
     ColorVector &Colors = BlockColors[Visiting];
-    if (std::find(Colors.begin(), Colors.end(), Color) == Colors.end())
+    if (!is_contained(Colors, Color))
       Colors.push_back(Color);
     else
       continue;
diff --git a/lib/Analysis/GlobalsModRef.cpp b/lib/Analysis/GlobalsModRef.cpp
index a7d1e048e133..33f00cb19b26 100644
--- a/lib/Analysis/GlobalsModRef.cpp
+++ b/lib/Analysis/GlobalsModRef.cpp
@@ -78,7 +78,7 @@ class GlobalsAAResult::FunctionInfo {
       return (AlignedMap *)P;
     }
     enum { NumLowBitsAvailable = 3 };
-    static_assert(AlignOf<AlignedMap>::Alignment >= (1 << NumLowBitsAvailable),
+    static_assert(alignof(AlignedMap) >= (1 << NumLowBitsAvailable),
                   "AlignedMap insufficiently aligned to have enough low bits.");
   };
 
@@ -366,6 +366,10 @@ bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V,
     } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
       if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
         return true; // Allow comparison against null.
+    } else if (Constant *C = dyn_cast<Constant>(I)) {
+      // Ignore constants which don't have any live uses.
+      if (isa<GlobalValue>(C) || C->isConstantUsed())
+        return true;
     } else {
       return true;
     }
@@ -521,7 +525,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
             // Can't say anything about it.  However, if it is inside our SCC,
             // then nothing needs to be done.
             CallGraphNode *CalleeNode = CG[Callee];
-            if (std::find(SCC.begin(), SCC.end(), CalleeNode) == SCC.end())
+            if (!is_contained(SCC, CalleeNode))
               KnowNothing = true;
           }
         } else {
@@ -857,22 +861,22 @@ ModRefInfo GlobalsAAResult::getModRefInfoForArgument(ImmutableCallSite CS,
   if (CS.doesNotAccessMemory())
     return MRI_NoModRef;
   ModRefInfo ConservativeResult = CS.onlyReadsMemory() ? MRI_Ref : MRI_ModRef;
-  
+
   // Iterate through all the arguments to the called function. If any argument
   // is based on GV, return the conservative result.
   for (auto &A : CS.args()) {
     SmallVector<Value*, 4> Objects;
     GetUnderlyingObjects(A, Objects, DL);
-    
+
     // All objects must be identified.
-    if (!std::all_of(Objects.begin(), Objects.end(), isIdentifiedObject) &&
+    if (!all_of(Objects, isIdentifiedObject) &&
         // Try ::alias to see if all objects are known not to alias GV.
-        !std::all_of(Objects.begin(), Objects.end(), [&](Value *V) {
+        !all_of(Objects, [&](Value *V) {
           return this->alias(MemoryLocation(V), MemoryLocation(GV)) == NoAlias;
-          }))
+        }))
       return ConservativeResult;
 
-    if (std::find(Objects.begin(), Objects.end(), GV) != Objects.end())
+    if (is_contained(Objects, GV))
       return ConservativeResult;
   }
 
@@ -937,9 +941,9 @@ GlobalsAAResult::analyzeModule(Module &M, const TargetLibraryInfo &TLI,
   return Result;
 }
 
-char GlobalsAA::PassID;
+AnalysisKey GlobalsAA::Key;
 
-GlobalsAAResult GlobalsAA::run(Module &M, AnalysisManager<Module> &AM) {
+GlobalsAAResult GlobalsAA::run(Module &M, ModuleAnalysisManager &AM) {
   return GlobalsAAResult::analyzeModule(M,
                                         AM.getResult<TargetLibraryAnalysis>(M),
                                         AM.getResult<CallGraphAnalysis>(M));
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index 43c0ba17fe4a..76e2561b9da3 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -34,9 +34,9 @@ using namespace llvm;
 
 #define DEBUG_TYPE "iv-users"
 
-char IVUsersAnalysis::PassID;
+AnalysisKey IVUsersAnalysis::Key;
 
-IVUsers IVUsersAnalysis::run(Loop &L, AnalysisManager<Loop> &AM) {
+IVUsers IVUsersAnalysis::run(Loop &L, LoopAnalysisManager &AM) {
   const auto &FAM =
       AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager();
   Function *F = L.getHeader()->getParent();
@@ -47,7 +47,7 @@ IVUsers IVUsersAnalysis::run(Loop &L, AnalysisManager<Loop> &AM) {
                  FAM.getCachedResult<ScalarEvolutionAnalysis>(*F));
 }
 
-PreservedAnalyses IVUsersPrinterPass::run(Loop &L, AnalysisManager<Loop> &AM) {
+PreservedAnalyses IVUsersPrinterPass::run(Loop &L, LoopAnalysisManager &AM) {
   AM.getResult<IVUsersAnalysis>(L).print(OS);
   return PreservedAnalyses::all();
 }
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index dcb724abc02d..9b9faacd354c 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -40,18 +40,7 @@ using namespace llvm;
 
 STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
 
-// Threshold to use when optsize is specified (and there is no
-// -inline-threshold).
-const int OptSizeThreshold = 75;
-
-// Threshold to use when -Oz is specified (and there is no -inline-threshold).
-const int OptMinSizeThreshold = 25;
-
-// Threshold to use when -O[34] is specified (and there is no
-// -inline-threshold).
-const int OptAggressiveThreshold = 275;
-
-static cl::opt<int> DefaultInlineThreshold(
+static cl::opt<int> InlineThreshold(
     "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
     cl::desc("Control the amount of inlining to perform (default = 225)"));
 
@@ -66,6 +55,11 @@ static cl::opt<int> ColdThreshold(
     "inlinecold-threshold", cl::Hidden, cl::init(225),
     cl::desc("Threshold for inlining functions with cold attribute"));
 
+static cl::opt<int>
+    HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000),
+                         cl::ZeroOrMore,
+                         cl::desc("Threshold for hot callsites "));
+
 namespace {
 
 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
@@ -75,20 +69,23 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   /// The TargetTransformInfo available for this compilation.
   const TargetTransformInfo &TTI;
 
-  /// The cache of @llvm.assume intrinsics.
-  AssumptionCacheTracker *ACT;
+  /// Getter for the cache of @llvm.assume intrinsics.
+  std::function<AssumptionCache &(Function &)> &GetAssumptionCache;
 
   /// Profile summary information.
   ProfileSummaryInfo *PSI;
 
-  // The called function.
+  /// The called function.
   Function &F;
 
-  // The candidate callsite being analyzed. Please do not use this to do
-  // analysis in the caller function; we want the inline cost query to be
-  // easily cacheable. Instead, use the cover function paramHasAttr.
+  /// The candidate callsite being analyzed. Please do not use this to do
+  /// analysis in the caller function; we want the inline cost query to be
+  /// easily cacheable. Instead, use the cover function paramHasAttr.
   CallSite CandidateCS;
 
+  /// Tunable parameters that control the analysis.
+  const InlineParams &Params;
+
   int Threshold;
   int Cost;
 
@@ -107,25 +104,25 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   int FiftyPercentVectorBonus, TenPercentVectorBonus;
   int VectorBonus;
 
-  // While we walk the potentially-inlined instructions, we build up and
-  // maintain a mapping of simplified values specific to this callsite. The
-  // idea is to propagate any special information we have about arguments to
-  // this call through the inlinable section of the function, and account for
-  // likely simplifications post-inlining. The most important aspect we track
-  // is CFG altering simplifications -- when we prove a basic block dead, that
-  // can cause dramatic shifts in the cost of inlining a function.
+  /// While we walk the potentially-inlined instructions, we build up and
+  /// maintain a mapping of simplified values specific to this callsite. The
+  /// idea is to propagate any special information we have about arguments to
+  /// this call through the inlinable section of the function, and account for
+  /// likely simplifications post-inlining. The most important aspect we track
+  /// is CFG altering simplifications -- when we prove a basic block dead, that
+  /// can cause dramatic shifts in the cost of inlining a function.
   DenseMap<Value *, Constant *> SimplifiedValues;
 
-  // Keep track of the values which map back (through function arguments) to
-  // allocas on the caller stack which could be simplified through SROA.
+  /// Keep track of the values which map back (through function arguments) to
+  /// allocas on the caller stack which could be simplified through SROA.
   DenseMap<Value *, Value *> SROAArgValues;
 
-  // The mapping of caller Alloca values to their accumulated cost savings. If
-  // we have to disable SROA for one of the allocas, this tells us how much
-  // cost must be added.
+  /// The mapping of caller Alloca values to their accumulated cost savings. If
+  /// we have to disable SROA for one of the allocas, this tells us how much
+  /// cost must be added.
   DenseMap<Value *, int> SROAArgCosts;
 
-  // Keep track of values which map to a pointer base and constant offset.
+  /// Keep track of values which map to a pointer base and constant offset.
   DenseMap<Value *, std::pair<Value *, APInt>> ConstantOffsetPtrs;
 
   // Custom simplification helper routines.
@@ -203,20 +200,21 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   bool visitUnreachableInst(UnreachableInst &I);
 
 public:
-  CallAnalyzer(const TargetTransformInfo &TTI, AssumptionCacheTracker *ACT,
-               ProfileSummaryInfo *PSI, Function &Callee, int Threshold,
-               CallSite CSArg)
-      : TTI(TTI), ACT(ACT), PSI(PSI), F(Callee), CandidateCS(CSArg),
-        Threshold(Threshold), Cost(0), IsCallerRecursive(false),
-        IsRecursiveCall(false), ExposesReturnsTwice(false),
-        HasDynamicAlloca(false), ContainsNoDuplicateCall(false),
-        HasReturn(false), HasIndirectBr(false), HasFrameEscape(false),
-        AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0),
-        FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
-        NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
-        NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
-        NumInstructionsSimplified(0), SROACostSavings(0),
-        SROACostSavingsLost(0) {}
+  CallAnalyzer(const TargetTransformInfo &TTI,
+               std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
+               ProfileSummaryInfo *PSI, Function &Callee, CallSite CSArg,
+               const InlineParams &Params)
+      : TTI(TTI), GetAssumptionCache(GetAssumptionCache), PSI(PSI), F(Callee),
+        CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold),
+        Cost(0), IsCallerRecursive(false), IsRecursiveCall(false),
+        ExposesReturnsTwice(false), HasDynamicAlloca(false),
+        ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
+        HasFrameEscape(false), AllocatedSize(0), NumInstructions(0),
+        NumVectorInstructions(0), FiftyPercentVectorBonus(0),
+        TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
+        NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
+        NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
+        SROACostSavings(0), SROACostSavingsLost(0) {}
 
   bool analyzeCall(CallSite CS);
 
@@ -320,7 +318,7 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
       continue;
 
     // Handle a struct index, which adds its field offset to the pointer.
-    if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+    if (StructType *STy = GTI.getStructTypeOrNull()) {
       unsigned ElementIdx = OpC->getZExtValue();
       const StructLayout *SL = DL.getStructLayout(STy);
       Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
@@ -620,42 +618,47 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
   }
 
   Function *Caller = CS.getCaller();
-  if (DefaultInlineThreshold.getNumOccurrences() > 0) {
-    // Explicitly specified -inline-threhold overrides the threshold passed to
-    // CallAnalyzer's constructor.
-    Threshold = DefaultInlineThreshold;
-  } else {
-    // If -inline-threshold is not given, listen to the optsize and minsize
-    // attributes when they would decrease the threshold.
-    if (Caller->optForMinSize() && OptMinSizeThreshold < Threshold)
-      Threshold = OptMinSizeThreshold;
-    else if (Caller->optForSize() && OptSizeThreshold < Threshold)
-      Threshold = OptSizeThreshold;
-  }
+
+  // return min(A, B) if B is valid.
+  auto MinIfValid = [](int A, Optional<int> B) {
+    return B ? std::min(A, B.getValue()) : A;
+  };
+
+  // return max(A, B) if B is valid.
+  auto MaxIfValid = [](int A, Optional<int> B) {
+    return B ? std::max(A, B.getValue()) : A;
+  };
+
+  // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
+  // and reduce the threshold if the caller has the necessary attribute.
+  if (Caller->optForMinSize())
+    Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold);
+  else if (Caller->optForSize())
+    Threshold = MinIfValid(Threshold, Params.OptSizeThreshold);
 
   bool HotCallsite = false;
   uint64_t TotalWeight;
-  if (CS.getInstruction()->extractProfTotalWeight(TotalWeight) &&
-      PSI->isHotCount(TotalWeight))
+  if (PSI && CS.getInstruction()->extractProfTotalWeight(TotalWeight) &&
+      PSI->isHotCount(TotalWeight)) {
     HotCallsite = true;
+  }
 
   // Listen to the inlinehint attribute or profile based hotness information
   // when it would increase the threshold and the caller does not need to
   // minimize its size.
   bool InlineHint = Callee.hasFnAttribute(Attribute::InlineHint) ||
-                    PSI->isHotFunction(&Callee) ||
-                    HotCallsite;
-  if (InlineHint && HintThreshold > Threshold && !Caller->optForMinSize())
-    Threshold = HintThreshold;
-
-  bool ColdCallee = PSI->isColdFunction(&Callee);
-  // Command line argument for DefaultInlineThreshold will override the default
-  // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold,
-  // do not use the default cold threshold even if it is smaller.
-  if ((DefaultInlineThreshold.getNumOccurrences() == 0 ||
-       ColdThreshold.getNumOccurrences() > 0) &&
-      ColdCallee && ColdThreshold < Threshold)
-    Threshold = ColdThreshold;
+                    (PSI && PSI->isFunctionEntryHot(&Callee));
+  if (InlineHint && !Caller->optForMinSize())
+    Threshold = MaxIfValid(Threshold, Params.HintThreshold);
+
+  if (HotCallsite && !Caller->optForMinSize())
+    Threshold = MaxIfValid(Threshold, Params.HotCallSiteThreshold);
+
+  bool ColdCallee = PSI && PSI->isFunctionEntryCold(&Callee);
+  // For cold callees, use the ColdThreshold knob if it is available and reduces
+  // the threshold.
+  if (ColdCallee)
+    Threshold = MinIfValid(Threshold, Params.ColdThreshold);
 
   // Finally, take the target-specific inlining threshold multiplier into
   // account.
@@ -957,8 +960,9 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
   // during devirtualization and so we want to give it a hefty bonus for
   // inlining, but cap that bonus in the event that inlining wouldn't pan
   // out. Pretend to inline the function, with a custom threshold.
-  CallAnalyzer CA(TTI, ACT, PSI, *F, InlineConstants::IndirectCallThreshold,
-                  CS);
+  auto IndirectCallParams = Params;
+  IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold;
+  CallAnalyzer CA(TTI, GetAssumptionCache, PSI, *F, CS, IndirectCallParams);
   if (CA.analyzeCall(CS)) {
     // We were able to inline the indirect call! Subtract the cost from the
     // threshold to get the bonus we want to apply, but don't go below zero.
@@ -1251,13 +1255,15 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
       Cost -= InlineConstants::InstrCost;
     }
   }
-
+  // The call instruction also disappears after inlining.
+  Cost -= InlineConstants::InstrCost + InlineConstants::CallPenalty;
+  
   // If there is only one call of the function, and it has internal linkage,
   // the cost of inlining it drops dramatically.
   bool OnlyOneCallAndLocalLinkage =
       F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
   if (OnlyOneCallAndLocalLinkage)
-    Cost += InlineConstants::LastCallToStaticBonus;
+    Cost -= InlineConstants::LastCallToStaticBonus;
 
   // If this function uses the coldcc calling convention, prefer not to inline
   // it.
@@ -1312,8 +1318,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
   // the ephemeral values multiple times (and they're completely determined by
   // the callee, so this is purely duplicate work).
   SmallPtrSet<const Value *, 32> EphValues;
-  CodeMetrics::collectEphemeralValues(&F, &ACT->getAssumptionCache(F),
-                                      EphValues);
+  CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues);
 
   // The worklist of live basic blocks in the callee *after* inlining. We avoid
   // adding basic blocks of the callee which can be proven to be dead for this
@@ -1444,32 +1449,19 @@ static bool functionsHaveCompatibleAttributes(Function *Caller,
          AttributeFuncs::areInlineCompatible(*Caller, *Callee);
 }
 
-InlineCost llvm::getInlineCost(CallSite CS, int DefaultThreshold,
-                               TargetTransformInfo &CalleeTTI,
-                               AssumptionCacheTracker *ACT,
-                               ProfileSummaryInfo *PSI) {
-  return getInlineCost(CS, CS.getCalledFunction(), DefaultThreshold, CalleeTTI,
-                       ACT, PSI);
-}
-
-int llvm::computeThresholdFromOptLevels(unsigned OptLevel,
-                                        unsigned SizeOptLevel) {
-  if (OptLevel > 2)
-    return OptAggressiveThreshold;
-  if (SizeOptLevel == 1) // -Os
-    return OptSizeThreshold;
-  if (SizeOptLevel == 2) // -Oz
-    return OptMinSizeThreshold;
-  return DefaultInlineThreshold;
+InlineCost llvm::getInlineCost(
+    CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
+    std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
+    ProfileSummaryInfo *PSI) {
+  return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI,
+                       GetAssumptionCache, PSI);
 }
 
-int llvm::getDefaultInlineThreshold() { return DefaultInlineThreshold; }
-
-InlineCost llvm::getInlineCost(CallSite CS, Function *Callee,
-                               int DefaultThreshold,
-                               TargetTransformInfo &CalleeTTI,
-                               AssumptionCacheTracker *ACT,
-                               ProfileSummaryInfo *PSI) {
+InlineCost llvm::getInlineCost(
+    CallSite CS, Function *Callee, const InlineParams &Params,
+    TargetTransformInfo &CalleeTTI,
+    std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
+    ProfileSummaryInfo *PSI) {
 
   // Cannot inline indirect calls.
   if (!Callee)
@@ -1494,7 +1486,7 @@ InlineCost llvm::getInlineCost(CallSite CS, Function *Callee,
 
   // Don't inline functions which can be interposed at link-time.  Don't inline
   // functions marked noinline or call sites marked noinline.
-  // Note: inlining non-exact non-interposable fucntions is fine, since we know
+  // Note: inlining non-exact non-interposable functions is fine, since we know
   // we have *a* correct implementation of the source level function.
   if (Callee->isInterposable() || Callee->hasFnAttribute(Attribute::NoInline) ||
       CS.isNoInline())
@@ -1503,7 +1495,7 @@ InlineCost llvm::getInlineCost(CallSite CS, Function *Callee,
   DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()
                      << "...\n");
 
-  CallAnalyzer CA(CalleeTTI, ACT, PSI, *Callee, DefaultThreshold, CS);
+  CallAnalyzer CA(CalleeTTI, GetAssumptionCache, PSI, *Callee, CS, Params);
   bool ShouldInline = CA.analyzeCall(CS);
 
   DEBUG(CA.dump());
@@ -1551,3 +1543,67 @@ bool llvm::isInlineViable(Function &F) {
 
   return true;
 }
+
+// APIs to create InlineParams based on command line flags and/or other
+// parameters.
+
+InlineParams llvm::getInlineParams(int Threshold) {
+  InlineParams Params;
+
+  // This field is the threshold to use for a callee by default. This is
+  // derived from one or more of:
+  //  * optimization or size-optimization levels,
+  //  * a value passed to createFunctionInliningPass function, or
+  //  * the -inline-threshold flag.
+  //  If the -inline-threshold flag is explicitly specified, that is used
+  //  irrespective of anything else.
+  if (InlineThreshold.getNumOccurrences() > 0)
+    Params.DefaultThreshold = InlineThreshold;
+  else
+    Params.DefaultThreshold = Threshold;
+
+  // Set the HintThreshold knob from the -inlinehint-threshold.
+  Params.HintThreshold = HintThreshold;
+
+  // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold.
+  Params.HotCallSiteThreshold = HotCallSiteThreshold;
+
+  // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
+  // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
+  // -inlinehint-threshold commandline option is not explicitly given. If that
+  // option is present, then its value applies even for callees with size and
+  // minsize attributes.
+  // If the -inline-threshold is not specified, set the ColdThreshold from the
+  // -inlinecold-threshold even if it is not explicitly passed. If
+  // -inline-threshold is specified, then -inlinecold-threshold needs to be
+  // explicitly specified to set the ColdThreshold knob
+  if (InlineThreshold.getNumOccurrences() == 0) {
+    Params.OptMinSizeThreshold = InlineConstants::OptMinSizeThreshold;
+    Params.OptSizeThreshold = InlineConstants::OptSizeThreshold;
+    Params.ColdThreshold = ColdThreshold;
+  } else if (ColdThreshold.getNumOccurrences() > 0) {
+    Params.ColdThreshold = ColdThreshold;
+  }
+  return Params;
+}
+
+InlineParams llvm::getInlineParams() {
+  return getInlineParams(InlineThreshold);
+}
+
+// Compute the default threshold for inlining based on the opt level and the
+// size opt level.
+static int computeThresholdFromOptLevels(unsigned OptLevel,
+                                         unsigned SizeOptLevel) {
+  if (OptLevel > 2)
+    return InlineConstants::OptAggressiveThreshold;
+  if (SizeOptLevel == 1) // -Os
+    return InlineConstants::OptSizeThreshold;
+  if (SizeOptLevel == 2) // -Oz
+    return InlineConstants::OptMinSizeThreshold;
+  return InlineThreshold;
+}
+
+InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) {
+  return getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
+}
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index aeaf9388579c..b4686a1ff175 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -67,9 +67,12 @@ static Value *SimplifyFPBinOp(unsigned, Value *, Value *, const FastMathFlags &,
                               const Query &, unsigned);
 static Value *SimplifyCmpInst(unsigned, Value *, Value *, const Query &,
                               unsigned);
+static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                               const Query &Q, unsigned MaxRecurse);
 static Value *SimplifyOrInst(Value *, Value *, const Query &, unsigned);
 static Value *SimplifyXorInst(Value *, Value *, const Query &, unsigned);
-static Value *SimplifyTruncInst(Value *, Type *, const Query &, unsigned);
+static Value *SimplifyCastInst(unsigned, Value *, Type *,
+                               const Query &, unsigned);
 
 /// For a boolean type, or a vector of boolean type, return false, or
 /// a vector with every element false, as appropriate for the type.
@@ -679,9 +682,26 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
   if (Op0 == Op1)
     return Constant::getNullValue(Op0->getType());
 
-  // 0 - X -> 0 if the sub is NUW.
-  if (isNUW && match(Op0, m_Zero()))
-    return Op0;
+  // Is this a negation?
+  if (match(Op0, m_Zero())) {
+    // 0 - X -> 0 if the sub is NUW.
+    if (isNUW)
+      return Op0;
+
+    unsigned BitWidth = Op1->getType()->getScalarSizeInBits();
+    APInt KnownZero(BitWidth, 0);
+    APInt KnownOne(BitWidth, 0);
+    computeKnownBits(Op1, KnownZero, KnownOne, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+    if (KnownZero == ~APInt::getSignBit(BitWidth)) {
+      // Op1 is either 0 or the minimum signed value. If the sub is NSW, then
+      // Op1 must be 0 because negating the minimum signed value is undefined.
+      if (isNSW)
+        return Op0;
+
+      // 0 - X -> X if X is 0 or the minimum signed value.
+      return Op1;
+    }
+  }
 
   // (X + Y) - Z -> X + (Y - Z) or Y + (X - Z) if everything simplifies.
   // For example, (X + Y) - Y -> X; (Y + X) - Y -> X
@@ -747,7 +767,8 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
       // See if "V === X - Y" simplifies.
       if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1))
         // It does!  Now see if "trunc V" simplifies.
-        if (Value *W = SimplifyTruncInst(V, Op0->getType(), Q, MaxRecurse-1))
+        if (Value *W = SimplifyCastInst(Instruction::Trunc, V, Op0->getType(),
+                                        Q, MaxRecurse - 1))
           // It does, return the simplified "trunc V".
           return W;
 
@@ -1106,6 +1127,10 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
   if (match(Op1, m_Undef()))
     return Op1;
 
+  // X / 1.0 -> X
+  if (match(Op1, m_FPOne()))
+    return Op0;
+
   // 0 / X -> 0
   // Requires that NaNs are off (X could be zero) and signed zeroes are
   // ignored (X could be positive or negative, so the output sign is unknown).
@@ -1497,17 +1522,45 @@ static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp,
   return nullptr;
 }
 
-static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
-  Type *ITy = Op0->getType();
+/// Commuted variants are assumed to be handled by calling this function again
+/// with the parameters swapped.
+static Value *simplifyAndOfICmpsWithSameOperands(ICmpInst *Op0, ICmpInst *Op1) {
   ICmpInst::Predicate Pred0, Pred1;
-  ConstantInt *CI1, *CI2;
-  Value *V;
+  Value *A ,*B;
+  if (!match(Op0, m_ICmp(Pred0, m_Value(A), m_Value(B))) ||
+      !match(Op1, m_ICmp(Pred1, m_Specific(A), m_Specific(B))))
+    return nullptr;
+
+  // We have (icmp Pred0, A, B) & (icmp Pred1, A, B).
+  // If Op1 is always implied true by Op0, then Op0 is a subset of Op1, and we
+  // can eliminate Op1 from this 'and'.
+  if (ICmpInst::isImpliedTrueByMatchingCmp(Pred0, Pred1))
+    return Op0;
 
+  // Check for any combination of predicates that are guaranteed to be disjoint.
+  if ((Pred0 == ICmpInst::getInversePredicate(Pred1)) ||
+      (Pred0 == ICmpInst::ICMP_EQ && ICmpInst::isFalseWhenEqual(Pred1)) ||
+      (Pred0 == ICmpInst::ICMP_SLT && Pred1 == ICmpInst::ICMP_SGT) ||
+      (Pred0 == ICmpInst::ICMP_ULT && Pred1 == ICmpInst::ICMP_UGT))
+    return getFalse(Op0->getType());
+
+  return nullptr;
+}
+
+/// Commuted variants are assumed to be handled by calling this function again
+/// with the parameters swapped.
+static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
   if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true))
     return X;
 
+  if (Value *X = simplifyAndOfICmpsWithSameOperands(Op0, Op1))
+    return X;
+
   // Look for this pattern: (icmp V, C0) & (icmp V, C1)).
+  Type *ITy = Op0->getType();
+  ICmpInst::Predicate Pred0, Pred1;
   const APInt *C0, *C1;
+  Value *V;
   if (match(Op0, m_ICmp(Pred0, m_Value(V), m_APInt(C0))) &&
       match(Op1, m_ICmp(Pred1, m_Specific(V), m_APInt(C1)))) {
     // Make a constant range that's the intersection of the two icmp ranges.
@@ -1518,21 +1571,22 @@ static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
       return getFalse(ITy);
   }
 
-  if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_ConstantInt(CI1)),
-                         m_ConstantInt(CI2))))
+  // (icmp (add V, C0), C1) & (icmp V, C0)
+  if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_APInt(C0)), m_APInt(C1))))
     return nullptr;
 
-  if (!match(Op1, m_ICmp(Pred1, m_Specific(V), m_Specific(CI1))))
+  if (!match(Op1, m_ICmp(Pred1, m_Specific(V), m_Value())))
     return nullptr;
 
   auto *AddInst = cast<BinaryOperator>(Op0->getOperand(0));
+  if (AddInst->getOperand(1) != Op1->getOperand(1))
+    return nullptr;
+
   bool isNSW = AddInst->hasNoSignedWrap();
   bool isNUW = AddInst->hasNoUnsignedWrap();
 
-  const APInt &CI1V = CI1->getValue();
-  const APInt &CI2V = CI2->getValue();
-  const APInt Delta = CI2V - CI1V;
-  if (CI1V.isStrictlyPositive()) {
+  const APInt Delta = *C1 - *C0;
+  if (C0->isStrictlyPositive()) {
     if (Delta == 2) {
       if (Pred0 == ICmpInst::ICMP_ULT && Pred1 == ICmpInst::ICMP_SGT)
         return getFalse(ITy);
@@ -1546,7 +1600,7 @@ static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
         return getFalse(ITy);
     }
   }
-  if (CI1V.getBoolValue() && isNUW) {
+  if (C0->getBoolValue() && isNUW) {
     if (Delta == 2)
       if (Pred0 == ICmpInst::ICMP_ULT && Pred1 == ICmpInst::ICMP_UGT)
         return getFalse(ITy);
@@ -1680,33 +1734,61 @@ Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout &DL,
                            RecursionLimit);
 }
 
-/// Simplify (or (icmp ...) (icmp ...)) to true when we can tell that the union
-/// contains all possible values.
-static Value *SimplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
+/// Commuted variants are assumed to be handled by calling this function again
+/// with the parameters swapped.
+static Value *simplifyOrOfICmpsWithSameOperands(ICmpInst *Op0, ICmpInst *Op1) {
   ICmpInst::Predicate Pred0, Pred1;
-  ConstantInt *CI1, *CI2;
-  Value *V;
+  Value *A ,*B;
+  if (!match(Op0, m_ICmp(Pred0, m_Value(A), m_Value(B))) ||
+      !match(Op1, m_ICmp(Pred1, m_Specific(A), m_Specific(B))))
+    return nullptr;
 
+  // We have (icmp Pred0, A, B) | (icmp Pred1, A, B).
+  // If Op1 is always implied true by Op0, then Op0 is a subset of Op1, and we
+  // can eliminate Op0 from this 'or'.
+  if (ICmpInst::isImpliedTrueByMatchingCmp(Pred0, Pred1))
+    return Op1;
+
+  // Check for any combination of predicates that cover the entire range of
+  // possibilities.
+  if ((Pred0 == ICmpInst::getInversePredicate(Pred1)) ||
+      (Pred0 == ICmpInst::ICMP_NE && ICmpInst::isTrueWhenEqual(Pred1)) ||
+      (Pred0 == ICmpInst::ICMP_SLE && Pred1 == ICmpInst::ICMP_SGE) ||
+      (Pred0 == ICmpInst::ICMP_ULE && Pred1 == ICmpInst::ICMP_UGE))
+    return getTrue(Op0->getType());
+
+  return nullptr;
+}
+
+/// Commuted variants are assumed to be handled by calling this function again
+/// with the parameters swapped.
+static Value *SimplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
   if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false))
     return X;
 
-  if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_ConstantInt(CI1)),
-                         m_ConstantInt(CI2))))
-   return nullptr;
+  if (Value *X = simplifyOrOfICmpsWithSameOperands(Op0, Op1))
+    return X;
 
-  if (!match(Op1, m_ICmp(Pred1, m_Specific(V), m_Specific(CI1))))
+  // (icmp (add V, C0), C1) | (icmp V, C0)
+  ICmpInst::Predicate Pred0, Pred1;
+  const APInt *C0, *C1;
+  Value *V;
+  if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_APInt(C0)), m_APInt(C1))))
     return nullptr;
 
-  Type *ITy = Op0->getType();
+  if (!match(Op1, m_ICmp(Pred1, m_Specific(V), m_Value())))
+    return nullptr;
 
   auto *AddInst = cast<BinaryOperator>(Op0->getOperand(0));
+  if (AddInst->getOperand(1) != Op1->getOperand(1))
+    return nullptr;
+
+  Type *ITy = Op0->getType();
   bool isNSW = AddInst->hasNoSignedWrap();
   bool isNUW = AddInst->hasNoUnsignedWrap();
 
-  const APInt &CI1V = CI1->getValue();
-  const APInt &CI2V = CI2->getValue();
-  const APInt Delta = CI2V - CI1V;
-  if (CI1V.isStrictlyPositive()) {
+  const APInt Delta = *C1 - *C0;
+  if (C0->isStrictlyPositive()) {
     if (Delta == 2) {
       if (Pred0 == ICmpInst::ICMP_UGE && Pred1 == ICmpInst::ICMP_SLE)
         return getTrue(ITy);
@@ -1720,7 +1802,7 @@ static Value *SimplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
         return getTrue(ITy);
     }
   }
-  if (CI1V.getBoolValue() && isNUW) {
+  if (C0->getBoolValue() && isNUW) {
     if (Delta == 2)
       if (Pred0 == ICmpInst::ICMP_UGE && Pred1 == ICmpInst::ICMP_ULE)
         return getTrue(ITy);
@@ -2102,8 +2184,8 @@ computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI,
     GetUnderlyingObjects(RHS, RHSUObjs, DL);
 
     // Is the set of underlying objects all noalias calls?
-    auto IsNAC = [](SmallVectorImpl<Value *> &Objects) {
-      return std::all_of(Objects.begin(), Objects.end(), isNoAliasCall);
+    auto IsNAC = [](ArrayRef<Value *> Objects) {
+      return all_of(Objects, isNoAliasCall);
     };
 
     // Is the set of underlying objects all things which must be disjoint from
@@ -2112,8 +2194,8 @@ computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI,
     // live with the compared-to allocation). For globals, we exclude symbols
     // that might be resolve lazily to symbols in another dynamically-loaded
     // library (and, thus, could be malloc'ed by the implementation).
-    auto IsAllocDisjoint = [](SmallVectorImpl<Value *> &Objects) {
-      return std::all_of(Objects.begin(), Objects.end(), [](Value *V) {
+    auto IsAllocDisjoint = [](ArrayRef<Value *> Objects) {
+      return all_of(Objects, [](Value *V) {
         if (const AllocaInst *AI = dyn_cast<AllocaInst>(V))
           return AI->getParent() && AI->getFunction() && AI->isStaticAlloca();
         if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
@@ -2150,470 +2232,275 @@ computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI,
   return nullptr;
 }
 
-/// Given operands for an ICmpInst, see if we can fold the result.
-/// If not, this returns null.
-static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                               const Query &Q, unsigned MaxRecurse) {
-  CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
-  assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!");
-
-  if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
-    if (Constant *CRHS = dyn_cast<Constant>(RHS))
-      return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.DL, Q.TLI);
-
-    // If we have a constant, make sure it is on the RHS.
-    std::swap(LHS, RHS);
-    Pred = CmpInst::getSwappedPredicate(Pred);
-  }
-
+/// Fold an icmp when its operands have i1 scalar type.
+static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS,
+                                  Value *RHS, const Query &Q) {
   Type *ITy = GetCompareTy(LHS); // The return type.
   Type *OpTy = LHS->getType();   // The operand type.
+  if (!OpTy->getScalarType()->isIntegerTy(1))
+    return nullptr;
 
-  // icmp X, X -> true/false
-  // X icmp undef -> true/false.  For example, icmp ugt %X, undef -> false
-  // because X could be 0.
-  if (LHS == RHS || isa<UndefValue>(RHS))
-    return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred));
-
-  // Special case logic when the operands have i1 type.
-  if (OpTy->getScalarType()->isIntegerTy(1)) {
-    switch (Pred) {
-    default: break;
-    case ICmpInst::ICMP_EQ:
-      // X == 1 -> X
-      if (match(RHS, m_One()))
-        return LHS;
-      break;
-    case ICmpInst::ICMP_NE:
-      // X != 0 -> X
-      if (match(RHS, m_Zero()))
-        return LHS;
-      break;
-    case ICmpInst::ICMP_UGT:
-      // X >u 0 -> X
-      if (match(RHS, m_Zero()))
-        return LHS;
-      break;
-    case ICmpInst::ICMP_UGE: {
-      // X >=u 1 -> X
-      if (match(RHS, m_One()))
-        return LHS;
-      if (isImpliedCondition(RHS, LHS, Q.DL).getValueOr(false))
-        return getTrue(ITy);
-      break;
-    }
-    case ICmpInst::ICMP_SGE: {
-      /// For signed comparison, the values for an i1 are 0 and -1
-      /// respectively. This maps into a truth table of:
-      /// LHS | RHS | LHS >=s RHS   | LHS implies RHS
-      ///  0  |  0  |  1 (0 >= 0)   |  1
-      ///  0  |  1  |  1 (0 >= -1)  |  1
-      ///  1  |  0  |  0 (-1 >= 0)  |  0
-      ///  1  |  1  |  1 (-1 >= -1) |  1
-      if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false))
-        return getTrue(ITy);
-      break;
-    }
-    case ICmpInst::ICMP_SLT:
-      // X <s 0 -> X
-      if (match(RHS, m_Zero()))
-        return LHS;
-      break;
-    case ICmpInst::ICMP_SLE:
-      // X <=s -1 -> X
-      if (match(RHS, m_One()))
-        return LHS;
-      break;
-    case ICmpInst::ICMP_ULE: {
-      if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false))
-        return getTrue(ITy);
-      break;
-    }
-    }
-  }
-
-  // If we are comparing with zero then try hard since this is a common case.
-  if (match(RHS, m_Zero())) {
-    bool LHSKnownNonNegative, LHSKnownNegative;
-    switch (Pred) {
-    default: llvm_unreachable("Unknown ICmp predicate!");
-    case ICmpInst::ICMP_ULT:
-      return getFalse(ITy);
-    case ICmpInst::ICMP_UGE:
+  switch (Pred) {
+  default:
+    break;
+  case ICmpInst::ICMP_EQ:
+    // X == 1 -> X
+    if (match(RHS, m_One()))
+      return LHS;
+    break;
+  case ICmpInst::ICMP_NE:
+    // X != 0 -> X
+    if (match(RHS, m_Zero()))
+      return LHS;
+    break;
+  case ICmpInst::ICMP_UGT:
+    // X >u 0 -> X
+    if (match(RHS, m_Zero()))
+      return LHS;
+    break;
+  case ICmpInst::ICMP_UGE:
+    // X >=u 1 -> X
+    if (match(RHS, m_One()))
+      return LHS;
+    if (isImpliedCondition(RHS, LHS, Q.DL).getValueOr(false))
       return getTrue(ITy);
-    case ICmpInst::ICMP_EQ:
-    case ICmpInst::ICMP_ULE:
-      if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
-        return getFalse(ITy);
-      break;
-    case ICmpInst::ICMP_NE:
-    case ICmpInst::ICMP_UGT:
-      if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
-        return getTrue(ITy);
-      break;
-    case ICmpInst::ICMP_SLT:
-      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
-                     Q.CxtI, Q.DT);
-      if (LHSKnownNegative)
-        return getTrue(ITy);
-      if (LHSKnownNonNegative)
-        return getFalse(ITy);
-      break;
-    case ICmpInst::ICMP_SLE:
-      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
-                     Q.CxtI, Q.DT);
-      if (LHSKnownNegative)
-        return getTrue(ITy);
-      if (LHSKnownNonNegative &&
-          isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
-        return getFalse(ITy);
-      break;
-    case ICmpInst::ICMP_SGE:
-      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
-                     Q.CxtI, Q.DT);
-      if (LHSKnownNegative)
-        return getFalse(ITy);
-      if (LHSKnownNonNegative)
-        return getTrue(ITy);
-      break;
-    case ICmpInst::ICMP_SGT:
-      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
-                     Q.CxtI, Q.DT);
-      if (LHSKnownNegative)
-        return getFalse(ITy);
-      if (LHSKnownNonNegative &&
-          isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
-        return getTrue(ITy);
-      break;
-    }
+    break;
+  case ICmpInst::ICMP_SGE:
+    /// For signed comparison, the values for an i1 are 0 and -1
+    /// respectively. This maps into a truth table of:
+    /// LHS | RHS | LHS >=s RHS   | LHS implies RHS
+    ///  0  |  0  |  1 (0 >= 0)   |  1
+    ///  0  |  1  |  1 (0 >= -1)  |  1
+    ///  1  |  0  |  0 (-1 >= 0)  |  0
+    ///  1  |  1  |  1 (-1 >= -1) |  1
+    if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false))
+      return getTrue(ITy);
+    break;
+  case ICmpInst::ICMP_SLT:
+    // X <s 0 -> X
+    if (match(RHS, m_Zero()))
+      return LHS;
+    break;
+  case ICmpInst::ICMP_SLE:
+    // X <=s -1 -> X
+    if (match(RHS, m_One()))
+      return LHS;
+    break;
+  case ICmpInst::ICMP_ULE:
+    if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false))
+      return getTrue(ITy);
+    break;
   }
 
-  // See if we are doing a comparison with a constant integer.
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
-    // Rule out tautological comparisons (eg., ult 0 or uge 0).
-    ConstantRange RHS_CR = ICmpInst::makeConstantRange(Pred, CI->getValue());
-    if (RHS_CR.isEmptySet())
-      return ConstantInt::getFalse(CI->getContext());
-    if (RHS_CR.isFullSet())
-      return ConstantInt::getTrue(CI->getContext());
-
-    // Many binary operators with constant RHS have easy to compute constant
-    // range.  Use them to check whether the comparison is a tautology.
-    unsigned Width = CI->getBitWidth();
-    APInt Lower = APInt(Width, 0);
-    APInt Upper = APInt(Width, 0);
-    ConstantInt *CI2;
-    if (match(LHS, m_URem(m_Value(), m_ConstantInt(CI2)))) {
-      // 'urem x, CI2' produces [0, CI2).
-      Upper = CI2->getValue();
-    } else if (match(LHS, m_SRem(m_Value(), m_ConstantInt(CI2)))) {
-      // 'srem x, CI2' produces (-|CI2|, |CI2|).
-      Upper = CI2->getValue().abs();
-      Lower = (-Upper) + 1;
-    } else if (match(LHS, m_UDiv(m_ConstantInt(CI2), m_Value()))) {
-      // 'udiv CI2, x' produces [0, CI2].
-      Upper = CI2->getValue() + 1;
-    } else if (match(LHS, m_UDiv(m_Value(), m_ConstantInt(CI2)))) {
-      // 'udiv x, CI2' produces [0, UINT_MAX / CI2].
-      APInt NegOne = APInt::getAllOnesValue(Width);
-      if (!CI2->isZero())
-        Upper = NegOne.udiv(CI2->getValue()) + 1;
-    } else if (match(LHS, m_SDiv(m_ConstantInt(CI2), m_Value()))) {
-      if (CI2->isMinSignedValue()) {
-        // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2].
-        Lower = CI2->getValue();
-        Upper = Lower.lshr(1) + 1;
-      } else {
-        // 'sdiv CI2, x' produces [-|CI2|, |CI2|].
-        Upper = CI2->getValue().abs() + 1;
-        Lower = (-Upper) + 1;
-      }
-    } else if (match(LHS, m_SDiv(m_Value(), m_ConstantInt(CI2)))) {
-      APInt IntMin = APInt::getSignedMinValue(Width);
-      APInt IntMax = APInt::getSignedMaxValue(Width);
-      const APInt &Val = CI2->getValue();
-      if (Val.isAllOnesValue()) {
-        // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX]
-        //    where CI2 != -1 and CI2 != 0 and CI2 != 1
-        Lower = IntMin + 1;
-        Upper = IntMax + 1;
-      } else if (Val.countLeadingZeros() < Width - 1) {
-        // 'sdiv x, CI2' produces [INT_MIN / CI2, INT_MAX / CI2]
-        //    where CI2 != -1 and CI2 != 0 and CI2 != 1
-        Lower = IntMin.sdiv(Val);
-        Upper = IntMax.sdiv(Val);
-        if (Lower.sgt(Upper))
-          std::swap(Lower, Upper);
-        Upper = Upper + 1;
-        assert(Upper != Lower && "Upper part of range has wrapped!");
-      }
-    } else if (match(LHS, m_NUWShl(m_ConstantInt(CI2), m_Value()))) {
-      // 'shl nuw CI2, x' produces [CI2, CI2 << CLZ(CI2)]
-      Lower = CI2->getValue();
-      Upper = Lower.shl(Lower.countLeadingZeros()) + 1;
-    } else if (match(LHS, m_NSWShl(m_ConstantInt(CI2), m_Value()))) {
-      if (CI2->isNegative()) {
-        // 'shl nsw CI2, x' produces [CI2 << CLO(CI2)-1, CI2]
-        unsigned ShiftAmount = CI2->getValue().countLeadingOnes() - 1;
-        Lower = CI2->getValue().shl(ShiftAmount);
-        Upper = CI2->getValue() + 1;
-      } else {
-        // 'shl nsw CI2, x' produces [CI2, CI2 << CLZ(CI2)-1]
-        unsigned ShiftAmount = CI2->getValue().countLeadingZeros() - 1;
-        Lower = CI2->getValue();
-        Upper = CI2->getValue().shl(ShiftAmount) + 1;
-      }
-    } else if (match(LHS, m_LShr(m_Value(), m_ConstantInt(CI2)))) {
-      // 'lshr x, CI2' produces [0, UINT_MAX >> CI2].
-      APInt NegOne = APInt::getAllOnesValue(Width);
-      if (CI2->getValue().ult(Width))
-        Upper = NegOne.lshr(CI2->getValue()) + 1;
-    } else if (match(LHS, m_LShr(m_ConstantInt(CI2), m_Value()))) {
-      // 'lshr CI2, x' produces [CI2 >> (Width-1), CI2].
-      unsigned ShiftAmount = Width - 1;
-      if (!CI2->isZero() && cast<BinaryOperator>(LHS)->isExact())
-        ShiftAmount = CI2->getValue().countTrailingZeros();
-      Lower = CI2->getValue().lshr(ShiftAmount);
-      Upper = CI2->getValue() + 1;
-    } else if (match(LHS, m_AShr(m_Value(), m_ConstantInt(CI2)))) {
-      // 'ashr x, CI2' produces [INT_MIN >> CI2, INT_MAX >> CI2].
-      APInt IntMin = APInt::getSignedMinValue(Width);
-      APInt IntMax = APInt::getSignedMaxValue(Width);
-      if (CI2->getValue().ult(Width)) {
-        Lower = IntMin.ashr(CI2->getValue());
-        Upper = IntMax.ashr(CI2->getValue()) + 1;
-      }
-    } else if (match(LHS, m_AShr(m_ConstantInt(CI2), m_Value()))) {
-      unsigned ShiftAmount = Width - 1;
-      if (!CI2->isZero() && cast<BinaryOperator>(LHS)->isExact())
-        ShiftAmount = CI2->getValue().countTrailingZeros();
-      if (CI2->isNegative()) {
-        // 'ashr CI2, x' produces [CI2, CI2 >> (Width-1)]
-        Lower = CI2->getValue();
-        Upper = CI2->getValue().ashr(ShiftAmount) + 1;
-      } else {
-        // 'ashr CI2, x' produces [CI2 >> (Width-1), CI2]
-        Lower = CI2->getValue().ashr(ShiftAmount);
-        Upper = CI2->getValue() + 1;
-      }
-    } else if (match(LHS, m_Or(m_Value(), m_ConstantInt(CI2)))) {
-      // 'or x, CI2' produces [CI2, UINT_MAX].
-      Lower = CI2->getValue();
-    } else if (match(LHS, m_And(m_Value(), m_ConstantInt(CI2)))) {
-      // 'and x, CI2' produces [0, CI2].
-      Upper = CI2->getValue() + 1;
-    } else if (match(LHS, m_NUWAdd(m_Value(), m_ConstantInt(CI2)))) {
-      // 'add nuw x, CI2' produces [CI2, UINT_MAX].
-      Lower = CI2->getValue();
-    }
-
-    ConstantRange LHS_CR = Lower != Upper ? ConstantRange(Lower, Upper)
-                                          : ConstantRange(Width, true);
+  return nullptr;
+}
 
-    if (auto *I = dyn_cast<Instruction>(LHS))
-      if (auto *Ranges = I->getMetadata(LLVMContext::MD_range))
-        LHS_CR = LHS_CR.intersectWith(getConstantRangeFromMetadata(*Ranges));
+/// Try hard to fold icmp with zero RHS because this is a common case.
+static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS,
+                                   Value *RHS, const Query &Q) {
+  if (!match(RHS, m_Zero()))
+    return nullptr;
 
-    if (!LHS_CR.isFullSet()) {
-      if (RHS_CR.contains(LHS_CR))
-        return ConstantInt::getTrue(RHS->getContext());
-      if (RHS_CR.inverse().contains(LHS_CR))
-        return ConstantInt::getFalse(RHS->getContext());
-    }
+  Type *ITy = GetCompareTy(LHS); // The return type.
+  bool LHSKnownNonNegative, LHSKnownNegative;
+  switch (Pred) {
+  default:
+    llvm_unreachable("Unknown ICmp predicate!");
+  case ICmpInst::ICMP_ULT:
+    return getFalse(ITy);
+  case ICmpInst::ICMP_UGE:
+    return getTrue(ITy);
+  case ICmpInst::ICMP_EQ:
+  case ICmpInst::ICMP_ULE:
+    if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+      return getFalse(ITy);
+    break;
+  case ICmpInst::ICMP_NE:
+  case ICmpInst::ICMP_UGT:
+    if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+      return getTrue(ITy);
+    break;
+  case ICmpInst::ICMP_SLT:
+    ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
+                   Q.CxtI, Q.DT);
+    if (LHSKnownNegative)
+      return getTrue(ITy);
+    if (LHSKnownNonNegative)
+      return getFalse(ITy);
+    break;
+  case ICmpInst::ICMP_SLE:
+    ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
+                   Q.CxtI, Q.DT);
+    if (LHSKnownNegative)
+      return getTrue(ITy);
+    if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+      return getFalse(ITy);
+    break;
+  case ICmpInst::ICMP_SGE:
+    ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
+                   Q.CxtI, Q.DT);
+    if (LHSKnownNegative)
+      return getFalse(ITy);
+    if (LHSKnownNonNegative)
+      return getTrue(ITy);
+    break;
+  case ICmpInst::ICMP_SGT:
+    ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
+                   Q.CxtI, Q.DT);
+    if (LHSKnownNegative)
+      return getFalse(ITy);
+    if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+      return getTrue(ITy);
+    break;
   }
 
-  // If both operands have range metadata, use the metadata
-  // to simplify the comparison.
-  if (isa<Instruction>(RHS) && isa<Instruction>(LHS)) {
-    auto RHS_Instr = dyn_cast<Instruction>(RHS);
-    auto LHS_Instr = dyn_cast<Instruction>(LHS);
-
-    if (RHS_Instr->getMetadata(LLVMContext::MD_range) &&
-        LHS_Instr->getMetadata(LLVMContext::MD_range)) {
-      auto RHS_CR = getConstantRangeFromMetadata(
-          *RHS_Instr->getMetadata(LLVMContext::MD_range));
-      auto LHS_CR = getConstantRangeFromMetadata(
-          *LHS_Instr->getMetadata(LLVMContext::MD_range));
+  return nullptr;
+}
 
-      auto Satisfied_CR = ConstantRange::makeSatisfyingICmpRegion(Pred, RHS_CR);
-      if (Satisfied_CR.contains(LHS_CR))
-        return ConstantInt::getTrue(RHS->getContext());
+static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS,
+                                       Value *RHS) {
+  const APInt *C;
+  if (!match(RHS, m_APInt(C)))
+    return nullptr;
 
-      auto InversedSatisfied_CR = ConstantRange::makeSatisfyingICmpRegion(
-                CmpInst::getInversePredicate(Pred), RHS_CR);
-      if (InversedSatisfied_CR.contains(LHS_CR))
-        return ConstantInt::getFalse(RHS->getContext());
+  // Rule out tautological comparisons (eg., ult 0 or uge 0).
+  ConstantRange RHS_CR = ConstantRange::makeExactICmpRegion(Pred, *C);
+  if (RHS_CR.isEmptySet())
+    return ConstantInt::getFalse(GetCompareTy(RHS));
+  if (RHS_CR.isFullSet())
+    return ConstantInt::getTrue(GetCompareTy(RHS));
+
+  // Many binary operators with constant RHS have easy to compute constant
+  // range.  Use them to check whether the comparison is a tautology.
+  unsigned Width = C->getBitWidth();
+  APInt Lower = APInt(Width, 0);
+  APInt Upper = APInt(Width, 0);
+  const APInt *C2;
+  if (match(LHS, m_URem(m_Value(), m_APInt(C2)))) {
+    // 'urem x, C2' produces [0, C2).
+    Upper = *C2;
+  } else if (match(LHS, m_SRem(m_Value(), m_APInt(C2)))) {
+    // 'srem x, C2' produces (-|C2|, |C2|).
+    Upper = C2->abs();
+    Lower = (-Upper) + 1;
+  } else if (match(LHS, m_UDiv(m_APInt(C2), m_Value()))) {
+    // 'udiv C2, x' produces [0, C2].
+    Upper = *C2 + 1;
+  } else if (match(LHS, m_UDiv(m_Value(), m_APInt(C2)))) {
+    // 'udiv x, C2' produces [0, UINT_MAX / C2].
+    APInt NegOne = APInt::getAllOnesValue(Width);
+    if (*C2 != 0)
+      Upper = NegOne.udiv(*C2) + 1;
+  } else if (match(LHS, m_SDiv(m_APInt(C2), m_Value()))) {
+    if (C2->isMinSignedValue()) {
+      // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2].
+      Lower = *C2;
+      Upper = Lower.lshr(1) + 1;
+    } else {
+      // 'sdiv C2, x' produces [-|C2|, |C2|].
+      Upper = C2->abs() + 1;
+      Lower = (-Upper) + 1;
     }
-  }
-
-  // Compare of cast, for example (zext X) != 0 -> X != 0
-  if (isa<CastInst>(LHS) && (isa<Constant>(RHS) || isa<CastInst>(RHS))) {
-    Instruction *LI = cast<CastInst>(LHS);
-    Value *SrcOp = LI->getOperand(0);
-    Type *SrcTy = SrcOp->getType();
-    Type *DstTy = LI->getType();
-
-    // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input
-    // if the integer type is the same size as the pointer type.
-    if (MaxRecurse && isa<PtrToIntInst>(LI) &&
-        Q.DL.getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()) {
-      if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
-        // Transfer the cast to the constant.
-        if (Value *V = SimplifyICmpInst(Pred, SrcOp,
-                                        ConstantExpr::getIntToPtr(RHSC, SrcTy),
-                                        Q, MaxRecurse-1))
-          return V;
-      } else if (PtrToIntInst *RI = dyn_cast<PtrToIntInst>(RHS)) {
-        if (RI->getOperand(0)->getType() == SrcTy)
-          // Compare without the cast.
-          if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
-                                          Q, MaxRecurse-1))
-            return V;
-      }
+  } else if (match(LHS, m_SDiv(m_Value(), m_APInt(C2)))) {
+    APInt IntMin = APInt::getSignedMinValue(Width);
+    APInt IntMax = APInt::getSignedMaxValue(Width);
+    if (C2->isAllOnesValue()) {
+      // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX]
+      //    where C2 != -1 and C2 != 0 and C2 != 1
+      Lower = IntMin + 1;
+      Upper = IntMax + 1;
+    } else if (C2->countLeadingZeros() < Width - 1) {
+      // 'sdiv x, C2' produces [INT_MIN / C2, INT_MAX / C2]
+      //    where C2 != -1 and C2 != 0 and C2 != 1
+      Lower = IntMin.sdiv(*C2);
+      Upper = IntMax.sdiv(*C2);
+      if (Lower.sgt(Upper))
+        std::swap(Lower, Upper);
+      Upper = Upper + 1;
+      assert(Upper != Lower && "Upper part of range has wrapped!");
     }
-
-    if (isa<ZExtInst>(LHS)) {
-      // Turn icmp (zext X), (zext Y) into a compare of X and Y if they have the
-      // same type.
-      if (ZExtInst *RI = dyn_cast<ZExtInst>(RHS)) {
-        if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
-          // Compare X and Y.  Note that signed predicates become unsigned.
-          if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
-                                          SrcOp, RI->getOperand(0), Q,
-                                          MaxRecurse-1))
-            return V;
-      }
-      // Turn icmp (zext X), Cst into a compare of X and Cst if Cst is extended
-      // too.  If not, then try to deduce the result of the comparison.
-      else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
-        // Compute the constant that would happen if we truncated to SrcTy then
-        // reextended to DstTy.
-        Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy);
-        Constant *RExt = ConstantExpr::getCast(CastInst::ZExt, Trunc, DstTy);
-
-        // If the re-extended constant didn't change then this is effectively
-        // also a case of comparing two zero-extended values.
-        if (RExt == CI && MaxRecurse)
-          if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
-                                        SrcOp, Trunc, Q, MaxRecurse-1))
-            return V;
-
-        // Otherwise the upper bits of LHS are zero while RHS has a non-zero bit
-        // there.  Use this to work out the result of the comparison.
-        if (RExt != CI) {
-          switch (Pred) {
-          default: llvm_unreachable("Unknown ICmp predicate!");
-          // LHS <u RHS.
-          case ICmpInst::ICMP_EQ:
-          case ICmpInst::ICMP_UGT:
-          case ICmpInst::ICMP_UGE:
-            return ConstantInt::getFalse(CI->getContext());
-
-          case ICmpInst::ICMP_NE:
-          case ICmpInst::ICMP_ULT:
-          case ICmpInst::ICMP_ULE:
-            return ConstantInt::getTrue(CI->getContext());
-
-          // LHS is non-negative.  If RHS is negative then LHS >s LHS.  If RHS
-          // is non-negative then LHS <s RHS.
-          case ICmpInst::ICMP_SGT:
-          case ICmpInst::ICMP_SGE:
-            return CI->getValue().isNegative() ?
-              ConstantInt::getTrue(CI->getContext()) :
-              ConstantInt::getFalse(CI->getContext());
-
-          case ICmpInst::ICMP_SLT:
-          case ICmpInst::ICMP_SLE:
-            return CI->getValue().isNegative() ?
-              ConstantInt::getFalse(CI->getContext()) :
-              ConstantInt::getTrue(CI->getContext());
-          }
-        }
-      }
+  } else if (match(LHS, m_NUWShl(m_APInt(C2), m_Value()))) {
+    // 'shl nuw C2, x' produces [C2, C2 << CLZ(C2)]
+    Lower = *C2;
+    Upper = Lower.shl(Lower.countLeadingZeros()) + 1;
+  } else if (match(LHS, m_NSWShl(m_APInt(C2), m_Value()))) {
+    if (C2->isNegative()) {
+      // 'shl nsw C2, x' produces [C2 << CLO(C2)-1, C2]
+      unsigned ShiftAmount = C2->countLeadingOnes() - 1;
+      Lower = C2->shl(ShiftAmount);
+      Upper = *C2 + 1;
+    } else {
+      // 'shl nsw C2, x' produces [C2, C2 << CLZ(C2)-1]
+      unsigned ShiftAmount = C2->countLeadingZeros() - 1;
+      Lower = *C2;
+      Upper = C2->shl(ShiftAmount) + 1;
     }
+  } else if (match(LHS, m_LShr(m_Value(), m_APInt(C2)))) {
+    // 'lshr x, C2' produces [0, UINT_MAX >> C2].
+    APInt NegOne = APInt::getAllOnesValue(Width);
+    if (C2->ult(Width))
+      Upper = NegOne.lshr(*C2) + 1;
+  } else if (match(LHS, m_LShr(m_APInt(C2), m_Value()))) {
+    // 'lshr C2, x' produces [C2 >> (Width-1), C2].
+    unsigned ShiftAmount = Width - 1;
+    if (*C2 != 0 && cast<BinaryOperator>(LHS)->isExact())
+      ShiftAmount = C2->countTrailingZeros();
+    Lower = C2->lshr(ShiftAmount);
+    Upper = *C2 + 1;
+  } else if (match(LHS, m_AShr(m_Value(), m_APInt(C2)))) {
+    // 'ashr x, C2' produces [INT_MIN >> C2, INT_MAX >> C2].
+    APInt IntMin = APInt::getSignedMinValue(Width);
+    APInt IntMax = APInt::getSignedMaxValue(Width);
+    if (C2->ult(Width)) {
+      Lower = IntMin.ashr(*C2);
+      Upper = IntMax.ashr(*C2) + 1;
+    }
+  } else if (match(LHS, m_AShr(m_APInt(C2), m_Value()))) {
+    unsigned ShiftAmount = Width - 1;
+    if (*C2 != 0 && cast<BinaryOperator>(LHS)->isExact())
+      ShiftAmount = C2->countTrailingZeros();
+    if (C2->isNegative()) {
+      // 'ashr C2, x' produces [C2, C2 >> (Width-1)]
+      Lower = *C2;
+      Upper = C2->ashr(ShiftAmount) + 1;
+    } else {
+      // 'ashr C2, x' produces [C2 >> (Width-1), C2]
+      Lower = C2->ashr(ShiftAmount);
+      Upper = *C2 + 1;
+    }
+  } else if (match(LHS, m_Or(m_Value(), m_APInt(C2)))) {
+    // 'or x, C2' produces [C2, UINT_MAX].
+    Lower = *C2;
+  } else if (match(LHS, m_And(m_Value(), m_APInt(C2)))) {
+    // 'and x, C2' produces [0, C2].
+    Upper = *C2 + 1;
+  } else if (match(LHS, m_NUWAdd(m_Value(), m_APInt(C2)))) {
+    // 'add nuw x, C2' produces [C2, UINT_MAX].
+    Lower = *C2;
+  }
 
-    if (isa<SExtInst>(LHS)) {
-      // Turn icmp (sext X), (sext Y) into a compare of X and Y if they have the
-      // same type.
-      if (SExtInst *RI = dyn_cast<SExtInst>(RHS)) {
-        if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
-          // Compare X and Y.  Note that the predicate does not change.
-          if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
-                                          Q, MaxRecurse-1))
-            return V;
-      }
-      // Turn icmp (sext X), Cst into a compare of X and Cst if Cst is extended
-      // too.  If not, then try to deduce the result of the comparison.
-      else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
-        // Compute the constant that would happen if we truncated to SrcTy then
-        // reextended to DstTy.
-        Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy);
-        Constant *RExt = ConstantExpr::getCast(CastInst::SExt, Trunc, DstTy);
-
-        // If the re-extended constant didn't change then this is effectively
-        // also a case of comparing two sign-extended values.
-        if (RExt == CI && MaxRecurse)
-          if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, Q, MaxRecurse-1))
-            return V;
-
-        // Otherwise the upper bits of LHS are all equal, while RHS has varying
-        // bits there.  Use this to work out the result of the comparison.
-        if (RExt != CI) {
-          switch (Pred) {
-          default: llvm_unreachable("Unknown ICmp predicate!");
-          case ICmpInst::ICMP_EQ:
-            return ConstantInt::getFalse(CI->getContext());
-          case ICmpInst::ICMP_NE:
-            return ConstantInt::getTrue(CI->getContext());
+  ConstantRange LHS_CR =
+      Lower != Upper ? ConstantRange(Lower, Upper) : ConstantRange(Width, true);
 
-          // If RHS is non-negative then LHS <s RHS.  If RHS is negative then
-          // LHS >s RHS.
-          case ICmpInst::ICMP_SGT:
-          case ICmpInst::ICMP_SGE:
-            return CI->getValue().isNegative() ?
-              ConstantInt::getTrue(CI->getContext()) :
-              ConstantInt::getFalse(CI->getContext());
-          case ICmpInst::ICMP_SLT:
-          case ICmpInst::ICMP_SLE:
-            return CI->getValue().isNegative() ?
-              ConstantInt::getFalse(CI->getContext()) :
-              ConstantInt::getTrue(CI->getContext());
+  if (auto *I = dyn_cast<Instruction>(LHS))
+    if (auto *Ranges = I->getMetadata(LLVMContext::MD_range))
+      LHS_CR = LHS_CR.intersectWith(getConstantRangeFromMetadata(*Ranges));
 
-          // If LHS is non-negative then LHS <u RHS.  If LHS is negative then
-          // LHS >u RHS.
-          case ICmpInst::ICMP_UGT:
-          case ICmpInst::ICMP_UGE:
-            // Comparison is true iff the LHS <s 0.
-            if (MaxRecurse)
-              if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SLT, SrcOp,
-                                              Constant::getNullValue(SrcTy),
-                                              Q, MaxRecurse-1))
-                return V;
-            break;
-          case ICmpInst::ICMP_ULT:
-          case ICmpInst::ICMP_ULE:
-            // Comparison is true iff the LHS >=s 0.
-            if (MaxRecurse)
-              if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp,
-                                              Constant::getNullValue(SrcTy),
-                                              Q, MaxRecurse-1))
-                return V;
-            break;
-          }
-        }
-      }
-    }
+  if (!LHS_CR.isFullSet()) {
+    if (RHS_CR.contains(LHS_CR))
+      return ConstantInt::getTrue(GetCompareTy(RHS));
+    if (RHS_CR.inverse().contains(LHS_CR))
+      return ConstantInt::getFalse(GetCompareTy(RHS));
   }
 
-  // icmp eq|ne X, Y -> false|true if X != Y
-  if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
-      isKnownNonEqual(LHS, RHS, Q.DL, Q.AC, Q.CxtI, Q.DT)) {
-    LLVMContext &Ctx = LHS->getType()->getContext();
-    return Pred == ICmpInst::ICMP_NE ?
-      ConstantInt::getTrue(Ctx) : ConstantInt::getFalse(Ctx);
-  }
+  return nullptr;
+}
+
+static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
+                                    Value *RHS, const Query &Q,
+                                    unsigned MaxRecurse) {
+  Type *ITy = GetCompareTy(LHS); // The return type.
 
-  // Special logic for binary operators.
   BinaryOperator *LBO = dyn_cast<BinaryOperator>(LHS);
   BinaryOperator *RBO = dyn_cast<BinaryOperator>(RHS);
   if (MaxRecurse && (LBO || RBO)) {
@@ -2622,35 +2509,39 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     // LHS = A + B (or A and B are null); RHS = C + D (or C and D are null).
     bool NoLHSWrapProblem = false, NoRHSWrapProblem = false;
     if (LBO && LBO->getOpcode() == Instruction::Add) {
-      A = LBO->getOperand(0); B = LBO->getOperand(1);
-      NoLHSWrapProblem = ICmpInst::isEquality(Pred) ||
-        (CmpInst::isUnsigned(Pred) && LBO->hasNoUnsignedWrap()) ||
-        (CmpInst::isSigned(Pred) && LBO->hasNoSignedWrap());
+      A = LBO->getOperand(0);
+      B = LBO->getOperand(1);
+      NoLHSWrapProblem =
+          ICmpInst::isEquality(Pred) ||
+          (CmpInst::isUnsigned(Pred) && LBO->hasNoUnsignedWrap()) ||
+          (CmpInst::isSigned(Pred) && LBO->hasNoSignedWrap());
     }
     if (RBO && RBO->getOpcode() == Instruction::Add) {
-      C = RBO->getOperand(0); D = RBO->getOperand(1);
-      NoRHSWrapProblem = ICmpInst::isEquality(Pred) ||
-        (CmpInst::isUnsigned(Pred) && RBO->hasNoUnsignedWrap()) ||
-        (CmpInst::isSigned(Pred) && RBO->hasNoSignedWrap());
+      C = RBO->getOperand(0);
+      D = RBO->getOperand(1);
+      NoRHSWrapProblem =
+          ICmpInst::isEquality(Pred) ||
+          (CmpInst::isUnsigned(Pred) && RBO->hasNoUnsignedWrap()) ||
+          (CmpInst::isSigned(Pred) && RBO->hasNoSignedWrap());
     }
 
     // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow.
     if ((A == RHS || B == RHS) && NoLHSWrapProblem)
       if (Value *V = SimplifyICmpInst(Pred, A == RHS ? B : A,
-                                      Constant::getNullValue(RHS->getType()),
-                                      Q, MaxRecurse-1))
+                                      Constant::getNullValue(RHS->getType()), Q,
+                                      MaxRecurse - 1))
         return V;
 
     // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
     if ((C == LHS || D == LHS) && NoRHSWrapProblem)
-      if (Value *V = SimplifyICmpInst(Pred,
-                                      Constant::getNullValue(LHS->getType()),
-                                      C == LHS ? D : C, Q, MaxRecurse-1))
+      if (Value *V =
+              SimplifyICmpInst(Pred, Constant::getNullValue(LHS->getType()),
+                               C == LHS ? D : C, Q, MaxRecurse - 1))
         return V;
 
     // icmp (X+Y), (X+Z) -> icmp Y,Z for equalities or if there is no overflow.
-    if (A && C && (A == C || A == D || B == C || B == D) &&
-        NoLHSWrapProblem && NoRHSWrapProblem) {
+    if (A && C && (A == C || A == D || B == C || B == D) && NoLHSWrapProblem &&
+        NoRHSWrapProblem) {
       // Determine Y and Z in the form icmp (X+Y), (X+Z).
       Value *Y, *Z;
       if (A == C) {
@@ -2671,7 +2562,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         Y = A;
         Z = C;
       }
-      if (Value *V = SimplifyICmpInst(Pred, Y, Z, Q, MaxRecurse-1))
+      if (Value *V = SimplifyICmpInst(Pred, Y, Z, Q, MaxRecurse - 1))
         return V;
     }
   }
@@ -2771,7 +2662,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
                      Q.CxtI, Q.DT);
       if (!KnownNonNegative)
         break;
-      // fall-through
+      LLVM_FALLTHROUGH;
     case ICmpInst::ICMP_EQ:
     case ICmpInst::ICMP_UGT:
     case ICmpInst::ICMP_UGE:
@@ -2782,7 +2673,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
                      Q.CxtI, Q.DT);
       if (!KnownNonNegative)
         break;
-      // fall-through
+      LLVM_FALLTHROUGH;
     case ICmpInst::ICMP_NE:
     case ICmpInst::ICMP_ULT:
     case ICmpInst::ICMP_ULE:
@@ -2802,7 +2693,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
                      Q.CxtI, Q.DT);
       if (!KnownNonNegative)
         break;
-      // fall-through
+      LLVM_FALLTHROUGH;
     case ICmpInst::ICMP_NE:
     case ICmpInst::ICMP_UGT:
     case ICmpInst::ICMP_UGE:
@@ -2813,7 +2704,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
                      Q.CxtI, Q.DT);
       if (!KnownNonNegative)
         break;
-      // fall-through
+      LLVM_FALLTHROUGH;
     case ICmpInst::ICMP_EQ:
     case ICmpInst::ICMP_ULT:
     case ICmpInst::ICMP_ULE:
@@ -2832,6 +2723,17 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       return getTrue(ITy);
   }
 
+  // x >=u x >> y
+  // x >=u x udiv y.
+  if (RBO && (match(RBO, m_LShr(m_Specific(LHS), m_Value())) ||
+              match(RBO, m_UDiv(m_Specific(LHS), m_Value())))) {
+    // icmp pred X, (X op Y)
+    if (Pred == ICmpInst::ICMP_ULT)
+      return getFalse(ITy);
+    if (Pred == ICmpInst::ICMP_UGE)
+      return getTrue(ITy);
+  }
+
   // handle:
   //   CI2 << X == CI
   //   CI2 << X != CI
@@ -2870,18 +2772,19 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   if (MaxRecurse && LBO && RBO && LBO->getOpcode() == RBO->getOpcode() &&
       LBO->getOperand(1) == RBO->getOperand(1)) {
     switch (LBO->getOpcode()) {
-    default: break;
+    default:
+      break;
     case Instruction::UDiv:
     case Instruction::LShr:
       if (ICmpInst::isSigned(Pred))
         break;
-      // fall-through
+      LLVM_FALLTHROUGH;
     case Instruction::SDiv:
     case Instruction::AShr:
       if (!LBO->isExact() || !RBO->isExact())
         break;
       if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
-                                      RBO->getOperand(0), Q, MaxRecurse-1))
+                                      RBO->getOperand(0), Q, MaxRecurse - 1))
         return V;
       break;
     case Instruction::Shl: {
@@ -2892,40 +2795,51 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       if (!NSW && ICmpInst::isSigned(Pred))
         break;
       if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
-                                      RBO->getOperand(0), Q, MaxRecurse-1))
+                                      RBO->getOperand(0), Q, MaxRecurse - 1))
         return V;
       break;
     }
     }
   }
+  return nullptr;
+}
 
-  // Simplify comparisons involving max/min.
+/// Simplify integer comparisons where at least one operand of the compare
+/// matches an integer min/max idiom.
+static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS,
+                                     Value *RHS, const Query &Q,
+                                     unsigned MaxRecurse) {
+  Type *ITy = GetCompareTy(LHS); // The return type.
   Value *A, *B;
   CmpInst::Predicate P = CmpInst::BAD_ICMP_PREDICATE;
   CmpInst::Predicate EqP; // Chosen so that "A == max/min(A,B)" iff "A EqP B".
 
   // Signed variants on "max(a,b)>=a -> true".
   if (match(LHS, m_SMax(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) {
-    if (A != RHS) std::swap(A, B); // smax(A, B) pred A.
+    if (A != RHS)
+      std::swap(A, B);       // smax(A, B) pred A.
     EqP = CmpInst::ICMP_SGE; // "A == smax(A, B)" iff "A sge B".
     // We analyze this as smax(A, B) pred A.
     P = Pred;
   } else if (match(RHS, m_SMax(m_Value(A), m_Value(B))) &&
              (A == LHS || B == LHS)) {
-    if (A != LHS) std::swap(A, B); // A pred smax(A, B).
+    if (A != LHS)
+      std::swap(A, B);       // A pred smax(A, B).
     EqP = CmpInst::ICMP_SGE; // "A == smax(A, B)" iff "A sge B".
     // We analyze this as smax(A, B) swapped-pred A.
     P = CmpInst::getSwappedPredicate(Pred);
   } else if (match(LHS, m_SMin(m_Value(A), m_Value(B))) &&
              (A == RHS || B == RHS)) {
-    if (A != RHS) std::swap(A, B); // smin(A, B) pred A.
+    if (A != RHS)
+      std::swap(A, B);       // smin(A, B) pred A.
     EqP = CmpInst::ICMP_SLE; // "A == smin(A, B)" iff "A sle B".
     // We analyze this as smax(-A, -B) swapped-pred -A.
     // Note that we do not need to actually form -A or -B thanks to EqP.
     P = CmpInst::getSwappedPredicate(Pred);
   } else if (match(RHS, m_SMin(m_Value(A), m_Value(B))) &&
              (A == LHS || B == LHS)) {
-    if (A != LHS) std::swap(A, B); // A pred smin(A, B).
+    if (A != LHS)
+      std::swap(A, B);       // A pred smin(A, B).
     EqP = CmpInst::ICMP_SLE; // "A == smin(A, B)" iff "A sle B".
     // We analyze this as smax(-A, -B) pred -A.
     // Note that we do not need to actually form -A or -B thanks to EqP.
@@ -2946,7 +2860,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         return V;
       // Otherwise, see if "A EqP B" simplifies.
       if (MaxRecurse)
-        if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse-1))
+        if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse - 1))
           return V;
       break;
     case CmpInst::ICMP_NE:
@@ -2960,7 +2874,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         return V;
       // Otherwise, see if "A InvEqP B" simplifies.
       if (MaxRecurse)
-        if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse-1))
+        if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse - 1))
           return V;
       break;
     }
@@ -2976,26 +2890,30 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   // Unsigned variants on "max(a,b)>=a -> true".
   P = CmpInst::BAD_ICMP_PREDICATE;
   if (match(LHS, m_UMax(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) {
-    if (A != RHS) std::swap(A, B); // umax(A, B) pred A.
+    if (A != RHS)
+      std::swap(A, B);       // umax(A, B) pred A.
     EqP = CmpInst::ICMP_UGE; // "A == umax(A, B)" iff "A uge B".
     // We analyze this as umax(A, B) pred A.
     P = Pred;
   } else if (match(RHS, m_UMax(m_Value(A), m_Value(B))) &&
              (A == LHS || B == LHS)) {
-    if (A != LHS) std::swap(A, B); // A pred umax(A, B).
+    if (A != LHS)
+      std::swap(A, B);       // A pred umax(A, B).
     EqP = CmpInst::ICMP_UGE; // "A == umax(A, B)" iff "A uge B".
     // We analyze this as umax(A, B) swapped-pred A.
     P = CmpInst::getSwappedPredicate(Pred);
   } else if (match(LHS, m_UMin(m_Value(A), m_Value(B))) &&
              (A == RHS || B == RHS)) {
-    if (A != RHS) std::swap(A, B); // umin(A, B) pred A.
+    if (A != RHS)
+      std::swap(A, B);       // umin(A, B) pred A.
     EqP = CmpInst::ICMP_ULE; // "A == umin(A, B)" iff "A ule B".
     // We analyze this as umax(-A, -B) swapped-pred -A.
     // Note that we do not need to actually form -A or -B thanks to EqP.
     P = CmpInst::getSwappedPredicate(Pred);
   } else if (match(RHS, m_UMin(m_Value(A), m_Value(B))) &&
              (A == LHS || B == LHS)) {
-    if (A != LHS) std::swap(A, B); // A pred umin(A, B).
+    if (A != LHS)
+      std::swap(A, B);       // A pred umin(A, B).
     EqP = CmpInst::ICMP_ULE; // "A == umin(A, B)" iff "A ule B".
     // We analyze this as umax(-A, -B) pred -A.
     // Note that we do not need to actually form -A or -B thanks to EqP.
@@ -3016,7 +2934,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         return V;
       // Otherwise, see if "A EqP B" simplifies.
       if (MaxRecurse)
-        if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse-1))
+        if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse - 1))
           return V;
       break;
     case CmpInst::ICMP_NE:
@@ -3030,7 +2948,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         return V;
       // Otherwise, see if "A InvEqP B" simplifies.
       if (MaxRecurse)
-        if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse-1))
+        if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse - 1))
           return V;
       break;
     }
@@ -3087,11 +3005,254 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       return getFalse(ITy);
   }
 
+  return nullptr;
+}
+
+/// Given operands for an ICmpInst, see if we can fold the result.
+/// If not, this returns null.
+static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                               const Query &Q, unsigned MaxRecurse) {
+  CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
+  assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!");
+
+  if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
+    if (Constant *CRHS = dyn_cast<Constant>(RHS))
+      return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.DL, Q.TLI);
+
+    // If we have a constant, make sure it is on the RHS.
+    std::swap(LHS, RHS);
+    Pred = CmpInst::getSwappedPredicate(Pred);
+  }
+
+  Type *ITy = GetCompareTy(LHS); // The return type.
+
+  // icmp X, X -> true/false
+  // X icmp undef -> true/false.  For example, icmp ugt %X, undef -> false
+  // because X could be 0.
+  if (LHS == RHS || isa<UndefValue>(RHS))
+    return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred));
+
+  if (Value *V = simplifyICmpOfBools(Pred, LHS, RHS, Q))
+    return V;
+
+  if (Value *V = simplifyICmpWithZero(Pred, LHS, RHS, Q))
+    return V;
+
+  if (Value *V = simplifyICmpWithConstant(Pred, LHS, RHS))
+    return V;
+
+  // If both operands have range metadata, use the metadata
+  // to simplify the comparison.
+  if (isa<Instruction>(RHS) && isa<Instruction>(LHS)) {
+    auto RHS_Instr = dyn_cast<Instruction>(RHS);
+    auto LHS_Instr = dyn_cast<Instruction>(LHS);
+
+    if (RHS_Instr->getMetadata(LLVMContext::MD_range) &&
+        LHS_Instr->getMetadata(LLVMContext::MD_range)) {
+      auto RHS_CR = getConstantRangeFromMetadata(
+          *RHS_Instr->getMetadata(LLVMContext::MD_range));
+      auto LHS_CR = getConstantRangeFromMetadata(
+          *LHS_Instr->getMetadata(LLVMContext::MD_range));
+
+      auto Satisfied_CR = ConstantRange::makeSatisfyingICmpRegion(Pred, RHS_CR);
+      if (Satisfied_CR.contains(LHS_CR))
+        return ConstantInt::getTrue(RHS->getContext());
+
+      auto InversedSatisfied_CR = ConstantRange::makeSatisfyingICmpRegion(
+                CmpInst::getInversePredicate(Pred), RHS_CR);
+      if (InversedSatisfied_CR.contains(LHS_CR))
+        return ConstantInt::getFalse(RHS->getContext());
+    }
+  }
+
+  // Compare of cast, for example (zext X) != 0 -> X != 0
+  if (isa<CastInst>(LHS) && (isa<Constant>(RHS) || isa<CastInst>(RHS))) {
+    Instruction *LI = cast<CastInst>(LHS);
+    Value *SrcOp = LI->getOperand(0);
+    Type *SrcTy = SrcOp->getType();
+    Type *DstTy = LI->getType();
+
+    // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input
+    // if the integer type is the same size as the pointer type.
+    if (MaxRecurse && isa<PtrToIntInst>(LI) &&
+        Q.DL.getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()) {
+      if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
+        // Transfer the cast to the constant.
+        if (Value *V = SimplifyICmpInst(Pred, SrcOp,
+                                        ConstantExpr::getIntToPtr(RHSC, SrcTy),
+                                        Q, MaxRecurse-1))
+          return V;
+      } else if (PtrToIntInst *RI = dyn_cast<PtrToIntInst>(RHS)) {
+        if (RI->getOperand(0)->getType() == SrcTy)
+          // Compare without the cast.
+          if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
+                                          Q, MaxRecurse-1))
+            return V;
+      }
+    }
+
+    if (isa<ZExtInst>(LHS)) {
+      // Turn icmp (zext X), (zext Y) into a compare of X and Y if they have the
+      // same type.
+      if (ZExtInst *RI = dyn_cast<ZExtInst>(RHS)) {
+        if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
+          // Compare X and Y.  Note that signed predicates become unsigned.
+          if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
+                                          SrcOp, RI->getOperand(0), Q,
+                                          MaxRecurse-1))
+            return V;
+      }
+      // Turn icmp (zext X), Cst into a compare of X and Cst if Cst is extended
+      // too.  If not, then try to deduce the result of the comparison.
+      else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+        // Compute the constant that would happen if we truncated to SrcTy then
+        // reextended to DstTy.
+        Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy);
+        Constant *RExt = ConstantExpr::getCast(CastInst::ZExt, Trunc, DstTy);
+
+        // If the re-extended constant didn't change then this is effectively
+        // also a case of comparing two zero-extended values.
+        if (RExt == CI && MaxRecurse)
+          if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
+                                        SrcOp, Trunc, Q, MaxRecurse-1))
+            return V;
+
+        // Otherwise the upper bits of LHS are zero while RHS has a non-zero bit
+        // there.  Use this to work out the result of the comparison.
+        if (RExt != CI) {
+          switch (Pred) {
+          default: llvm_unreachable("Unknown ICmp predicate!");
+          // LHS <u RHS.
+          case ICmpInst::ICMP_EQ:
+          case ICmpInst::ICMP_UGT:
+          case ICmpInst::ICMP_UGE:
+            return ConstantInt::getFalse(CI->getContext());
+
+          case ICmpInst::ICMP_NE:
+          case ICmpInst::ICMP_ULT:
+          case ICmpInst::ICMP_ULE:
+            return ConstantInt::getTrue(CI->getContext());
+
+          // LHS is non-negative.  If RHS is negative then LHS >s LHS.  If RHS
+          // is non-negative then LHS <s RHS.
+          case ICmpInst::ICMP_SGT:
+          case ICmpInst::ICMP_SGE:
+            return CI->getValue().isNegative() ?
+              ConstantInt::getTrue(CI->getContext()) :
+              ConstantInt::getFalse(CI->getContext());
+
+          case ICmpInst::ICMP_SLT:
+          case ICmpInst::ICMP_SLE:
+            return CI->getValue().isNegative() ?
+              ConstantInt::getFalse(CI->getContext()) :
+              ConstantInt::getTrue(CI->getContext());
+          }
+        }
+      }
+    }
+
+    if (isa<SExtInst>(LHS)) {
+      // Turn icmp (sext X), (sext Y) into a compare of X and Y if they have the
+      // same type.
+      if (SExtInst *RI = dyn_cast<SExtInst>(RHS)) {
+        if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
+          // Compare X and Y.  Note that the predicate does not change.
+          if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
+                                          Q, MaxRecurse-1))
+            return V;
+      }
+      // Turn icmp (sext X), Cst into a compare of X and Cst if Cst is extended
+      // too.  If not, then try to deduce the result of the comparison.
+      else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+        // Compute the constant that would happen if we truncated to SrcTy then
+        // reextended to DstTy.
+        Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy);
+        Constant *RExt = ConstantExpr::getCast(CastInst::SExt, Trunc, DstTy);
+
+        // If the re-extended constant didn't change then this is effectively
+        // also a case of comparing two sign-extended values.
+        if (RExt == CI && MaxRecurse)
+          if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, Q, MaxRecurse-1))
+            return V;
+
+        // Otherwise the upper bits of LHS are all equal, while RHS has varying
+        // bits there.  Use this to work out the result of the comparison.
+        if (RExt != CI) {
+          switch (Pred) {
+          default: llvm_unreachable("Unknown ICmp predicate!");
+          case ICmpInst::ICMP_EQ:
+            return ConstantInt::getFalse(CI->getContext());
+          case ICmpInst::ICMP_NE:
+            return ConstantInt::getTrue(CI->getContext());
+
+          // If RHS is non-negative then LHS <s RHS.  If RHS is negative then
+          // LHS >s RHS.
+          case ICmpInst::ICMP_SGT:
+          case ICmpInst::ICMP_SGE:
+            return CI->getValue().isNegative() ?
+              ConstantInt::getTrue(CI->getContext()) :
+              ConstantInt::getFalse(CI->getContext());
+          case ICmpInst::ICMP_SLT:
+          case ICmpInst::ICMP_SLE:
+            return CI->getValue().isNegative() ?
+              ConstantInt::getFalse(CI->getContext()) :
+              ConstantInt::getTrue(CI->getContext());
+
+          // If LHS is non-negative then LHS <u RHS.  If LHS is negative then
+          // LHS >u RHS.
+          case ICmpInst::ICMP_UGT:
+          case ICmpInst::ICMP_UGE:
+            // Comparison is true iff the LHS <s 0.
+            if (MaxRecurse)
+              if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SLT, SrcOp,
+                                              Constant::getNullValue(SrcTy),
+                                              Q, MaxRecurse-1))
+                return V;
+            break;
+          case ICmpInst::ICMP_ULT:
+          case ICmpInst::ICMP_ULE:
+            // Comparison is true iff the LHS >=s 0.
+            if (MaxRecurse)
+              if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp,
+                                              Constant::getNullValue(SrcTy),
+                                              Q, MaxRecurse-1))
+                return V;
+            break;
+          }
+        }
+      }
+    }
+  }
+
+  // icmp eq|ne X, Y -> false|true if X != Y
+  if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
+      isKnownNonEqual(LHS, RHS, Q.DL, Q.AC, Q.CxtI, Q.DT)) {
+    LLVMContext &Ctx = LHS->getType()->getContext();
+    return Pred == ICmpInst::ICMP_NE ?
+      ConstantInt::getTrue(Ctx) : ConstantInt::getFalse(Ctx);
+  }
+
+  if (Value *V = simplifyICmpWithBinOp(Pred, LHS, RHS, Q, MaxRecurse))
+    return V;
+
+  if (Value *V = simplifyICmpWithMinMax(Pred, LHS, RHS, Q, MaxRecurse))
+    return V;
+
   // Simplify comparisons of related pointers using a powerful, recursive
   // GEP-walk when we have target data available..
   if (LHS->getType()->isPointerTy())
     if (auto *C = computePointerICmp(Q.DL, Q.TLI, Q.DT, Pred, Q.CxtI, LHS, RHS))
       return C;
+  if (auto *CLHS = dyn_cast<PtrToIntOperator>(LHS))
+    if (auto *CRHS = dyn_cast<PtrToIntOperator>(RHS))
+      if (Q.DL.getTypeSizeInBits(CLHS->getPointerOperandType()) ==
+              Q.DL.getTypeSizeInBits(CLHS->getType()) &&
+          Q.DL.getTypeSizeInBits(CRHS->getPointerOperandType()) ==
+              Q.DL.getTypeSizeInBits(CRHS->getType()))
+        if (auto *C = computePointerICmp(Q.DL, Q.TLI, Q.DT, Pred, Q.CxtI,
+                                         CLHS->getPointerOperand(),
+                                         CRHS->getPointerOperand()))
+          return C;
 
   if (GetElementPtrInst *GLHS = dyn_cast<GetElementPtrInst>(LHS)) {
     if (GEPOperator *GRHS = dyn_cast<GEPOperator>(RHS)) {
@@ -3119,17 +3280,16 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   // If a bit is known to be zero for A and known to be one for B,
   // then A and B cannot be equal.
   if (ICmpInst::isEquality(Pred)) {
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
-      uint32_t BitWidth = CI->getBitWidth();
+    const APInt *RHSVal;
+    if (match(RHS, m_APInt(RHSVal))) {
+      unsigned BitWidth = RHSVal->getBitWidth();
       APInt LHSKnownZero(BitWidth, 0);
       APInt LHSKnownOne(BitWidth, 0);
       computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, Q.DL, /*Depth=*/0, Q.AC,
                        Q.CxtI, Q.DT);
-      const APInt &RHSVal = CI->getValue();
-      if (((LHSKnownZero & RHSVal) != 0) || ((LHSKnownOne & ~RHSVal) != 0))
-        return Pred == ICmpInst::ICMP_EQ
-                   ? ConstantInt::getFalse(CI->getContext())
-                   : ConstantInt::getTrue(CI->getContext());
+      if (((LHSKnownZero & *RHSVal) != 0) || ((LHSKnownOne & ~(*RHSVal)) != 0))
+        return Pred == ICmpInst::ICMP_EQ ? ConstantInt::getFalse(ITy)
+                                         : ConstantInt::getTrue(ITy);
     }
   }
 
@@ -3175,17 +3335,18 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   }
 
   // Fold trivial predicates.
+  Type *RetTy = GetCompareTy(LHS);
   if (Pred == FCmpInst::FCMP_FALSE)
-    return ConstantInt::get(GetCompareTy(LHS), 0);
+    return getFalse(RetTy);
   if (Pred == FCmpInst::FCMP_TRUE)
-    return ConstantInt::get(GetCompareTy(LHS), 1);
+    return getTrue(RetTy);
 
   // UNO/ORD predicates can be trivially folded if NaNs are ignored.
   if (FMF.noNaNs()) {
     if (Pred == FCmpInst::FCMP_UNO)
-      return ConstantInt::get(GetCompareTy(LHS), 0);
+      return getFalse(RetTy);
     if (Pred == FCmpInst::FCMP_ORD)
-      return ConstantInt::get(GetCompareTy(LHS), 1);
+      return getTrue(RetTy);
   }
 
   // fcmp pred x, undef  and  fcmp pred undef, x
@@ -3193,15 +3354,15 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) {
     // Choosing NaN for the undef will always make unordered comparison succeed
     // and ordered comparison fail.
-    return ConstantInt::get(GetCompareTy(LHS), CmpInst::isUnordered(Pred));
+    return ConstantInt::get(RetTy, CmpInst::isUnordered(Pred));
   }
 
   // fcmp x,x -> true/false.  Not all compares are foldable.
   if (LHS == RHS) {
     if (CmpInst::isTrueWhenEqual(Pred))
-      return ConstantInt::get(GetCompareTy(LHS), 1);
+      return getTrue(RetTy);
     if (CmpInst::isFalseWhenEqual(Pred))
-      return ConstantInt::get(GetCompareTy(LHS), 0);
+      return getFalse(RetTy);
   }
 
   // Handle fcmp with constant RHS
@@ -3216,11 +3377,11 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     // If the constant is a nan, see if we can fold the comparison based on it.
     if (CFP->getValueAPF().isNaN()) {
       if (FCmpInst::isOrdered(Pred)) // True "if ordered and foo"
-        return ConstantInt::getFalse(CFP->getContext());
+        return getFalse(RetTy);
       assert(FCmpInst::isUnordered(Pred) &&
              "Comparison must be either ordered or unordered!");
       // True if unordered.
-      return ConstantInt::get(GetCompareTy(LHS), 1);
+      return getTrue(RetTy);
     }
     // Check whether the constant is an infinity.
     if (CFP->getValueAPF().isInfinity()) {
@@ -3228,10 +3389,10 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         switch (Pred) {
         case FCmpInst::FCMP_OLT:
           // No value is ordered and less than negative infinity.
-          return ConstantInt::get(GetCompareTy(LHS), 0);
+          return getFalse(RetTy);
         case FCmpInst::FCMP_UGE:
           // All values are unordered with or at least negative infinity.
-          return ConstantInt::get(GetCompareTy(LHS), 1);
+          return getTrue(RetTy);
         default:
           break;
         }
@@ -3239,10 +3400,10 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         switch (Pred) {
         case FCmpInst::FCMP_OGT:
           // No value is ordered and greater than infinity.
-          return ConstantInt::get(GetCompareTy(LHS), 0);
+          return getFalse(RetTy);
         case FCmpInst::FCMP_ULE:
           // All values are unordered with and at most infinity.
-          return ConstantInt::get(GetCompareTy(LHS), 1);
+          return getTrue(RetTy);
         default:
           break;
         }
@@ -3252,12 +3413,12 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       switch (Pred) {
       case FCmpInst::FCMP_UGE:
         if (CannotBeOrderedLessThanZero(LHS, Q.TLI))
-          return ConstantInt::get(GetCompareTy(LHS), 1);
+          return getTrue(RetTy);
         break;
       case FCmpInst::FCMP_OLT:
         // X < 0
         if (CannotBeOrderedLessThanZero(LHS, Q.TLI))
-          return ConstantInt::get(GetCompareTy(LHS), 0);
+          return getFalse(RetTy);
         break;
       default:
         break;
@@ -3371,6 +3532,150 @@ static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
   return nullptr;
 }
 
+/// Try to simplify a select instruction when its condition operand is an
+/// integer comparison where one operand of the compare is a constant.
+static Value *simplifySelectBitTest(Value *TrueVal, Value *FalseVal, Value *X,
+                                    const APInt *Y, bool TrueWhenUnset) {
+  const APInt *C;
+
+  // (X & Y) == 0 ? X & ~Y : X  --> X
+  // (X & Y) != 0 ? X & ~Y : X  --> X & ~Y
+  if (FalseVal == X && match(TrueVal, m_And(m_Specific(X), m_APInt(C))) &&
+      *Y == ~*C)
+    return TrueWhenUnset ? FalseVal : TrueVal;
+
+  // (X & Y) == 0 ? X : X & ~Y  --> X & ~Y
+  // (X & Y) != 0 ? X : X & ~Y  --> X
+  if (TrueVal == X && match(FalseVal, m_And(m_Specific(X), m_APInt(C))) &&
+      *Y == ~*C)
+    return TrueWhenUnset ? FalseVal : TrueVal;
+
+  if (Y->isPowerOf2()) {
+    // (X & Y) == 0 ? X | Y : X  --> X | Y
+    // (X & Y) != 0 ? X | Y : X  --> X
+    if (FalseVal == X && match(TrueVal, m_Or(m_Specific(X), m_APInt(C))) &&
+        *Y == *C)
+      return TrueWhenUnset ? TrueVal : FalseVal;
+
+    // (X & Y) == 0 ? X : X | Y  --> X
+    // (X & Y) != 0 ? X : X | Y  --> X | Y
+    if (TrueVal == X && match(FalseVal, m_Or(m_Specific(X), m_APInt(C))) &&
+        *Y == *C)
+      return TrueWhenUnset ? TrueVal : FalseVal;
+  }
+  
+  return nullptr;
+}
+
+/// An alternative way to test if a bit is set or not uses sgt/slt instead of
+/// eq/ne.
+static Value *simplifySelectWithFakeICmpEq(Value *CmpLHS, Value *TrueVal,
+                                           Value *FalseVal,
+                                           bool TrueWhenUnset) {
+  unsigned BitWidth = TrueVal->getType()->getScalarSizeInBits();
+  if (!BitWidth)
+    return nullptr;
+  
+  APInt MinSignedValue;
+  Value *X;
+  if (match(CmpLHS, m_Trunc(m_Value(X))) && (X == TrueVal || X == FalseVal)) {
+    // icmp slt (trunc X), 0  <--> icmp ne (and X, C), 0
+    // icmp sgt (trunc X), -1 <--> icmp eq (and X, C), 0
+    unsigned DestSize = CmpLHS->getType()->getScalarSizeInBits();
+    MinSignedValue = APInt::getSignedMinValue(DestSize).zext(BitWidth);
+  } else {
+    // icmp slt X, 0  <--> icmp ne (and X, C), 0
+    // icmp sgt X, -1 <--> icmp eq (and X, C), 0
+    X = CmpLHS;
+    MinSignedValue = APInt::getSignedMinValue(BitWidth);
+  }
+
+  if (Value *V = simplifySelectBitTest(TrueVal, FalseVal, X, &MinSignedValue,
+                                       TrueWhenUnset))
+    return V;
+
+  return nullptr;
+}
+
+/// Try to simplify a select instruction when its condition operand is an
+/// integer comparison.
+static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
+                                         Value *FalseVal, const Query &Q,
+                                         unsigned MaxRecurse) {
+  ICmpInst::Predicate Pred;
+  Value *CmpLHS, *CmpRHS;
+  if (!match(CondVal, m_ICmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS))))
+    return nullptr;
+
+  // FIXME: This code is nearly duplicated in InstCombine. Using/refactoring
+  // decomposeBitTestICmp() might help.
+  if (ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero())) {
+    Value *X;
+    const APInt *Y;
+    if (match(CmpLHS, m_And(m_Value(X), m_APInt(Y))))
+      if (Value *V = simplifySelectBitTest(TrueVal, FalseVal, X, Y,
+                                           Pred == ICmpInst::ICMP_EQ))
+        return V;
+  } else if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, m_Zero())) {
+    // Comparing signed-less-than 0 checks if the sign bit is set.
+    if (Value *V = simplifySelectWithFakeICmpEq(CmpLHS, TrueVal, FalseVal,
+                                                false))
+      return V;
+  } else if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, m_AllOnes())) {
+    // Comparing signed-greater-than -1 checks if the sign bit is not set.
+    if (Value *V = simplifySelectWithFakeICmpEq(CmpLHS, TrueVal, FalseVal,
+                                                true))
+      return V;
+  }
+
+  if (CondVal->hasOneUse()) {
+    const APInt *C;
+    if (match(CmpRHS, m_APInt(C))) {
+      // X < MIN ? T : F  -->  F
+      if (Pred == ICmpInst::ICMP_SLT && C->isMinSignedValue())
+        return FalseVal;
+      // X < MIN ? T : F  -->  F
+      if (Pred == ICmpInst::ICMP_ULT && C->isMinValue())
+        return FalseVal;
+      // X > MAX ? T : F  -->  F
+      if (Pred == ICmpInst::ICMP_SGT && C->isMaxSignedValue())
+        return FalseVal;
+      // X > MAX ? T : F  -->  F
+      if (Pred == ICmpInst::ICMP_UGT && C->isMaxValue())
+        return FalseVal;
+    }
+  }
+
+  // If we have an equality comparison, then we know the value in one of the
+  // arms of the select. See if substituting this value into the arm and
+  // simplifying the result yields the same value as the other arm.
+  if (Pred == ICmpInst::ICMP_EQ) {
+    if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
+            TrueVal ||
+        SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
+            TrueVal)
+      return FalseVal;
+    if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
+            FalseVal ||
+        SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
+            FalseVal)
+      return FalseVal;
+  } else if (Pred == ICmpInst::ICMP_NE) {
+    if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
+            FalseVal ||
+        SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
+            FalseVal)
+      return TrueVal;
+    if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
+            TrueVal ||
+        SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
+            TrueVal)
+      return TrueVal;
+  }
+
+  return nullptr;
+}
+
 /// Given operands for a SelectInst, see if we can fold the result.
 /// If not, this returns null.
 static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
@@ -3399,106 +3704,9 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
   if (isa<UndefValue>(FalseVal))   // select C, X, undef -> X
     return TrueVal;
 
-  if (const auto *ICI = dyn_cast<ICmpInst>(CondVal)) {
-    // FIXME: This code is nearly duplicated in InstCombine. Using/refactoring
-    // decomposeBitTestICmp() might help.
-    unsigned BitWidth =
-        Q.DL.getTypeSizeInBits(TrueVal->getType()->getScalarType());
-    ICmpInst::Predicate Pred = ICI->getPredicate();
-    Value *CmpLHS = ICI->getOperand(0);
-    Value *CmpRHS = ICI->getOperand(1);
-    APInt MinSignedValue = APInt::getSignBit(BitWidth);
-    Value *X;
-    const APInt *Y;
-    bool TrueWhenUnset;
-    bool IsBitTest = false;
-    if (ICmpInst::isEquality(Pred) &&
-        match(CmpLHS, m_And(m_Value(X), m_APInt(Y))) &&
-        match(CmpRHS, m_Zero())) {
-      IsBitTest = true;
-      TrueWhenUnset = Pred == ICmpInst::ICMP_EQ;
-    } else if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, m_Zero())) {
-      X = CmpLHS;
-      Y = &MinSignedValue;
-      IsBitTest = true;
-      TrueWhenUnset = false;
-    } else if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, m_AllOnes())) {
-      X = CmpLHS;
-      Y = &MinSignedValue;
-      IsBitTest = true;
-      TrueWhenUnset = true;
-    }
-    if (IsBitTest) {
-      const APInt *C;
-      // (X & Y) == 0 ? X & ~Y : X  --> X
-      // (X & Y) != 0 ? X & ~Y : X  --> X & ~Y
-      if (FalseVal == X && match(TrueVal, m_And(m_Specific(X), m_APInt(C))) &&
-          *Y == ~*C)
-        return TrueWhenUnset ? FalseVal : TrueVal;
-      // (X & Y) == 0 ? X : X & ~Y  --> X & ~Y
-      // (X & Y) != 0 ? X : X & ~Y  --> X
-      if (TrueVal == X && match(FalseVal, m_And(m_Specific(X), m_APInt(C))) &&
-          *Y == ~*C)
-        return TrueWhenUnset ? FalseVal : TrueVal;
-
-      if (Y->isPowerOf2()) {
-        // (X & Y) == 0 ? X | Y : X  --> X | Y
-        // (X & Y) != 0 ? X | Y : X  --> X
-        if (FalseVal == X && match(TrueVal, m_Or(m_Specific(X), m_APInt(C))) &&
-            *Y == *C)
-          return TrueWhenUnset ? TrueVal : FalseVal;
-        // (X & Y) == 0 ? X : X | Y  --> X
-        // (X & Y) != 0 ? X : X | Y  --> X | Y
-        if (TrueVal == X && match(FalseVal, m_Or(m_Specific(X), m_APInt(C))) &&
-            *Y == *C)
-          return TrueWhenUnset ? TrueVal : FalseVal;
-      }
-    }
-    if (ICI->hasOneUse()) {
-      const APInt *C;
-      if (match(CmpRHS, m_APInt(C))) {
-        // X < MIN ? T : F  -->  F
-        if (Pred == ICmpInst::ICMP_SLT && C->isMinSignedValue())
-          return FalseVal;
-        // X < MIN ? T : F  -->  F
-        if (Pred == ICmpInst::ICMP_ULT && C->isMinValue())
-          return FalseVal;
-        // X > MAX ? T : F  -->  F
-        if (Pred == ICmpInst::ICMP_SGT && C->isMaxSignedValue())
-          return FalseVal;
-        // X > MAX ? T : F  -->  F
-        if (Pred == ICmpInst::ICMP_UGT && C->isMaxValue())
-          return FalseVal;
-      }
-    }
-
-    // If we have an equality comparison then we know the value in one of the
-    // arms of the select. See if substituting this value into the arm and
-    // simplifying the result yields the same value as the other arm.
-    if (Pred == ICmpInst::ICMP_EQ) {
-      if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
-              TrueVal ||
-          SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
-              TrueVal)
-        return FalseVal;
-      if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
-              FalseVal ||
-          SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
-              FalseVal)
-        return FalseVal;
-    } else if (Pred == ICmpInst::ICMP_NE) {
-      if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
-              FalseVal ||
-          SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
-              FalseVal)
-        return TrueVal;
-      if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
-              TrueVal ||
-          SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
-              TrueVal)
-        return TrueVal;
-    }
-  }
+  if (Value *V =
+          simplifySelectWithICmpCond(CondVal, TrueVal, FalseVal, Q, MaxRecurse))
+    return V;
 
   return nullptr;
 }
@@ -3587,6 +3795,32 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
     }
   }
 
+  if (Q.DL.getTypeAllocSize(LastType) == 1 &&
+      all_of(Ops.slice(1).drop_back(1),
+             [](Value *Idx) { return match(Idx, m_Zero()); })) {
+    unsigned PtrWidth =
+        Q.DL.getPointerSizeInBits(Ops[0]->getType()->getPointerAddressSpace());
+    if (Q.DL.getTypeSizeInBits(Ops.back()->getType()) == PtrWidth) {
+      APInt BasePtrOffset(PtrWidth, 0);
+      Value *StrippedBasePtr =
+          Ops[0]->stripAndAccumulateInBoundsConstantOffsets(Q.DL,
+                                                            BasePtrOffset);
+
+      // gep (gep V, C), (sub 0, V) -> C
+      if (match(Ops.back(),
+                m_Sub(m_Zero(), m_PtrToInt(m_Specific(StrippedBasePtr))))) {
+        auto *CI = ConstantInt::get(GEPTy->getContext(), BasePtrOffset);
+        return ConstantExpr::getIntToPtr(CI, GEPTy);
+      }
+      // gep (gep V, C), (xor V, -1) -> C-1
+      if (match(Ops.back(),
+                m_Xor(m_PtrToInt(m_Specific(StrippedBasePtr)), m_AllOnes()))) {
+        auto *CI = ConstantInt::get(GEPTy->getContext(), BasePtrOffset - 1);
+        return ConstantExpr::getIntToPtr(CI, GEPTy);
+      }
+    }
+  }
+
   // Check to see if this is constant foldable.
   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
     if (!isa<Constant>(Ops[i]))
@@ -3742,19 +3976,47 @@ static Value *SimplifyPHINode(PHINode *PN, const Query &Q) {
   return CommonValue;
 }
 
-static Value *SimplifyTruncInst(Value *Op, Type *Ty, const Query &Q, unsigned) {
-  if (Constant *C = dyn_cast<Constant>(Op))
-    return ConstantFoldCastOperand(Instruction::Trunc, C, Ty, Q.DL);
+static Value *SimplifyCastInst(unsigned CastOpc, Value *Op,
+                               Type *Ty, const Query &Q, unsigned MaxRecurse) {
+  if (auto *C = dyn_cast<Constant>(Op))
+    return ConstantFoldCastOperand(CastOpc, C, Ty, Q.DL);
+
+  if (auto *CI = dyn_cast<CastInst>(Op)) {
+    auto *Src = CI->getOperand(0);
+    Type *SrcTy = Src->getType();
+    Type *MidTy = CI->getType();
+    Type *DstTy = Ty;
+    if (Src->getType() == Ty) {
+      auto FirstOp = static_cast<Instruction::CastOps>(CI->getOpcode());
+      auto SecondOp = static_cast<Instruction::CastOps>(CastOpc);
+      Type *SrcIntPtrTy =
+          SrcTy->isPtrOrPtrVectorTy() ? Q.DL.getIntPtrType(SrcTy) : nullptr;
+      Type *MidIntPtrTy =
+          MidTy->isPtrOrPtrVectorTy() ? Q.DL.getIntPtrType(MidTy) : nullptr;
+      Type *DstIntPtrTy =
+          DstTy->isPtrOrPtrVectorTy() ? Q.DL.getIntPtrType(DstTy) : nullptr;
+      if (CastInst::isEliminableCastPair(FirstOp, SecondOp, SrcTy, MidTy, DstTy,
+                                         SrcIntPtrTy, MidIntPtrTy,
+                                         DstIntPtrTy) == Instruction::BitCast)
+        return Src;
+    }
+  }
+
+  // bitcast x -> x
+  if (CastOpc == Instruction::BitCast)
+    if (Op->getType() == Ty)
+      return Op;
 
   return nullptr;
 }
 
-Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout &DL,
-                               const TargetLibraryInfo *TLI,
-                               const DominatorTree *DT, AssumptionCache *AC,
-                               const Instruction *CxtI) {
-  return ::SimplifyTruncInst(Op, Ty, Query(DL, TLI, DT, AC, CxtI),
-                             RecursionLimit);
+Value *llvm::SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty,
+                              const DataLayout &DL,
+                              const TargetLibraryInfo *TLI,
+                              const DominatorTree *DT, AssumptionCache *AC,
+                              const Instruction *CxtI) {
+  return ::SimplifyCastInst(CastOpc, Op, Ty, Query(DL, TLI, DT, AC, CxtI),
+                            RecursionLimit);
 }
 
 //=== Helper functions for higher up the class hierarchy.
@@ -3837,6 +4099,8 @@ static Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
     return SimplifyFSubInst(LHS, RHS, FMF, Q, MaxRecurse);
   case Instruction::FMul:
     return SimplifyFMulInst(LHS, RHS, FMF, Q, MaxRecurse);
+  case Instruction::FDiv:
+    return SimplifyFDivInst(LHS, RHS, FMF, Q, MaxRecurse);
   default:
     return SimplifyBinOp(Opcode, LHS, RHS, Q, MaxRecurse);
   }
@@ -4223,21 +4487,23 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,
                           TLI, DT, AC, I);
     break;
   }
-  case Instruction::Trunc:
-    Result =
-        SimplifyTruncInst(I->getOperand(0), I->getType(), DL, TLI, DT, AC, I);
+#define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc:
+#include "llvm/IR/Instruction.def"
+#undef HANDLE_CAST_INST
+    Result = SimplifyCastInst(I->getOpcode(), I->getOperand(0), I->getType(),
+                              DL, TLI, DT, AC, I);
     break;
   }
 
   // In general, it is possible for computeKnownBits to determine all bits in a
   // value even when the operands are not all constants.
-  if (!Result && I->getType()->isIntegerTy()) {
+  if (!Result && I->getType()->isIntOrIntVectorTy()) {
     unsigned BitWidth = I->getType()->getScalarSizeInBits();
     APInt KnownZero(BitWidth, 0);
     APInt KnownOne(BitWidth, 0);
     computeKnownBits(I, KnownZero, KnownOne, DL, /*Depth*/0, AC, I, DT);
     if ((KnownZero | KnownOne).isAllOnesValue())
-      Result = ConstantInt::get(I->getContext(), KnownOne);
+      Result = ConstantInt::get(I->getType(), KnownOne);
   }
 
   /// If called on unreachable code, the above logic may report that the
diff --git a/lib/Analysis/IteratedDominanceFrontier.cpp b/lib/Analysis/IteratedDominanceFrontier.cpp
index 3ab6b5d60905..d1374acd963e 100644
--- a/lib/Analysis/IteratedDominanceFrontier.cpp
+++ b/lib/Analysis/IteratedDominanceFrontier.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-/// \brief Compute iterated dominance frontiers using a linear time algorithm.
+// Compute iterated dominance frontiers using a linear time algorithm.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Analysis/LLVMBuild.txt b/lib/Analysis/LLVMBuild.txt
index 08af5f37700d..15c757b48f76 100644
--- a/lib/Analysis/LLVMBuild.txt
+++ b/lib/Analysis/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Library
 name = Analysis
 parent = Libraries
-required_libraries = Core Support ProfileData
+required_libraries = Core Support ProfileData Object
diff --git a/lib/Analysis/LazyBlockFrequencyInfo.cpp b/lib/Analysis/LazyBlockFrequencyInfo.cpp
index 7debfde87d2a..596b6fc1afb5 100644
--- a/lib/Analysis/LazyBlockFrequencyInfo.cpp
+++ b/lib/Analysis/LazyBlockFrequencyInfo.cpp
@@ -15,7 +15,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/LazyBlockFrequencyInfo.h"
-#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/LazyBranchProbabilityInfo.h"
 #include "llvm/Analysis/LoopInfo.h"
 
 using namespace llvm;
@@ -24,7 +24,7 @@ using namespace llvm;
 
 INITIALIZE_PASS_BEGIN(LazyBlockFrequencyInfoPass, DEBUG_TYPE,
                       "Lazy Block Frequency Analysis", true, true)
-INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LazyBPIPass)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_END(LazyBlockFrequencyInfoPass, DEBUG_TYPE,
                     "Lazy Block Frequency Analysis", true, true)
@@ -40,7 +40,7 @@ void LazyBlockFrequencyInfoPass::print(raw_ostream &OS, const Module *) const {
 }
 
 void LazyBlockFrequencyInfoPass::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<BranchProbabilityInfoWrapperPass>();
+  LazyBranchProbabilityInfoPass::getLazyBPIAnalysisUsage(AU);
   AU.addRequired<LoopInfoWrapperPass>();
   AU.setPreservesAll();
 }
@@ -48,21 +48,20 @@ void LazyBlockFrequencyInfoPass::getAnalysisUsage(AnalysisUsage &AU) const {
 void LazyBlockFrequencyInfoPass::releaseMemory() { LBFI.releaseMemory(); }
 
 bool LazyBlockFrequencyInfoPass::runOnFunction(Function &F) {
-  BranchProbabilityInfo &BPI =
-      getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
+  auto &BPIPass = getAnalysis<LazyBranchProbabilityInfoPass>();
   LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-  LBFI.setAnalysis(&F, &BPI, &LI);
+  LBFI.setAnalysis(&F, &BPIPass, &LI);
   return false;
 }
 
 void LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AnalysisUsage &AU) {
-  AU.addRequired<BranchProbabilityInfoWrapperPass>();
+  LazyBranchProbabilityInfoPass::getLazyBPIAnalysisUsage(AU);
   AU.addRequired<LazyBlockFrequencyInfoPass>();
   AU.addRequired<LoopInfoWrapperPass>();
 }
 
 void llvm::initializeLazyBFIPassPass(PassRegistry &Registry) {
-  INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass);
+  initializeLazyBPIPassPass(Registry);
   INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass);
   INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
 }
diff --git a/lib/Analysis/LazyBranchProbabilityInfo.cpp b/lib/Analysis/LazyBranchProbabilityInfo.cpp
new file mode 100644
index 000000000000..b51c6beb7959
--- /dev/null
+++ b/lib/Analysis/LazyBranchProbabilityInfo.cpp
@@ -0,0 +1,63 @@
+//===- LazyBranchProbabilityInfo.cpp - Lazy Branch Probability Analysis ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an alternative analysis pass to BranchProbabilityInfoWrapperPass.
+// The difference is that with this pass the branch probabilities are not
+// computed when the analysis pass is executed but rather when the BPI results
+// is explicitly requested by the analysis client.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LazyBranchProbabilityInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "lazy-branch-prob"
+
+INITIALIZE_PASS_BEGIN(LazyBranchProbabilityInfoPass, DEBUG_TYPE,
+                      "Lazy Branch Probability Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(LazyBranchProbabilityInfoPass, DEBUG_TYPE,
+                    "Lazy Branch Probability Analysis", true, true)
+
+char LazyBranchProbabilityInfoPass::ID = 0;
+
+LazyBranchProbabilityInfoPass::LazyBranchProbabilityInfoPass()
+    : FunctionPass(ID) {
+  initializeLazyBranchProbabilityInfoPassPass(*PassRegistry::getPassRegistry());
+}
+
+void LazyBranchProbabilityInfoPass::print(raw_ostream &OS,
+                                          const Module *) const {
+  LBPI->getCalculated().print(OS);
+}
+
+void LazyBranchProbabilityInfoPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<LoopInfoWrapperPass>();
+  AU.setPreservesAll();
+}
+
+void LazyBranchProbabilityInfoPass::releaseMemory() { LBPI.reset(); }
+
+bool LazyBranchProbabilityInfoPass::runOnFunction(Function &F) {
+  LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  LBPI = llvm::make_unique<LazyBranchProbabilityInfo>(&F, &LI);
+  return false;
+}
+
+void LazyBranchProbabilityInfoPass::getLazyBPIAnalysisUsage(AnalysisUsage &AU) {
+  AU.addRequired<LazyBranchProbabilityInfoPass>();
+  AU.addRequired<LoopInfoWrapperPass>();
+}
+
+void llvm::initializeLazyBPIPassPass(PassRegistry &Registry) {
+  INITIALIZE_PASS_DEPENDENCY(LazyBranchProbabilityInfoPass);
+  INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
+}
diff --git a/lib/Analysis/LazyCallGraph.cpp b/lib/Analysis/LazyCallGraph.cpp
index acff8529b151..f7cf8c6729f2 100644
--- a/lib/Analysis/LazyCallGraph.cpp
+++ b/lib/Analysis/LazyCallGraph.cpp
@@ -8,7 +8,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/Sequence.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/Instructions.h"
@@ -23,39 +26,11 @@ using namespace llvm;
 static void addEdge(SmallVectorImpl<LazyCallGraph::Edge> &Edges,
                     DenseMap<Function *, int> &EdgeIndexMap, Function &F,
                     LazyCallGraph::Edge::Kind EK) {
-  // Note that we consider *any* function with a definition to be a viable
-  // edge. Even if the function's definition is subject to replacement by
-  // some other module (say, a weak definition) there may still be
-  // optimizations which essentially speculate based on the definition and
-  // a way to check that the specific definition is in fact the one being
-  // used. For example, this could be done by moving the weak definition to
-  // a strong (internal) definition and making the weak definition be an
-  // alias. Then a test of the address of the weak function against the new
-  // strong definition's address would be an effective way to determine the
-  // safety of optimizing a direct call edge.
-  if (!F.isDeclaration() &&
-      EdgeIndexMap.insert({&F, Edges.size()}).second) {
-    DEBUG(dbgs() << "    Added callable function: " << F.getName() << "\n");
-    Edges.emplace_back(LazyCallGraph::Edge(F, EK));
-  }
-}
-
-static void findReferences(SmallVectorImpl<Constant *> &Worklist,
-                           SmallPtrSetImpl<Constant *> &Visited,
-                           SmallVectorImpl<LazyCallGraph::Edge> &Edges,
-                           DenseMap<Function *, int> &EdgeIndexMap) {
-  while (!Worklist.empty()) {
-    Constant *C = Worklist.pop_back_val();
-
-    if (Function *F = dyn_cast<Function>(C)) {
-      addEdge(Edges, EdgeIndexMap, *F, LazyCallGraph::Edge::Ref);
-      continue;
-    }
+  if (!EdgeIndexMap.insert({&F, Edges.size()}).second)
+    return;
 
-    for (Value *Op : C->operand_values())
-      if (Visited.insert(cast<Constant>(Op)).second)
-        Worklist.push_back(cast<Constant>(Op));
-  }
+  DEBUG(dbgs() << "    Added callable function: " << F.getName() << "\n");
+  Edges.emplace_back(LazyCallGraph::Edge(F, EK));
 }
 
 LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F)
@@ -72,14 +47,26 @@ LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F)
   // are trivially added, but to accumulate the latter we walk the instructions
   // and add every operand which is a constant to the worklist to process
   // afterward.
+  //
+  // Note that we consider *any* function with a definition to be a viable
+  // edge. Even if the function's definition is subject to replacement by
+  // some other module (say, a weak definition) there may still be
+  // optimizations which essentially speculate based on the definition and
+  // a way to check that the specific definition is in fact the one being
+  // used. For example, this could be done by moving the weak definition to
+  // a strong (internal) definition and making the weak definition be an
+  // alias. Then a test of the address of the weak function against the new
+  // strong definition's address would be an effective way to determine the
+  // safety of optimizing a direct call edge.
   for (BasicBlock &BB : F)
     for (Instruction &I : BB) {
       if (auto CS = CallSite(&I))
         if (Function *Callee = CS.getCalledFunction())
-          if (Callees.insert(Callee).second) {
-            Visited.insert(Callee);
-            addEdge(Edges, EdgeIndexMap, *Callee, LazyCallGraph::Edge::Call);
-          }
+          if (!Callee->isDeclaration())
+            if (Callees.insert(Callee).second) {
+              Visited.insert(Callee);
+              addEdge(Edges, EdgeIndexMap, *Callee, LazyCallGraph::Edge::Call);
+            }
 
       for (Value *Op : I.operand_values())
         if (Constant *C = dyn_cast<Constant>(Op))
@@ -90,7 +77,9 @@ LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F)
   // We've collected all the constant (and thus potentially function or
   // function containing) operands to all of the instructions in the function.
   // Process them (recursively) collecting every function found.
-  findReferences(Worklist, Visited, Edges, EdgeIndexMap);
+  visitReferences(Worklist, Visited, [&](Function &F) {
+    addEdge(Edges, EdgeIndexMap, F, LazyCallGraph::Edge::Ref);
+  });
 }
 
 void LazyCallGraph::Node::insertEdgeInternal(Function &Target, Edge::Kind EK) {
@@ -144,7 +133,9 @@ LazyCallGraph::LazyCallGraph(Module &M) : NextDFSNumber(0) {
 
   DEBUG(dbgs() << "  Adding functions referenced by global initializers to the "
                   "entry set.\n");
-  findReferences(Worklist, Visited, EntryEdges, EntryIndexMap);
+  visitReferences(Worklist, Visited, [&](Function &F) {
+    addEdge(EntryEdges, EntryIndexMap, F, LazyCallGraph::Edge::Ref);
+  });
 
   for (const Edge &E : EntryEdges)
     RefSCCEntryNodes.push_back(&E.getFunction());
@@ -199,6 +190,57 @@ void LazyCallGraph::SCC::verify() {
 }
 #endif
 
+bool LazyCallGraph::SCC::isParentOf(const SCC &C) const {
+  if (this == &C)
+    return false;
+
+  for (Node &N : *this)
+    for (Edge &E : N.calls())
+      if (Node *CalleeN = E.getNode())
+        if (OuterRefSCC->G->lookupSCC(*CalleeN) == &C)
+          return true;
+
+  // No edges found.
+  return false;
+}
+
+bool LazyCallGraph::SCC::isAncestorOf(const SCC &TargetC) const {
+  if (this == &TargetC)
+    return false;
+
+  LazyCallGraph &G = *OuterRefSCC->G;
+
+  // Start with this SCC.
+  SmallPtrSet<const SCC *, 16> Visited = {this};
+  SmallVector<const SCC *, 16> Worklist = {this};
+
+  // Walk down the graph until we run out of edges or find a path to TargetC.
+  do {
+    const SCC &C = *Worklist.pop_back_val();
+    for (Node &N : C)
+      for (Edge &E : N.calls()) {
+        Node *CalleeN = E.getNode();
+        if (!CalleeN)
+          continue;
+        SCC *CalleeC = G.lookupSCC(*CalleeN);
+        if (!CalleeC)
+          continue;
+
+        // If the callee's SCC is the TargetC, we're done.
+        if (CalleeC == &TargetC)
+          return true;
+
+        // If this is the first time we've reached this SCC, put it on the
+        // worklist to recurse through.
+        if (Visited.insert(CalleeC).second)
+          Worklist.push_back(CalleeC);
+      }
+  } while (!Worklist.empty());
+
+  // No paths found.
+  return false;
+}
+
 LazyCallGraph::RefSCC::RefSCC(LazyCallGraph &G) : G(&G) {}
 
 void LazyCallGraph::RefSCC::dump() const {
@@ -211,11 +253,17 @@ void LazyCallGraph::RefSCC::verify() {
   assert(!SCCs.empty() && "Can't have an empty SCC!");
 
   // Verify basic properties of the SCCs.
+  SmallPtrSet<SCC *, 4> SCCSet;
   for (SCC *C : SCCs) {
     assert(C && "Can't have a null SCC!");
     C->verify();
     assert(&C->getOuterRefSCC() == this &&
            "SCC doesn't think it is inside this RefSCC!");
+    bool Inserted = SCCSet.insert(C).second;
+    assert(Inserted && "Found a duplicate SCC!");
+    auto IndexIt = SCCIndices.find(C);
+    assert(IndexIt != SCCIndices.end() &&
+           "Found an SCC that doesn't have an index!");
   }
 
   // Check that our indices map correctly.
@@ -223,6 +271,7 @@ void LazyCallGraph::RefSCC::verify() {
     SCC *C = SCCIndexPair.first;
     int i = SCCIndexPair.second;
     assert(C && "Can't have a null SCC in the indices!");
+    assert(SCCSet.count(C) && "Found an index for an SCC not in the RefSCC!");
     assert(SCCs[i] == C && "Index doesn't point to SCC!");
   }
 
@@ -243,6 +292,20 @@ void LazyCallGraph::RefSCC::verify() {
                "Edge to a RefSCC missing us in its parent set.");
       }
   }
+
+  // Check that our parents are actually parents.
+  for (RefSCC *ParentRC : Parents) {
+    assert(ParentRC != this && "Cannot be our own parent!");
+    auto HasConnectingEdge = [&] {
+      for (SCC &C : *ParentRC)
+        for (Node &N : C)
+          for (Edge &E : N)
+            if (G->lookupRefSCC(*E.getNode()) == this)
+              return true;
+      return false;
+    };
+    assert(HasConnectingEdge() && "No edge connects the parent to us!");
+  }
 }
 #endif
 
@@ -261,12 +324,153 @@ bool LazyCallGraph::RefSCC::isDescendantOf(const RefSCC &C) const {
   return false;
 }
 
+/// Generic helper that updates a postorder sequence of SCCs for a potentially
+/// cycle-introducing edge insertion.
+///
+/// A postorder sequence of SCCs of a directed graph has one fundamental
+/// property: all deges in the DAG of SCCs point "up" the sequence. That is,
+/// all edges in the SCC DAG point to prior SCCs in the sequence.
+///
+/// This routine both updates a postorder sequence and uses that sequence to
+/// compute the set of SCCs connected into a cycle. It should only be called to
+/// insert a "downward" edge which will require changing the sequence to
+/// restore it to a postorder.
+///
+/// When inserting an edge from an earlier SCC to a later SCC in some postorder
+/// sequence, all of the SCCs which may be impacted are in the closed range of
+/// those two within the postorder sequence. The algorithm used here to restore
+/// the state is as follows:
+///
+/// 1) Starting from the source SCC, construct a set of SCCs which reach the
+///    source SCC consisting of just the source SCC. Then scan toward the
+///    target SCC in postorder and for each SCC, if it has an edge to an SCC
+///    in the set, add it to the set. Otherwise, the source SCC is not
+///    a successor, move it in the postorder sequence to immediately before
+///    the source SCC, shifting the source SCC and all SCCs in the set one
+///    position toward the target SCC. Stop scanning after processing the
+///    target SCC.
+/// 2) If the source SCC is now past the target SCC in the postorder sequence,
+///    and thus the new edge will flow toward the start, we are done.
+/// 3) Otherwise, starting from the target SCC, walk all edges which reach an
+///    SCC between the source and the target, and add them to the set of
+///    connected SCCs, then recurse through them. Once a complete set of the
+///    SCCs the target connects to is known, hoist the remaining SCCs between
+///    the source and the target to be above the target. Note that there is no
+///    need to process the source SCC, it is already known to connect.
+/// 4) At this point, all of the SCCs in the closed range between the source
+///    SCC and the target SCC in the postorder sequence are connected,
+///    including the target SCC and the source SCC. Inserting the edge from
+///    the source SCC to the target SCC will form a cycle out of precisely
+///    these SCCs. Thus we can merge all of the SCCs in this closed range into
+///    a single SCC.
+///
+/// This process has various important properties:
+/// - Only mutates the SCCs when adding the edge actually changes the SCC
+///   structure.
+/// - Never mutates SCCs which are unaffected by the change.
+/// - Updates the postorder sequence to correctly satisfy the postorder
+///   constraint after the edge is inserted.
+/// - Only reorders SCCs in the closed postorder sequence from the source to
+///   the target, so easy to bound how much has changed even in the ordering.
+/// - Big-O is the number of edges in the closed postorder range of SCCs from
+///   source to target.
+///
+/// This helper routine, in addition to updating the postorder sequence itself
+/// will also update a map from SCCs to indices within that sequecne.
+///
+/// The sequence and the map must operate on pointers to the SCC type.
+///
+/// Two callbacks must be provided. The first computes the subset of SCCs in
+/// the postorder closed range from the source to the target which connect to
+/// the source SCC via some (transitive) set of edges. The second computes the
+/// subset of the same range which the target SCC connects to via some
+/// (transitive) set of edges. Both callbacks should populate the set argument
+/// provided.
+template <typename SCCT, typename PostorderSequenceT, typename SCCIndexMapT,
+          typename ComputeSourceConnectedSetCallableT,
+          typename ComputeTargetConnectedSetCallableT>
+static iterator_range<typename PostorderSequenceT::iterator>
+updatePostorderSequenceForEdgeInsertion(
+    SCCT &SourceSCC, SCCT &TargetSCC, PostorderSequenceT &SCCs,
+    SCCIndexMapT &SCCIndices,
+    ComputeSourceConnectedSetCallableT ComputeSourceConnectedSet,
+    ComputeTargetConnectedSetCallableT ComputeTargetConnectedSet) {
+  int SourceIdx = SCCIndices[&SourceSCC];
+  int TargetIdx = SCCIndices[&TargetSCC];
+  assert(SourceIdx < TargetIdx && "Cannot have equal indices here!");
+
+  SmallPtrSet<SCCT *, 4> ConnectedSet;
+
+  // Compute the SCCs which (transitively) reach the source.
+  ComputeSourceConnectedSet(ConnectedSet);
+
+  // Partition the SCCs in this part of the port-order sequence so only SCCs
+  // connecting to the source remain between it and the target. This is
+  // a benign partition as it preserves postorder.
+  auto SourceI = std::stable_partition(
+      SCCs.begin() + SourceIdx, SCCs.begin() + TargetIdx + 1,
+      [&ConnectedSet](SCCT *C) { return !ConnectedSet.count(C); });
+  for (int i = SourceIdx, e = TargetIdx + 1; i < e; ++i)
+    SCCIndices.find(SCCs[i])->second = i;
+
+  // If the target doesn't connect to the source, then we've corrected the
+  // post-order and there are no cycles formed.
+  if (!ConnectedSet.count(&TargetSCC)) {
+    assert(SourceI > (SCCs.begin() + SourceIdx) &&
+           "Must have moved the source to fix the post-order.");
+    assert(*std::prev(SourceI) == &TargetSCC &&
+           "Last SCC to move should have bene the target.");
+
+    // Return an empty range at the target SCC indicating there is nothing to
+    // merge.
+    return make_range(std::prev(SourceI), std::prev(SourceI));
+  }
+
+  assert(SCCs[TargetIdx] == &TargetSCC &&
+         "Should not have moved target if connected!");
+  SourceIdx = SourceI - SCCs.begin();
+  assert(SCCs[SourceIdx] == &SourceSCC &&
+         "Bad updated index computation for the source SCC!");
+
+
+  // See whether there are any remaining intervening SCCs between the source
+  // and target. If so we need to make sure they all are reachable form the
+  // target.
+  if (SourceIdx + 1 < TargetIdx) {
+    ConnectedSet.clear();
+    ComputeTargetConnectedSet(ConnectedSet);
+
+    // Partition SCCs so that only SCCs reached from the target remain between
+    // the source and the target. This preserves postorder.
+    auto TargetI = std::stable_partition(
+        SCCs.begin() + SourceIdx + 1, SCCs.begin() + TargetIdx + 1,
+        [&ConnectedSet](SCCT *C) { return ConnectedSet.count(C); });
+    for (int i = SourceIdx + 1, e = TargetIdx + 1; i < e; ++i)
+      SCCIndices.find(SCCs[i])->second = i;
+    TargetIdx = std::prev(TargetI) - SCCs.begin();
+    assert(SCCs[TargetIdx] == &TargetSCC &&
+           "Should always end with the target!");
+  }
+
+  // At this point, we know that connecting source to target forms a cycle
+  // because target connects back to source, and we know that all of the SCCs
+  // between the source and target in the postorder sequence participate in that
+  // cycle.
+  return make_range(SCCs.begin() + SourceIdx, SCCs.begin() + TargetIdx);
+}
+
 SmallVector<LazyCallGraph::SCC *, 1>
 LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
   assert(!SourceN[TargetN].isCall() && "Must start with a ref edge!");
-
   SmallVector<SCC *, 1> DeletedSCCs;
 
+#ifndef NDEBUG
+  // In a debug build, verify the RefSCC is valid to start with and when this
+  // routine finishes.
+  verify();
+  auto VerifyOnExit = make_scope_exit([&]() { verify(); });
+#endif
+
   SCC &SourceSCC = *G->lookupSCC(SourceN);
   SCC &TargetSCC = *G->lookupSCC(TargetN);
 
@@ -274,10 +478,6 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
   // we've just added more connectivity.
   if (&SourceSCC == &TargetSCC) {
     SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call);
-#ifndef NDEBUG
-    // Check that the RefSCC is still valid.
-    verify();
-#endif
     return DeletedSCCs;
   }
 
@@ -291,114 +491,44 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
   int TargetIdx = SCCIndices[&TargetSCC];
   if (TargetIdx < SourceIdx) {
     SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call);
-#ifndef NDEBUG
-    // Check that the RefSCC is still valid.
-    verify();
-#endif
     return DeletedSCCs;
   }
 
-  // When we do have an edge from an earlier SCC to a later SCC in the
-  // postorder sequence, all of the SCCs which may be impacted are in the
-  // closed range of those two within the postorder sequence. The algorithm to
-  // restore the state is as follows:
-  //
-  // 1) Starting from the source SCC, construct a set of SCCs which reach the
-  //    source SCC consisting of just the source SCC. Then scan toward the
-  //    target SCC in postorder and for each SCC, if it has an edge to an SCC
-  //    in the set, add it to the set. Otherwise, the source SCC is not
-  //    a successor, move it in the postorder sequence to immediately before
-  //    the source SCC, shifting the source SCC and all SCCs in the set one
-  //    position toward the target SCC. Stop scanning after processing the
-  //    target SCC.
-  // 2) If the source SCC is now past the target SCC in the postorder sequence,
-  //    and thus the new edge will flow toward the start, we are done.
-  // 3) Otherwise, starting from the target SCC, walk all edges which reach an
-  //    SCC between the source and the target, and add them to the set of
-  //    connected SCCs, then recurse through them. Once a complete set of the
-  //    SCCs the target connects to is known, hoist the remaining SCCs between
-  //    the source and the target to be above the target. Note that there is no
-  //    need to process the source SCC, it is already known to connect.
-  // 4) At this point, all of the SCCs in the closed range between the source
-  //    SCC and the target SCC in the postorder sequence are connected,
-  //    including the target SCC and the source SCC. Inserting the edge from
-  //    the source SCC to the target SCC will form a cycle out of precisely
-  //    these SCCs. Thus we can merge all of the SCCs in this closed range into
-  //    a single SCC.
-  //
-  // This process has various important properties:
-  // - Only mutates the SCCs when adding the edge actually changes the SCC
-  //   structure.
-  // - Never mutates SCCs which are unaffected by the change.
-  // - Updates the postorder sequence to correctly satisfy the postorder
-  //   constraint after the edge is inserted.
-  // - Only reorders SCCs in the closed postorder sequence from the source to
-  //   the target, so easy to bound how much has changed even in the ordering.
-  // - Big-O is the number of edges in the closed postorder range of SCCs from
-  //   source to target.
-
-  assert(SourceIdx < TargetIdx && "Cannot have equal indices here!");
-  SmallPtrSet<SCC *, 4> ConnectedSet;
-
   // Compute the SCCs which (transitively) reach the source.
-  ConnectedSet.insert(&SourceSCC);
-  auto IsConnected = [&](SCC &C) {
-    for (Node &N : C)
-      for (Edge &E : N.calls()) {
-        assert(E.getNode() && "Must have formed a node within an SCC!");
-        if (ConnectedSet.count(G->lookupSCC(*E.getNode())))
-          return true;
-      }
-
-    return false;
-  };
-
-  for (SCC *C :
-       make_range(SCCs.begin() + SourceIdx + 1, SCCs.begin() + TargetIdx + 1))
-    if (IsConnected(*C))
-      ConnectedSet.insert(C);
-
-  // Partition the SCCs in this part of the port-order sequence so only SCCs
-  // connecting to the source remain between it and the target. This is
-  // a benign partition as it preserves postorder.
-  auto SourceI = std::stable_partition(
-      SCCs.begin() + SourceIdx, SCCs.begin() + TargetIdx + 1,
-      [&ConnectedSet](SCC *C) { return !ConnectedSet.count(C); });
-  for (int i = SourceIdx, e = TargetIdx + 1; i < e; ++i)
-    SCCIndices.find(SCCs[i])->second = i;
-
-  // If the target doesn't connect to the source, then we've corrected the
-  // post-order and there are no cycles formed.
-  if (!ConnectedSet.count(&TargetSCC)) {
-    assert(SourceI > (SCCs.begin() + SourceIdx) &&
-           "Must have moved the source to fix the post-order.");
-    assert(*std::prev(SourceI) == &TargetSCC &&
-           "Last SCC to move should have bene the target.");
-    SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call);
+  auto ComputeSourceConnectedSet = [&](SmallPtrSetImpl<SCC *> &ConnectedSet) {
 #ifndef NDEBUG
+    // Check that the RefSCC is still valid before computing this as the
+    // results will be nonsensical of we've broken its invariants.
     verify();
 #endif
-    return DeletedSCCs;
-  }
+    ConnectedSet.insert(&SourceSCC);
+    auto IsConnected = [&](SCC &C) {
+      for (Node &N : C)
+        for (Edge &E : N.calls()) {
+          assert(E.getNode() && "Must have formed a node within an SCC!");
+          if (ConnectedSet.count(G->lookupSCC(*E.getNode())))
+            return true;
+        }
 
-  assert(SCCs[TargetIdx] == &TargetSCC &&
-         "Should not have moved target if connected!");
-  SourceIdx = SourceI - SCCs.begin();
+      return false;
+    };
+
+    for (SCC *C :
+         make_range(SCCs.begin() + SourceIdx + 1, SCCs.begin() + TargetIdx + 1))
+      if (IsConnected(*C))
+        ConnectedSet.insert(C);
+  };
 
+  // Use a normal worklist to find which SCCs the target connects to. We still
+  // bound the search based on the range in the postorder list we care about,
+  // but because this is forward connectivity we just "recurse" through the
+  // edges.
+  auto ComputeTargetConnectedSet = [&](SmallPtrSetImpl<SCC *> &ConnectedSet) {
 #ifndef NDEBUG
-  // Check that the RefSCC is still valid.
-  verify();
+    // Check that the RefSCC is still valid before computing this as the
+    // results will be nonsensical of we've broken its invariants.
+    verify();
 #endif
-
-  // See whether there are any remaining intervening SCCs between the source
-  // and target. If so we need to make sure they all are reachable form the
-  // target.
-  if (SourceIdx + 1 < TargetIdx) {
-    // Use a normal worklist to find which SCCs the target connects to. We still
-    // bound the search based on the range in the postorder list we care about,
-    // but because this is forward connectivity we just "recurse" through the
-    // edges.
-    ConnectedSet.clear();
     ConnectedSet.insert(&TargetSCC);
     SmallVector<SCC *, 4> Worklist;
     Worklist.push_back(&TargetSCC);
@@ -421,35 +551,36 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
             Worklist.push_back(&EdgeC);
         }
     } while (!Worklist.empty());
+  };
 
-    // Partition SCCs so that only SCCs reached from the target remain between
-    // the source and the target. This preserves postorder.
-    auto TargetI = std::stable_partition(
-        SCCs.begin() + SourceIdx + 1, SCCs.begin() + TargetIdx + 1,
-        [&ConnectedSet](SCC *C) { return ConnectedSet.count(C); });
-    for (int i = SourceIdx + 1, e = TargetIdx + 1; i < e; ++i)
-      SCCIndices.find(SCCs[i])->second = i;
-    TargetIdx = std::prev(TargetI) - SCCs.begin();
-    assert(SCCs[TargetIdx] == &TargetSCC &&
-           "Should always end with the target!");
+  // Use a generic helper to update the postorder sequence of SCCs and return
+  // a range of any SCCs connected into a cycle by inserting this edge. This
+  // routine will also take care of updating the indices into the postorder
+  // sequence.
+  auto MergeRange = updatePostorderSequenceForEdgeInsertion(
+      SourceSCC, TargetSCC, SCCs, SCCIndices, ComputeSourceConnectedSet,
+      ComputeTargetConnectedSet);
+
+  // If the merge range is empty, then adding the edge didn't actually form any
+  // new cycles. We're done.
+  if (MergeRange.begin() == MergeRange.end()) {
+    // Now that the SCC structure is finalized, flip the kind to call.
+    SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call);
+    return DeletedSCCs;
+  }
 
 #ifndef NDEBUG
-    // Check that the RefSCC is still valid.
-    verify();
+  // Before merging, check that the RefSCC remains valid after all the
+  // postorder updates.
+  verify();
 #endif
-  }
 
-  // At this point, we know that connecting source to target forms a cycle
-  // because target connects back to source, and we know that all of the SCCs
-  // between the source and target in the postorder sequence participate in that
-  // cycle. This means that we need to merge all of these SCCs into a single
+  // Otherwise we need to merge all of the SCCs in the cycle into a single
   // result SCC.
   //
   // NB: We merge into the target because all of these functions were already
   // reachable from the target, meaning any SCC-wide properties deduced about it
   // other than the set of functions within it will not have changed.
-  auto MergeRange =
-      make_range(SCCs.begin() + SourceIdx, SCCs.begin() + TargetIdx);
   for (SCC *C : MergeRange) {
     assert(C != &TargetSCC &&
            "We merge *into* the target and shouldn't process it here!");
@@ -471,37 +602,55 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
   // Now that the SCC structure is finalized, flip the kind to call.
   SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call);
 
-#ifndef NDEBUG
-  // And we're done! Verify in debug builds that the RefSCC is coherent.
-  verify();
-#endif
+  // And we're done!
   return DeletedSCCs;
 }
 
-void LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN,
-                                                    Node &TargetN) {
+void LazyCallGraph::RefSCC::switchTrivialInternalEdgeToRef(Node &SourceN,
+                                                           Node &TargetN) {
   assert(SourceN[TargetN].isCall() && "Must start with a call edge!");
 
-  SCC &SourceSCC = *G->lookupSCC(SourceN);
-  SCC &TargetSCC = *G->lookupSCC(TargetN);
+#ifndef NDEBUG
+  // In a debug build, verify the RefSCC is valid to start with and when this
+  // routine finishes.
+  verify();
+  auto VerifyOnExit = make_scope_exit([&]() { verify(); });
+#endif
 
-  assert(&SourceSCC.getOuterRefSCC() == this &&
+  assert(G->lookupRefSCC(SourceN) == this &&
          "Source must be in this RefSCC.");
-  assert(&TargetSCC.getOuterRefSCC() == this &&
+  assert(G->lookupRefSCC(TargetN) == this &&
          "Target must be in this RefSCC.");
+  assert(G->lookupSCC(SourceN) != G->lookupSCC(TargetN) &&
+         "Source and Target must be in separate SCCs for this to be trivial!");
 
   // Set the edge kind.
   SourceN.setEdgeKind(TargetN.getFunction(), Edge::Ref);
+}
+
+iterator_range<LazyCallGraph::RefSCC::iterator>
+LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN, Node &TargetN) {
+  assert(SourceN[TargetN].isCall() && "Must start with a call edge!");
 
-  // If this call edge is just connecting two separate SCCs within this RefSCC,
-  // there is nothing to do.
-  if (&SourceSCC != &TargetSCC) {
 #ifndef NDEBUG
-    // Check that the RefSCC is still valid.
-    verify();
+  // In a debug build, verify the RefSCC is valid to start with and when this
+  // routine finishes.
+  verify();
+  auto VerifyOnExit = make_scope_exit([&]() { verify(); });
 #endif
-    return;
-  }
+
+  assert(G->lookupRefSCC(SourceN) == this &&
+         "Source must be in this RefSCC.");
+  assert(G->lookupRefSCC(TargetN) == this &&
+         "Target must be in this RefSCC.");
+
+  SCC &TargetSCC = *G->lookupSCC(TargetN);
+  assert(G->lookupSCC(SourceN) == &TargetSCC && "Source and Target must be in "
+                                                "the same SCC to require the "
+                                                "full CG update.");
+
+  // Set the edge kind.
+  SourceN.setEdgeKind(TargetN.getFunction(), Edge::Ref);
 
   // Otherwise we are removing a call edge from a single SCC. This may break
   // the cycle. In order to compute the new set of SCCs, we need to do a small
@@ -635,10 +784,9 @@ void LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN,
       // root DFS number.
       auto SCCNodes = make_range(
           PendingSCCStack.rbegin(),
-          std::find_if(PendingSCCStack.rbegin(), PendingSCCStack.rend(),
-                       [RootDFSNumber](Node *N) {
-                         return N->DFSNumber < RootDFSNumber;
-                       }));
+          find_if(reverse(PendingSCCStack), [RootDFSNumber](const Node *N) {
+            return N->DFSNumber < RootDFSNumber;
+          }));
 
       // Form a new SCC out of these nodes and then clear them off our pending
       // stack.
@@ -663,10 +811,8 @@ void LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN,
   for (int Idx = OldIdx, Size = SCCs.size(); Idx < Size; ++Idx)
     SCCIndices[SCCs[Idx]] = Idx;
 
-#ifndef NDEBUG
-  // We're done. Check the validity on our way out.
-  verify();
-#endif
+  return make_range(SCCs.begin() + OldIdx,
+                    SCCs.begin() + OldIdx + NewSCCs.size());
 }
 
 void LazyCallGraph::RefSCC::switchOutgoingEdgeToCall(Node &SourceN,
@@ -746,112 +892,113 @@ void LazyCallGraph::RefSCC::insertOutgoingEdge(Node &SourceN, Node &TargetN,
 
 SmallVector<LazyCallGraph::RefSCC *, 1>
 LazyCallGraph::RefSCC::insertIncomingRefEdge(Node &SourceN, Node &TargetN) {
-  assert(G->lookupRefSCC(TargetN) == this && "Target must be in this SCC.");
-
-  // We store the RefSCCs found to be connected in postorder so that we can use
-  // that when merging. We also return this to the caller to allow them to
-  // invalidate information pertaining to these RefSCCs.
-  SmallVector<RefSCC *, 1> Connected;
-
+  assert(G->lookupRefSCC(TargetN) == this && "Target must be in this RefSCC.");
   RefSCC &SourceC = *G->lookupRefSCC(SourceN);
-  assert(&SourceC != this && "Source must not be in this SCC.");
+  assert(&SourceC != this && "Source must not be in this RefSCC.");
   assert(SourceC.isDescendantOf(*this) &&
          "Source must be a descendant of the Target.");
 
-  // The algorithm we use for merging SCCs based on the cycle introduced here
-  // is to walk the RefSCC inverted DAG formed by the parent sets. The inverse
-  // graph has the same cycle properties as the actual DAG of the RefSCCs, and
-  // when forming RefSCCs lazily by a DFS, the bottom of the graph won't exist
-  // in many cases which should prune the search space.
-  //
-  // FIXME: We can get this pruning behavior even after the incremental RefSCC
-  // formation by leaving behind (conservative) DFS numberings in the nodes,
-  // and pruning the search with them. These would need to be cleverly updated
-  // during the removal of intra-SCC edges, but could be preserved
-  // conservatively.
-  //
-  // FIXME: This operation currently creates ordering stability problems
-  // because we don't use stably ordered containers for the parent SCCs.
-
-  // The set of RefSCCs that are connected to the parent, and thus will
-  // participate in the merged connected component.
-  SmallPtrSet<RefSCC *, 8> ConnectedSet;
-  ConnectedSet.insert(this);
-
-  // We build up a DFS stack of the parents chains.
-  SmallVector<std::pair<RefSCC *, parent_iterator>, 8> DFSStack;
-  SmallPtrSet<RefSCC *, 8> Visited;
-  int ConnectedDepth = -1;
-  DFSStack.push_back({&SourceC, SourceC.parent_begin()});
-  do {
-    auto DFSPair = DFSStack.pop_back_val();
-    RefSCC *C = DFSPair.first;
-    parent_iterator I = DFSPair.second;
-    auto E = C->parent_end();
+  SmallVector<RefSCC *, 1> DeletedRefSCCs;
 
-    while (I != E) {
-      RefSCC &Parent = *I++;
-
-      // If we have already processed this parent SCC, skip it, and remember
-      // whether it was connected so we don't have to check the rest of the
-      // stack. This also handles when we reach a child of the 'this' SCC (the
-      // callee) which terminates the search.
-      if (ConnectedSet.count(&Parent)) {
-        assert(ConnectedDepth < (int)DFSStack.size() &&
-               "Cannot have a connected depth greater than the DFS depth!");
-        ConnectedDepth = DFSStack.size();
-        continue;
+#ifndef NDEBUG
+  // In a debug build, verify the RefSCC is valid to start with and when this
+  // routine finishes.
+  verify();
+  auto VerifyOnExit = make_scope_exit([&]() { verify(); });
+#endif
+
+  int SourceIdx = G->RefSCCIndices[&SourceC];
+  int TargetIdx = G->RefSCCIndices[this];
+  assert(SourceIdx < TargetIdx &&
+         "Postorder list doesn't see edge as incoming!");
+
+  // Compute the RefSCCs which (transitively) reach the source. We do this by
+  // working backwards from the source using the parent set in each RefSCC,
+  // skipping any RefSCCs that don't fall in the postorder range. This has the
+  // advantage of walking the sparser parent edge (in high fan-out graphs) but
+  // more importantly this removes examining all forward edges in all RefSCCs
+  // within the postorder range which aren't in fact connected. Only connected
+  // RefSCCs (and their edges) are visited here.
+  auto ComputeSourceConnectedSet = [&](SmallPtrSetImpl<RefSCC *> &Set) {
+    Set.insert(&SourceC);
+    SmallVector<RefSCC *, 4> Worklist;
+    Worklist.push_back(&SourceC);
+    do {
+      RefSCC &RC = *Worklist.pop_back_val();
+      for (RefSCC &ParentRC : RC.parents()) {
+        // Skip any RefSCCs outside the range of source to target in the
+        // postorder sequence.
+        int ParentIdx = G->getRefSCCIndex(ParentRC);
+        assert(ParentIdx > SourceIdx && "Parent cannot precede source in postorder!");
+        if (ParentIdx > TargetIdx)
+          continue;
+        if (Set.insert(&ParentRC).second)
+          // First edge connecting to this parent, add it to our worklist.
+          Worklist.push_back(&ParentRC);
       }
-      if (Visited.count(&Parent))
-        continue;
+    } while (!Worklist.empty());
+  };
 
-      // We fully explore the depth-first space, adding nodes to the connected
-      // set only as we pop them off, so "recurse" by rotating to the parent.
-      DFSStack.push_back({C, I});
-      C = &Parent;
-      I = C->parent_begin();
-      E = C->parent_end();
-    }
+  // Use a normal worklist to find which SCCs the target connects to. We still
+  // bound the search based on the range in the postorder list we care about,
+  // but because this is forward connectivity we just "recurse" through the
+  // edges.
+  auto ComputeTargetConnectedSet = [&](SmallPtrSetImpl<RefSCC *> &Set) {
+    Set.insert(this);
+    SmallVector<RefSCC *, 4> Worklist;
+    Worklist.push_back(this);
+    do {
+      RefSCC &RC = *Worklist.pop_back_val();
+      for (SCC &C : RC)
+        for (Node &N : C)
+          for (Edge &E : N) {
+            assert(E.getNode() && "Must have formed a node!");
+            RefSCC &EdgeRC = *G->lookupRefSCC(*E.getNode());
+            if (G->getRefSCCIndex(EdgeRC) <= SourceIdx)
+              // Not in the postorder sequence between source and target.
+              continue;
+
+            if (Set.insert(&EdgeRC).second)
+              Worklist.push_back(&EdgeRC);
+          }
+    } while (!Worklist.empty());
+  };
 
-    // If we've found a connection anywhere below this point on the stack (and
-    // thus up the parent graph from the caller), the current node needs to be
-    // added to the connected set now that we've processed all of its parents.
-    if ((int)DFSStack.size() == ConnectedDepth) {
-      --ConnectedDepth; // We're finished with this connection.
-      bool Inserted = ConnectedSet.insert(C).second;
-      (void)Inserted;
-      assert(Inserted && "Cannot insert a refSCC multiple times!");
-      Connected.push_back(C);
-    } else {
-      // Otherwise remember that its parents don't ever connect.
-      assert(ConnectedDepth < (int)DFSStack.size() &&
-             "Cannot have a connected depth greater than the DFS depth!");
-      Visited.insert(C);
-    }
-  } while (!DFSStack.empty());
+  // Use a generic helper to update the postorder sequence of RefSCCs and return
+  // a range of any RefSCCs connected into a cycle by inserting this edge. This
+  // routine will also take care of updating the indices into the postorder
+  // sequence.
+  iterator_range<SmallVectorImpl<RefSCC *>::iterator> MergeRange =
+      updatePostorderSequenceForEdgeInsertion(
+          SourceC, *this, G->PostOrderRefSCCs, G->RefSCCIndices,
+          ComputeSourceConnectedSet, ComputeTargetConnectedSet);
+
+  // Build a set so we can do fast tests for whether a RefSCC will end up as
+  // part of the merged RefSCC.
+  SmallPtrSet<RefSCC *, 16> MergeSet(MergeRange.begin(), MergeRange.end());
+
+  // This RefSCC will always be part of that set, so just insert it here.
+  MergeSet.insert(this);
 
   // Now that we have identified all of the SCCs which need to be merged into
   // a connected set with the inserted edge, merge all of them into this SCC.
-  // We walk the newly connected RefSCCs in the reverse postorder of the parent
-  // DAG walk above and merge in each of their SCC postorder lists. This
-  // ensures a merged postorder SCC list.
   SmallVector<SCC *, 16> MergedSCCs;
   int SCCIndex = 0;
-  for (RefSCC *C : reverse(Connected)) {
-    assert(C != this &&
-           "This RefSCC should terminate the DFS without being reached.");
+  for (RefSCC *RC : MergeRange) {
+    assert(RC != this && "We're merging into the target RefSCC, so it "
+                         "shouldn't be in the range.");
 
     // Merge the parents which aren't part of the merge into the our parents.
-    for (RefSCC *ParentC : C->Parents)
-      if (!ConnectedSet.count(ParentC))
-        Parents.insert(ParentC);
-    C->Parents.clear();
+    for (RefSCC *ParentRC : RC->Parents)
+      if (!MergeSet.count(ParentRC))
+        Parents.insert(ParentRC);
+    RC->Parents.clear();
 
     // Walk the inner SCCs to update their up-pointer and walk all the edges to
     // update any parent sets.
     // FIXME: We should try to find a way to avoid this (rather expensive) edge
     // walk by updating the parent sets in some other manner.
-    for (SCC &InnerC : *C) {
+    for (SCC &InnerC : *RC) {
       InnerC.OuterRefSCC = this;
       SCCIndices[&InnerC] = SCCIndex++;
       for (Node &N : InnerC) {
@@ -860,9 +1007,9 @@ LazyCallGraph::RefSCC::insertIncomingRefEdge(Node &SourceN, Node &TargetN) {
           assert(E.getNode() &&
                  "Cannot have a null node within a visited SCC!");
           RefSCC &ChildRC = *G->lookupRefSCC(*E.getNode());
-          if (ConnectedSet.count(&ChildRC))
+          if (MergeSet.count(&ChildRC))
             continue;
-          ChildRC.Parents.erase(C);
+          ChildRC.Parents.erase(RC);
           ChildRC.Parents.insert(this);
         }
       }
@@ -871,33 +1018,37 @@ LazyCallGraph::RefSCC::insertIncomingRefEdge(Node &SourceN, Node &TargetN) {
     // Now merge in the SCCs. We can actually move here so try to reuse storage
     // the first time through.
     if (MergedSCCs.empty())
-      MergedSCCs = std::move(C->SCCs);
+      MergedSCCs = std::move(RC->SCCs);
     else
-      MergedSCCs.append(C->SCCs.begin(), C->SCCs.end());
-    C->SCCs.clear();
+      MergedSCCs.append(RC->SCCs.begin(), RC->SCCs.end());
+    RC->SCCs.clear();
+    DeletedRefSCCs.push_back(RC);
   }
 
-  // Finally append our original SCCs to the merged list and move it into
-  // place.
+  // Append our original SCCs to the merged list and move it into place.
   for (SCC &InnerC : *this)
     SCCIndices[&InnerC] = SCCIndex++;
   MergedSCCs.append(SCCs.begin(), SCCs.end());
   SCCs = std::move(MergedSCCs);
 
+  // Remove the merged away RefSCCs from the post order sequence.
+  for (RefSCC *RC : MergeRange)
+    G->RefSCCIndices.erase(RC);
+  int IndexOffset = MergeRange.end() - MergeRange.begin();
+  auto EraseEnd =
+      G->PostOrderRefSCCs.erase(MergeRange.begin(), MergeRange.end());
+  for (RefSCC *RC : make_range(EraseEnd, G->PostOrderRefSCCs.end()))
+    G->RefSCCIndices[RC] -= IndexOffset;
+
   // At this point we have a merged RefSCC with a post-order SCCs list, just
   // connect the nodes to form the new edge.
   SourceN.insertEdgeInternal(TargetN, Edge::Ref);
 
-#ifndef NDEBUG
-  // Check that the RefSCC is still valid.
-  verify();
-#endif
-
   // We return the list of SCCs which were merged so that callers can
   // invalidate any data they have associated with those SCCs. Note that these
   // SCCs are no longer in an interesting state (they are totally empty) but
   // the pointers will remain stable for the life of the graph itself.
-  return Connected;
+  return DeletedRefSCCs;
 }
 
 void LazyCallGraph::RefSCC::removeOutgoingEdge(Node &SourceN, Node &TargetN) {
@@ -907,10 +1058,16 @@ void LazyCallGraph::RefSCC::removeOutgoingEdge(Node &SourceN, Node &TargetN) {
   RefSCC &TargetRC = *G->lookupRefSCC(TargetN);
   assert(&TargetRC != this && "The target must not be a member of this RefSCC");
 
-  assert(std::find(G->LeafRefSCCs.begin(), G->LeafRefSCCs.end(), this) ==
-             G->LeafRefSCCs.end() &&
+  assert(!is_contained(G->LeafRefSCCs, this) &&
          "Cannot have a leaf RefSCC source.");
 
+#ifndef NDEBUG
+  // In a debug build, verify the RefSCC is valid to start with and when this
+  // routine finishes.
+  verify();
+  auto VerifyOnExit = make_scope_exit([&]() { verify(); });
+#endif
+
   // First remove it from the node.
   SourceN.removeEdgeInternal(TargetN.getFunction());
 
@@ -962,6 +1119,13 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
   assert(!SourceN[TargetN].isCall() &&
          "Cannot remove a call edge, it must first be made a ref edge");
 
+#ifndef NDEBUG
+  // In a debug build, verify the RefSCC is valid to start with and when this
+  // routine finishes.
+  verify();
+  auto VerifyOnExit = make_scope_exit([&]() { verify(); });
+#endif
+
   // First remove the actual edge.
   SourceN.removeEdgeInternal(TargetN.getFunction());
 
@@ -972,6 +1136,13 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
   if (&SourceN == &TargetN)
     return Result;
 
+  // If this ref edge is within an SCC then there are sufficient other edges to
+  // form a cycle without this edge so removing it is a no-op.
+  SCC &SourceC = *G->lookupSCC(SourceN);
+  SCC &TargetC = *G->lookupSCC(TargetN);
+  if (&SourceC == &TargetC)
+    return Result;
+
   // We build somewhat synthetic new RefSCCs by providing a postorder mapping
   // for each inner SCC. We also store these associated with *nodes* rather
   // than SCCs because this saves a round-trip through the node->SCC map and in
@@ -994,7 +1165,6 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
   // and handle participants in that cycle without walking all the edges that
   // form the connections, and instead by relying on the fundamental guarantee
   // coming into this operation.
-  SCC &TargetC = *G->lookupSCC(TargetN);
   for (Node &N : TargetC)
     PostOrderMapping[&N] = RootPostOrderNumber;
 
@@ -1082,9 +1252,8 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
           }
 
           // If this child isn't currently in this RefSCC, no need to process
-          // it.
-          // However, we do need to remove this RefSCC from its RefSCC's parent
-          // set.
+          // it. However, we do need to remove this RefSCC from its RefSCC's
+          // parent set.
           RefSCC &ChildRC = *G->lookupRefSCC(ChildN);
           ChildRC.Parents.erase(this);
           ++I;
@@ -1121,10 +1290,9 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
       // root DFS number.
       auto RefSCCNodes = make_range(
           PendingRefSCCStack.rbegin(),
-          std::find_if(PendingRefSCCStack.rbegin(), PendingRefSCCStack.rend(),
-                       [RootDFSNumber](Node *N) {
-                         return N->DFSNumber < RootDFSNumber;
-                       }));
+          find_if(reverse(PendingRefSCCStack), [RootDFSNumber](const Node *N) {
+            return N->DFSNumber < RootDFSNumber;
+          }));
 
       // Mark the postorder number for these nodes and clear them off the
       // stack. We'll use the postorder number to pull them into RefSCCs at the
@@ -1149,6 +1317,25 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
   for (int i = 1; i < PostOrderNumber; ++i)
     Result.push_back(G->createRefSCC(*G));
 
+  // Insert the resulting postorder sequence into the global graph postorder
+  // sequence before the current RefSCC in that sequence. The idea being that
+  // this RefSCC is the target of the reference edge removed, and thus has
+  // a direct or indirect edge to every other RefSCC formed and so must be at
+  // the end of any postorder traversal.
+  //
+  // FIXME: It'd be nice to change the APIs so that we returned an iterator
+  // range over the global postorder sequence and generally use that sequence
+  // rather than building a separate result vector here.
+  if (!Result.empty()) {
+    int Idx = G->getRefSCCIndex(*this);
+    G->PostOrderRefSCCs.insert(G->PostOrderRefSCCs.begin() + Idx,
+                               Result.begin(), Result.end());
+    for (int i : seq<int>(Idx, G->PostOrderRefSCCs.size()))
+      G->RefSCCIndices[G->PostOrderRefSCCs[i]] = i;
+    assert(G->PostOrderRefSCCs[G->getRefSCCIndex(*this)] == this &&
+           "Failed to update this RefSCC's index after insertion!");
+  }
+
   for (SCC *C : SCCs) {
     auto PostOrderI = PostOrderMapping.find(&*C->begin());
     assert(PostOrderI != PostOrderMapping.end() &&
@@ -1166,7 +1353,7 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
     RefSCC &RC = *Result[SCCNumber - 1];
     int SCCIndex = RC.SCCs.size();
     RC.SCCs.push_back(C);
-    SCCIndices[C] = SCCIndex;
+    RC.SCCIndices[C] = SCCIndex;
     C->OuterRefSCC = &RC;
   }
 
@@ -1178,12 +1365,15 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
     G->connectRefSCC(*RC);
 
   // Now erase all but the root's SCCs.
-  SCCs.erase(std::remove_if(SCCs.begin(), SCCs.end(),
-                            [&](SCC *C) {
-                              return PostOrderMapping.lookup(&*C->begin()) !=
-                                     RootPostOrderNumber;
-                            }),
+  SCCs.erase(remove_if(SCCs,
+                       [&](SCC *C) {
+                         return PostOrderMapping.lookup(&*C->begin()) !=
+                                RootPostOrderNumber;
+                       }),
              SCCs.end());
+  SCCIndices.clear();
+  for (int i = 0, Size = SCCs.size(); i < Size; ++i)
+    SCCIndices[SCCs[i]] = i;
 
 #ifndef NDEBUG
   // Now we need to reconnect the current (root) SCC to the graph. We do this
@@ -1207,11 +1397,24 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
   if (!Result.empty())
     assert(!IsLeaf && "This SCC cannot be a leaf as we have split out new "
                       "SCCs by removing this edge.");
-  if (!std::any_of(G->LeafRefSCCs.begin(), G->LeafRefSCCs.end(),
-                   [&](RefSCC *C) { return C == this; }))
+  if (none_of(G->LeafRefSCCs, [&](RefSCC *C) { return C == this; }))
     assert(!IsLeaf && "This SCC cannot be a leaf as it already had child "
                       "SCCs before we removed this edge.");
 #endif
+  // And connect both this RefSCC and all the new ones to the correct parents.
+  // The easiest way to do this is just to re-analyze the old parent set.
+  SmallVector<RefSCC *, 4> OldParents(Parents.begin(), Parents.end());
+  Parents.clear();
+  for (RefSCC *ParentRC : OldParents)
+    for (SCC &ParentC : *ParentRC)
+      for (Node &ParentN : ParentC)
+        for (Edge &E : ParentN) {
+          assert(E.getNode() && "Cannot have a missing node in a visited SCC!");
+          RefSCC &RC = *G->lookupRefSCC(*E.getNode());
+          if (&RC != ParentRC)
+            RC.Parents.insert(ParentRC);
+        }
+
   // If this SCC stopped being a leaf through this edge removal, remove it from
   // the leaf SCC list. Note that this DTRT in the case where this was never
   // a leaf.
@@ -1222,10 +1425,93 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
         std::remove(G->LeafRefSCCs.begin(), G->LeafRefSCCs.end(), this),
         G->LeafRefSCCs.end());
 
+#ifndef NDEBUG
+  // Verify all of the new RefSCCs.
+  for (RefSCC *RC : Result)
+    RC->verify();
+#endif
+
   // Return the new list of SCCs.
   return Result;
 }
 
+void LazyCallGraph::RefSCC::handleTrivialEdgeInsertion(Node &SourceN,
+                                                       Node &TargetN) {
+  // The only trivial case that requires any graph updates is when we add new
+  // ref edge and may connect different RefSCCs along that path. This is only
+  // because of the parents set. Every other part of the graph remains constant
+  // after this edge insertion.
+  assert(G->lookupRefSCC(SourceN) == this && "Source must be in this RefSCC.");
+  RefSCC &TargetRC = *G->lookupRefSCC(TargetN);
+  if (&TargetRC == this) {
+
+    return;
+  }
+
+  assert(TargetRC.isDescendantOf(*this) &&
+         "Target must be a descendant of the Source.");
+  // The only change required is to add this RefSCC to the parent set of the
+  // target. This is a set and so idempotent if the edge already existed.
+  TargetRC.Parents.insert(this);
+}
+
+void LazyCallGraph::RefSCC::insertTrivialCallEdge(Node &SourceN,
+                                                  Node &TargetN) {
+#ifndef NDEBUG
+  // Check that the RefSCC is still valid when we finish.
+  auto ExitVerifier = make_scope_exit([this] { verify(); });
+
+  // Check that we aren't breaking some invariants of the SCC graph.
+  SCC &SourceC = *G->lookupSCC(SourceN);
+  SCC &TargetC = *G->lookupSCC(TargetN);
+  if (&SourceC != &TargetC)
+    assert(SourceC.isAncestorOf(TargetC) &&
+           "Call edge is not trivial in the SCC graph!");
+#endif
+  // First insert it into the source or find the existing edge.
+  auto InsertResult = SourceN.EdgeIndexMap.insert(
+      {&TargetN.getFunction(), SourceN.Edges.size()});
+  if (!InsertResult.second) {
+    // Already an edge, just update it.
+    Edge &E = SourceN.Edges[InsertResult.first->second];
+    if (E.isCall())
+      return; // Nothing to do!
+    E.setKind(Edge::Call);
+  } else {
+    // Create the new edge.
+    SourceN.Edges.emplace_back(TargetN, Edge::Call);
+  }
+
+  // Now that we have the edge, handle the graph fallout.
+  handleTrivialEdgeInsertion(SourceN, TargetN);
+}
+
+void LazyCallGraph::RefSCC::insertTrivialRefEdge(Node &SourceN, Node &TargetN) {
+#ifndef NDEBUG
+  // Check that the RefSCC is still valid when we finish.
+  auto ExitVerifier = make_scope_exit([this] { verify(); });
+
+  // Check that we aren't breaking some invariants of the RefSCC graph.
+  RefSCC &SourceRC = *G->lookupRefSCC(SourceN);
+  RefSCC &TargetRC = *G->lookupRefSCC(TargetN);
+  if (&SourceRC != &TargetRC)
+    assert(SourceRC.isAncestorOf(TargetRC) &&
+           "Ref edge is not trivial in the RefSCC graph!");
+#endif
+  // First insert it into the source or find the existing edge.
+  auto InsertResult = SourceN.EdgeIndexMap.insert(
+      {&TargetN.getFunction(), SourceN.Edges.size()});
+  if (!InsertResult.second)
+    // Already an edge, we're done.
+    return;
+
+  // Create the new edge.
+  SourceN.Edges.emplace_back(TargetN, Edge::Ref);
+
+  // Now that we have the edge, handle the graph fallout.
+  handleTrivialEdgeInsertion(SourceN, TargetN);
+}
+
 void LazyCallGraph::insertEdge(Node &SourceN, Function &Target, Edge::Kind EK) {
   assert(SCCMap.empty() && DFSStack.empty() &&
          "This method cannot be called after SCCs have been formed!");
@@ -1240,6 +1526,93 @@ void LazyCallGraph::removeEdge(Node &SourceN, Function &Target) {
   return SourceN.removeEdgeInternal(Target);
 }
 
+void LazyCallGraph::removeDeadFunction(Function &F) {
+  // FIXME: This is unnecessarily restrictive. We should be able to remove
+  // functions which recursively call themselves.
+  assert(F.use_empty() &&
+         "This routine should only be called on trivially dead functions!");
+
+  auto EII = EntryIndexMap.find(&F);
+  if (EII != EntryIndexMap.end()) {
+    EntryEdges[EII->second] = Edge();
+    EntryIndexMap.erase(EII);
+  }
+
+  // It's safe to just remove un-visited functions from the RefSCC entry list.
+  // FIXME: This is a linear operation which could become hot and benefit from
+  // an index map.
+  auto RENI = find(RefSCCEntryNodes, &F);
+  if (RENI != RefSCCEntryNodes.end())
+    RefSCCEntryNodes.erase(RENI);
+
+  auto NI = NodeMap.find(&F);
+  if (NI == NodeMap.end())
+    // Not in the graph at all!
+    return;
+
+  Node &N = *NI->second;
+  NodeMap.erase(NI);
+
+  if (SCCMap.empty() && DFSStack.empty()) {
+    // No SCC walk has begun, so removing this is fine and there is nothing
+    // else necessary at this point but clearing out the node.
+    N.clear();
+    return;
+  }
+
+  // Check that we aren't going to break the DFS walk.
+  assert(all_of(DFSStack,
+                [&N](const std::pair<Node *, edge_iterator> &Element) {
+                  return Element.first != &N;
+                }) &&
+         "Tried to remove a function currently in the DFS stack!");
+  assert(find(PendingRefSCCStack, &N) == PendingRefSCCStack.end() &&
+         "Tried to remove a function currently pending to add to a RefSCC!");
+
+  // Cannot remove a function which has yet to be visited in the DFS walk, so
+  // if we have a node at all then we must have an SCC and RefSCC.
+  auto CI = SCCMap.find(&N);
+  assert(CI != SCCMap.end() &&
+         "Tried to remove a node without an SCC after DFS walk started!");
+  SCC &C = *CI->second;
+  SCCMap.erase(CI);
+  RefSCC &RC = C.getOuterRefSCC();
+
+  // This node must be the only member of its SCC as it has no callers, and
+  // that SCC must be the only member of a RefSCC as it has no references.
+  // Validate these properties first.
+  assert(C.size() == 1 && "Dead functions must be in a singular SCC");
+  assert(RC.size() == 1 && "Dead functions must be in a singular RefSCC");
+  assert(RC.Parents.empty() && "Cannot have parents of a dead RefSCC!");
+
+  // Now remove this RefSCC from any parents sets and the leaf list.
+  for (Edge &E : N)
+    if (Node *TargetN = E.getNode())
+      if (RefSCC *TargetRC = lookupRefSCC(*TargetN))
+        TargetRC->Parents.erase(&RC);
+  // FIXME: This is a linear operation which could become hot and benefit from
+  // an index map.
+  auto LRI = find(LeafRefSCCs, &RC);
+  if (LRI != LeafRefSCCs.end())
+    LeafRefSCCs.erase(LRI);
+
+  auto RCIndexI = RefSCCIndices.find(&RC);
+  int RCIndex = RCIndexI->second;
+  PostOrderRefSCCs.erase(PostOrderRefSCCs.begin() + RCIndex);
+  RefSCCIndices.erase(RCIndexI);
+  for (int i = RCIndex, Size = PostOrderRefSCCs.size(); i < Size; ++i)
+    RefSCCIndices[PostOrderRefSCCs[i]] = i;
+
+  // Finally clear out all the data structures from the node down through the
+  // components.
+  N.clear();
+  C.clear();
+  RC.clear();
+
+  // Nothing to delete as all the objects are allocated in stable bump pointer
+  // allocators.
+}
+
 LazyCallGraph::Node &LazyCallGraph::insertInto(Function &F, Node *&MappedN) {
   return *new (MappedN = BPA.Allocate()) Node(*this, F);
 }
@@ -1372,10 +1745,9 @@ void LazyCallGraph::buildSCCs(RefSCC &RC, node_stack_range Nodes) {
       // root DFS number.
       auto SCCNodes = make_range(
           PendingSCCStack.rbegin(),
-          std::find_if(PendingSCCStack.rbegin(), PendingSCCStack.rend(),
-                       [RootDFSNumber](Node *N) {
-                         return N->DFSNumber < RootDFSNumber;
-                       }));
+          find_if(reverse(PendingSCCStack), [RootDFSNumber](const Node *N) {
+            return N->DFSNumber < RootDFSNumber;
+          }));
       // Form a new SCC out of these nodes and then clear them off our pending
       // stack.
       RC.SCCs.push_back(createSCC(RC, SCCNodes));
@@ -1411,19 +1783,19 @@ void LazyCallGraph::connectRefSCC(RefSCC &RC) {
         IsLeaf = false;
       }
 
-  // For the SCCs where we fine no child SCCs, add them to the leaf list.
+  // For the SCCs where we find no child SCCs, add them to the leaf list.
   if (IsLeaf)
     LeafRefSCCs.push_back(&RC);
 }
 
-LazyCallGraph::RefSCC *LazyCallGraph::getNextRefSCCInPostOrder() {
+bool LazyCallGraph::buildNextRefSCCInPostOrder() {
   if (DFSStack.empty()) {
     Node *N;
     do {
       // If we've handled all candidate entry nodes to the SCC forest, we're
       // done.
       if (RefSCCEntryNodes.empty())
-        return nullptr;
+        return false;
 
       N = &get(*RefSCCEntryNodes.pop_back_val());
     } while (N->DFSNumber != 0);
@@ -1494,9 +1866,9 @@ LazyCallGraph::RefSCC *LazyCallGraph::getNextRefSCCInPostOrder() {
     // root DFS number.
     auto RefSCCNodes = node_stack_range(
         PendingRefSCCStack.rbegin(),
-        std::find_if(
-            PendingRefSCCStack.rbegin(), PendingRefSCCStack.rend(),
-            [RootDFSNumber](Node *N) { return N->DFSNumber < RootDFSNumber; }));
+        find_if(reverse(PendingRefSCCStack), [RootDFSNumber](const Node *N) {
+          return N->DFSNumber < RootDFSNumber;
+        }));
     // Form a new RefSCC out of these nodes and then clear them off our pending
     // stack.
     RefSCC *NewRC = createRefSCC(*this);
@@ -1505,13 +1877,18 @@ LazyCallGraph::RefSCC *LazyCallGraph::getNextRefSCCInPostOrder() {
     PendingRefSCCStack.erase(RefSCCNodes.end().base(),
                              PendingRefSCCStack.end());
 
-    // We return the new node here. This essentially suspends the DFS walk
-    // until another RefSCC is requested.
-    return NewRC;
+    // Push the new node into the postorder list and return true indicating we
+    // successfully grew the postorder sequence by one.
+    bool Inserted =
+        RefSCCIndices.insert({NewRC, PostOrderRefSCCs.size()}).second;
+    (void)Inserted;
+    assert(Inserted && "Cannot already have this RefSCC in the index map!");
+    PostOrderRefSCCs.push_back(NewRC);
+    return true;
   }
 }
 
-char LazyCallGraphAnalysis::PassID;
+AnalysisKey LazyCallGraphAnalysis::Key;
 
 LazyCallGraphPrinterPass::LazyCallGraphPrinterPass(raw_ostream &OS) : OS(OS) {}
 
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index 4d09b7ca006b..4f6355236873 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -26,6 +26,7 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/ValueHandle.h"
@@ -50,7 +51,7 @@ namespace llvm {
   FunctionPass *createLazyValueInfoPass() { return new LazyValueInfoWrapperPass(); }
 }
 
-char LazyValueAnalysis::PassID;
+AnalysisKey LazyValueAnalysis::Key;
 
 //===----------------------------------------------------------------------===//
 //                               LVILatticeVal
@@ -70,12 +71,14 @@ class LVILatticeVal {
     /// "nothing known yet".
     undefined,
 
-    /// This Value has a specific constant value.  (For integers, constantrange
-    /// is used instead.)
+    /// This Value has a specific constant value.  (For constant integers,
+    /// constantrange is used instead.  Integer typed constantexprs can appear
+    /// as constant.) 
     constant,
 
-    /// This Value is known to not have the specified value.  (For integers,
-    /// constantrange is used instead.)
+    /// This Value is known to not have the specified value.  (For constant
+    /// integers, constantrange is used instead.  As above, integer typed
+    /// constantexprs can appear here.)
     notconstant,
 
     /// The Value falls within this range. (Used only for integer typed values.)
@@ -139,37 +142,37 @@ public:
     return Range;
   }
 
-  /// Return true if this is a change in status.
-  bool markOverdefined() {
+private:
+  void markOverdefined() {
     if (isOverdefined())
-      return false;
+      return;
     Tag = overdefined;
-    return true;
   }
 
-  /// Return true if this is a change in status.
-  bool markConstant(Constant *V) {
+  void markConstant(Constant *V) {
     assert(V && "Marking constant with NULL");
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
-      return markConstantRange(ConstantRange(CI->getValue()));
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+      markConstantRange(ConstantRange(CI->getValue()));
+      return;
+    }
     if (isa<UndefValue>(V))
-      return false;
+      return;
 
     assert((!isConstant() || getConstant() == V) &&
            "Marking constant with different value");
     assert(isUndefined());
     Tag = constant;
     Val = V;
-    return true;
   }
 
-  /// Return true if this is a change in status.
-  bool markNotConstant(Constant *V) {
+  void markNotConstant(Constant *V) {
     assert(V && "Marking constant with NULL");
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
-      return markConstantRange(ConstantRange(CI->getValue()+1, CI->getValue()));
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+      markConstantRange(ConstantRange(CI->getValue()+1, CI->getValue()));
+      return;
+    }
     if (isa<UndefValue>(V))
-      return false;
+      return;
 
     assert((!isConstant() || getConstant() != V) &&
            "Marking constant !constant with same value");
@@ -178,100 +181,70 @@ public:
     assert(isUndefined() || isConstant());
     Tag = notconstant;
     Val = V;
-    return true;
   }
 
-  /// Return true if this is a change in status.
-  bool markConstantRange(ConstantRange NewR) {
+  void markConstantRange(ConstantRange NewR) {
     if (isConstantRange()) {
       if (NewR.isEmptySet())
-        return markOverdefined();
-
-      bool changed = Range != NewR;
-      Range = std::move(NewR);
-      return changed;
+        markOverdefined();
+      else {
+        Range = std::move(NewR);
+      }
+      return;
     }
 
     assert(isUndefined());
     if (NewR.isEmptySet())
-      return markOverdefined();
-
-    Tag = constantrange;
-    Range = std::move(NewR);
-    return true;
+      markOverdefined();
+    else {
+      Tag = constantrange;
+      Range = std::move(NewR);
+    }
   }
 
+public:
+
   /// Merge the specified lattice value into this one, updating this
   /// one and returning true if anything changed.
-  bool mergeIn(const LVILatticeVal &RHS, const DataLayout &DL) {
-    if (RHS.isUndefined() || isOverdefined()) return false;
-    if (RHS.isOverdefined()) return markOverdefined();
+  void mergeIn(const LVILatticeVal &RHS, const DataLayout &DL) {
+    if (RHS.isUndefined() || isOverdefined())
+      return;
+    if (RHS.isOverdefined()) {
+      markOverdefined();
+      return;
+    }
 
     if (isUndefined()) {
-      Tag = RHS.Tag;
-      Val = RHS.Val;
-      Range = RHS.Range;
-      return true;
+      *this = RHS;
+      return;
     }
 
     if (isConstant()) {
-      if (RHS.isConstant()) {
-        if (Val == RHS.Val)
-          return false;
-        return markOverdefined();
-      }
-
-      if (RHS.isNotConstant()) {
-        if (Val == RHS.Val)
-          return markOverdefined();
-
-        // Unless we can prove that the two Constants are different, we must
-        // move to overdefined.
-        if (ConstantInt *Res =
-                dyn_cast<ConstantInt>(ConstantFoldCompareInstOperands(
-                    CmpInst::ICMP_NE, getConstant(), RHS.getNotConstant(), DL)))
-          if (Res->isOne())
-            return markNotConstant(RHS.getNotConstant());
-
-        return markOverdefined();
-      }
-
-      return markOverdefined();
+      if (RHS.isConstant() && Val == RHS.Val)
+          return;
+      markOverdefined();
+      return;
     }
 
     if (isNotConstant()) {
-      if (RHS.isConstant()) {
-        if (Val == RHS.Val)
-          return markOverdefined();
-
-        // Unless we can prove that the two Constants are different, we must
-        // move to overdefined.
-        if (ConstantInt *Res =
-                dyn_cast<ConstantInt>(ConstantFoldCompareInstOperands(
-                    CmpInst::ICMP_NE, getNotConstant(), RHS.getConstant(), DL)))
-          if (Res->isOne())
-            return false;
-
-        return markOverdefined();
-      }
-
-      if (RHS.isNotConstant()) {
-        if (Val == RHS.Val)
-          return false;
-        return markOverdefined();
-      }
-
-      return markOverdefined();
+      if (RHS.isNotConstant() && Val == RHS.Val)
+          return;
+      markOverdefined();
+      return;
     }
 
     assert(isConstantRange() && "New LVILattice type?");
-    if (!RHS.isConstantRange())
-      return markOverdefined();
-
+    if (!RHS.isConstantRange()) {
+      // We can get here if we've encountered a constantexpr of integer type
+      // and merge it with a constantrange.
+      markOverdefined();
+      return;
+    }
     ConstantRange NewR = Range.unionWith(RHS.getConstantRange());
     if (NewR.isFullSet())
-      return markOverdefined();
-    return markConstantRange(NewR);
+      markOverdefined();
+    else
+      markConstantRange(NewR);
   }
 };
 
@@ -366,6 +339,9 @@ namespace {
   /// A callback value handle updates the cache when values are erased.
   class LazyValueInfoCache;
   struct LVIValueHandle final : public CallbackVH {
+    // Needs to access getValPtr(), which is protected.
+    friend struct DenseMapInfo<LVIValueHandle>;
+
     LazyValueInfoCache *Parent;
 
     LVIValueHandle(Value *V, LazyValueInfoCache *P)
@@ -376,7 +352,7 @@ namespace {
       deleted();
     }
   };
-}
+} // end anonymous namespace
 
 namespace {
   /// This is the cache kept by LazyValueInfo which
@@ -387,12 +363,15 @@ namespace {
     /// entries, allowing us to do a lookup with a binary search.
     /// Over-defined lattice values are recorded in OverDefinedCache to reduce
     /// memory overhead.
-    typedef SmallDenseMap<AssertingVH<BasicBlock>, LVILatticeVal, 4>
-        ValueCacheEntryTy;
+    struct ValueCacheEntryTy {
+      ValueCacheEntryTy(Value *V, LazyValueInfoCache *P) : Handle(V, P) {}
+      LVIValueHandle Handle;
+      SmallDenseMap<AssertingVH<BasicBlock>, LVILatticeVal, 4> BlockVals;
+    };
 
     /// This is all of the cached information for all values,
     /// mapped from Value* to key information.
-    std::map<LVIValueHandle, ValueCacheEntryTy> ValueCache;
+    DenseMap<Value *, std::unique_ptr<ValueCacheEntryTy>> ValueCache;
 
     /// This tracks, on a per-block basis, the set of values that are
     /// over-defined at the end of that block.
@@ -404,6 +383,183 @@ namespace {
     /// don't spend time removing unused blocks from our caches.
     DenseSet<AssertingVH<BasicBlock> > SeenBlocks;
 
+  public:
+    void insertResult(Value *Val, BasicBlock *BB, const LVILatticeVal &Result) {
+      SeenBlocks.insert(BB);
+
+      // Insert over-defined values into their own cache to reduce memory
+      // overhead.
+      if (Result.isOverdefined())
+        OverDefinedCache[BB].insert(Val);
+      else {
+        auto It = ValueCache.find_as(Val);
+        if (It == ValueCache.end()) {
+          ValueCache[Val] = make_unique<ValueCacheEntryTy>(Val, this);
+          It = ValueCache.find_as(Val);
+          assert(It != ValueCache.end() && "Val was just added to the map!");
+        }
+        It->second->BlockVals[BB] = Result;
+      }
+    }
+
+    bool isOverdefined(Value *V, BasicBlock *BB) const {
+      auto ODI = OverDefinedCache.find(BB);
+
+      if (ODI == OverDefinedCache.end())
+        return false;
+
+      return ODI->second.count(V);
+    }
+
+    bool hasCachedValueInfo(Value *V, BasicBlock *BB) const {
+      if (isOverdefined(V, BB))
+        return true;
+
+      auto I = ValueCache.find_as(V);
+      if (I == ValueCache.end())
+        return false;
+
+      return I->second->BlockVals.count(BB);
+    }
+
+    LVILatticeVal getCachedValueInfo(Value *V, BasicBlock *BB) const {
+      if (isOverdefined(V, BB))
+        return LVILatticeVal::getOverdefined();
+
+      auto I = ValueCache.find_as(V);
+      if (I == ValueCache.end())
+        return LVILatticeVal();
+      auto BBI = I->second->BlockVals.find(BB);
+      if (BBI == I->second->BlockVals.end())
+        return LVILatticeVal();
+      return BBI->second;
+    }
+
+    /// clear - Empty the cache.
+    void clear() {
+      SeenBlocks.clear();
+      ValueCache.clear();
+      OverDefinedCache.clear();
+    }
+
+    /// Inform the cache that a given value has been deleted.
+    void eraseValue(Value *V);
+
+    /// This is part of the update interface to inform the cache
+    /// that a block has been deleted.
+    void eraseBlock(BasicBlock *BB);
+
+    /// Updates the cache to remove any influence an overdefined value in
+    /// OldSucc might have (unless also overdefined in NewSucc).  This just
+    /// flushes elements from the cache and does not add any.
+    void threadEdgeImpl(BasicBlock *OldSucc,BasicBlock *NewSucc);
+
+    friend struct LVIValueHandle;
+  };
+}
+
+void LazyValueInfoCache::eraseValue(Value *V) {
+  SmallVector<AssertingVH<BasicBlock>, 4> ToErase;
+  for (auto &I : OverDefinedCache) {
+    SmallPtrSetImpl<Value *> &ValueSet = I.second;
+    ValueSet.erase(V);
+    if (ValueSet.empty())
+      ToErase.push_back(I.first);
+  }
+  for (auto &BB : ToErase)
+    OverDefinedCache.erase(BB);
+
+  ValueCache.erase(V);
+}
+
+void LVIValueHandle::deleted() {
+  // This erasure deallocates *this, so it MUST happen after we're done
+  // using any and all members of *this.
+  Parent->eraseValue(*this);
+}
+
+void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {
+  // Shortcut if we have never seen this block.
+  DenseSet<AssertingVH<BasicBlock> >::iterator I = SeenBlocks.find(BB);
+  if (I == SeenBlocks.end())
+    return;
+  SeenBlocks.erase(I);
+
+  auto ODI = OverDefinedCache.find(BB);
+  if (ODI != OverDefinedCache.end())
+    OverDefinedCache.erase(ODI);
+
+  for (auto &I : ValueCache)
+    I.second->BlockVals.erase(BB);
+}
+
+void LazyValueInfoCache::threadEdgeImpl(BasicBlock *OldSucc,
+                                        BasicBlock *NewSucc) {
+  // When an edge in the graph has been threaded, values that we could not
+  // determine a value for before (i.e. were marked overdefined) may be
+  // possible to solve now. We do NOT try to proactively update these values.
+  // Instead, we clear their entries from the cache, and allow lazy updating to
+  // recompute them when needed.
+
+  // The updating process is fairly simple: we need to drop cached info
+  // for all values that were marked overdefined in OldSucc, and for those same
+  // values in any successor of OldSucc (except NewSucc) in which they were
+  // also marked overdefined.
+  std::vector<BasicBlock*> worklist;
+  worklist.push_back(OldSucc);
+
+  auto I = OverDefinedCache.find(OldSucc);
+  if (I == OverDefinedCache.end())
+    return; // Nothing to process here.
+  SmallVector<Value *, 4> ValsToClear(I->second.begin(), I->second.end());
+
+  // Use a worklist to perform a depth-first search of OldSucc's successors.
+  // NOTE: We do not need a visited list since any blocks we have already
+  // visited will have had their overdefined markers cleared already, and we
+  // thus won't loop to their successors.
+  while (!worklist.empty()) {
+    BasicBlock *ToUpdate = worklist.back();
+    worklist.pop_back();
+
+    // Skip blocks only accessible through NewSucc.
+    if (ToUpdate == NewSucc) continue;
+
+    // If a value was marked overdefined in OldSucc, and is here too...
+    auto OI = OverDefinedCache.find(ToUpdate);
+    if (OI == OverDefinedCache.end())
+      continue;
+    SmallPtrSetImpl<Value *> &ValueSet = OI->second;
+
+    bool changed = false;
+    for (Value *V : ValsToClear) {
+      if (!ValueSet.erase(V))
+        continue;
+
+      // If we removed anything, then we potentially need to update
+      // blocks successors too.
+      changed = true;
+
+      if (ValueSet.empty()) {
+        OverDefinedCache.erase(OI);
+        break;
+      }
+    }
+
+    if (!changed) continue;
+
+    worklist.insert(worklist.end(), succ_begin(ToUpdate), succ_end(ToUpdate));
+  }
+}
+
+namespace {
+  // The actual implementation of the lazy analysis and update.  Note that the
+  // inheritance from LazyValueInfoCache is intended to be temporary while
+  // splitting the code and then transitioning to a has-a relationship.
+  class LazyValueInfoImpl {
+
+    /// Cached results from previous queries
+    LazyValueInfoCache TheCache;
+
     /// This stack holds the state of the value solver during a query.
     /// It basically emulates the callstack of the naive
     /// recursive value lookup process.
@@ -428,19 +584,6 @@ namespace {
     const DataLayout &DL; ///< A mandatory DataLayout
     DominatorTree *DT;    ///< An optional DT pointer.
 
-    friend struct LVIValueHandle;
-
-    void insertResult(Value *Val, BasicBlock *BB, const LVILatticeVal &Result) {
-      SeenBlocks.insert(BB);
-
-      // Insert over-defined values into their own cache to reduce memory
-      // overhead.
-      if (Result.isOverdefined())
-        OverDefinedCache[BB].insert(Val);
-      else
-        lookup(Val)[BB] = Result;
-    }
-
   LVILatticeVal getBlockValue(Value *Val, BasicBlock *BB);
   bool getEdgeValue(Value *V, BasicBlock *F, BasicBlock *T,
                     LVILatticeVal &Result, Instruction *CxtI = nullptr);
@@ -450,6 +593,7 @@ namespace {
   // returned means that the work item was not completely processed and must
   // be revisited after going through the new items.
   bool solveBlockValue(Value *Val, BasicBlock *BB);
+  bool solveBlockValueImpl(LVILatticeVal &Res, Value *Val, BasicBlock *BB);
   bool solveBlockValueNonLocal(LVILatticeVal &BBLV, Value *Val, BasicBlock *BB);
   bool solveBlockValuePHINode(LVILatticeVal &BBLV, PHINode *PN, BasicBlock *BB);
   bool solveBlockValueSelect(LVILatticeVal &BBLV, SelectInst *S,
@@ -458,43 +602,12 @@ namespace {
                                BasicBlock *BB);
   bool solveBlockValueCast(LVILatticeVal &BBLV, Instruction *BBI,
                            BasicBlock *BB);
-  void intersectAssumeBlockValueConstantRange(Value *Val, LVILatticeVal &BBLV,
+  void intersectAssumeOrGuardBlockValueConstantRange(Value *Val,
+                                                     LVILatticeVal &BBLV,
                                               Instruction *BBI);
 
   void solve();
 
-  ValueCacheEntryTy &lookup(Value *V) {
-    return ValueCache[LVIValueHandle(V, this)];
-  }
-
-    bool isOverdefined(Value *V, BasicBlock *BB) const {
-      auto ODI = OverDefinedCache.find(BB);
-
-      if (ODI == OverDefinedCache.end())
-        return false;
-
-      return ODI->second.count(V);
-    }
-
-    bool hasCachedValueInfo(Value *V, BasicBlock *BB) {
-      if (isOverdefined(V, BB))
-        return true;
-
-      LVIValueHandle ValHandle(V, this);
-      auto I = ValueCache.find(ValHandle);
-      if (I == ValueCache.end())
-        return false;
-
-      return I->second.count(BB);
-    }
-
-    LVILatticeVal getCachedValueInfo(Value *V, BasicBlock *BB) {
-      if (isOverdefined(V, BB))
-        return LVILatticeVal::getOverdefined();
-
-      return lookup(V)[BB];
-    }
-
   public:
     /// This is the query interface to determine the lattice
     /// value for the specified Value* at the end of the specified block.
@@ -511,60 +624,28 @@ namespace {
     LVILatticeVal getValueOnEdge(Value *V, BasicBlock *FromBB,BasicBlock *ToBB,
                                  Instruction *CxtI = nullptr);
 
-    /// This is the update interface to inform the cache that an edge from
-    /// PredBB to OldSucc has been threaded to be from PredBB to NewSucc.
-    void threadEdge(BasicBlock *PredBB,BasicBlock *OldSucc,BasicBlock *NewSucc);
+    /// Complete flush all previously computed values
+    void clear() {
+      TheCache.clear();
+    }
 
     /// This is part of the update interface to inform the cache
     /// that a block has been deleted.
-    void eraseBlock(BasicBlock *BB);
-
-    /// clear - Empty the cache.
-    void clear() {
-      SeenBlocks.clear();
-      ValueCache.clear();
-      OverDefinedCache.clear();
+    void eraseBlock(BasicBlock *BB) {
+      TheCache.eraseBlock(BB);
     }
 
-    LazyValueInfoCache(AssumptionCache *AC, const DataLayout &DL,
+    /// This is the update interface to inform the cache that an edge from
+    /// PredBB to OldSucc has been threaded to be from PredBB to NewSucc.
+    void threadEdge(BasicBlock *PredBB,BasicBlock *OldSucc,BasicBlock *NewSucc);
+
+    LazyValueInfoImpl(AssumptionCache *AC, const DataLayout &DL,
                        DominatorTree *DT = nullptr)
         : AC(AC), DL(DL), DT(DT) {}
   };
 } // end anonymous namespace
 
-void LVIValueHandle::deleted() {
-  SmallVector<AssertingVH<BasicBlock>, 4> ToErase;
-  for (auto &I : Parent->OverDefinedCache) {
-    SmallPtrSetImpl<Value *> &ValueSet = I.second;
-    if (ValueSet.count(getValPtr()))
-      ValueSet.erase(getValPtr());
-    if (ValueSet.empty())
-      ToErase.push_back(I.first);
-  }
-  for (auto &BB : ToErase)
-    Parent->OverDefinedCache.erase(BB);
-
-  // This erasure deallocates *this, so it MUST happen after we're done
-  // using any and all members of *this.
-  Parent->ValueCache.erase(*this);
-}
-
-void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {
-  // Shortcut if we have never seen this block.
-  DenseSet<AssertingVH<BasicBlock> >::iterator I = SeenBlocks.find(BB);
-  if (I == SeenBlocks.end())
-    return;
-  SeenBlocks.erase(I);
-
-  auto ODI = OverDefinedCache.find(BB);
-  if (ODI != OverDefinedCache.end())
-    OverDefinedCache.erase(ODI);
-
-  for (auto &I : ValueCache)
-    I.second.erase(BB);
-}
-
-void LazyValueInfoCache::solve() {
+void LazyValueInfoImpl::solve() {
   while (!BlockValueStack.empty()) {
     std::pair<BasicBlock*, Value*> &e = BlockValueStack.top();
     assert(BlockValueSet.count(e) && "Stack value should be in BlockValueSet!");
@@ -572,11 +653,11 @@ void LazyValueInfoCache::solve() {
     if (solveBlockValue(e.second, e.first)) {
       // The work item was completely processed.
       assert(BlockValueStack.top() == e && "Nothing should have been pushed!");
-      assert(hasCachedValueInfo(e.second, e.first) &&
+      assert(TheCache.hasCachedValueInfo(e.second, e.first) &&
              "Result should be in cache!");
 
       DEBUG(dbgs() << "POP " << *e.second << " in " << e.first->getName()
-                   << " = " << getCachedValueInfo(e.second, e.first) << "\n");
+                   << " = " << TheCache.getCachedValueInfo(e.second, e.first) << "\n");
 
       BlockValueStack.pop();
       BlockValueSet.erase(e);
@@ -587,21 +668,20 @@ void LazyValueInfoCache::solve() {
   }
 }
 
-bool LazyValueInfoCache::hasBlockValue(Value *Val, BasicBlock *BB) {
+bool LazyValueInfoImpl::hasBlockValue(Value *Val, BasicBlock *BB) {
   // If already a constant, there is nothing to compute.
   if (isa<Constant>(Val))
     return true;
 
-  return hasCachedValueInfo(Val, BB);
+  return TheCache.hasCachedValueInfo(Val, BB);
 }
 
-LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) {
+LVILatticeVal LazyValueInfoImpl::getBlockValue(Value *Val, BasicBlock *BB) {
   // If already a constant, there is nothing to compute.
   if (Constant *VC = dyn_cast<Constant>(Val))
     return LVILatticeVal::get(VC);
 
-  SeenBlocks.insert(BB);
-  return getCachedValueInfo(Val, BB);
+  return TheCache.getCachedValueInfo(Val, BB);
 }
 
 static LVILatticeVal getFromRangeMetadata(Instruction *BBI) {
@@ -610,7 +690,7 @@ static LVILatticeVal getFromRangeMetadata(Instruction *BBI) {
   case Instruction::Load:
   case Instruction::Call:
   case Instruction::Invoke:
-    if (MDNode *Ranges = BBI->getMetadata(LLVMContext::MD_range)) 
+    if (MDNode *Ranges = BBI->getMetadata(LLVMContext::MD_range))
       if (isa<IntegerType>(BBI->getType())) {
         return LVILatticeVal::getRange(getConstantRangeFromMetadata(*Ranges));
       }
@@ -620,14 +700,14 @@ static LVILatticeVal getFromRangeMetadata(Instruction *BBI) {
   return LVILatticeVal::getOverdefined();
 }
 
-bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
+bool LazyValueInfoImpl::solveBlockValue(Value *Val, BasicBlock *BB) {
   if (isa<Constant>(Val))
     return true;
 
-  if (hasCachedValueInfo(Val, BB)) {
+  if (TheCache.hasCachedValueInfo(Val, BB)) {
     // If we have a cached value, use that.
     DEBUG(dbgs() << "  reuse BB '" << BB->getName()
-                 << "' val=" << getCachedValueInfo(Val, BB) << '\n');
+                 << "' val=" << TheCache.getCachedValueInfo(Val, BB) << '\n');
 
     // Since we're reusing a cached value, we don't need to update the
     // OverDefinedCache. The cache will have been properly updated whenever the
@@ -638,28 +718,26 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
   // Hold off inserting this value into the Cache in case we have to return
   // false and come back later.
   LVILatticeVal Res;
+  if (!solveBlockValueImpl(Res, Val, BB))
+    // Work pushed, will revisit
+    return false;
+
+  TheCache.insertResult(Val, BB, Res);
+  return true;
+}
+
+bool LazyValueInfoImpl::solveBlockValueImpl(LVILatticeVal &Res,
+                                            Value *Val, BasicBlock *BB) {
 
   Instruction *BBI = dyn_cast<Instruction>(Val);
-  if (!BBI || BBI->getParent() != BB) {
-    if (!solveBlockValueNonLocal(Res, Val, BB))
-      return false;
-   insertResult(Val, BB, Res);
-   return true;
-  }
+  if (!BBI || BBI->getParent() != BB)
+    return solveBlockValueNonLocal(Res, Val, BB);
 
-  if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
-    if (!solveBlockValuePHINode(Res, PN, BB))
-      return false;
-    insertResult(Val, BB, Res);
-    return true;
-  }
+  if (PHINode *PN = dyn_cast<PHINode>(BBI))
+    return solveBlockValuePHINode(Res, PN, BB);
 
-  if (auto *SI = dyn_cast<SelectInst>(BBI)) {
-    if (!solveBlockValueSelect(Res, SI, BB))
-      return false;
-    insertResult(Val, BB, Res);
-    return true;
-  }
+  if (auto *SI = dyn_cast<SelectInst>(BBI))
+    return solveBlockValueSelect(Res, SI, BB);
 
   // If this value is a nonnull pointer, record it's range and bailout.  Note
   // that for all other pointer typed values, we terminate the search at the
@@ -673,29 +751,20 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
   PointerType *PT = dyn_cast<PointerType>(BBI->getType());
   if (PT && isKnownNonNull(BBI)) {
     Res = LVILatticeVal::getNot(ConstantPointerNull::get(PT));
-    insertResult(Val, BB, Res);
     return true;
   }
   if (BBI->getType()->isIntegerTy()) {
-    if (isa<CastInst>(BBI)) {
-      if (!solveBlockValueCast(Res, BBI, BB))
-        return false;
-      insertResult(Val, BB, Res);
-      return true;
-    }
+    if (isa<CastInst>(BBI))
+      return solveBlockValueCast(Res, BBI, BB);
+    
     BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI);
-    if (BO && isa<ConstantInt>(BO->getOperand(1))) { 
-      if (!solveBlockValueBinaryOp(Res, BBI, BB))
-        return false;
-      insertResult(Val, BB, Res);
-      return true;
-    }
+    if (BO && isa<ConstantInt>(BO->getOperand(1)))
+      return solveBlockValueBinaryOp(Res, BBI, BB);
   }
 
   DEBUG(dbgs() << " compute BB '" << BB->getName()
                  << "' - unknown inst def found.\n");
   Res = getFromRangeMetadata(BBI);
-  insertResult(Val, BB, Res);
   return true;
 }
 
@@ -748,7 +817,7 @@ static bool isObjectDereferencedInBlock(Value *Val, BasicBlock *BB) {
   return false;
 }
 
-bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
+bool LazyValueInfoImpl::solveBlockValueNonLocal(LVILatticeVal &BBLV,
                                                  Value *Val, BasicBlock *BB) {
   LVILatticeVal Result;  // Start Undefined.
 
@@ -763,7 +832,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
       PointerType *PTy = cast<PointerType>(Val->getType());
       Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy));
     } else {
-      Result.markOverdefined();
+      Result = LVILatticeVal::getOverdefined();
     }
     BBLV = Result;
     return true;
@@ -785,7 +854,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
     if (Result.isOverdefined()) {
       DEBUG(dbgs() << " compute BB '" << BB->getName()
             << "' - overdefined because of pred (non local).\n");
-      // Bofore giving up, see if we can prove the pointer non-null local to
+      // Before giving up, see if we can prove the pointer non-null local to
       // this particular block.
       if (Val->getType()->isPointerTy() &&
           isObjectDereferencedInBlock(Val, BB)) {
@@ -806,7 +875,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
   return true;
 }
 
-bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV,
+bool LazyValueInfoImpl::solveBlockValuePHINode(LVILatticeVal &BBLV,
                                                 PHINode *PN, BasicBlock *BB) {
   LVILatticeVal Result;  // Start Undefined.
 
@@ -845,15 +914,13 @@ bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV,
   return true;
 }
 
-static bool getValueFromFromCondition(Value *Val, ICmpInst *ICI,
-                                      LVILatticeVal &Result,
-                                      bool isTrueDest = true);
+static LVILatticeVal getValueFromCondition(Value *Val, Value *Cond,
+                                           bool isTrueDest = true);
 
 // If we can determine a constraint on the value given conditions assumed by
 // the program, intersect those constraints with BBLV
-void LazyValueInfoCache::intersectAssumeBlockValueConstantRange(Value *Val,
-                                                            LVILatticeVal &BBLV,
-                                                            Instruction *BBI) {
+void LazyValueInfoImpl::intersectAssumeOrGuardBlockValueConstantRange(
+        Value *Val, LVILatticeVal &BBLV, Instruction *BBI) {
   BBI = BBI ? BBI : dyn_cast<Instruction>(Val);
   if (!BBI)
     return;
@@ -865,44 +932,52 @@ void LazyValueInfoCache::intersectAssumeBlockValueConstantRange(Value *Val,
     if (!isValidAssumeForContext(I, BBI, DT))
       continue;
 
-    Value *C = I->getArgOperand(0);
-    if (ICmpInst *ICI = dyn_cast<ICmpInst>(C)) {
-      LVILatticeVal Result;
-      if (getValueFromFromCondition(Val, ICI, Result))
-        BBLV = intersect(BBLV, Result);
-    }
+    BBLV = intersect(BBLV, getValueFromCondition(Val, I->getArgOperand(0)));
+  }
+
+  // If guards are not used in the module, don't spend time looking for them
+  auto *GuardDecl = BBI->getModule()->getFunction(
+          Intrinsic::getName(Intrinsic::experimental_guard));
+  if (!GuardDecl || GuardDecl->use_empty())
+    return;
+
+  for (Instruction &I : make_range(BBI->getIterator().getReverse(),
+                                   BBI->getParent()->rend())) {
+    Value *Cond = nullptr;
+    if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(Cond))))
+      BBLV = intersect(BBLV, getValueFromCondition(Val, Cond));
   }
 }
 
-bool LazyValueInfoCache::solveBlockValueSelect(LVILatticeVal &BBLV,
+bool LazyValueInfoImpl::solveBlockValueSelect(LVILatticeVal &BBLV,
                                                SelectInst *SI, BasicBlock *BB) {
 
   // Recurse on our inputs if needed
   if (!hasBlockValue(SI->getTrueValue(), BB)) {
     if (pushBlockValue(std::make_pair(BB, SI->getTrueValue())))
       return false;
-    BBLV.markOverdefined();
+    BBLV = LVILatticeVal::getOverdefined();
     return true;
   }
   LVILatticeVal TrueVal = getBlockValue(SI->getTrueValue(), BB);
   // If we hit overdefined, don't ask more queries.  We want to avoid poisoning
   // extra slots in the table if we can.
   if (TrueVal.isOverdefined()) {
-    BBLV.markOverdefined();
+    BBLV = LVILatticeVal::getOverdefined();
     return true;
   }
 
   if (!hasBlockValue(SI->getFalseValue(), BB)) {
     if (pushBlockValue(std::make_pair(BB, SI->getFalseValue())))
       return false;
-    BBLV.markOverdefined();
+    BBLV = LVILatticeVal::getOverdefined();
     return true;
   }
   LVILatticeVal FalseVal = getBlockValue(SI->getFalseValue(), BB);
   // If we hit overdefined, don't ask more queries.  We want to avoid poisoning
   // extra slots in the table if we can.
   if (FalseVal.isOverdefined()) {
-    BBLV.markOverdefined();
+    BBLV = LVILatticeVal::getOverdefined();
     return true;
   }
 
@@ -916,22 +991,22 @@ bool LazyValueInfoCache::solveBlockValueSelect(LVILatticeVal &BBLV,
     // ValueTracking getting smarter looking back past our immediate inputs.)
     if (SelectPatternResult::isMinOrMax(SPR.Flavor) &&
         LHS == SI->getTrueValue() && RHS == SI->getFalseValue()) {
-      switch (SPR.Flavor) {
-      default:
-        llvm_unreachable("unexpected minmax type!");
-      case SPF_SMIN:                   /// Signed minimum
-        BBLV.markConstantRange(TrueCR.smin(FalseCR));
-        return true;
-      case SPF_UMIN:                   /// Unsigned minimum
-        BBLV.markConstantRange(TrueCR.umin(FalseCR));
-        return true;
-      case SPF_SMAX:                   /// Signed maximum
-        BBLV.markConstantRange(TrueCR.smax(FalseCR));
-        return true;
-      case SPF_UMAX:                   /// Unsigned maximum
-        BBLV.markConstantRange(TrueCR.umax(FalseCR));
-        return true;
-      };
+      ConstantRange ResultCR = [&]() {
+        switch (SPR.Flavor) {
+        default:
+          llvm_unreachable("unexpected minmax type!");
+        case SPF_SMIN:                   /// Signed minimum
+          return TrueCR.smin(FalseCR);
+        case SPF_UMIN:                   /// Unsigned minimum
+          return TrueCR.umin(FalseCR);
+        case SPF_SMAX:                   /// Signed maximum
+          return TrueCR.smax(FalseCR);
+        case SPF_UMAX:                   /// Unsigned maximum
+          return TrueCR.umax(FalseCR);
+        };
+      }();
+      BBLV = LVILatticeVal::getRange(ResultCR);
+      return true;
     }
 
     // TODO: ABS, NABS from the SelectPatternResult
@@ -940,27 +1015,21 @@ bool LazyValueInfoCache::solveBlockValueSelect(LVILatticeVal &BBLV,
   // Can we constrain the facts about the true and false values by using the
   // condition itself?  This shows up with idioms like e.g. select(a > 5, a, 5).
   // TODO: We could potentially refine an overdefined true value above.
-  if (auto *ICI = dyn_cast<ICmpInst>(SI->getCondition())) {
-    LVILatticeVal TrueValTaken, FalseValTaken;
-    if (!getValueFromFromCondition(SI->getTrueValue(), ICI,
-                                   TrueValTaken, true))
-      TrueValTaken.markOverdefined();
-    if (!getValueFromFromCondition(SI->getFalseValue(), ICI,
-                                   FalseValTaken, false))
-      FalseValTaken.markOverdefined();
-
-    TrueVal = intersect(TrueVal, TrueValTaken);
-    FalseVal = intersect(FalseVal, FalseValTaken);
-
-
-    // Handle clamp idioms such as:
-    //   %24 = constantrange<0, 17>
-    //   %39 = icmp eq i32 %24, 0
-    //   %40 = add i32 %24, -1
-    //   %siv.next = select i1 %39, i32 16, i32 %40
-    //   %siv.next = constantrange<0, 17> not <-1, 17>
-    // In general, this can handle any clamp idiom which tests the edge
-    // condition via an equality or inequality.
+  Value *Cond = SI->getCondition();
+  TrueVal = intersect(TrueVal,
+                      getValueFromCondition(SI->getTrueValue(), Cond, true));
+  FalseVal = intersect(FalseVal,
+                       getValueFromCondition(SI->getFalseValue(), Cond, false));
+
+  // Handle clamp idioms such as:
+  //   %24 = constantrange<0, 17>
+  //   %39 = icmp eq i32 %24, 0
+  //   %40 = add i32 %24, -1
+  //   %siv.next = select i1 %39, i32 16, i32 %40
+  //   %siv.next = constantrange<0, 17> not <-1, 17>
+  // In general, this can handle any clamp idiom which tests the edge
+  // condition via an equality or inequality.
+  if (auto *ICI = dyn_cast<ICmpInst>(Cond)) {
     ICmpInst::Predicate Pred = ICI->getPredicate();
     Value *A = ICI->getOperand(0);
     if (ConstantInt *CIBase = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
@@ -1001,13 +1070,13 @@ bool LazyValueInfoCache::solveBlockValueSelect(LVILatticeVal &BBLV,
   return true;
 }
 
-bool LazyValueInfoCache::solveBlockValueCast(LVILatticeVal &BBLV,
+bool LazyValueInfoImpl::solveBlockValueCast(LVILatticeVal &BBLV,
                                              Instruction *BBI,
                                              BasicBlock *BB) {
   if (!BBI->getOperand(0)->getType()->isSized()) {
     // Without knowing how wide the input is, we can't analyze it in any useful
     // way.
-    BBLV.markOverdefined();
+    BBLV = LVILatticeVal::getOverdefined();
     return true;
   }
 
@@ -1024,7 +1093,7 @@ bool LazyValueInfoCache::solveBlockValueCast(LVILatticeVal &BBLV,
     // Unhandled instructions are overdefined.
     DEBUG(dbgs() << " compute BB '" << BB->getName()
                  << "' - overdefined (unknown cast).\n");
-    BBLV.markOverdefined();
+    BBLV = LVILatticeVal::getOverdefined();
     return true;
   }
 
@@ -1041,7 +1110,8 @@ bool LazyValueInfoCache::solveBlockValueCast(LVILatticeVal &BBLV,
   ConstantRange LHSRange = ConstantRange(OperandBitWidth);
   if (hasBlockValue(BBI->getOperand(0), BB)) {
     LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB);
-    intersectAssumeBlockValueConstantRange(BBI->getOperand(0), LHSVal, BBI);
+    intersectAssumeOrGuardBlockValueConstantRange(BBI->getOperand(0), LHSVal,
+                                                  BBI);
     if (LHSVal.isConstantRange())
       LHSRange = LHSVal.getConstantRange();
   }
@@ -1052,31 +1122,12 @@ bool LazyValueInfoCache::solveBlockValueCast(LVILatticeVal &BBLV,
   // NOTE: We're currently limited by the set of operations that ConstantRange
   // can evaluate symbolically.  Enhancing that set will allows us to analyze
   // more definitions.
-  LVILatticeVal Result;
-  switch (BBI->getOpcode()) {
-  case Instruction::Trunc:
-    Result.markConstantRange(LHSRange.truncate(ResultBitWidth));
-    break;
-  case Instruction::SExt:
-    Result.markConstantRange(LHSRange.signExtend(ResultBitWidth));
-    break;
-  case Instruction::ZExt:
-    Result.markConstantRange(LHSRange.zeroExtend(ResultBitWidth));
-    break;
-  case Instruction::BitCast:
-    Result.markConstantRange(LHSRange);
-    break;
-  default:
-    // Should be dead if the code above is correct
-    llvm_unreachable("inconsistent with above");
-    break;
-  }
-
-  BBLV = Result;
+  auto CastOp = (Instruction::CastOps) BBI->getOpcode();
+  BBLV = LVILatticeVal::getRange(LHSRange.castOp(CastOp, ResultBitWidth));
   return true;
 }
 
-bool LazyValueInfoCache::solveBlockValueBinaryOp(LVILatticeVal &BBLV,
+bool LazyValueInfoImpl::solveBlockValueBinaryOp(LVILatticeVal &BBLV,
                                                  Instruction *BBI,
                                                  BasicBlock *BB) {
 
@@ -1101,7 +1152,7 @@ bool LazyValueInfoCache::solveBlockValueBinaryOp(LVILatticeVal &BBLV,
     // Unhandled instructions are overdefined.
     DEBUG(dbgs() << " compute BB '" << BB->getName()
                  << "' - overdefined (unknown binary operator).\n");
-    BBLV.markOverdefined();
+    BBLV = LVILatticeVal::getOverdefined();
     return true;
   };
 
@@ -1118,7 +1169,8 @@ bool LazyValueInfoCache::solveBlockValueBinaryOp(LVILatticeVal &BBLV,
   ConstantRange LHSRange = ConstantRange(OperandBitWidth);
   if (hasBlockValue(BBI->getOperand(0), BB)) {
     LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB);
-    intersectAssumeBlockValueConstantRange(BBI->getOperand(0), LHSVal, BBI);
+    intersectAssumeOrGuardBlockValueConstantRange(BBI->getOperand(0), LHSVal,
+                                                  BBI);
     if (LHSVal.isConstantRange())
       LHSRange = LHSVal.getConstantRange();
   }
@@ -1129,82 +1181,114 @@ bool LazyValueInfoCache::solveBlockValueBinaryOp(LVILatticeVal &BBLV,
   // NOTE: We're currently limited by the set of operations that ConstantRange
   // can evaluate symbolically.  Enhancing that set will allows us to analyze
   // more definitions.
-  LVILatticeVal Result;
-  switch (BBI->getOpcode()) {
-  case Instruction::Add:
-    Result.markConstantRange(LHSRange.add(RHSRange));
-    break;
-  case Instruction::Sub:
-    Result.markConstantRange(LHSRange.sub(RHSRange));
-    break;
-  case Instruction::Mul:
-    Result.markConstantRange(LHSRange.multiply(RHSRange));
-    break;
-  case Instruction::UDiv:
-    Result.markConstantRange(LHSRange.udiv(RHSRange));
-    break;
-  case Instruction::Shl:
-    Result.markConstantRange(LHSRange.shl(RHSRange));
-    break;
-  case Instruction::LShr:
-    Result.markConstantRange(LHSRange.lshr(RHSRange));
-    break;
-  case Instruction::And:
-    Result.markConstantRange(LHSRange.binaryAnd(RHSRange));
-    break;
-  case Instruction::Or:
-    Result.markConstantRange(LHSRange.binaryOr(RHSRange));
-    break;
-  default:
-    // Should be dead if the code above is correct
-    llvm_unreachable("inconsistent with above");
-    break;
-  }
-
-  BBLV = Result;
+  auto BinOp = (Instruction::BinaryOps) BBI->getOpcode();
+  BBLV = LVILatticeVal::getRange(LHSRange.binaryOp(BinOp, RHSRange));
   return true;
 }
 
-bool getValueFromFromCondition(Value *Val, ICmpInst *ICI,
-                               LVILatticeVal &Result, bool isTrueDest) {
-  assert(ICI && "precondition");
-  if (isa<Constant>(ICI->getOperand(1))) {
-    if (ICI->isEquality() && ICI->getOperand(0) == Val) {
+static LVILatticeVal getValueFromICmpCondition(Value *Val, ICmpInst *ICI,
+                                               bool isTrueDest) {
+  Value *LHS = ICI->getOperand(0);
+  Value *RHS = ICI->getOperand(1);
+  CmpInst::Predicate Predicate = ICI->getPredicate();
+
+  if (isa<Constant>(RHS)) {
+    if (ICI->isEquality() && LHS == Val) {
       // We know that V has the RHS constant if this is a true SETEQ or
       // false SETNE.
-      if (isTrueDest == (ICI->getPredicate() == ICmpInst::ICMP_EQ))
-        Result = LVILatticeVal::get(cast<Constant>(ICI->getOperand(1)));
+      if (isTrueDest == (Predicate == ICmpInst::ICMP_EQ))
+        return LVILatticeVal::get(cast<Constant>(RHS));
       else
-        Result = LVILatticeVal::getNot(cast<Constant>(ICI->getOperand(1)));
-      return true;
+        return LVILatticeVal::getNot(cast<Constant>(RHS));
     }
+  }
 
-    // Recognize the range checking idiom that InstCombine produces.
-    // (X-C1) u< C2 --> [C1, C1+C2)
-    ConstantInt *NegOffset = nullptr;
-    if (ICI->getPredicate() == ICmpInst::ICMP_ULT)
-      match(ICI->getOperand(0), m_Add(m_Specific(Val),
-                                      m_ConstantInt(NegOffset)));
-
-    ConstantInt *CI = dyn_cast<ConstantInt>(ICI->getOperand(1));
-    if (CI && (ICI->getOperand(0) == Val || NegOffset)) {
-      // Calculate the range of values that are allowed by the comparison
-      ConstantRange CmpRange(CI->getValue());
-      ConstantRange TrueValues =
-          ConstantRange::makeAllowedICmpRegion(ICI->getPredicate(), CmpRange);
+  if (!Val->getType()->isIntegerTy())
+    return LVILatticeVal::getOverdefined();
+
+  // Use ConstantRange::makeAllowedICmpRegion in order to determine the possible
+  // range of Val guaranteed by the condition. Recognize comparisons in the from
+  // of:
+  //  icmp <pred> Val, ...
+  //  icmp <pred> (add Val, Offset), ...
+  // The latter is the range checking idiom that InstCombine produces. Subtract
+  // the offset from the allowed range for RHS in this case.
+
+  // Val or (add Val, Offset) can be on either hand of the comparison
+  if (LHS != Val && !match(LHS, m_Add(m_Specific(Val), m_ConstantInt()))) {
+    std::swap(LHS, RHS);
+    Predicate = CmpInst::getSwappedPredicate(Predicate);
+  }
 
-      if (NegOffset) // Apply the offset from above.
-        TrueValues = TrueValues.subtract(NegOffset->getValue());
+  ConstantInt *Offset = nullptr;
+  if (LHS != Val)
+    match(LHS, m_Add(m_Specific(Val), m_ConstantInt(Offset)));
+
+  if (LHS == Val || Offset) {
+    // Calculate the range of values that are allowed by the comparison
+    ConstantRange RHSRange(RHS->getType()->getIntegerBitWidth(),
+                           /*isFullSet=*/true);
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS))
+      RHSRange = ConstantRange(CI->getValue());
+    else if (Instruction *I = dyn_cast<Instruction>(RHS))
+      if (auto *Ranges = I->getMetadata(LLVMContext::MD_range))
+        RHSRange = getConstantRangeFromMetadata(*Ranges);
+
+    // If we're interested in the false dest, invert the condition
+    CmpInst::Predicate Pred =
+            isTrueDest ? Predicate : CmpInst::getInversePredicate(Predicate);
+    ConstantRange TrueValues =
+            ConstantRange::makeAllowedICmpRegion(Pred, RHSRange);
 
-      // If we're interested in the false dest, invert the condition.
-      if (!isTrueDest) TrueValues = TrueValues.inverse();
+    if (Offset) // Apply the offset from above.
+      TrueValues = TrueValues.subtract(Offset->getValue());
 
-      Result = LVILatticeVal::getRange(std::move(TrueValues));
-      return true;
-    }
+    return LVILatticeVal::getRange(std::move(TrueValues));
   }
 
-  return false;
+  return LVILatticeVal::getOverdefined();
+}
+
+static LVILatticeVal
+getValueFromCondition(Value *Val, Value *Cond, bool isTrueDest,
+                      DenseMap<Value*, LVILatticeVal> &Visited);
+
+static LVILatticeVal
+getValueFromConditionImpl(Value *Val, Value *Cond, bool isTrueDest,
+                          DenseMap<Value*, LVILatticeVal> &Visited) {
+  if (ICmpInst *ICI = dyn_cast<ICmpInst>(Cond))
+    return getValueFromICmpCondition(Val, ICI, isTrueDest);
+
+  // Handle conditions in the form of (cond1 && cond2), we know that on the
+  // true dest path both of the conditions hold.
+  if (!isTrueDest)
+    return LVILatticeVal::getOverdefined();
+
+  BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond);
+  if (!BO || BO->getOpcode() != BinaryOperator::And)
+    return LVILatticeVal::getOverdefined();
+
+  auto RHS = getValueFromCondition(Val, BO->getOperand(0), isTrueDest, Visited);
+  auto LHS = getValueFromCondition(Val, BO->getOperand(1), isTrueDest, Visited);
+  return intersect(RHS, LHS);
+}
+
+static LVILatticeVal
+getValueFromCondition(Value *Val, Value *Cond, bool isTrueDest,
+                      DenseMap<Value*, LVILatticeVal> &Visited) {
+  auto I = Visited.find(Cond);
+  if (I != Visited.end())
+    return I->second;
+
+  auto Result = getValueFromConditionImpl(Val, Cond, isTrueDest, Visited);
+  Visited[Cond] = Result;
+  return Result;
+}
+
+LVILatticeVal getValueFromCondition(Value *Val, Value *Cond, bool isTrueDest) {
+  assert(Cond && "precondition");
+  DenseMap<Value*, LVILatticeVal> Visited;
+  return getValueFromCondition(Val, Cond, isTrueDest, Visited);
 }
 
 /// \brief Compute the value of Val on the edge BBFrom -> BBTo. Returns false if
@@ -1233,9 +1317,9 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
 
       // If the condition of the branch is an equality comparison, we may be
       // able to infer the value.
-      if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
-        if (getValueFromFromCondition(Val, ICI, Result, isTrueDest))
-          return true;
+      Result = getValueFromCondition(Val, BI->getCondition(), isTrueDest);
+      if (!Result.isOverdefined())
+        return true;
     }
   }
 
@@ -1267,7 +1351,7 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
 
 /// \brief Compute the value of Val on the edge BBFrom -> BBTo or the value at
 /// the basic block if the edge does not constrain Val.
-bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom,
+bool LazyValueInfoImpl::getEdgeValue(Value *Val, BasicBlock *BBFrom,
                                       BasicBlock *BBTo, LVILatticeVal &Result,
                                       Instruction *CxtI) {
   // If already a constant, there is nothing to compute.
@@ -1280,7 +1364,7 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom,
   if (!getEdgeValueLocal(Val, BBFrom, BBTo, LocalResult))
     // If we couldn't constrain the value on the edge, LocalResult doesn't
     // provide any information.
-    LocalResult.markOverdefined();
+    LocalResult = LVILatticeVal::getOverdefined();
 
   if (hasSingleValue(LocalResult)) {
     // Can't get any more precise here
@@ -1298,39 +1382,40 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom,
 
   // Try to intersect ranges of the BB and the constraint on the edge.
   LVILatticeVal InBlock = getBlockValue(Val, BBFrom);
-  intersectAssumeBlockValueConstantRange(Val, InBlock, BBFrom->getTerminator());
+  intersectAssumeOrGuardBlockValueConstantRange(Val, InBlock,
+                                                BBFrom->getTerminator());
   // We can use the context instruction (generically the ultimate instruction
   // the calling pass is trying to simplify) here, even though the result of
   // this function is generally cached when called from the solve* functions
   // (and that cached result might be used with queries using a different
   // context instruction), because when this function is called from the solve*
   // functions, the context instruction is not provided. When called from
-  // LazyValueInfoCache::getValueOnEdge, the context instruction is provided,
+  // LazyValueInfoImpl::getValueOnEdge, the context instruction is provided,
   // but then the result is not cached.
-  intersectAssumeBlockValueConstantRange(Val, InBlock, CxtI);
+  intersectAssumeOrGuardBlockValueConstantRange(Val, InBlock, CxtI);
 
   Result = intersect(LocalResult, InBlock);
   return true;
 }
 
-LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB,
+LVILatticeVal LazyValueInfoImpl::getValueInBlock(Value *V, BasicBlock *BB,
                                                   Instruction *CxtI) {
   DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '"
         << BB->getName() << "'\n");
 
   assert(BlockValueStack.empty() && BlockValueSet.empty());
   if (!hasBlockValue(V, BB)) {
-    pushBlockValue(std::make_pair(BB, V)); 
+    pushBlockValue(std::make_pair(BB, V));
     solve();
   }
   LVILatticeVal Result = getBlockValue(V, BB);
-  intersectAssumeBlockValueConstantRange(V, Result, CxtI);
+  intersectAssumeOrGuardBlockValueConstantRange(V, Result, CxtI);
 
   DEBUG(dbgs() << "  Result = " << Result << "\n");
   return Result;
 }
 
-LVILatticeVal LazyValueInfoCache::getValueAt(Value *V, Instruction *CxtI) {
+LVILatticeVal LazyValueInfoImpl::getValueAt(Value *V, Instruction *CxtI) {
   DEBUG(dbgs() << "LVI Getting value " << *V << " at '"
         << CxtI->getName() << "'\n");
 
@@ -1340,13 +1425,13 @@ LVILatticeVal LazyValueInfoCache::getValueAt(Value *V, Instruction *CxtI) {
   LVILatticeVal Result = LVILatticeVal::getOverdefined();
   if (auto *I = dyn_cast<Instruction>(V))
     Result = getFromRangeMetadata(I);
-  intersectAssumeBlockValueConstantRange(V, Result, CxtI);
+  intersectAssumeOrGuardBlockValueConstantRange(V, Result, CxtI);
 
   DEBUG(dbgs() << "  Result = " << Result << "\n");
   return Result;
 }
 
-LVILatticeVal LazyValueInfoCache::
+LVILatticeVal LazyValueInfoImpl::
 getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB,
                Instruction *CxtI) {
   DEBUG(dbgs() << "LVI Getting edge value " << *V << " from '"
@@ -1364,75 +1449,24 @@ getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB,
   return Result;
 }
 
-void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
-                                    BasicBlock *NewSucc) {
-  // When an edge in the graph has been threaded, values that we could not
-  // determine a value for before (i.e. were marked overdefined) may be
-  // possible to solve now. We do NOT try to proactively update these values.
-  // Instead, we clear their entries from the cache, and allow lazy updating to
-  // recompute them when needed.
-
-  // The updating process is fairly simple: we need to drop cached info
-  // for all values that were marked overdefined in OldSucc, and for those same
-  // values in any successor of OldSucc (except NewSucc) in which they were
-  // also marked overdefined.
-  std::vector<BasicBlock*> worklist;
-  worklist.push_back(OldSucc);
-
-  auto I = OverDefinedCache.find(OldSucc);
-  if (I == OverDefinedCache.end())
-    return; // Nothing to process here.
-  SmallVector<Value *, 4> ValsToClear(I->second.begin(), I->second.end());
-
-  // Use a worklist to perform a depth-first search of OldSucc's successors.
-  // NOTE: We do not need a visited list since any blocks we have already
-  // visited will have had their overdefined markers cleared already, and we
-  // thus won't loop to their successors.
-  while (!worklist.empty()) {
-    BasicBlock *ToUpdate = worklist.back();
-    worklist.pop_back();
-
-    // Skip blocks only accessible through NewSucc.
-    if (ToUpdate == NewSucc) continue;
-
-    bool changed = false;
-    for (Value *V : ValsToClear) {
-      // If a value was marked overdefined in OldSucc, and is here too...
-      auto OI = OverDefinedCache.find(ToUpdate);
-      if (OI == OverDefinedCache.end())
-        continue;
-      SmallPtrSetImpl<Value *> &ValueSet = OI->second;
-      if (!ValueSet.count(V))
-        continue;
-
-      ValueSet.erase(V);
-      if (ValueSet.empty())
-        OverDefinedCache.erase(OI);
-
-      // If we removed anything, then we potentially need to update
-      // blocks successors too.
-      changed = true;
-    }
-
-    if (!changed) continue;
-
-    worklist.insert(worklist.end(), succ_begin(ToUpdate), succ_end(ToUpdate));
-  }
+void LazyValueInfoImpl::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
+                                   BasicBlock *NewSucc) {
+  TheCache.threadEdgeImpl(OldSucc, NewSucc);
 }
 
 //===----------------------------------------------------------------------===//
 //                            LazyValueInfo Impl
 //===----------------------------------------------------------------------===//
 
-/// This lazily constructs the LazyValueInfoCache.
-static LazyValueInfoCache &getCache(void *&PImpl, AssumptionCache *AC,
-                                    const DataLayout *DL,
-                                    DominatorTree *DT = nullptr) {
+/// This lazily constructs the LazyValueInfoImpl.
+static LazyValueInfoImpl &getImpl(void *&PImpl, AssumptionCache *AC,
+                                  const DataLayout *DL,
+                                  DominatorTree *DT = nullptr) {
   if (!PImpl) {
     assert(DL && "getCache() called with a null DataLayout");
-    PImpl = new LazyValueInfoCache(AC, *DL, DT);
+    PImpl = new LazyValueInfoImpl(AC, *DL, DT);
   }
-  return *static_cast<LazyValueInfoCache*>(PImpl);
+  return *static_cast<LazyValueInfoImpl*>(PImpl);
 }
 
 bool LazyValueInfoWrapperPass::runOnFunction(Function &F) {
@@ -1445,7 +1479,7 @@ bool LazyValueInfoWrapperPass::runOnFunction(Function &F) {
   Info.TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
 
   if (Info.PImpl)
-    getCache(Info.PImpl, Info.AC, &DL, Info.DT).clear();
+    getImpl(Info.PImpl, Info.AC, &DL, Info.DT).clear();
 
   // Fully lazy.
   return false;
@@ -1464,7 +1498,7 @@ LazyValueInfo::~LazyValueInfo() { releaseMemory(); }
 void LazyValueInfo::releaseMemory() {
   // If the cache was allocated, free it.
   if (PImpl) {
-    delete &getCache(PImpl, AC, nullptr);
+    delete &getImpl(PImpl, AC, nullptr);
     PImpl = nullptr;
   }
 }
@@ -1479,11 +1513,29 @@ LazyValueInfo LazyValueAnalysis::run(Function &F, FunctionAnalysisManager &FAM)
   return LazyValueInfo(&AC, &TLI, DT);
 }
 
+/// Returns true if we can statically tell that this value will never be a
+/// "useful" constant.  In practice, this means we've got something like an
+/// alloca or a malloc call for which a comparison against a constant can
+/// only be guarding dead code.  Note that we are potentially giving up some
+/// precision in dead code (a constant result) in favour of avoiding a
+/// expensive search for a easily answered common query.
+static bool isKnownNonConstant(Value *V) {
+  V = V->stripPointerCasts();
+  // The return val of alloc cannot be a Constant.
+  if (isa<AllocaInst>(V))
+    return true;
+  return false;
+}
+
 Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB,
                                      Instruction *CxtI) {
+  // Bail out early if V is known not to be a Constant.
+  if (isKnownNonConstant(V))
+    return nullptr;
+
   const DataLayout &DL = BB->getModule()->getDataLayout();
   LVILatticeVal Result =
-      getCache(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI);
+      getImpl(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI);
 
   if (Result.isConstant())
     return Result.getConstant();
@@ -1501,12 +1553,15 @@ ConstantRange LazyValueInfo::getConstantRange(Value *V, BasicBlock *BB,
   unsigned Width = V->getType()->getIntegerBitWidth();
   const DataLayout &DL = BB->getModule()->getDataLayout();
   LVILatticeVal Result =
-      getCache(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI);
-  assert(!Result.isConstant());
+      getImpl(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI);
   if (Result.isUndefined())
     return ConstantRange(Width, /*isFullSet=*/false);
   if (Result.isConstantRange())
     return Result.getConstantRange();
+  // We represent ConstantInt constants as constant ranges but other kinds
+  // of integer constants, i.e. ConstantExpr will be tagged as constants
+  assert(!(Result.isConstant() && isa<ConstantInt>(Result.getConstant())) &&
+         "ConstantInt value must be represented as constantrange");
   return ConstantRange(Width, /*isFullSet=*/true);
 }
 
@@ -1517,7 +1572,7 @@ Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
                                            Instruction *CxtI) {
   const DataLayout &DL = FromBB->getModule()->getDataLayout();
   LVILatticeVal Result =
-      getCache(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
+      getImpl(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
 
   if (Result.isConstant())
     return Result.getConstant();
@@ -1564,8 +1619,8 @@ static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C,
     }
 
     // Handle more complex predicates.
-    ConstantRange TrueValues =
-        ICmpInst::makeConstantRange((ICmpInst::Predicate)Pred, CI->getValue());
+    ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(
+        (ICmpInst::Predicate)Pred, CI->getValue());
     if (TrueValues.contains(CR))
       return LazyValueInfo::True;
     if (TrueValues.inverse().contains(CR))
@@ -1605,7 +1660,7 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
                                   Instruction *CxtI) {
   const DataLayout &DL = FromBB->getModule()->getDataLayout();
   LVILatticeVal Result =
-      getCache(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
+      getImpl(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
 
   return getPredicateResult(Pred, C, Result, DL, TLI);
 }
@@ -1613,8 +1668,19 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
 LazyValueInfo::Tristate
 LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C,
                               Instruction *CxtI) {
+  // Is or is not NonNull are common predicates being queried. If
+  // isKnownNonNull can tell us the result of the predicate, we can
+  // return it quickly. But this is only a fastpath, and falling
+  // through would still be correct.
+  if (V->getType()->isPointerTy() && C->isNullValue() &&
+      isKnownNonNull(V->stripPointerCasts())) {
+    if (Pred == ICmpInst::ICMP_EQ)
+      return LazyValueInfo::False;
+    else if (Pred == ICmpInst::ICMP_NE)
+      return LazyValueInfo::True;
+  }
   const DataLayout &DL = CxtI->getModule()->getDataLayout();
-  LVILatticeVal Result = getCache(PImpl, AC, &DL, DT).getValueAt(V, CxtI);
+  LVILatticeVal Result = getImpl(PImpl, AC, &DL, DT).getValueAt(V, CxtI);
   Tristate Ret = getPredicateResult(Pred, C, Result, DL, TLI);
   if (Ret != Unknown)
     return Ret;
@@ -1673,7 +1739,7 @@ LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C,
         }
         if (Baseline != Unknown)
           return Baseline;
-      }    
+      }
 
     // For a comparison where the V is outside this block, it's possible
     // that we've branched on it before. Look to see if the value is known
@@ -1704,13 +1770,13 @@ void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
                                BasicBlock *NewSucc) {
   if (PImpl) {
     const DataLayout &DL = PredBB->getModule()->getDataLayout();
-    getCache(PImpl, AC, &DL, DT).threadEdge(PredBB, OldSucc, NewSucc);
+    getImpl(PImpl, AC, &DL, DT).threadEdge(PredBB, OldSucc, NewSucc);
   }
 }
 
 void LazyValueInfo::eraseBlock(BasicBlock *BB) {
   if (PImpl) {
     const DataLayout &DL = BB->getModule()->getDataLayout();
-    getCache(PImpl, AC, &DL, DT).eraseBlock(BB);
+    getImpl(PImpl, AC, &DL, DT).eraseBlock(BB);
   }
 }
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index fdf5f55dab9f..2ca46b1fe872 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -35,27 +35,48 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/Lint.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <string>
+
 using namespace llvm;
 
 namespace {
@@ -64,7 +85,7 @@ namespace {
     static const unsigned Write    = 2;
     static const unsigned Callee   = 4;
     static const unsigned Branchee = 8;
-  }
+  } // end namespace MemRef
 
   class Lint : public FunctionPass, public InstVisitor<Lint> {
     friend class InstVisitor<Lint>;
@@ -159,7 +180,7 @@ namespace {
       WriteValues({V1, Vs...});
     }
   };
-}
+} // end anonymous namespace
 
 char Lint::ID = 0;
 INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR",
@@ -173,7 +194,7 @@ INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
 
 // Assert - We know that cond should be true, if not print an error message.
 #define Assert(C, ...) \
-    do { if (!(C)) { CheckFailed(__VA_ARGS__); return; } } while (0)
+    do { if (!(C)) { CheckFailed(__VA_ARGS__); return; } } while (false)
 
 // Lint::run - This is the main Analysis entry point for a
 // function.
@@ -680,9 +701,9 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
   if (Instruction *Inst = dyn_cast<Instruction>(V)) {
     if (Value *W = SimplifyInstruction(Inst, *DL, TLI, DT, AC))
       return findValueImpl(W, OffsetOk, Visited);
-  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
-    if (Value *W = ConstantFoldConstantExpression(CE, *DL, TLI))
-      if (W != V)
+  } else if (auto *C = dyn_cast<Constant>(V)) {
+    if (Value *W = ConstantFoldConstant(C, *DL, TLI))
+      if (W && W != V)
         return findValueImpl(W, OffsetOk, Visited);
   }
 
diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp
index 75426b54195a..e46541e6538d 100644
--- a/lib/Analysis/Loads.cpp
+++ b/lib/Analysis/Loads.cpp
@@ -55,6 +55,10 @@ static bool isDereferenceableAndAlignedPointer(
     const Value *V, unsigned Align, const APInt &Size, const DataLayout &DL,
     const Instruction *CtxI, const DominatorTree *DT,
     SmallPtrSetImpl<const Value *> &Visited) {
+  // Already visited?  Bail out, we've likely hit unreachable code.
+  if (!Visited.insert(V).second)
+    return false;
+
   // Note that it is not safe to speculate into a malloc'd region because
   // malloc may return null.
 
@@ -87,9 +91,11 @@ static bool isDereferenceableAndAlignedPointer(
     // then the GEP (== Base + Offset == k_0 * Align + k_1 * Align) is also
     // aligned to Align bytes.
 
-    return Visited.insert(Base).second &&
-           isDereferenceableAndAlignedPointer(Base, Align, Offset + Size, DL,
-                                              CtxI, DT, Visited);
+    // Offset and Size may have different bit widths if we have visited an
+    // addrspacecast, so we can't do arithmetic directly on the APInt values.
+    return isDereferenceableAndAlignedPointer(
+        Base, Align, Offset + Size.sextOrTrunc(Offset.getBitWidth()),
+        DL, CtxI, DT, Visited);
   }
 
   // For gc.relocate, look through relocations
@@ -302,11 +308,11 @@ llvm::DefMaxInstsToScan("available-load-scan-limit", cl::init(6), cl::Hidden,
            "to scan backward from a given instruction, when searching for "
            "available loaded value"));
 
-Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB,
+Value *llvm::FindAvailableLoadedValue(LoadInst *Load,
+                                      BasicBlock *ScanBB,
                                       BasicBlock::iterator &ScanFrom,
                                       unsigned MaxInstsToScan,
-                                      AliasAnalysis *AA, AAMDNodes *AATags,
-                                      bool *IsLoadCSE) {
+                                      AliasAnalysis *AA, bool *IsLoadCSE) {
   if (MaxInstsToScan == 0)
     MaxInstsToScan = ~0U;
 
@@ -356,8 +362,6 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB,
         if (LI->isAtomic() < Load->isAtomic())
           return nullptr;
 
-        if (AATags)
-          LI->getAAMetadata(*AATags);
         if (IsLoadCSE)
             *IsLoadCSE = true;
         return LI;
@@ -377,8 +381,8 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB,
         if (SI->isAtomic() < Load->isAtomic())
           return nullptr;
 
-        if (AATags)
-          SI->getAAMetadata(*AATags);
+        if (IsLoadCSE)
+          *IsLoadCSE = false;
         return SI->getOperand(0);
       }
 
diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp
index 5214eb7c051c..2f3dca3d23fa 100644
--- a/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/lib/Analysis/LoopAccessAnalysis.cpp
@@ -12,19 +12,61 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasSetTracker.h"
 #include "llvm/Analysis/LoopAccessAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPassManager.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <iterator>
+#include <utility>
+#include <vector>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "loop-accesses"
@@ -94,14 +136,18 @@ bool VectorizerParams::isInterleaveForced() {
 }
 
 void LoopAccessReport::emitAnalysis(const LoopAccessReport &Message,
-                                    const Function *TheFunction,
-                                    const Loop *TheLoop,
-                                    const char *PassName) {
+                                    const Loop *TheLoop, const char *PassName,
+                                    OptimizationRemarkEmitter &ORE) {
   DebugLoc DL = TheLoop->getStartLoc();
-  if (const Instruction *I = Message.getInstr())
-    DL = I->getDebugLoc();
-  emitOptimizationRemarkAnalysis(TheFunction->getContext(), PassName,
-                                 *TheFunction, DL, Message.str());
+  const Value *V = TheLoop->getHeader();
+  if (const Instruction *I = Message.getInstr()) {
+    // If there is no debug location attached to the instruction, revert back to
+    // using the loop's.
+    if (I->getDebugLoc())
+      DL = I->getDebugLoc();
+    V = I->getParent();
+  }
+  ORE.emitOptimizationRemarkAnalysis(PassName, DL, V, Message.str());
 }
 
 Value *llvm::stripIntegerCast(Value *V) {
@@ -463,6 +509,7 @@ void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const {
 }
 
 namespace {
+
 /// \brief Analyses memory accesses in a loop.
 ///
 /// Checks whether run time pointer checks are needed and builds sets for data
@@ -886,7 +933,7 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
 /// \brief Check whether the access through \p Ptr has a constant stride.
 int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
                            const Loop *Lp, const ValueToValueMap &StridesMap,
-                           bool Assume) {
+                           bool Assume, bool ShouldCheckWrap) {
   Type *Ty = Ptr->getType();
   assert(Ty->isPointerTy() && "Unexpected non-ptr");
 
@@ -925,9 +972,9 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
   // to access the pointer value "0" which is undefined behavior in address
   // space 0, therefore we can also vectorize this case.
   bool IsInBoundsGEP = isInBoundsGep(Ptr);
-  bool IsNoWrapAddRec =
-      PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) ||
-      isNoWrapAddRec(Ptr, AR, PSE, Lp);
+  bool IsNoWrapAddRec = !ShouldCheckWrap ||
+    PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) ||
+    isNoWrapAddRec(Ptr, AR, PSE, Lp);
   bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
   if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
     if (Assume) {
@@ -1028,8 +1075,8 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
     return false;
 
   // Make sure that A and B have the same type if required.
-  if(CheckType && PtrA->getType() != PtrB->getType())
-      return false;
+  if (CheckType && PtrA->getType() != PtrB->getType())
+    return false;
 
   unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA);
   Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
@@ -1451,7 +1498,7 @@ MemoryDepChecker::getInstructionsForAccess(Value *Ptr, bool isWrite) const {
   auto &IndexVector = Accesses.find(Access)->second;
 
   SmallVector<Instruction *, 4> Insts;
-  std::transform(IndexVector.begin(), IndexVector.end(),
+  transform(IndexVector,
                  std::back_inserter(Insts),
                  [&](unsigned Idx) { return this->InstMap[Idx]; });
   return Insts;
@@ -1478,25 +1525,23 @@ bool LoopAccessInfo::canAnalyzeLoop() {
   // We can only analyze innermost loops.
   if (!TheLoop->empty()) {
     DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
-    emitAnalysis(LoopAccessReport() << "loop is not the innermost loop");
+    recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";
     return false;
   }
 
   // We must have a single backedge.
   if (TheLoop->getNumBackEdges() != 1) {
     DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
-    emitAnalysis(
-        LoopAccessReport() <<
-        "loop control flow is not understood by analyzer");
+    recordAnalysis("CFGNotUnderstood")
+        << "loop control flow is not understood by analyzer";
     return false;
   }
 
   // We must have a single exiting block.
   if (!TheLoop->getExitingBlock()) {
     DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
-    emitAnalysis(
-        LoopAccessReport() <<
-        "loop control flow is not understood by analyzer");
+    recordAnalysis("CFGNotUnderstood")
+        << "loop control flow is not understood by analyzer";
     return false;
   }
 
@@ -1505,17 +1550,16 @@ bool LoopAccessInfo::canAnalyzeLoop() {
   // instructions in the loop are executed the same number of times.
   if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
     DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
-    emitAnalysis(
-        LoopAccessReport() <<
-        "loop control flow is not understood by analyzer");
+    recordAnalysis("CFGNotUnderstood")
+        << "loop control flow is not understood by analyzer";
     return false;
   }
 
   // ScalarEvolution needs to be able to find the exit count.
   const SCEV *ExitCount = PSE->getBackedgeTakenCount();
   if (ExitCount == PSE->getSE()->getCouldNotCompute()) {
-    emitAnalysis(LoopAccessReport()
-                 << "could not determine number of loop iterations");
+    recordAnalysis("CantComputeNumberOfIterations")
+        << "could not determine number of loop iterations";
     DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
     return false;
   }
@@ -1564,8 +1608,8 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
 
         auto *Ld = dyn_cast<LoadInst>(&I);
         if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {
-          emitAnalysis(LoopAccessReport(Ld)
-                       << "read with atomic ordering or volatile read");
+          recordAnalysis("NonSimpleLoad", Ld)
+              << "read with atomic ordering or volatile read";
           DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
           CanVecMem = false;
           return;
@@ -1582,14 +1626,14 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
       if (I.mayWriteToMemory()) {
         auto *St = dyn_cast<StoreInst>(&I);
         if (!St) {
-          emitAnalysis(LoopAccessReport(St)
-                       << "instruction cannot be vectorized");
+          recordAnalysis("CantVectorizeInstruction", St)
+              << "instruction cannot be vectorized";
           CanVecMem = false;
           return;
         }
         if (!St->isSimple() && !IsAnnotatedParallel) {
-          emitAnalysis(LoopAccessReport(St)
-                       << "write with atomic ordering or volatile write");
+          recordAnalysis("NonSimpleStore", St)
+              << "write with atomic ordering or volatile write";
           DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
           CanVecMem = false;
           return;
@@ -1697,7 +1741,7 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
   bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(),
                                                   TheLoop, SymbolicStrides);
   if (!CanDoRTIfNeeded) {
-    emitAnalysis(LoopAccessReport() << "cannot identify array bounds");
+    recordAnalysis("CantIdentifyArrayBounds") << "cannot identify array bounds";
     DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
                  << "the array bounds.\n");
     CanVecMem = false;
@@ -1728,8 +1772,8 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
 
       // Check that we found the bounds for the pointer.
       if (!CanDoRTIfNeeded) {
-        emitAnalysis(LoopAccessReport()
-                     << "cannot check memory dependencies at runtime");
+        recordAnalysis("CantCheckMemDepsAtRunTime")
+            << "cannot check memory dependencies at runtime";
         DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
         CanVecMem = false;
         return;
@@ -1744,12 +1788,11 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
                  << (PtrRtChecking->Need ? "" : " don't")
                  << " need runtime memory checks.\n");
   else {
-    emitAnalysis(
-        LoopAccessReport()
+    recordAnalysis("UnsafeMemDep")
         << "unsafe dependent memory operations in loop. Use "
            "#pragma loop distribute(enable) to allow loop distribution "
            "to attempt to isolate the offending operations into a separate "
-           "loop");
+           "loop";
     DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
   }
 }
@@ -1763,13 +1806,35 @@ bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
   return !DT->dominates(BB, Latch);
 }
 
-void LoopAccessInfo::emitAnalysis(LoopAccessReport &Message) {
+OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName,
+                                                           Instruction *I) {
   assert(!Report && "Multiple reports generated");
-  Report = Message;
+
+  Value *CodeRegion = TheLoop->getHeader();
+  DebugLoc DL = TheLoop->getStartLoc();
+
+  if (I) {
+    CodeRegion = I->getParent();
+    // If there is no debug location attached to the instruction, revert back to
+    // using the loop's.
+    if (I->getDebugLoc())
+      DL = I->getDebugLoc();
+  }
+
+  Report = make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL,
+                                                   CodeRegion);
+  return *Report;
 }
 
 bool LoopAccessInfo::isUniform(Value *V) const {
-  return (PSE->getSE()->isLoopInvariant(PSE->getSE()->getSCEV(V), TheLoop));
+  auto *SE = PSE->getSE();
+  // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
+  // never considered uniform.
+  // TODO: Is this really what we want? Even without FP SCEV, we may want some
+  // trivially loop-invariant FP values to be considered uniform.
+  if (!SE->isSCEVable(V->getType()))
+    return false;
+  return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
 }
 
 // FIXME: this function is currently a duplicate of the one in
@@ -1784,6 +1849,7 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V,
 }
 
 namespace {
+
 /// \brief IR Values for the lower and upper bounds of a pointer evolution.  We
 /// need to use value-handles because SCEV expansion can invalidate previously
 /// expanded values.  Thus expansion of a pointer can invalidate the bounds for
@@ -1792,6 +1858,7 @@ struct PointerBounds {
   TrackingVH<Value> Start;
   TrackingVH<Value> End;
 };
+
 } // end anonymous namespace
 
 /// \brief Expand code for the lower and upper bound of the pointer group \p CG
@@ -1803,18 +1870,24 @@ expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop,
   Value *Ptr = PtrRtChecking.Pointers[CG->Members[0]].PointerValue;
   const SCEV *Sc = SE->getSCEV(Ptr);
 
+  unsigned AS = Ptr->getType()->getPointerAddressSpace();
+  LLVMContext &Ctx = Loc->getContext();
+
+  // Use this type for pointer arithmetic.
+  Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
+
   if (SE->isLoopInvariant(Sc, TheLoop)) {
     DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr
                  << "\n");
-    return {Ptr, Ptr};
+    // Ptr could be in the loop body. If so, expand a new one at the correct
+    // location.
+    Instruction *Inst = dyn_cast<Instruction>(Ptr);
+    Value *NewPtr = (Inst && TheLoop->contains(Inst))
+                        ? Exp.expandCodeFor(Sc, PtrArithTy, Loc)
+                        : Ptr;
+    return {NewPtr, NewPtr};
   } else {
-    unsigned AS = Ptr->getType()->getPointerAddressSpace();
-    LLVMContext &Ctx = Loc->getContext();
-
-    // Use this type for pointer arithmetic.
-    Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
     Value *Start = nullptr, *End = nullptr;
-
     DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
     Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
     End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
@@ -1833,9 +1906,8 @@ static SmallVector<std::pair<PointerBounds, PointerBounds>, 4> expandBounds(
 
   // Here we're relying on the SCEV Expander's cache to only emit code for the
   // same bounds once.
-  std::transform(
-      PointerChecks.begin(), PointerChecks.end(),
-      std::back_inserter(ChecksWithBounds),
+  transform(
+      PointerChecks, std::back_inserter(ChecksWithBounds),
       [&](const RuntimePointerChecking::PointerCheck &Check) {
         PointerBounds
           First = expandBounds(Check.first, L, Loc, Exp, SE, PtrRtChecking),
@@ -1967,7 +2039,7 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
   }
 
   if (Report)
-    OS.indent(Depth) << "Report: " << Report->str() << "\n";
+    OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";
 
   if (auto *Dependences = DepChecker->getDependences()) {
     OS.indent(Depth) << "Dependences:\n";
@@ -2046,10 +2118,10 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_END(LoopAccessLegacyAnalysis, LAA_NAME, laa_name, false, true)
 
-char LoopAccessAnalysis::PassID;
+AnalysisKey LoopAccessAnalysis::Key;
 
-LoopAccessInfo LoopAccessAnalysis::run(Loop &L, AnalysisManager<Loop> &AM) {
-  const AnalysisManager<Function> &FAM =
+LoopAccessInfo LoopAccessAnalysis::run(Loop &L, LoopAnalysisManager &AM) {
+  const FunctionAnalysisManager &FAM =
       AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager();
   Function &F = *L.getHeader()->getParent();
   auto *SE = FAM.getCachedResult<ScalarEvolutionAnalysis>(F);
@@ -2070,7 +2142,7 @@ LoopAccessInfo LoopAccessAnalysis::run(Loop &L, AnalysisManager<Loop> &AM) {
 }
 
 PreservedAnalyses LoopAccessInfoPrinterPass::run(Loop &L,
-                                                 AnalysisManager<Loop> &AM) {
+                                                 LoopAnalysisManager &AM) {
   Function &F = *L.getHeader()->getParent();
   auto &LAI = AM.getResult<LoopAccessAnalysis>(L);
   OS << "Loop access info in function '" << F.getName() << "':\n";
@@ -2080,7 +2152,9 @@ PreservedAnalyses LoopAccessInfoPrinterPass::run(Loop &L,
 }
 
 namespace llvm {
+
   Pass *createLAAPass() {
     return new LoopAccessLegacyAnalysis();
   }
-}
+
+} // end namespace llvm
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index 30f7ef392422..19c0171740c9 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -143,42 +143,47 @@ PHINode *Loop::getCanonicalInductionVariable() const {
   return nullptr;
 }
 
-bool Loop::isLCSSAForm(DominatorTree &DT) const {
-  for (BasicBlock *BB : this->blocks()) {
-    for (Instruction &I : *BB) {
-      // Tokens can't be used in PHI nodes and live-out tokens prevent loop
-      // optimizations, so for the purposes of considered LCSSA form, we
-      // can ignore them.
-      if (I.getType()->isTokenTy())
-        continue;
+// Check that 'BB' doesn't have any uses outside of the 'L'
+static bool isBlockInLCSSAForm(const Loop &L, const BasicBlock &BB,
+                               DominatorTree &DT) {
+  for (const Instruction &I : BB) {
+    // Tokens can't be used in PHI nodes and live-out tokens prevent loop
+    // optimizations, so for the purposes of considered LCSSA form, we
+    // can ignore them.
+    if (I.getType()->isTokenTy())
+      continue;
 
-      for (Use &U : I.uses()) {
-        Instruction *UI = cast<Instruction>(U.getUser());
-        BasicBlock *UserBB = UI->getParent();
-        if (PHINode *P = dyn_cast<PHINode>(UI))
-          UserBB = P->getIncomingBlock(U);
-
-        // Check the current block, as a fast-path, before checking whether
-        // the use is anywhere in the loop.  Most values are used in the same
-        // block they are defined in.  Also, blocks not reachable from the
-        // entry are special; uses in them don't need to go through PHIs.
-        if (UserBB != BB &&
-            !contains(UserBB) &&
-            DT.isReachableFromEntry(UserBB))
-          return false;
-      }
+    for (const Use &U : I.uses()) {
+      const Instruction *UI = cast<Instruction>(U.getUser());
+      const BasicBlock *UserBB = UI->getParent();
+      if (const PHINode *P = dyn_cast<PHINode>(UI))
+        UserBB = P->getIncomingBlock(U);
+
+      // Check the current block, as a fast-path, before checking whether
+      // the use is anywhere in the loop.  Most values are used in the same
+      // block they are defined in.  Also, blocks not reachable from the
+      // entry are special; uses in them don't need to go through PHIs.
+      if (UserBB != &BB && !L.contains(UserBB) &&
+          DT.isReachableFromEntry(UserBB))
+        return false;
     }
   }
-
   return true;
 }
 
-bool Loop::isRecursivelyLCSSAForm(DominatorTree &DT) const {
-  if (!isLCSSAForm(DT))
-    return false;
+bool Loop::isLCSSAForm(DominatorTree &DT) const {
+  // For each block we check that it doesn't have any uses outside of this loop.
+  return all_of(this->blocks(), [&](const BasicBlock *BB) {
+    return isBlockInLCSSAForm(*this, *BB, DT);
+  });
+}
 
-  return std::all_of(begin(), end(), [&](const Loop *L) {
-    return L->isRecursivelyLCSSAForm(DT);
+bool Loop::isRecursivelyLCSSAForm(DominatorTree &DT, const LoopInfo &LI) const {
+  // For each block we check that it doesn't have any uses outside of it's
+  // innermost loop. This process will transitivelly guarntee that current loop 
+  // and all of the nested loops are in the LCSSA form.
+  return all_of(this->blocks(), [&](const BasicBlock *BB) {
+    return isBlockInLCSSAForm(*LI.getLoopFor(BB), *BB, DT);
   });
 }
 
@@ -300,23 +305,40 @@ bool Loop::isAnnotatedParallel() const {
 }
 
 DebugLoc Loop::getStartLoc() const {
+  return getLocRange().getStart();
+}
+
+Loop::LocRange Loop::getLocRange() const {
   // If we have a debug location in the loop ID, then use it.
-  if (MDNode *LoopID = getLoopID())
-    for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i)
-      if (DILocation *L = dyn_cast<DILocation>(LoopID->getOperand(i)))
-        return DebugLoc(L);
+  if (MDNode *LoopID = getLoopID()) {
+    DebugLoc Start;
+    // We use the first DebugLoc in the header as the start location of the loop
+    // and if there is a second DebugLoc in the header we use it as end location
+    // of the loop.
+    for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
+      if (DILocation *L = dyn_cast<DILocation>(LoopID->getOperand(i))) {
+        if (!Start)
+          Start = DebugLoc(L);
+        else
+          return LocRange(Start, DebugLoc(L));
+      }
+    }
+
+    if (Start)
+      return LocRange(Start);
+  }
 
   // Try the pre-header first.
   if (BasicBlock *PHeadBB = getLoopPreheader())
     if (DebugLoc DL = PHeadBB->getTerminator()->getDebugLoc())
-      return DL;
+      return LocRange(DL);
 
   // If we have no pre-header or there are no instructions with debug
   // info in it, try the header.
   if (BasicBlock *HeadBB = getHeader())
-    return HeadBB->getTerminator()->getDebugLoc();
+    return LocRange(HeadBB->getTerminator()->getDebugLoc());
 
-  return DebugLoc();
+  return LocRange();
 }
 
 bool Loop::hasDedicatedExits() const {
@@ -366,8 +388,7 @@ Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const {
       // In case of multiple edges from current block to exit block, collect
       // only one edge in ExitBlocks. Use switchExitBlocks to keep track of
       // duplicate edges.
-      if (std::find(SwitchExitBlocks.begin(), SwitchExitBlocks.end(), Successor)
-          == SwitchExitBlocks.end()) {
+      if (!is_contained(SwitchExitBlocks, Successor)) {
         SwitchExitBlocks.push_back(Successor);
         ExitBlocks.push_back(Successor);
       }
@@ -387,6 +408,10 @@ BasicBlock *Loop::getUniqueExitBlock() const {
 LLVM_DUMP_METHOD void Loop::dump() const {
   print(dbgs());
 }
+
+LLVM_DUMP_METHOD void Loop::dumpVerbose() const {
+  print(dbgs(), /*Depth=*/ 0, /*Verbose=*/ true);
+}
 #endif
 
 //===----------------------------------------------------------------------===//
@@ -532,8 +557,7 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) {
       assert(Subloop && "subloop is not an ancestor of the original loop");
     }
     // Get the current nearest parent of the Subloop exits, initially Unloop.
-    NearLoop =
-      SubloopParents.insert(std::make_pair(Subloop, &Unloop)).first->second;
+    NearLoop = SubloopParents.insert({Subloop, &Unloop}).first->second;
   }
 
   succ_iterator I = succ_begin(BB), E = succ_end(BB);
@@ -645,9 +669,9 @@ void LoopInfo::markAsRemoved(Loop *Unloop) {
   }
 }
 
-char LoopAnalysis::PassID;
+AnalysisKey LoopAnalysis::Key;
 
-LoopInfo LoopAnalysis::run(Function &F, AnalysisManager<Function> &AM) {
+LoopInfo LoopAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
   // FIXME: Currently we create a LoopInfo from scratch for every function.
   // This may prove to be too wasteful due to deallocating and re-allocating
   // memory each time for the underlying map and vector datastructures. At some
@@ -660,7 +684,7 @@ LoopInfo LoopAnalysis::run(Function &F, AnalysisManager<Function> &AM) {
 }
 
 PreservedAnalyses LoopPrinterPass::run(Function &F,
-                                       AnalysisManager<Function> &AM) {
+                                       FunctionAnalysisManager &AM) {
   AM.getResult<LoopAnalysis>(F).print(OS);
   return PreservedAnalyses::all();
 }
@@ -702,8 +726,10 @@ void LoopInfoWrapperPass::verifyAnalysis() const {
   // -verify-loop-info option can enable this. In order to perform some
   // checking by default, LoopPass has been taught to call verifyLoop manually
   // during loop pass sequences.
-  if (VerifyLoopInfo)
-    LI.verify();
+  if (VerifyLoopInfo) {
+    auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+    LI.verify(DT);
+  }
 }
 
 void LoopInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -715,6 +741,14 @@ void LoopInfoWrapperPass::print(raw_ostream &OS, const Module *) const {
   LI.print(OS);
 }
 
+PreservedAnalyses LoopVerifierPass::run(Function &F,
+                                        FunctionAnalysisManager &AM) {
+  LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
+  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+  LI.verify(DT);
+  return PreservedAnalyses::all();
+}
+
 //===----------------------------------------------------------------------===//
 // LoopBlocksDFS implementation
 //
diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp
index 222345c9a980..b5b8040984d7 100644
--- a/lib/Analysis/LoopPass.cpp
+++ b/lib/Analysis/LoopPass.cpp
@@ -15,6 +15,7 @@
 
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/LoopPassManager.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/OptBisect.h"
@@ -48,7 +49,7 @@ public:
                        [](BasicBlock *BB) { return BB; });
     if (BBI != L->blocks().end() &&
         isFunctionInPrintList((*BBI)->getParent()->getName())) {
-      AnalysisManager<Loop> DummyLAM;
+      LoopAnalysisManager DummyLAM;
       P.run(*L, DummyLAM);
     }
     return false;
@@ -131,8 +132,8 @@ void LPPassManager::deleteSimpleAnalysisLoop(Loop *L) {
 // Recurse through all subloops and all loops  into LQ.
 static void addLoopIntoQueue(Loop *L, std::deque<Loop *> &LQ) {
   LQ.push_back(L);
-  for (Loop::reverse_iterator I = L->rbegin(), E = L->rend(); I != E; ++I)
-    addLoopIntoQueue(*I, LQ);
+  for (Loop *I : reverse(*L))
+    addLoopIntoQueue(I, LQ);
 }
 
 /// Pass Manager itself does not invalidate any analysis info.
@@ -140,6 +141,7 @@ void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const {
   // LPPassManager needs LoopInfo. In the long term LoopInfo class will
   // become part of LPPassManager.
   Info.addRequired<LoopInfoWrapperPass>();
+  Info.addRequired<DominatorTreeWrapperPass>();
   Info.setPreservesAll();
 }
 
@@ -148,6 +150,7 @@ void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const {
 bool LPPassManager::runOnFunction(Function &F) {
   auto &LIWP = getAnalysis<LoopInfoWrapperPass>();
   LI = &LIWP.getLoopInfo();
+  DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   bool Changed = false;
 
   // Collect inherited analysis from Module level pass manager.
@@ -162,16 +165,14 @@ bool LPPassManager::runOnFunction(Function &F) {
   // Note that LoopInfo::iterator visits loops in reverse program
   // order. Here, reverse_iterator gives us a forward order, and the LoopQueue
   // reverses the order a third time by popping from the back.
-  for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I)
-    addLoopIntoQueue(*I, LQ);
+  for (Loop *L : reverse(*LI))
+    addLoopIntoQueue(L, LQ);
 
   if (LQ.empty()) // No loops, skip calling finalizers
     return false;
 
   // Initialization
-  for (std::deque<Loop *>::const_iterator I = LQ.begin(), E = LQ.end();
-       I != E; ++I) {
-    Loop *L = *I;
+  for (Loop *L : LQ) {
     for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
       LoopPass *P = getContainedPass(Index);
       Changed |= P->doInitialization(L, *this);
@@ -220,6 +221,12 @@ bool LPPassManager::runOnFunction(Function &F) {
           TimeRegion PassTimer(getPassTimer(&LIWP));
           CurrentLoop->verifyLoop();
         }
+        // Here we apply same reasoning as in the above case. Only difference
+        // is that LPPassManager might run passes which do not require LCSSA
+        // form (LoopPassPrinter for example). We should skip verification for
+        // such passes.
+        if (mustPreserveAnalysisID(LCSSAVerificationPass::ID))
+          CurrentLoop->isRecursivelyLCSSAForm(*DT, *LI);
 
         // Then call the regular verifyAnalysis functions.
         verifyPreservedAnalysis(P);
@@ -355,3 +362,8 @@ bool LoopPass::skipLoop(const Loop *L) const {
   }
   return false;
 }
+
+char LCSSAVerificationPass::ID = 0;
+INITIALIZE_PASS(LCSSAVerificationPass, "lcssa-verification", "LCSSA Verifier",
+                false, false)
+
diff --git a/lib/Analysis/LoopPassManager.cpp b/lib/Analysis/LoopPassManager.cpp
index 8bac19a58217..044e5d55dafd 100644
--- a/lib/Analysis/LoopPassManager.cpp
+++ b/lib/Analysis/LoopPassManager.cpp
@@ -17,12 +17,31 @@
 
 using namespace llvm;
 
-// Explicit instantiations for core typedef'ed templates.
+// Explicit template instantiations and specialization defininitions for core
+// template typedefs.
 namespace llvm {
 template class PassManager<Loop>;
 template class AnalysisManager<Loop>;
 template class InnerAnalysisManagerProxy<LoopAnalysisManager, Function>;
 template class OuterAnalysisManagerProxy<FunctionAnalysisManager, Loop>;
+
+template <>
+bool LoopAnalysisManagerFunctionProxy::Result::invalidate(
+    Function &F, const PreservedAnalyses &PA,
+    FunctionAnalysisManager::Invalidator &Inv) {
+  // If this proxy isn't marked as preserved, the set of Function objects in
+  // the module may have changed. We therefore can't call
+  // InnerAM->invalidate(), because any pointers to Functions it has may be
+  // stale.
+  auto PAC = PA.getChecker<LoopAnalysisManagerFunctionProxy>();
+  if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Loop>>())
+    InnerAM->clear();
+
+  // FIXME: Proper suppor for invalidation isn't yet implemented for the LPM.
+
+  // Return false to indicate that this result is still a valid proxy.
+  return false;
+}
 }
 
 PreservedAnalyses llvm::getLoopPassPreservedAnalyses() {
@@ -32,6 +51,7 @@ PreservedAnalyses llvm::getLoopPassPreservedAnalyses() {
   PA.preserve<ScalarEvolutionAnalysis>();
   // TODO: What we really want to do here is preserve an AA category, but that
   // concept doesn't exist yet.
+  PA.preserve<AAManager>();
   PA.preserve<BasicAA>();
   PA.preserve<GlobalsAA>();
   PA.preserve<SCEVAA>();
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index f23477622bec..2d8274040d39 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -77,8 +77,12 @@ static const std::pair<LibFunc::Func, AllocFnsTy> AllocationFnData[] = {
   // TODO: Handle "int posix_memalign(void **, size_t, size_t)"
 };
 
+static Function *getCalledFunction(const Value *V, bool LookThroughBitCast,
+                                   bool &IsNoBuiltin) {
+  // Don't care about intrinsics in this case.
+  if (isa<IntrinsicInst>(V))
+    return nullptr;
 
-static Function *getCalledFunction(const Value *V, bool LookThroughBitCast) {
   if (LookThroughBitCast)
     V = V->stripPointerCasts();
 
@@ -86,8 +90,7 @@ static Function *getCalledFunction(const Value *V, bool LookThroughBitCast) {
   if (!CS.getInstruction())
     return nullptr;
 
-  if (CS.isNoBuiltin())
-    return nullptr;
+  IsNoBuiltin = CS.isNoBuiltin();
 
   Function *Callee = CS.getCalledFunction();
   if (!Callee || !Callee->isDeclaration())
@@ -98,47 +101,19 @@ static Function *getCalledFunction(const Value *V, bool LookThroughBitCast) {
 /// Returns the allocation data for the given value if it's either a call to a
 /// known allocation function, or a call to a function with the allocsize
 /// attribute.
-static Optional<AllocFnsTy> getAllocationData(const Value *V, AllocType AllocTy,
-                                              const TargetLibraryInfo *TLI,
-                                              bool LookThroughBitCast = false) {
-  // Skip intrinsics
-  if (isa<IntrinsicInst>(V))
-    return None;
-
-  const Function *Callee = getCalledFunction(V, LookThroughBitCast);
-  if (!Callee)
-    return None;
-
-  // If it has allocsize, we can skip checking if it's a known function.
-  //
-  // MallocLike is chosen here because allocsize makes no guarantees about the
-  // nullness of the result of the function, nor does it deal with strings, nor
-  // does it require that the memory returned is zeroed out.
-  LLVM_CONSTEXPR auto AllocSizeAllocTy = MallocLike;
-  if ((AllocTy & AllocSizeAllocTy) == AllocSizeAllocTy &&
-      Callee->hasFnAttribute(Attribute::AllocSize)) {
-    Attribute Attr = Callee->getFnAttribute(Attribute::AllocSize);
-    std::pair<unsigned, Optional<unsigned>> Args = Attr.getAllocSizeArgs();
-
-    AllocFnsTy Result;
-    Result.AllocTy = AllocSizeAllocTy;
-    Result.NumParams = Callee->getNumOperands();
-    Result.FstParam = Args.first;
-    Result.SndParam = Args.second.getValueOr(-1);
-    return Result;
-  }
-
+static Optional<AllocFnsTy>
+getAllocationDataForFunction(const Function *Callee, AllocType AllocTy,
+                             const TargetLibraryInfo *TLI) {
   // Make sure that the function is available.
   StringRef FnName = Callee->getName();
   LibFunc::Func TLIFn;
   if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn))
     return None;
 
-  const auto *Iter =
-      std::find_if(std::begin(AllocationFnData), std::end(AllocationFnData),
-                   [TLIFn](const std::pair<LibFunc::Func, AllocFnsTy> &P) {
-                     return P.first == TLIFn;
-                   });
+  const auto *Iter = find_if(
+      AllocationFnData, [TLIFn](const std::pair<LibFunc::Func, AllocFnsTy> &P) {
+        return P.first == TLIFn;
+      });
 
   if (Iter == std::end(AllocationFnData))
     return None;
@@ -164,6 +139,48 @@ static Optional<AllocFnsTy> getAllocationData(const Value *V, AllocType AllocTy,
   return None;
 }
 
+static Optional<AllocFnsTy> getAllocationData(const Value *V, AllocType AllocTy,
+                                              const TargetLibraryInfo *TLI,
+                                              bool LookThroughBitCast = false) {
+  bool IsNoBuiltinCall;
+  if (const Function *Callee =
+          getCalledFunction(V, LookThroughBitCast, IsNoBuiltinCall))
+    if (!IsNoBuiltinCall)
+      return getAllocationDataForFunction(Callee, AllocTy, TLI);
+  return None;
+}
+
+static Optional<AllocFnsTy> getAllocationSize(const Value *V,
+                                              const TargetLibraryInfo *TLI) {
+  bool IsNoBuiltinCall;
+  const Function *Callee =
+      getCalledFunction(V, /*LookThroughBitCast=*/false, IsNoBuiltinCall);
+  if (!Callee)
+    return None;
+
+  // Prefer to use existing information over allocsize. This will give us an
+  // accurate AllocTy.
+  if (!IsNoBuiltinCall)
+    if (Optional<AllocFnsTy> Data =
+            getAllocationDataForFunction(Callee, AnyAlloc, TLI))
+      return Data;
+
+  Attribute Attr = Callee->getFnAttribute(Attribute::AllocSize);
+  if (Attr == Attribute())
+    return None;
+
+  std::pair<unsigned, Optional<unsigned>> Args = Attr.getAllocSizeArgs();
+
+  AllocFnsTy Result;
+  // Because allocsize only tells us how many bytes are allocated, we're not
+  // really allowed to assume anything, so we use MallocLike.
+  Result.AllocTy = MallocLike;
+  Result.NumParams = Callee->getNumOperands();
+  Result.FstParam = Args.first;
+  Result.SndParam = Args.second.getValueOr(-1);
+  return Result;
+}
+
 static bool hasNoAliasAttr(const Value *V, bool LookThroughBitCast) {
   ImmutableCallSite CS(LookThroughBitCast ? V->stripPointerCasts() : V);
   return CS && CS.paramHasAttr(AttributeSet::ReturnIndex, Attribute::NoAlias);
@@ -389,6 +406,36 @@ bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL,
   return true;
 }
 
+ConstantInt *llvm::lowerObjectSizeCall(IntrinsicInst *ObjectSize,
+                                       const DataLayout &DL,
+                                       const TargetLibraryInfo *TLI,
+                                       bool MustSucceed) {
+  assert(ObjectSize->getIntrinsicID() == Intrinsic::objectsize &&
+         "ObjectSize must be a call to llvm.objectsize!");
+
+  bool MaxVal = cast<ConstantInt>(ObjectSize->getArgOperand(1))->isZero();
+  ObjSizeMode Mode;
+  // Unless we have to fold this to something, try to be as accurate as
+  // possible.
+  if (MustSucceed)
+    Mode = MaxVal ? ObjSizeMode::Max : ObjSizeMode::Min;
+  else
+    Mode = ObjSizeMode::Exact;
+
+  // FIXME: Does it make sense to just return a failure value if the size won't
+  // fit in the output and `!MustSucceed`?
+  uint64_t Size;
+  auto *ResultType = cast<IntegerType>(ObjectSize->getType());
+  if (getObjectSize(ObjectSize->getArgOperand(0), Size, DL, TLI, false, Mode) &&
+      isUIntN(ResultType->getBitWidth(), Size))
+    return ConstantInt::get(ResultType, Size);
+
+  if (!MustSucceed)
+    return nullptr;
+
+  return ConstantInt::get(ResultType, MaxVal ? -1ULL : 0);
+}
+
 STATISTIC(ObjectVisitorArgument,
           "Number of arguments with unsolved size and offset");
 STATISTIC(ObjectVisitorLoad,
@@ -476,8 +523,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) {
 }
 
 SizeOffsetType ObjectSizeOffsetVisitor::visitCallSite(CallSite CS) {
-  Optional<AllocFnsTy> FnData =
-      getAllocationData(CS.getInstruction(), AnyAlloc, TLI);
+  Optional<AllocFnsTy> FnData = getAllocationSize(CS.getInstruction(), TLI);
   if (!FnData)
     return unknown();
 
@@ -736,8 +782,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitAllocaInst(AllocaInst &I) {
 }
 
 SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitCallSite(CallSite CS) {
-  Optional<AllocFnsTy> FnData =
-      getAllocationData(CS.getInstruction(), AnyAlloc, TLI);
+  Optional<AllocFnsTy> FnData = getAllocationSize(CS.getInstruction(), TLI);
   if (!FnData)
     return unknown();
 
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 33499334fefa..2746361ab4b5 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -15,24 +15,38 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/PHITransAddr.h"
 #include "llvm/Analysis/OrderedBasicBlock.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/PredIteratorCache.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "memdep"
@@ -166,7 +180,7 @@ MemDepResult MemoryDependenceResults::getCallSiteDependencyFrom(
     BasicBlock *BB) {
   unsigned Limit = BlockScanLimit;
 
-  // Walk backwards through the block, looking for dependencies
+  // Walk backwards through the block, looking for dependencies.
   while (ScanIt != BB->begin()) {
     // Limit the amount of scanning we do so we don't end up with quadratic
     // running time on extreme testcases.
@@ -220,26 +234,6 @@ MemDepResult MemoryDependenceResults::getCallSiteDependencyFrom(
   return MemDepResult::getNonFuncLocal();
 }
 
-/// Return true if LI is a load that would fully overlap MemLoc if done as
-/// a wider legal integer load.
-///
-/// MemLocBase, MemLocOffset are lazily computed here the first time the
-/// base/offs of memloc is needed.
-static bool isLoadLoadClobberIfExtendedToFullWidth(const MemoryLocation &MemLoc,
-                                                   const Value *&MemLocBase,
-                                                   int64_t &MemLocOffs,
-                                                   const LoadInst *LI) {
-  const DataLayout &DL = LI->getModule()->getDataLayout();
-
-  // If we haven't already computed the base/offset of MemLoc, do so now.
-  if (!MemLocBase)
-    MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, DL);
-
-  unsigned Size = MemoryDependenceResults::getLoadLoadClobberFullWidthSize(
-      MemLocBase, MemLocOffs, MemLoc.Size, LI);
-  return Size != 0;
-}
-
 unsigned MemoryDependenceResults::getLoadLoadClobberFullWidthSize(
     const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize,
     const LoadInst *LI) {
@@ -292,7 +286,7 @@ unsigned MemoryDependenceResults::getLoadLoadClobberFullWidthSize(
   unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits() / 8U;
   NewLoadByteSize = NextPowerOf2(NewLoadByteSize);
 
-  while (1) {
+  while (true) {
     // If this load size is bigger than our known alignment or would not fit
     // into a native integer register, then we fail.
     if (NewLoadByteSize > LoadAlign ||
@@ -327,7 +321,7 @@ static bool isVolatile(Instruction *Inst) {
 
 MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
     const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
-    BasicBlock *BB, Instruction *QueryInst) {
+    BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
 
   if (QueryInst != nullptr) {
     if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
@@ -338,49 +332,69 @@ MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
         return invariantGroupDependency;
     }
   }
-  return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst);
+  return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst,
+                                        Limit);
 }
 
 MemDepResult
 MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
-                                                             BasicBlock *BB) {
+                                                            BasicBlock *BB) {
+
+  auto *InvariantGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group);
+  if (!InvariantGroupMD)
+    return MemDepResult::getUnknown();
+
   Value *LoadOperand = LI->getPointerOperand();
   // It's is not safe to walk the use list of global value, because function
   // passes aren't allowed to look outside their functions.
   if (isa<GlobalValue>(LoadOperand))
     return MemDepResult::getUnknown();
 
-  auto *InvariantGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group);
-  if (!InvariantGroupMD)
-    return MemDepResult::getUnknown();
-
-  MemDepResult Result = MemDepResult::getUnknown();
-  llvm::SmallSet<Value *, 14> Seen;
   // Queue to process all pointers that are equivalent to load operand.
-  llvm::SmallVector<Value *, 8> LoadOperandsQueue;
-  LoadOperandsQueue.push_back(LoadOperand);
+  SmallVector<const Value *, 8> LoadOperandsQueue;
+  SmallSet<const Value *, 14> SeenValues;
+  auto TryInsertToQueue = [&](Value *V) {
+    if (SeenValues.insert(V).second)
+      LoadOperandsQueue.push_back(V);
+  };
+
+  TryInsertToQueue(LoadOperand);
   while (!LoadOperandsQueue.empty()) {
-    Value *Ptr = LoadOperandsQueue.pop_back_val();
+    const Value *Ptr = LoadOperandsQueue.pop_back_val();
+    assert(Ptr);
     if (isa<GlobalValue>(Ptr))
       continue;
 
-    if (auto *BCI = dyn_cast<BitCastInst>(Ptr)) {
-      if (Seen.insert(BCI->getOperand(0)).second) {
-        LoadOperandsQueue.push_back(BCI->getOperand(0));
-      }
-    }
-
-    for (Use &Us : Ptr->uses()) {
+    // Value comes from bitcast: Ptr = bitcast x. Insert x.
+    if (auto *BCI = dyn_cast<BitCastInst>(Ptr))
+      TryInsertToQueue(BCI->getOperand(0));
+    // Gep with zeros is equivalent to bitcast.
+    // FIXME: we are not sure if some bitcast should be canonicalized to gep 0
+    // or gep 0 to bitcast because of SROA, so there are 2 forms. When typeless
+    // pointers will be upstream then both cases will be gone (and this BFS
+    // also won't be needed).
+    if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr))
+      if (GEP->hasAllZeroIndices())
+        TryInsertToQueue(GEP->getOperand(0));
+
+    for (const Use &Us : Ptr->uses()) {
       auto *U = dyn_cast<Instruction>(Us.getUser());
       if (!U || U == LI || !DT.dominates(U, LI))
         continue;
 
-      if (auto *BCI = dyn_cast<BitCastInst>(U)) {
-        if (Seen.insert(BCI).second) {
-          LoadOperandsQueue.push_back(BCI);
-        }
+      // Bitcast or gep with zeros are using Ptr. Add to queue to check it's
+      // users.      U = bitcast Ptr
+      if (isa<BitCastInst>(U)) {
+        TryInsertToQueue(U);
         continue;
       }
+      // U = getelementptr Ptr, 0, 0...
+      if (auto *GEP = dyn_cast<GetElementPtrInst>(U))
+        if (GEP->hasAllZeroIndices()) {
+          TryInsertToQueue(U);
+          continue;
+        }
+
       // If we hit load/store with the same invariant.group metadata (and the
       // same pointer operand) we can assume that value pointed by pointer
       // operand didn't change.
@@ -389,18 +403,20 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
         return MemDepResult::getDef(U);
     }
   }
-  return Result;
+  return MemDepResult::getUnknown();
 }
 
 MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
     const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
-    BasicBlock *BB, Instruction *QueryInst) {
-
-  const Value *MemLocBase = nullptr;
-  int64_t MemLocOffset = 0;
-  unsigned Limit = BlockScanLimit;
+    BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
   bool isInvariantLoad = false;
 
+  if (!Limit) {
+    unsigned DefaultLimit = BlockScanLimit;
+    return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst,
+                                          &DefaultLimit);
+  }
+
   // We must be careful with atomic accesses, as they may allow another thread
   //   to touch this location, clobbering it. We are conservative: if the
   //   QueryInst is not a simple (non-atomic) memory access, we automatically
@@ -474,8 +490,8 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
 
     // Limit the amount of scanning we do so we don't end up with quadratic
     // running time on extreme testcases.
-    --Limit;
-    if (!Limit)
+    --*Limit;
+    if (!*Limit)
       return MemDepResult::getUnknown();
 
     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
@@ -530,21 +546,8 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
       AliasResult R = AA.alias(LoadLoc, MemLoc);
 
       if (isLoad) {
-        if (R == NoAlias) {
-          // If this is an over-aligned integer load (for example,
-          // "load i8* %P, align 4") see if it would obviously overlap with the
-          // queried location if widened to a larger load (e.g. if the queried
-          // location is 1 byte at P+1).  If so, return it as a load/load
-          // clobber result, allowing the client to decide to widen the load if
-          // it wants to.
-          if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
-            if (LI->getAlignment() * 8 > ITy->getPrimitiveSizeInBits() &&
-                isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase,
-                                                       MemLocOffset, LI))
-              return MemDepResult::getClobber(Inst);
-          }
+        if (R == NoAlias)
           continue;
-        }
 
         // Must aliased loads are defs of each other.
         if (R == MustAlias)
@@ -697,7 +700,7 @@ MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) {
 
   // Do the scan.
   if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
-    // No dependence found.  If this is the entry block of the function, it is
+    // No dependence found. If this is the entry block of the function, it is
     // unknown, otherwise it is non-local.
     if (QueryParent != &QueryParent->getParent()->getEntryBlock())
       LocalCache = MemDepResult::getNonLocal();
@@ -709,7 +712,7 @@ MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) {
     if (MemLoc.Ptr) {
       // If we can do a pointer scan, make it happen.
       bool isLoad = !(MR & MRI_Mod);
-      if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst))
+      if (auto *II = dyn_cast<IntrinsicInst>(QueryInst))
         isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
 
       LocalCache = getPointerDependencyFrom(
@@ -1010,7 +1013,7 @@ SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache,
     MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
         std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
     Cache.insert(Entry, Val);
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
   }
   case 1:
     // One new entry, Just insert the new value at the appropriate position.
@@ -1659,10 +1662,10 @@ void MemoryDependenceResults::verifyRemoved(Instruction *D) const {
 #endif
 }
 
-char MemoryDependenceAnalysis::PassID;
+AnalysisKey MemoryDependenceAnalysis::Key;
 
 MemoryDependenceResults
-MemoryDependenceAnalysis::run(Function &F, AnalysisManager<Function> &AM) {
+MemoryDependenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
   auto &AA = AM.getResult<AAManager>(F);
   auto &AC = AM.getResult<AssumptionAnalysis>(F);
   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
@@ -1684,6 +1687,7 @@ INITIALIZE_PASS_END(MemoryDependenceWrapperPass, "memdep",
 MemoryDependenceWrapperPass::MemoryDependenceWrapperPass() : FunctionPass(ID) {
   initializeMemoryDependenceWrapperPassPass(*PassRegistry::getPassRegistry());
 }
+
 MemoryDependenceWrapperPass::~MemoryDependenceWrapperPass() {}
 
 void MemoryDependenceWrapperPass::releaseMemory() {
@@ -1698,6 +1702,28 @@ void MemoryDependenceWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
 }
 
+bool MemoryDependenceResults::invalidate(Function &F, const PreservedAnalyses &PA,
+                               FunctionAnalysisManager::Invalidator &Inv) {
+  // Check whether our analysis is preserved.
+  auto PAC = PA.getChecker<MemoryDependenceAnalysis>();
+  if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
+    // If not, give up now.
+    return true;
+
+  // Check whether the analyses we depend on became invalid for any reason.
+  if (Inv.invalidate<AAManager>(F, PA) ||
+      Inv.invalidate<AssumptionAnalysis>(F, PA) ||
+      Inv.invalidate<DominatorTreeAnalysis>(F, PA))
+    return true;
+
+  // Otherwise this analysis result remains valid.
+  return false;
+}
+
+unsigned MemoryDependenceResults::getDefaultBlockScanLimit() const {
+  return BlockScanLimit;
+}
+
 bool MemoryDependenceWrapperPass::runOnFunction(Function &F) {
   auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
   auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
diff --git a/lib/Analysis/ModuleDebugInfoPrinter.cpp b/lib/Analysis/ModuleDebugInfoPrinter.cpp
index 36c47141a45f..f675830aa67d 100644
--- a/lib/Analysis/ModuleDebugInfoPrinter.cpp
+++ b/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -74,7 +74,8 @@ void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const {
   // filenames), so just print a few useful things.
   for (DICompileUnit *CU : Finder.compile_units()) {
     O << "Compile unit: ";
-    if (const char *Lang = dwarf::LanguageString(CU->getSourceLanguage()))
+    auto Lang = dwarf::LanguageString(CU->getSourceLanguage());
+    if (!Lang.empty())
       O << Lang;
     else
       O << "unknown-language(" << CU->getSourceLanguage() << ")";
@@ -90,7 +91,8 @@ void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const {
     O << '\n';
   }
 
-  for (const DIGlobalVariable *GV : Finder.global_variables()) {
+  for (auto GVU : Finder.global_variables()) {
+    const auto *GV = GVU->getVariable();
     O << "Global variable: " << GV->getName();
     printFile(O, GV->getFilename(), GV->getDirectory(), GV->getLine());
     if (!GV->getLinkageName().empty())
@@ -105,14 +107,15 @@ void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const {
     printFile(O, T->getFilename(), T->getDirectory(), T->getLine());
     if (auto *BT = dyn_cast<DIBasicType>(T)) {
       O << " ";
-      if (const char *Encoding =
-              dwarf::AttributeEncodingString(BT->getEncoding()))
+      auto Encoding = dwarf::AttributeEncodingString(BT->getEncoding());
+      if (!Encoding.empty())
         O << Encoding;
       else
         O << "unknown-encoding(" << BT->getEncoding() << ')';
     } else {
       O << ' ';
-      if (const char *Tag = dwarf::TagString(T->getTag()))
+      auto Tag = dwarf::TagString(T->getTag());
+      if (!Tag.empty())
         O << Tag;
       else
         O << "unknown-tag(" << T->getTag() << ")";
diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp
index c9ac2bdb7942..1d2ffc1abe1f 100644
--- a/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -13,16 +13,22 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/BlockFrequencyInfoImpl.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TypeMetadataUtils.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/Object/IRObjectFile.h"
 #include "llvm/Pass.h"
 using namespace llvm;
 
@@ -31,7 +37,7 @@ using namespace llvm;
 // Walk through the operands of a given User via worklist iteration and populate
 // the set of GlobalValue references encountered. Invoked either on an
 // Instruction or a GlobalVariable (which walks its initializer).
-static void findRefEdges(const User *CurUser, DenseSet<const Value *> &RefEdges,
+static void findRefEdges(const User *CurUser, SetVector<ValueInfo> &RefEdges,
                          SmallPtrSet<const User *, 8> &Visited) {
   SmallVector<const User *, 32> Worklist;
   Worklist.push_back(CurUser);
@@ -50,12 +56,12 @@ static void findRefEdges(const User *CurUser, DenseSet<const Value *> &RefEdges,
         continue;
       if (isa<BlockAddress>(Operand))
         continue;
-      if (isa<GlobalValue>(Operand)) {
+      if (auto *GV = dyn_cast<GlobalValue>(Operand)) {
         // We have a reference to a global value. This should be added to
         // the reference set unless it is a callee. Callees are handled
         // specially by WriteFunction and are added to a separate list.
         if (!(CS && CS.isCallee(&OI)))
-          RefEdges.insert(Operand);
+          RefEdges.insert(GV);
         continue;
       }
       Worklist.push_back(Operand);
@@ -63,98 +69,178 @@ static void findRefEdges(const User *CurUser, DenseSet<const Value *> &RefEdges,
   }
 }
 
-void ModuleSummaryIndexBuilder::computeFunctionSummary(
-    const Function &F, BlockFrequencyInfo *BFI) {
-  // Summary not currently supported for anonymous functions, they must
-  // be renamed.
-  if (!F.hasName())
-    return;
+static CalleeInfo::HotnessType getHotness(uint64_t ProfileCount,
+                                          ProfileSummaryInfo *PSI) {
+  if (!PSI)
+    return CalleeInfo::HotnessType::Unknown;
+  if (PSI->isHotCount(ProfileCount))
+    return CalleeInfo::HotnessType::Hot;
+  if (PSI->isColdCount(ProfileCount))
+    return CalleeInfo::HotnessType::Cold;
+  return CalleeInfo::HotnessType::None;
+}
+
+static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
+                                   const Function &F, BlockFrequencyInfo *BFI,
+                                   ProfileSummaryInfo *PSI,
+                                   bool HasLocalsInUsed) {
+  // Summary not currently supported for anonymous functions, they should
+  // have been named.
+  assert(F.hasName());
 
   unsigned NumInsts = 0;
   // Map from callee ValueId to profile count. Used to accumulate profile
   // counts for all static calls to a given callee.
-  DenseMap<const Value *, CalleeInfo> CallGraphEdges;
-  DenseMap<GlobalValue::GUID, CalleeInfo> IndirectCallEdges;
-  DenseSet<const Value *> RefEdges;
+  MapVector<ValueInfo, CalleeInfo> CallGraphEdges;
+  SetVector<ValueInfo> RefEdges;
+  SetVector<GlobalValue::GUID> TypeTests;
   ICallPromotionAnalysis ICallAnalysis;
 
+  bool HasInlineAsmMaybeReferencingInternal = false;
   SmallPtrSet<const User *, 8> Visited;
   for (const BasicBlock &BB : F)
     for (const Instruction &I : BB) {
-      if (!isa<DbgInfoIntrinsic>(I))
-        ++NumInsts;
-
-      if (auto CS = ImmutableCallSite(&I)) {
-        auto *CalledFunction = CS.getCalledFunction();
-        // Check if this is a direct call to a known function.
-        if (CalledFunction) {
-          if (CalledFunction->hasName() && !CalledFunction->isIntrinsic()) {
-            auto ScaledCount = BFI ? BFI->getBlockProfileCount(&BB) : None;
-            auto *CalleeId =
-                M->getValueSymbolTable().lookup(CalledFunction->getName());
-            CallGraphEdges[CalleeId] +=
-                (ScaledCount ? ScaledCount.getValue() : 0);
-          }
-        } else {
-          // Otherwise, check for an indirect call (call to a non-const value
-          // that isn't an inline assembly call).
-          const CallInst *CI = dyn_cast<CallInst>(&I);
-          if (CS.getCalledValue() && !isa<Constant>(CS.getCalledValue()) &&
-              !(CI && CI->isInlineAsm())) {
-            uint32_t NumVals, NumCandidates;
-            uint64_t TotalCount;
-            auto CandidateProfileData =
-                ICallAnalysis.getPromotionCandidatesForInstruction(
-                    &I, NumVals, TotalCount, NumCandidates);
-            for (auto &Candidate : CandidateProfileData)
-              IndirectCallEdges[Candidate.Value] += Candidate.Count;
+      if (isa<DbgInfoIntrinsic>(I))
+        continue;
+      ++NumInsts;
+      findRefEdges(&I, RefEdges, Visited);
+      auto CS = ImmutableCallSite(&I);
+      if (!CS)
+        continue;
+
+      const auto *CI = dyn_cast<CallInst>(&I);
+      // Since we don't know exactly which local values are referenced in inline
+      // assembly, conservatively mark the function as possibly referencing
+      // a local value from inline assembly to ensure we don't export a
+      // reference (which would require renaming and promotion of the
+      // referenced value).
+      if (HasLocalsInUsed && CI && CI->isInlineAsm())
+        HasInlineAsmMaybeReferencingInternal = true;
+
+      auto *CalledValue = CS.getCalledValue();
+      auto *CalledFunction = CS.getCalledFunction();
+      // Check if this is an alias to a function. If so, get the
+      // called aliasee for the checks below.
+      if (auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
+        assert(!CalledFunction && "Expected null called function in callsite for alias");
+        CalledFunction = dyn_cast<Function>(GA->getBaseObject());
+      }
+      // Check if this is a direct call to a known function or a known
+      // intrinsic, or an indirect call with profile data.
+      if (CalledFunction) {
+        if (CalledFunction->isIntrinsic()) {
+          if (CalledFunction->getIntrinsicID() != Intrinsic::type_test)
+            continue;
+          // Produce a summary from type.test intrinsics. We only summarize
+          // type.test intrinsics that are used other than by an llvm.assume
+          // intrinsic. Intrinsics that are assumed are relevant only to the
+          // devirtualization pass, not the type test lowering pass.
+          bool HasNonAssumeUses = llvm::any_of(CI->uses(), [](const Use &CIU) {
+            auto *AssumeCI = dyn_cast<CallInst>(CIU.getUser());
+            if (!AssumeCI)
+              return true;
+            Function *F = AssumeCI->getCalledFunction();
+            return !F || F->getIntrinsicID() != Intrinsic::assume;
+          });
+          if (HasNonAssumeUses) {
+            auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1));
+            if (auto *TypeId = dyn_cast<MDString>(TypeMDVal->getMetadata()))
+              TypeTests.insert(GlobalValue::getGUID(TypeId->getString()));
           }
         }
+        // We should have named any anonymous globals
+        assert(CalledFunction->hasName());
+        auto ScaledCount = BFI ? BFI->getBlockProfileCount(&BB) : None;
+        auto Hotness = ScaledCount ? getHotness(ScaledCount.getValue(), PSI)
+                                   : CalleeInfo::HotnessType::Unknown;
+
+        // Use the original CalledValue, in case it was an alias. We want
+        // to record the call edge to the alias in that case. Eventually
+        // an alias summary will be created to associate the alias and
+        // aliasee.
+        CallGraphEdges[cast<GlobalValue>(CalledValue)].updateHotness(Hotness);
+      } else {
+        // Skip inline assembly calls.
+        if (CI && CI->isInlineAsm())
+          continue;
+        // Skip direct calls.
+        if (!CS.getCalledValue() || isa<Constant>(CS.getCalledValue()))
+          continue;
+
+        uint32_t NumVals, NumCandidates;
+        uint64_t TotalCount;
+        auto CandidateProfileData =
+            ICallAnalysis.getPromotionCandidatesForInstruction(
+                &I, NumVals, TotalCount, NumCandidates);
+        for (auto &Candidate : CandidateProfileData)
+          CallGraphEdges[Candidate.Value].updateHotness(
+              getHotness(Candidate.Count, PSI));
       }
-      findRefEdges(&I, RefEdges, Visited);
     }
 
   GlobalValueSummary::GVFlags Flags(F);
-  std::unique_ptr<FunctionSummary> FuncSummary =
-      llvm::make_unique<FunctionSummary>(Flags, NumInsts);
-  FuncSummary->addCallGraphEdges(CallGraphEdges);
-  FuncSummary->addCallGraphEdges(IndirectCallEdges);
-  FuncSummary->addRefEdges(RefEdges);
-  Index->addGlobalValueSummary(F.getName(), std::move(FuncSummary));
+  auto FuncSummary = llvm::make_unique<FunctionSummary>(
+      Flags, NumInsts, RefEdges.takeVector(), CallGraphEdges.takeVector(),
+      TypeTests.takeVector());
+  if (HasInlineAsmMaybeReferencingInternal)
+    FuncSummary->setHasInlineAsmMaybeReferencingInternal();
+  Index.addGlobalValueSummary(F.getName(), std::move(FuncSummary));
 }
 
-void ModuleSummaryIndexBuilder::computeVariableSummary(
-    const GlobalVariable &V) {
-  DenseSet<const Value *> RefEdges;
+static void computeVariableSummary(ModuleSummaryIndex &Index,
+                                   const GlobalVariable &V) {
+  SetVector<ValueInfo> RefEdges;
   SmallPtrSet<const User *, 8> Visited;
   findRefEdges(&V, RefEdges, Visited);
   GlobalValueSummary::GVFlags Flags(V);
-  std::unique_ptr<GlobalVarSummary> GVarSummary =
-      llvm::make_unique<GlobalVarSummary>(Flags);
-  GVarSummary->addRefEdges(RefEdges);
-  Index->addGlobalValueSummary(V.getName(), std::move(GVarSummary));
+  auto GVarSummary =
+      llvm::make_unique<GlobalVarSummary>(Flags, RefEdges.takeVector());
+  Index.addGlobalValueSummary(V.getName(), std::move(GVarSummary));
+}
+
+static void computeAliasSummary(ModuleSummaryIndex &Index,
+                                const GlobalAlias &A) {
+  GlobalValueSummary::GVFlags Flags(A);
+  auto AS = llvm::make_unique<AliasSummary>(Flags, ArrayRef<ValueInfo>{});
+  auto *Aliasee = A.getBaseObject();
+  auto *AliaseeSummary = Index.getGlobalValueSummary(*Aliasee);
+  assert(AliaseeSummary && "Alias expects aliasee summary to be parsed");
+  AS->setAliasee(AliaseeSummary);
+  Index.addGlobalValueSummary(A.getName(), std::move(AS));
 }
 
-ModuleSummaryIndexBuilder::ModuleSummaryIndexBuilder(
-    const Module *M,
-    std::function<BlockFrequencyInfo *(const Function &F)> Ftor)
-    : Index(llvm::make_unique<ModuleSummaryIndex>()), M(M) {
-  // Check if the module can be promoted, otherwise just disable importing from
-  // it by not emitting any summary.
-  // FIXME: we could still import *into* it most of the time.
-  if (!moduleCanBeRenamedForThinLTO(*M))
-    return;
+ModuleSummaryIndex llvm::buildModuleSummaryIndex(
+    const Module &M,
+    std::function<BlockFrequencyInfo *(const Function &F)> GetBFICallback,
+    ProfileSummaryInfo *PSI) {
+  ModuleSummaryIndex Index;
+
+  // Identify the local values in the llvm.used and llvm.compiler.used sets,
+  // which should not be exported as they would then require renaming and
+  // promotion, but we may have opaque uses e.g. in inline asm. We collect them
+  // here because we use this information to mark functions containing inline
+  // assembly calls as not importable.
+  SmallPtrSet<GlobalValue *, 8> LocalsUsed;
+  SmallPtrSet<GlobalValue *, 8> Used;
+  // First collect those in the llvm.used set.
+  collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ false);
+  // Next collect those in the llvm.compiler.used set.
+  collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ true);
+  for (auto *V : Used) {
+    if (V->hasLocalLinkage())
+      LocalsUsed.insert(V);
+  }
 
   // Compute summaries for all functions defined in module, and save in the
   // index.
-  for (auto &F : *M) {
+  for (auto &F : M) {
     if (F.isDeclaration())
       continue;
 
     BlockFrequencyInfo *BFI = nullptr;
     std::unique_ptr<BlockFrequencyInfo> BFIPtr;
-    if (Ftor)
-      BFI = Ftor(F);
+    if (GetBFICallback)
+      BFI = GetBFICallback(F);
     else if (F.getEntryCount().hasValue()) {
       LoopInfo LI{DominatorTree(const_cast<Function &>(F))};
       BranchProbabilityInfo BPI{F, LI};
@@ -162,16 +248,89 @@ ModuleSummaryIndexBuilder::ModuleSummaryIndexBuilder(
       BFI = BFIPtr.get();
     }
 
-    computeFunctionSummary(F, BFI);
+    computeFunctionSummary(Index, M, F, BFI, PSI, !LocalsUsed.empty());
   }
 
   // Compute summaries for all variables defined in module, and save in the
   // index.
-  for (const GlobalVariable &G : M->globals()) {
+  for (const GlobalVariable &G : M.globals()) {
     if (G.isDeclaration())
       continue;
-    computeVariableSummary(G);
+    computeVariableSummary(Index, G);
+  }
+
+  // Compute summaries for all aliases defined in module, and save in the
+  // index.
+  for (const GlobalAlias &A : M.aliases())
+    computeAliasSummary(Index, A);
+
+  for (auto *V : LocalsUsed) {
+    auto *Summary = Index.getGlobalValueSummary(*V);
+    assert(Summary && "Missing summary for global value");
+    Summary->setNoRename();
+  }
+
+  if (!M.getModuleInlineAsm().empty()) {
+    // Collect the local values defined by module level asm, and set up
+    // summaries for these symbols so that they can be marked as NoRename,
+    // to prevent export of any use of them in regular IR that would require
+    // renaming within the module level asm. Note we don't need to create a
+    // summary for weak or global defs, as they don't need to be flagged as
+    // NoRename, and defs in module level asm can't be imported anyway.
+    // Also, any values used but not defined within module level asm should
+    // be listed on the llvm.used or llvm.compiler.used global and marked as
+    // referenced from there.
+    ModuleSymbolTable::CollectAsmSymbols(
+        Triple(M.getTargetTriple()), M.getModuleInlineAsm(),
+        [&M, &Index](StringRef Name, object::BasicSymbolRef::Flags Flags) {
+          // Symbols not marked as Weak or Global are local definitions.
+          if (Flags & (object::BasicSymbolRef::SF_Weak |
+                       object::BasicSymbolRef::SF_Global))
+            return;
+          GlobalValue *GV = M.getNamedValue(Name);
+          if (!GV)
+            return;
+          assert(GV->isDeclaration() && "Def in module asm already has definition");
+          GlobalValueSummary::GVFlags GVFlags(
+              GlobalValue::InternalLinkage,
+              /* NoRename */ true,
+              /* HasInlineAsmMaybeReferencingInternal */ false,
+              /* IsNotViableToInline */ true);
+          // Create the appropriate summary type.
+          if (isa<Function>(GV)) {
+            std::unique_ptr<FunctionSummary> Summary =
+                llvm::make_unique<FunctionSummary>(
+                    GVFlags, 0, ArrayRef<ValueInfo>{},
+                    ArrayRef<FunctionSummary::EdgeTy>{},
+                    ArrayRef<GlobalValue::GUID>{});
+            Summary->setNoRename();
+            Index.addGlobalValueSummary(Name, std::move(Summary));
+          } else {
+            std::unique_ptr<GlobalVarSummary> Summary =
+                llvm::make_unique<GlobalVarSummary>(GVFlags,
+                                                    ArrayRef<ValueInfo>{});
+            Summary->setNoRename();
+            Index.addGlobalValueSummary(Name, std::move(Summary));
+          }
+        });
   }
+
+  return Index;
+}
+
+AnalysisKey ModuleSummaryIndexAnalysis::Key;
+
+ModuleSummaryIndex
+ModuleSummaryIndexAnalysis::run(Module &M, ModuleAnalysisManager &AM) {
+  ProfileSummaryInfo &PSI = AM.getResult<ProfileSummaryAnalysis>(M);
+  auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+  return buildModuleSummaryIndex(
+      M,
+      [&FAM](const Function &F) {
+        return &FAM.getResult<BlockFrequencyAnalysis>(
+            *const_cast<Function *>(&F));
+      },
+      &PSI);
 }
 
 char ModuleSummaryIndexWrapperPass::ID = 0;
@@ -191,59 +350,25 @@ ModuleSummaryIndexWrapperPass::ModuleSummaryIndexWrapperPass()
 }
 
 bool ModuleSummaryIndexWrapperPass::runOnModule(Module &M) {
-  IndexBuilder = llvm::make_unique<ModuleSummaryIndexBuilder>(
-      &M, [this](const Function &F) {
+  auto &PSI = *getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+  Index = buildModuleSummaryIndex(
+      M,
+      [this](const Function &F) {
         return &(this->getAnalysis<BlockFrequencyInfoWrapperPass>(
                          *const_cast<Function *>(&F))
                      .getBFI());
-      });
+      },
+      &PSI);
   return false;
 }
 
 bool ModuleSummaryIndexWrapperPass::doFinalization(Module &M) {
-  IndexBuilder.reset();
+  Index.reset();
   return false;
 }
 
 void ModuleSummaryIndexWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   AU.addRequired<BlockFrequencyInfoWrapperPass>();
-}
-
-bool llvm::moduleCanBeRenamedForThinLTO(const Module &M) {
-  // We cannot currently promote or rename anything used in inline assembly,
-  // which are not visible to the compiler. Detect a possible case by looking
-  // for a llvm.used local value, in conjunction with an inline assembly call
-  // in the module. Prevent importing of any modules containing these uses by
-  // suppressing generation of the index. This also prevents importing
-  // into this module, which is also necessary to avoid needing to rename
-  // in case of a name clash between a local in this module and an imported
-  // global.
-  // FIXME: If we find we need a finer-grained approach of preventing promotion
-  // and renaming of just the functions using inline assembly we will need to:
-  // - Add flag in the function summaries to identify those with inline asm.
-  // - Prevent importing of any functions with flag set.
-  // - Prevent importing of any global function with the same name as a
-  //   function in current module that has the flag set.
-  // - For any llvm.used value that is exported and promoted, add a private
-  //   alias to the original name in the current module (even if we don't
-  //   export the function using those values in inline asm, another function
-  //   with a reference could be exported).
-  SmallPtrSet<GlobalValue *, 8> Used;
-  collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ false);
-  bool LocalIsUsed =
-      llvm::any_of(Used, [](GlobalValue *V) { return V->hasLocalLinkage(); });
-  if (!LocalIsUsed)
-    return true;
-
-  // Walk all the instructions in the module and find if one is inline ASM
-  auto HasInlineAsm = llvm::any_of(M, [](const Function &F) {
-    return llvm::any_of(instructions(F), [](const Instruction &I) {
-      const CallInst *CallI = dyn_cast<CallInst>(&I);
-      if (!CallI)
-        return false;
-      return CallI->isInlineAsm();
-    });
-  });
-  return !HasInlineAsm;
+  AU.addRequired<ProfileSummaryInfoWrapperPass>();
 }
diff --git a/lib/Analysis/ObjCARCAliasAnalysis.cpp b/lib/Analysis/ObjCARCAliasAnalysis.cpp
index 9bb1048ea8ba..ed03406ca8c6 100644
--- a/lib/Analysis/ObjCARCAliasAnalysis.cpp
+++ b/lib/Analysis/ObjCARCAliasAnalysis.cpp
@@ -131,7 +131,7 @@ ModRefInfo ObjCARCAAResult::getModRefInfo(ImmutableCallSite CS,
   return AAResultBase::getModRefInfo(CS, Loc);
 }
 
-ObjCARCAAResult ObjCARCAA::run(Function &F, AnalysisManager<Function> &AM) {
+ObjCARCAAResult ObjCARCAA::run(Function &F, FunctionAnalysisManager &AM) {
   return ObjCARCAAResult(F.getParent()->getDataLayout());
 }
 
diff --git a/lib/Analysis/ObjCARCInstKind.cpp b/lib/Analysis/ObjCARCInstKind.cpp
index 3dc1463b8d8b..1e75c0824d03 100644
--- a/lib/Analysis/ObjCARCInstKind.cpp
+++ b/lib/Analysis/ObjCARCInstKind.cpp
@@ -96,43 +96,47 @@ ARCInstKind llvm::objcarc::GetFunctionClass(const Function *F) {
 
   // One argument.
   const Argument *A0 = &*AI++;
-  if (AI == AE)
+  if (AI == AE) {
     // Argument is a pointer.
-    if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) {
-      Type *ETy = PTy->getElementType();
-      // Argument is i8*.
-      if (ETy->isIntegerTy(8))
+    PointerType *PTy = dyn_cast<PointerType>(A0->getType());
+    if (!PTy)
+      return ARCInstKind::CallOrUser;
+
+    Type *ETy = PTy->getElementType();
+    // Argument is i8*.
+    if (ETy->isIntegerTy(8))
+      return StringSwitch<ARCInstKind>(F->getName())
+          .Case("objc_retain", ARCInstKind::Retain)
+          .Case("objc_retainAutoreleasedReturnValue", ARCInstKind::RetainRV)
+          .Case("objc_unsafeClaimAutoreleasedReturnValue", ARCInstKind::ClaimRV)
+          .Case("objc_retainBlock", ARCInstKind::RetainBlock)
+          .Case("objc_release", ARCInstKind::Release)
+          .Case("objc_autorelease", ARCInstKind::Autorelease)
+          .Case("objc_autoreleaseReturnValue", ARCInstKind::AutoreleaseRV)
+          .Case("objc_autoreleasePoolPop", ARCInstKind::AutoreleasepoolPop)
+          .Case("objc_retainedObject", ARCInstKind::NoopCast)
+          .Case("objc_unretainedObject", ARCInstKind::NoopCast)
+          .Case("objc_unretainedPointer", ARCInstKind::NoopCast)
+          .Case("objc_retain_autorelease", ARCInstKind::FusedRetainAutorelease)
+          .Case("objc_retainAutorelease", ARCInstKind::FusedRetainAutorelease)
+          .Case("objc_retainAutoreleaseReturnValue",
+                ARCInstKind::FusedRetainAutoreleaseRV)
+          .Case("objc_sync_enter", ARCInstKind::User)
+          .Case("objc_sync_exit", ARCInstKind::User)
+          .Default(ARCInstKind::CallOrUser);
+
+    // Argument is i8**
+    if (PointerType *Pte = dyn_cast<PointerType>(ETy))
+      if (Pte->getElementType()->isIntegerTy(8))
         return StringSwitch<ARCInstKind>(F->getName())
-            .Case("objc_retain", ARCInstKind::Retain)
-            .Case("objc_retainAutoreleasedReturnValue", ARCInstKind::RetainRV)
-            .Case("objc_unsafeClaimAutoreleasedReturnValue",
-                  ARCInstKind::ClaimRV)
-            .Case("objc_retainBlock", ARCInstKind::RetainBlock)
-            .Case("objc_release", ARCInstKind::Release)
-            .Case("objc_autorelease", ARCInstKind::Autorelease)
-            .Case("objc_autoreleaseReturnValue", ARCInstKind::AutoreleaseRV)
-            .Case("objc_autoreleasePoolPop", ARCInstKind::AutoreleasepoolPop)
-            .Case("objc_retainedObject", ARCInstKind::NoopCast)
-            .Case("objc_unretainedObject", ARCInstKind::NoopCast)
-            .Case("objc_unretainedPointer", ARCInstKind::NoopCast)
-            .Case("objc_retain_autorelease",
-                  ARCInstKind::FusedRetainAutorelease)
-            .Case("objc_retainAutorelease", ARCInstKind::FusedRetainAutorelease)
-            .Case("objc_retainAutoreleaseReturnValue",
-                  ARCInstKind::FusedRetainAutoreleaseRV)
-            .Case("objc_sync_enter", ARCInstKind::User)
-            .Case("objc_sync_exit", ARCInstKind::User)
+            .Case("objc_loadWeakRetained", ARCInstKind::LoadWeakRetained)
+            .Case("objc_loadWeak", ARCInstKind::LoadWeak)
+            .Case("objc_destroyWeak", ARCInstKind::DestroyWeak)
             .Default(ARCInstKind::CallOrUser);
 
-      // Argument is i8**
-      if (PointerType *Pte = dyn_cast<PointerType>(ETy))
-        if (Pte->getElementType()->isIntegerTy(8))
-          return StringSwitch<ARCInstKind>(F->getName())
-              .Case("objc_loadWeakRetained", ARCInstKind::LoadWeakRetained)
-              .Case("objc_loadWeak", ARCInstKind::LoadWeak)
-              .Case("objc_destroyWeak", ARCInstKind::DestroyWeak)
-              .Default(ARCInstKind::CallOrUser);
-    }
+    // Anything else with one argument.
+    return ARCInstKind::CallOrUser;
+  }
 
   // Two arguments, first is i8**.
   const Argument *A1 = &*AI++;
@@ -180,6 +184,7 @@ static bool isInertIntrinsic(unsigned ID) {
   // TODO: Make this into a covered switch.
   switch (ID) {
   case Intrinsic::returnaddress:
+  case Intrinsic::addressofreturnaddress:
   case Intrinsic::frameaddress:
   case Intrinsic::stacksave:
   case Intrinsic::stackrestore:
diff --git a/lib/Analysis/OptimizationDiagnosticInfo.cpp b/lib/Analysis/OptimizationDiagnosticInfo.cpp
index e979ba2531e4..fa8b07d61b01 100644
--- a/lib/Analysis/OptimizationDiagnosticInfo.cpp
+++ b/lib/Analysis/OptimizationDiagnosticInfo.cpp
@@ -13,30 +13,204 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/LazyBlockFrequencyInfo.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/LLVMContext.h"
 
 using namespace llvm;
 
-Optional<uint64_t> OptimizationRemarkEmitter::computeHotness(Value *V) {
+OptimizationRemarkEmitter::OptimizationRemarkEmitter(Function *F)
+    : F(F), BFI(nullptr) {
+  if (!F->getContext().getDiagnosticHotnessRequested())
+    return;
+
+  // First create a dominator tree.
+  DominatorTree DT;
+  DT.recalculate(*F);
+
+  // Generate LoopInfo from it.
+  LoopInfo LI;
+  LI.analyze(DT);
+
+  // Then compute BranchProbabilityInfo.
+  BranchProbabilityInfo BPI;
+  BPI.calculate(*F, LI);
+
+  // Finally compute BFI.
+  OwnedBFI = llvm::make_unique<BlockFrequencyInfo>(*F, BPI, LI);
+  BFI = OwnedBFI.get();
+}
+
+Optional<uint64_t> OptimizationRemarkEmitter::computeHotness(const Value *V) {
   if (!BFI)
     return None;
 
   return BFI->getBlockProfileCount(cast<BasicBlock>(V));
 }
 
+namespace llvm {
+namespace yaml {
+
+template <> struct MappingTraits<DiagnosticInfoOptimizationBase *> {
+  static void mapping(IO &io, DiagnosticInfoOptimizationBase *&OptDiag) {
+    assert(io.outputting() && "input not yet implemented");
+
+    if (io.mapTag("!Passed", OptDiag->getKind() == DK_OptimizationRemark))
+      ;
+    else if (io.mapTag("!Missed",
+                       OptDiag->getKind() == DK_OptimizationRemarkMissed))
+      ;
+    else if (io.mapTag("!Analysis",
+                       OptDiag->getKind() == DK_OptimizationRemarkAnalysis))
+      ;
+    else if (io.mapTag("!AnalysisFPCommute",
+                       OptDiag->getKind() ==
+                           DK_OptimizationRemarkAnalysisFPCommute))
+      ;
+    else if (io.mapTag("!AnalysisAliasing",
+                       OptDiag->getKind() ==
+                           DK_OptimizationRemarkAnalysisAliasing))
+      ;
+    else
+      llvm_unreachable("todo");
+
+    // These are read-only for now.
+    DebugLoc DL = OptDiag->getDebugLoc();
+    StringRef FN = GlobalValue::getRealLinkageName(
+        OptDiag->getFunction().getName());
+
+    StringRef PassName(OptDiag->PassName);
+    io.mapRequired("Pass", PassName);
+    io.mapRequired("Name", OptDiag->RemarkName);
+    if (!io.outputting() || DL)
+      io.mapOptional("DebugLoc", DL);
+    io.mapRequired("Function", FN);
+    io.mapOptional("Hotness", OptDiag->Hotness);
+    io.mapOptional("Args", OptDiag->Args);
+  }
+};
+
+template <> struct MappingTraits<DebugLoc> {
+  static void mapping(IO &io, DebugLoc &DL) {
+    assert(io.outputting() && "input not yet implemented");
+
+    auto *Scope = cast<DIScope>(DL.getScope());
+    StringRef File = Scope->getFilename();
+    unsigned Line = DL.getLine();
+    unsigned Col = DL.getCol();
+
+    io.mapRequired("File", File);
+    io.mapRequired("Line", Line);
+    io.mapRequired("Column", Col);
+  }
+
+  static const bool flow = true;
+};
+
+// Implement this as a mapping for now to get proper quotation for the value.
+template <> struct MappingTraits<DiagnosticInfoOptimizationBase::Argument> {
+  static void mapping(IO &io, DiagnosticInfoOptimizationBase::Argument &A) {
+    assert(io.outputting() && "input not yet implemented");
+    io.mapRequired(A.Key.data(), A.Val);
+    if (A.DLoc)
+      io.mapOptional("DebugLoc", A.DLoc);
+  }
+};
+
+} // end namespace yaml
+} // end namespace llvm
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(DiagnosticInfoOptimizationBase::Argument)
+
+void OptimizationRemarkEmitter::computeHotness(
+    DiagnosticInfoOptimizationBase &OptDiag) {
+  Value *V = OptDiag.getCodeRegion();
+  if (V)
+    OptDiag.setHotness(computeHotness(V));
+}
+
+void OptimizationRemarkEmitter::emit(DiagnosticInfoOptimizationBase &OptDiag) {
+  computeHotness(OptDiag);
+
+  yaml::Output *Out = F->getContext().getDiagnosticsOutputFile();
+  if (Out) {
+    auto *P = &const_cast<DiagnosticInfoOptimizationBase &>(OptDiag);
+    *Out << P;
+  }
+  // FIXME: now that IsVerbose is part of DI, filtering for this will be moved
+  // from here to clang.
+  if (!OptDiag.isVerbose() || shouldEmitVerbose())
+    F->getContext().diagnose(OptDiag);
+}
+
+void OptimizationRemarkEmitter::emitOptimizationRemark(const char *PassName,
+                                                       const DebugLoc &DLoc,
+                                                       const Value *V,
+                                                       const Twine &Msg) {
+  LLVMContext &Ctx = F->getContext();
+  Ctx.diagnose(OptimizationRemark(PassName, *F, DLoc, Msg, computeHotness(V)));
+}
+
+void OptimizationRemarkEmitter::emitOptimizationRemark(const char *PassName,
+                                                       Loop *L,
+                                                       const Twine &Msg) {
+  emitOptimizationRemark(PassName, L->getStartLoc(), L->getHeader(), Msg);
+}
+
 void OptimizationRemarkEmitter::emitOptimizationRemarkMissed(
-    const char *PassName, const DebugLoc &DLoc, Value *V, const Twine &Msg) {
+    const char *PassName, const DebugLoc &DLoc, const Value *V,
+    const Twine &Msg, bool IsVerbose) {
   LLVMContext &Ctx = F->getContext();
-  Ctx.diagnose(DiagnosticInfoOptimizationRemarkMissed(PassName, *F, DLoc, Msg,
-                                                      computeHotness(V)));
+  if (!IsVerbose || shouldEmitVerbose())
+    Ctx.diagnose(
+        OptimizationRemarkMissed(PassName, *F, DLoc, Msg, computeHotness(V)));
 }
 
 void OptimizationRemarkEmitter::emitOptimizationRemarkMissed(
+    const char *PassName, Loop *L, const Twine &Msg, bool IsVerbose) {
+  emitOptimizationRemarkMissed(PassName, L->getStartLoc(), L->getHeader(), Msg,
+                               IsVerbose);
+}
+
+void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysis(
+    const char *PassName, const DebugLoc &DLoc, const Value *V,
+    const Twine &Msg, bool IsVerbose) {
+  LLVMContext &Ctx = F->getContext();
+  if (!IsVerbose || shouldEmitVerbose())
+    Ctx.diagnose(
+        OptimizationRemarkAnalysis(PassName, *F, DLoc, Msg, computeHotness(V)));
+}
+
+void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysis(
+    const char *PassName, Loop *L, const Twine &Msg, bool IsVerbose) {
+  emitOptimizationRemarkAnalysis(PassName, L->getStartLoc(), L->getHeader(),
+                                 Msg, IsVerbose);
+}
+
+void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysisFPCommute(
+    const char *PassName, const DebugLoc &DLoc, const Value *V,
+    const Twine &Msg) {
+  LLVMContext &Ctx = F->getContext();
+  Ctx.diagnose(OptimizationRemarkAnalysisFPCommute(PassName, *F, DLoc, Msg,
+                                                   computeHotness(V)));
+}
+
+void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysisAliasing(
+    const char *PassName, const DebugLoc &DLoc, const Value *V,
+    const Twine &Msg) {
+  LLVMContext &Ctx = F->getContext();
+  Ctx.diagnose(OptimizationRemarkAnalysisAliasing(PassName, *F, DLoc, Msg,
+                                                  computeHotness(V)));
+}
+
+void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysisAliasing(
     const char *PassName, Loop *L, const Twine &Msg) {
-  emitOptimizationRemarkMissed(PassName, L->getStartLoc(), L->getHeader(), Msg);
+  emitOptimizationRemarkAnalysisAliasing(PassName, L->getStartLoc(),
+                                         L->getHeader(), Msg);
 }
 
 OptimizationRemarkEmitterWrapperPass::OptimizationRemarkEmitterWrapperPass()
@@ -63,10 +237,11 @@ void OptimizationRemarkEmitterWrapperPass::getAnalysisUsage(
   AU.setPreservesAll();
 }
 
-char OptimizationRemarkEmitterAnalysis::PassID;
+AnalysisKey OptimizationRemarkEmitterAnalysis::Key;
 
 OptimizationRemarkEmitter
-OptimizationRemarkEmitterAnalysis::run(Function &F, AnalysisManager<Function> &AM) {
+OptimizationRemarkEmitterAnalysis::run(Function &F,
+                                       FunctionAnalysisManager &AM) {
   BlockFrequencyInfo *BFI;
 
   if (F.getContext().getDiagnosticHotnessRequested())
diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp
index b4aad74d50dc..84ecd4ab9809 100644
--- a/lib/Analysis/PHITransAddr.cpp
+++ b/lib/Analysis/PHITransAddr.cpp
@@ -62,8 +62,7 @@ static bool VerifySubExpr(Value *Expr,
 
   // If it's an instruction, it is either in Tmp or its operands recursively
   // are.
-  SmallVectorImpl<Instruction*>::iterator Entry =
-    std::find(InstInputs.begin(), InstInputs.end(), I);
+  SmallVectorImpl<Instruction *>::iterator Entry = find(InstInputs, I);
   if (Entry != InstInputs.end()) {
     InstInputs.erase(Entry);
     return true;
@@ -126,8 +125,7 @@ static void RemoveInstInputs(Value *V,
   if (!I) return;
 
   // If the instruction is in the InstInputs list, remove it.
-  SmallVectorImpl<Instruction*>::iterator Entry =
-    std::find(InstInputs.begin(), InstInputs.end(), I);
+  SmallVectorImpl<Instruction *>::iterator Entry = find(InstInputs, I);
   if (Entry != InstInputs.end()) {
     InstInputs.erase(Entry);
     return;
@@ -150,8 +148,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
   if (!Inst) return V;
 
   // Determine whether 'Inst' is an input to our PHI translatable expression.
-  bool isInput =
-      std::find(InstInputs.begin(), InstInputs.end(), Inst) != InstInputs.end();
+  bool isInput = is_contained(InstInputs, Inst);
 
   // Handle inputs instructions if needed.
   if (isInput) {
@@ -165,7 +162,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
     // translated, we need to incorporate the value into the expression or fail.
 
     // In either case, the instruction itself isn't an input any longer.
-    InstInputs.erase(std::find(InstInputs.begin(), InstInputs.end(), Inst));
+    InstInputs.erase(find(InstInputs, Inst));
 
     // If this is a PHI, go ahead and translate it.
     if (PHINode *PN = dyn_cast<PHINode>(Inst))
@@ -272,8 +269,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
           isNSW = isNUW = false;
 
           // If the old 'LHS' was an input, add the new 'LHS' as an input.
-          if (std::find(InstInputs.begin(), InstInputs.end(), BOp) !=
-              InstInputs.end()) {
+          if (is_contained(InstInputs, BOp)) {
             RemoveInstInputs(BOp, InstInputs);
             AddAsInput(LHS);
           }
diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp
index 73550805d5ba..cb9438a2f928 100644
--- a/lib/Analysis/PostDominators.cpp
+++ b/lib/Analysis/PostDominators.cpp
@@ -44,7 +44,7 @@ FunctionPass* llvm::createPostDomTree() {
   return new PostDominatorTreeWrapperPass();
 }
 
-char PostDominatorTreeAnalysis::PassID;
+AnalysisKey PostDominatorTreeAnalysis::Key;
 
 PostDominatorTree PostDominatorTreeAnalysis::run(Function &F,
                                                  FunctionAnalysisManager &) {
diff --git a/lib/Analysis/ProfileSummaryInfo.cpp b/lib/Analysis/ProfileSummaryInfo.cpp
index 9cf99af49581..16d3614c14c6 100644
--- a/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/lib/Analysis/ProfileSummaryInfo.cpp
@@ -12,7 +12,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ProfileSummary.h"
@@ -63,10 +65,10 @@ void ProfileSummaryInfo::computeSummary() {
   Summary.reset(ProfileSummary::getFromMD(SummaryMD));
 }
 
-// Returns true if the function is a hot function. If it returns false, it
-// either means it is not hot or it is unknown whether F is hot or not (for
-// example, no profile data is available).
-bool ProfileSummaryInfo::isHotFunction(const Function *F) {
+/// Returns true if the function's entry is hot. If it returns false, it
+/// either means it is not hot or it is unknown whether it is hot or not (for
+/// example, no profile data is available).
+bool ProfileSummaryInfo::isFunctionEntryHot(const Function *F) {
   computeSummary();
   if (!F || !Summary)
     return false;
@@ -74,15 +76,13 @@ bool ProfileSummaryInfo::isHotFunction(const Function *F) {
   // FIXME: The heuristic used below for determining hotness is based on
   // preliminary SPEC tuning for inliner. This will eventually be a
   // convenience method that calls isHotCount.
-  return (FunctionCount &&
-          FunctionCount.getValue() >=
-              (uint64_t)(0.3 * (double)Summary->getMaxFunctionCount()));
+  return FunctionCount && isHotCount(FunctionCount.getValue());
 }
 
-// Returns true if the function is a cold function. If it returns false, it
-// either means it is not cold or it is unknown whether F is cold or not (for
-// example, no profile data is available).
-bool ProfileSummaryInfo::isColdFunction(const Function *F) {
+/// Returns true if the function's entry is a cold. If it returns false, it
+/// either means it is not cold or it is unknown whether it is cold or not (for
+/// example, no profile data is available).
+bool ProfileSummaryInfo::isFunctionEntryCold(const Function *F) {
   computeSummary();
   if (!F)
     return false;
@@ -95,12 +95,10 @@ bool ProfileSummaryInfo::isColdFunction(const Function *F) {
   // FIXME: The heuristic used below for determining coldness is based on
   // preliminary SPEC tuning for inliner. This will eventually be a
   // convenience method that calls isHotCount.
-  return (FunctionCount &&
-          FunctionCount.getValue() <=
-              (uint64_t)(0.01 * (double)Summary->getMaxFunctionCount()));
+  return FunctionCount && isColdCount(FunctionCount.getValue());
 }
 
-// Compute the hot and cold thresholds.
+/// Compute the hot and cold thresholds.
 void ProfileSummaryInfo::computeThresholds() {
   if (!Summary)
     computeSummary();
@@ -125,10 +123,22 @@ bool ProfileSummaryInfo::isColdCount(uint64_t C) {
   return ColdCountThreshold && C <= ColdCountThreshold.getValue();
 }
 
-ProfileSummaryInfo *ProfileSummaryInfoWrapperPass::getPSI(Module &M) {
-  if (!PSI)
-    PSI.reset(new ProfileSummaryInfo(M));
-  return PSI.get();
+bool ProfileSummaryInfo::isHotBB(const BasicBlock *B, BlockFrequencyInfo *BFI) {
+  auto Count = BFI->getBlockProfileCount(B);
+  if (Count && isHotCount(*Count))
+    return true;
+  // Use extractProfTotalWeight to get BB count.
+  // For Sample PGO, BFI may not provide accurate BB count due to errors
+  // magnified during sample count propagation. This serves as a backup plan
+  // to ensure all hot BB will not be missed.
+  // The query currently has false positives as branch instruction cloning does
+  // not update/scale branch weights. Unlike false negatives, this will not cause
+  // performance problem.
+  uint64_t TotalCount;
+  if (B->getTerminator()->extractProfTotalWeight(TotalCount) &&
+      isHotCount(TotalCount))
+    return true;
+  return false;
 }
 
 INITIALIZE_PASS(ProfileSummaryInfoWrapperPass, "profile-summary-info",
@@ -139,25 +149,33 @@ ProfileSummaryInfoWrapperPass::ProfileSummaryInfoWrapperPass()
   initializeProfileSummaryInfoWrapperPassPass(*PassRegistry::getPassRegistry());
 }
 
-char ProfileSummaryAnalysis::PassID;
+bool ProfileSummaryInfoWrapperPass::doInitialization(Module &M) {
+  PSI.reset(new ProfileSummaryInfo(M));
+  return false;
+}
+
+bool ProfileSummaryInfoWrapperPass::doFinalization(Module &M) {
+  PSI.reset();
+  return false;
+}
+
+AnalysisKey ProfileSummaryAnalysis::Key;
 ProfileSummaryInfo ProfileSummaryAnalysis::run(Module &M,
                                                ModuleAnalysisManager &) {
   return ProfileSummaryInfo(M);
 }
 
-// FIXME: This only tests isHotFunction and isColdFunction and not the
-// isHotCount and isColdCount calls.
 PreservedAnalyses ProfileSummaryPrinterPass::run(Module &M,
-                                                 AnalysisManager<Module> &AM) {
+                                                 ModuleAnalysisManager &AM) {
   ProfileSummaryInfo &PSI = AM.getResult<ProfileSummaryAnalysis>(M);
 
   OS << "Functions in " << M.getName() << " with hot/cold annotations: \n";
   for (auto &F : M) {
     OS << F.getName();
-    if (PSI.isHotFunction(&F))
-      OS << " :hot ";
-    else if (PSI.isColdFunction(&F))
-      OS << " :cold ";
+    if (PSI.isFunctionEntryHot(&F))
+      OS << " :hot entry ";
+    else if (PSI.isFunctionEntryCold(&F))
+      OS << " :cold entry ";
     OS << "\n";
   }
   return PreservedAnalyses::all();
diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp
index 6860a3e63953..8c084ddd2266 100644
--- a/lib/Analysis/RegionInfo.cpp
+++ b/lib/Analysis/RegionInfo.cpp
@@ -54,8 +54,7 @@ static cl::opt<Region::PrintStyle, true> printStyleX("print-region-style",
     clEnumValN(Region::PrintBB, "bb",
                "print regions in detail with block_iterator"),
     clEnumValN(Region::PrintRN, "rn",
-               "print regions in detail with element_iterator"),
-    clEnumValEnd));
+               "print regions in detail with element_iterator")));
 
 
 //===----------------------------------------------------------------------===//
@@ -182,9 +181,9 @@ namespace llvm {
 // RegionInfoAnalysis implementation
 //
 
-char RegionInfoAnalysis::PassID;
+AnalysisKey RegionInfoAnalysis::Key;
 
-RegionInfo RegionInfoAnalysis::run(Function &F, AnalysisManager<Function> &AM) {
+RegionInfo RegionInfoAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
   RegionInfo RI;
   auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
   auto *PDT = &AM.getResult<PostDominatorTreeAnalysis>(F);
@@ -206,7 +205,7 @@ PreservedAnalyses RegionInfoPrinterPass::run(Function &F,
 }
 
 PreservedAnalyses RegionInfoVerifierPass::run(Function &F,
-                                              AnalysisManager<Function> &AM) {
+                                              FunctionAnalysisManager &AM) {
   AM.getResult<RegionInfoAnalysis>(F).verifyAnalysis();
 
   return PreservedAnalyses::all();
diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp
index 5e1cdd48a78e..7358aa6810a1 100644
--- a/lib/Analysis/RegionPass.cpp
+++ b/lib/Analysis/RegionPass.cpp
@@ -10,7 +10,7 @@
 // This file implements RegionPass and RGPassManager. All region optimization
 // and transformation passes are derived from RegionPass. RGPassManager is
 // responsible for managing RegionPasses.
-// most of these codes are COPY from LoopPass.cpp
+// Most of this code has been COPIED from LoopPass.cpp
 //
 //===----------------------------------------------------------------------===//
 #include "llvm/Analysis/RegionPass.h"
@@ -64,9 +64,7 @@ bool RGPassManager::runOnFunction(Function &F) {
     return false;
 
   // Initialization
-  for (std::deque<Region *>::const_iterator I = RQ.begin(), E = RQ.end();
-       I != E; ++I) {
-    Region *R = *I;
+  for (Region *R : RQ) {
     for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
       RegionPass *RP = (RegionPass *)getContainedPass(Index);
       Changed |= RP->doInitialization(R, *this);
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index e42a4b574d90..5e566bcdaff4 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -61,6 +61,8 @@
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/Sequence.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AssumptionCache.h"
@@ -120,6 +122,16 @@ static cl::opt<bool>
                   cl::desc("Verify no dangling value in ScalarEvolution's "
                            "ExprValueMap (slow)"));
 
+static cl::opt<unsigned> MulOpsInlineThreshold(
+    "scev-mulops-inline-threshold", cl::Hidden,
+    cl::desc("Threshold for inlining multiplication operands into a SCEV"),
+    cl::init(1000));
+
+static cl::opt<unsigned>
+    MaxCompareDepth("scalar-evolution-max-compare-depth", cl::Hidden,
+                    cl::desc("Maximum depth of recursive compare complexity"),
+                    cl::init(32));
+
 //===----------------------------------------------------------------------===//
 //                           SCEV class definitions
 //===----------------------------------------------------------------------===//
@@ -447,180 +459,233 @@ bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const {
 //                               SCEV Utilities
 //===----------------------------------------------------------------------===//
 
-namespace {
-/// SCEVComplexityCompare - Return true if the complexity of the LHS is less
-/// than the complexity of the RHS.  This comparator is used to canonicalize
-/// expressions.
-class SCEVComplexityCompare {
-  const LoopInfo *const LI;
-public:
-  explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {}
+/// Compare the two values \p LV and \p RV in terms of their "complexity" where
+/// "complexity" is a partial (and somewhat ad-hoc) relation used to order
+/// operands in SCEV expressions.  \p EqCache is a set of pairs of values that
+/// have been previously deemed to be "equally complex" by this routine.  It is
+/// intended to avoid exponential time complexity in cases like:
+///
+///   %a = f(%x, %y)
+///   %b = f(%a, %a)
+///   %c = f(%b, %b)
+///
+///   %d = f(%x, %y)
+///   %e = f(%d, %d)
+///   %f = f(%e, %e)
+///
+///   CompareValueComplexity(%f, %c)
+///
+/// Since we do not continue running this routine on expression trees once we
+/// have seen unequal values, there is no need to track them in the cache.
+static int
+CompareValueComplexity(SmallSet<std::pair<Value *, Value *>, 8> &EqCache,
+                       const LoopInfo *const LI, Value *LV, Value *RV,
+                       unsigned Depth) {
+  if (Depth > MaxCompareDepth || EqCache.count({LV, RV}))
+    return 0;
+
+  // Order pointer values after integer values. This helps SCEVExpander form
+  // GEPs.
+  bool LIsPointer = LV->getType()->isPointerTy(),
+       RIsPointer = RV->getType()->isPointerTy();
+  if (LIsPointer != RIsPointer)
+    return (int)LIsPointer - (int)RIsPointer;
 
-  // Return true or false if LHS is less than, or at least RHS, respectively.
-  bool operator()(const SCEV *LHS, const SCEV *RHS) const {
-    return compare(LHS, RHS) < 0;
+  // Compare getValueID values.
+  unsigned LID = LV->getValueID(), RID = RV->getValueID();
+  if (LID != RID)
+    return (int)LID - (int)RID;
+
+  // Sort arguments by their position.
+  if (const auto *LA = dyn_cast<Argument>(LV)) {
+    const auto *RA = cast<Argument>(RV);
+    unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo();
+    return (int)LArgNo - (int)RArgNo;
   }
 
-  // Return negative, zero, or positive, if LHS is less than, equal to, or
-  // greater than RHS, respectively. A three-way result allows recursive
-  // comparisons to be more efficient.
-  int compare(const SCEV *LHS, const SCEV *RHS) const {
-    // Fast-path: SCEVs are uniqued so we can do a quick equality check.
-    if (LHS == RHS)
-      return 0;
-
-    // Primarily, sort the SCEVs by their getSCEVType().
-    unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType();
-    if (LType != RType)
-      return (int)LType - (int)RType;
-
-    // Aside from the getSCEVType() ordering, the particular ordering
-    // isn't very important except that it's beneficial to be consistent,
-    // so that (a + b) and (b + a) don't end up as different expressions.
-    switch (static_cast<SCEVTypes>(LType)) {
-    case scUnknown: {
-      const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
-      const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
-
-      // Sort SCEVUnknown values with some loose heuristics. TODO: This is
-      // not as complete as it could be.
-      const Value *LV = LU->getValue(), *RV = RU->getValue();
-
-      // Order pointer values after integer values. This helps SCEVExpander
-      // form GEPs.
-      bool LIsPointer = LV->getType()->isPointerTy(),
-        RIsPointer = RV->getType()->isPointerTy();
-      if (LIsPointer != RIsPointer)
-        return (int)LIsPointer - (int)RIsPointer;
-
-      // Compare getValueID values.
-      unsigned LID = LV->getValueID(),
-        RID = RV->getValueID();
-      if (LID != RID)
-        return (int)LID - (int)RID;
-
-      // Sort arguments by their position.
-      if (const Argument *LA = dyn_cast<Argument>(LV)) {
-        const Argument *RA = cast<Argument>(RV);
-        unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo();
-        return (int)LArgNo - (int)RArgNo;
-      }
+  if (const auto *LGV = dyn_cast<GlobalValue>(LV)) {
+    const auto *RGV = cast<GlobalValue>(RV);
 
-      // For instructions, compare their loop depth, and their operand
-      // count.  This is pretty loose.
-      if (const Instruction *LInst = dyn_cast<Instruction>(LV)) {
-        const Instruction *RInst = cast<Instruction>(RV);
-
-        // Compare loop depths.
-        const BasicBlock *LParent = LInst->getParent(),
-          *RParent = RInst->getParent();
-        if (LParent != RParent) {
-          unsigned LDepth = LI->getLoopDepth(LParent),
-            RDepth = LI->getLoopDepth(RParent);
-          if (LDepth != RDepth)
-            return (int)LDepth - (int)RDepth;
-        }
+    const auto IsGVNameSemantic = [&](const GlobalValue *GV) {
+      auto LT = GV->getLinkage();
+      return !(GlobalValue::isPrivateLinkage(LT) ||
+               GlobalValue::isInternalLinkage(LT));
+    };
 
-        // Compare the number of operands.
-        unsigned LNumOps = LInst->getNumOperands(),
-          RNumOps = RInst->getNumOperands();
-        return (int)LNumOps - (int)RNumOps;
-      }
+    // Use the names to distinguish the two values, but only if the
+    // names are semantically important.
+    if (IsGVNameSemantic(LGV) && IsGVNameSemantic(RGV))
+      return LGV->getName().compare(RGV->getName());
+  }
+
+  // For instructions, compare their loop depth, and their operand count.  This
+  // is pretty loose.
+  if (const auto *LInst = dyn_cast<Instruction>(LV)) {
+    const auto *RInst = cast<Instruction>(RV);
 
-      return 0;
+    // Compare loop depths.
+    const BasicBlock *LParent = LInst->getParent(),
+                     *RParent = RInst->getParent();
+    if (LParent != RParent) {
+      unsigned LDepth = LI->getLoopDepth(LParent),
+               RDepth = LI->getLoopDepth(RParent);
+      if (LDepth != RDepth)
+        return (int)LDepth - (int)RDepth;
     }
 
-    case scConstant: {
-      const SCEVConstant *LC = cast<SCEVConstant>(LHS);
-      const SCEVConstant *RC = cast<SCEVConstant>(RHS);
+    // Compare the number of operands.
+    unsigned LNumOps = LInst->getNumOperands(),
+             RNumOps = RInst->getNumOperands();
+    if (LNumOps != RNumOps)
+      return (int)LNumOps - (int)RNumOps;
 
-      // Compare constant values.
-      const APInt &LA = LC->getAPInt();
-      const APInt &RA = RC->getAPInt();
-      unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth();
-      if (LBitWidth != RBitWidth)
-        return (int)LBitWidth - (int)RBitWidth;
-      return LA.ult(RA) ? -1 : 1;
+    for (unsigned Idx : seq(0u, LNumOps)) {
+      int Result =
+          CompareValueComplexity(EqCache, LI, LInst->getOperand(Idx),
+                                 RInst->getOperand(Idx), Depth + 1);
+      if (Result != 0)
+        return Result;
     }
+  }
 
-    case scAddRecExpr: {
-      const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
-      const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
+  EqCache.insert({LV, RV});
+  return 0;
+}
 
-      // Compare addrec loop depths.
-      const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
-      if (LLoop != RLoop) {
-        unsigned LDepth = LLoop->getLoopDepth(),
-          RDepth = RLoop->getLoopDepth();
-        if (LDepth != RDepth)
-          return (int)LDepth - (int)RDepth;
-      }
+// Return negative, zero, or positive, if LHS is less than, equal to, or greater
+// than RHS, respectively. A three-way result allows recursive comparisons to be
+// more efficient.
+static int CompareSCEVComplexity(
+    SmallSet<std::pair<const SCEV *, const SCEV *>, 8> &EqCacheSCEV,
+    const LoopInfo *const LI, const SCEV *LHS, const SCEV *RHS,
+    unsigned Depth = 0) {
+  // Fast-path: SCEVs are uniqued so we can do a quick equality check.
+  if (LHS == RHS)
+    return 0;
 
-      // Addrec complexity grows with operand count.
-      unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands();
-      if (LNumOps != RNumOps)
-        return (int)LNumOps - (int)RNumOps;
+  // Primarily, sort the SCEVs by their getSCEVType().
+  unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType();
+  if (LType != RType)
+    return (int)LType - (int)RType;
 
-      // Lexicographically compare.
-      for (unsigned i = 0; i != LNumOps; ++i) {
-        long X = compare(LA->getOperand(i), RA->getOperand(i));
-        if (X != 0)
-          return X;
-      }
+  if (Depth > MaxCompareDepth || EqCacheSCEV.count({LHS, RHS}))
+    return 0;
+  // Aside from the getSCEVType() ordering, the particular ordering
+  // isn't very important except that it's beneficial to be consistent,
+  // so that (a + b) and (b + a) don't end up as different expressions.
+  switch (static_cast<SCEVTypes>(LType)) {
+  case scUnknown: {
+    const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
+    const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
+
+    SmallSet<std::pair<Value *, Value *>, 8> EqCache;
+    int X = CompareValueComplexity(EqCache, LI, LU->getValue(), RU->getValue(),
+                                   Depth + 1);
+    if (X == 0)
+      EqCacheSCEV.insert({LHS, RHS});
+    return X;
+  }
 
-      return 0;
+  case scConstant: {
+    const SCEVConstant *LC = cast<SCEVConstant>(LHS);
+    const SCEVConstant *RC = cast<SCEVConstant>(RHS);
+
+    // Compare constant values.
+    const APInt &LA = LC->getAPInt();
+    const APInt &RA = RC->getAPInt();
+    unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth();
+    if (LBitWidth != RBitWidth)
+      return (int)LBitWidth - (int)RBitWidth;
+    return LA.ult(RA) ? -1 : 1;
+  }
+
+  case scAddRecExpr: {
+    const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
+    const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
+
+    // Compare addrec loop depths.
+    const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
+    if (LLoop != RLoop) {
+      unsigned LDepth = LLoop->getLoopDepth(), RDepth = RLoop->getLoopDepth();
+      if (LDepth != RDepth)
+        return (int)LDepth - (int)RDepth;
     }
 
-    case scAddExpr:
-    case scMulExpr:
-    case scSMaxExpr:
-    case scUMaxExpr: {
-      const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
-      const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
-
-      // Lexicographically compare n-ary expressions.
-      unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
-      if (LNumOps != RNumOps)
-        return (int)LNumOps - (int)RNumOps;
-
-      for (unsigned i = 0; i != LNumOps; ++i) {
-        if (i >= RNumOps)
-          return 1;
-        long X = compare(LC->getOperand(i), RC->getOperand(i));
-        if (X != 0)
-          return X;
-      }
+    // Addrec complexity grows with operand count.
+    unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands();
+    if (LNumOps != RNumOps)
       return (int)LNumOps - (int)RNumOps;
+
+    // Lexicographically compare.
+    for (unsigned i = 0; i != LNumOps; ++i) {
+      int X = CompareSCEVComplexity(EqCacheSCEV, LI, LA->getOperand(i),
+                                    RA->getOperand(i), Depth + 1);
+      if (X != 0)
+        return X;
     }
+    EqCacheSCEV.insert({LHS, RHS});
+    return 0;
+  }
 
-    case scUDivExpr: {
-      const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS);
-      const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS);
+  case scAddExpr:
+  case scMulExpr:
+  case scSMaxExpr:
+  case scUMaxExpr: {
+    const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
+    const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
+
+    // Lexicographically compare n-ary expressions.
+    unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
+    if (LNumOps != RNumOps)
+      return (int)LNumOps - (int)RNumOps;
 
-      // Lexicographically compare udiv expressions.
-      long X = compare(LC->getLHS(), RC->getLHS());
+    for (unsigned i = 0; i != LNumOps; ++i) {
+      if (i >= RNumOps)
+        return 1;
+      int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getOperand(i),
+                                    RC->getOperand(i), Depth + 1);
       if (X != 0)
         return X;
-      return compare(LC->getRHS(), RC->getRHS());
     }
+    EqCacheSCEV.insert({LHS, RHS});
+    return 0;
+  }
 
-    case scTruncate:
-    case scZeroExtend:
-    case scSignExtend: {
-      const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS);
-      const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS);
+  case scUDivExpr: {
+    const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS);
+    const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS);
 
-      // Compare cast expressions by operand.
-      return compare(LC->getOperand(), RC->getOperand());
-    }
+    // Lexicographically compare udiv expressions.
+    int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getLHS(), RC->getLHS(),
+                                  Depth + 1);
+    if (X != 0)
+      return X;
+    X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getRHS(), RC->getRHS(),
+                              Depth + 1);
+    if (X == 0)
+      EqCacheSCEV.insert({LHS, RHS});
+    return X;
+  }
 
-    case scCouldNotCompute:
-      llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
-    }
-    llvm_unreachable("Unknown SCEV kind!");
+  case scTruncate:
+  case scZeroExtend:
+  case scSignExtend: {
+    const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS);
+    const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS);
+
+    // Compare cast expressions by operand.
+    int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getOperand(),
+                                  RC->getOperand(), Depth + 1);
+    if (X == 0)
+      EqCacheSCEV.insert({LHS, RHS});
+    return X;
   }
-};
-}  // end anonymous namespace
+
+  case scCouldNotCompute:
+    llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+  }
+  llvm_unreachable("Unknown SCEV kind!");
+}
 
 /// Given a list of SCEV objects, order them by their complexity, and group
 /// objects of the same complexity together by value.  When this routine is
@@ -635,17 +700,22 @@ public:
 static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
                               LoopInfo *LI) {
   if (Ops.size() < 2) return;  // Noop
+
+  SmallSet<std::pair<const SCEV *, const SCEV *>, 8> EqCache;
   if (Ops.size() == 2) {
     // This is the common case, which also happens to be trivially simple.
     // Special case it.
     const SCEV *&LHS = Ops[0], *&RHS = Ops[1];
-    if (SCEVComplexityCompare(LI)(RHS, LHS))
+    if (CompareSCEVComplexity(EqCache, LI, RHS, LHS) < 0)
       std::swap(LHS, RHS);
     return;
   }
 
   // Do the rough sort by complexity.
-  std::stable_sort(Ops.begin(), Ops.end(), SCEVComplexityCompare(LI));
+  std::stable_sort(Ops.begin(), Ops.end(),
+                   [&EqCache, LI](const SCEV *LHS, const SCEV *RHS) {
+                     return CompareSCEVComplexity(EqCache, LI, LHS, RHS) < 0;
+                   });
 
   // Now that we are sorted by complexity, group elements of the same
   // complexity.  Note that this is, at worst, N^2, but the vector is likely to
@@ -2518,6 +2588,8 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
   if (Idx < Ops.size()) {
     bool DeletedMul = false;
     while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) {
+      if (Ops.size() > MulOpsInlineThreshold)
+        break;
       // If we have an mul, expand the mul operands onto the end of the operands
       // list.
       Ops.erase(Ops.begin()+Idx);
@@ -2970,9 +3042,9 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
 }
 
 const SCEV *
-ScalarEvolution::getGEPExpr(Type *PointeeType, const SCEV *BaseExpr,
-                            const SmallVectorImpl<const SCEV *> &IndexExprs,
-                            bool InBounds) {
+ScalarEvolution::getGEPExpr(GEPOperator *GEP,
+                            const SmallVectorImpl<const SCEV *> &IndexExprs) {
+  const SCEV *BaseExpr = getSCEV(GEP->getPointerOperand());
   // getSCEV(Base)->getType() has the same address space as Base->getType()
   // because SCEV::getType() preserves the address space.
   Type *IntPtrTy = getEffectiveSCEVType(BaseExpr->getType());
@@ -2981,12 +3053,13 @@ ScalarEvolution::getGEPExpr(Type *PointeeType, const SCEV *BaseExpr,
   // flow and the no-overflow bits may not be valid for the expression in any
   // context. This can be fixed similarly to how these flags are handled for
   // adds.
-  SCEV::NoWrapFlags Wrap = InBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
+  SCEV::NoWrapFlags Wrap = GEP->isInBounds() ? SCEV::FlagNSW
+                                             : SCEV::FlagAnyWrap;
 
   const SCEV *TotalOffset = getZero(IntPtrTy);
-  // The address space is unimportant. The first thing we do on CurTy is getting
+  // The array size is unimportant. The first thing we do on CurTy is getting
   // its element type.
-  Type *CurTy = PointerType::getUnqual(PointeeType);
+  Type *CurTy = ArrayType::get(GEP->getSourceElementType(), 0);
   for (const SCEV *IndexExpr : IndexExprs) {
     // Compute the (potentially symbolic) offset in bytes for this index.
     if (StructType *STy = dyn_cast<StructType>(CurTy)) {
@@ -3311,75 +3384,47 @@ const SCEV *ScalarEvolution::getCouldNotCompute() {
   return CouldNotCompute.get();
 }
 
-
 bool ScalarEvolution::checkValidity(const SCEV *S) const {
-  // Helper class working with SCEVTraversal to figure out if a SCEV contains
-  // a SCEVUnknown with null value-pointer. FindInvalidSCEVUnknown::FindOne
-  // is set iff if find such SCEVUnknown.
-  //
-  struct FindInvalidSCEVUnknown {
-    bool FindOne;
-    FindInvalidSCEVUnknown() { FindOne = false; }
-    bool follow(const SCEV *S) {
-      switch (static_cast<SCEVTypes>(S->getSCEVType())) {
-      case scConstant:
-        return false;
-      case scUnknown:
-        if (!cast<SCEVUnknown>(S)->getValue())
-          FindOne = true;
-        return false;
-      default:
-        return true;
-      }
-    }
-    bool isDone() const { return FindOne; }
-  };
-
-  FindInvalidSCEVUnknown F;
-  SCEVTraversal<FindInvalidSCEVUnknown> ST(F);
-  ST.visitAll(S);
+  bool ContainsNulls = SCEVExprContains(S, [](const SCEV *S) {
+    auto *SU = dyn_cast<SCEVUnknown>(S);
+    return SU && SU->getValue() == nullptr;
+  });
 
-  return !F.FindOne;
-}
-
-namespace {
-// Helper class working with SCEVTraversal to figure out if a SCEV contains
-// a sub SCEV of scAddRecExpr type.  FindInvalidSCEVUnknown::FoundOne is set
-// iff if such sub scAddRecExpr type SCEV is found.
-struct FindAddRecurrence {
-  bool FoundOne;
-  FindAddRecurrence() : FoundOne(false) {}
-
-  bool follow(const SCEV *S) {
-    switch (static_cast<SCEVTypes>(S->getSCEVType())) {
-    case scAddRecExpr:
-      FoundOne = true;
-    case scConstant:
-    case scUnknown:
-    case scCouldNotCompute:
-      return false;
-    default:
-      return true;
-    }
-  }
-  bool isDone() const { return FoundOne; }
-};
+  return !ContainsNulls;
 }
 
 bool ScalarEvolution::containsAddRecurrence(const SCEV *S) {
-  HasRecMapType::iterator I = HasRecMap.find_as(S);
+  HasRecMapType::iterator I = HasRecMap.find(S);
   if (I != HasRecMap.end())
     return I->second;
 
-  FindAddRecurrence F;
-  SCEVTraversal<FindAddRecurrence> ST(F);
-  ST.visitAll(S);
-  HasRecMap.insert({S, F.FoundOne});
-  return F.FoundOne;
+  bool FoundAddRec = SCEVExprContains(S, isa<SCEVAddRecExpr, const SCEV *>);
+  HasRecMap.insert({S, FoundAddRec});
+  return FoundAddRec;
+}
+
+/// Try to split a SCEVAddExpr into a pair of {SCEV, ConstantInt}.
+/// If \p S is a SCEVAddExpr and is composed of a sub SCEV S' and an
+/// offset I, then return {S', I}, else return {\p S, nullptr}.
+static std::pair<const SCEV *, ConstantInt *> splitAddExpr(const SCEV *S) {
+  const auto *Add = dyn_cast<SCEVAddExpr>(S);
+  if (!Add)
+    return {S, nullptr};
+
+  if (Add->getNumOperands() != 2)
+    return {S, nullptr};
+
+  auto *ConstOp = dyn_cast<SCEVConstant>(Add->getOperand(0));
+  if (!ConstOp)
+    return {S, nullptr};
+
+  return {Add->getOperand(1), ConstOp->getValue()};
 }
 
-/// Return the Value set from S.
-SetVector<Value *> *ScalarEvolution::getSCEVValues(const SCEV *S) {
+/// Return the ValueOffsetPair set for \p S. \p S can be represented
+/// by the value and offset from any ValueOffsetPair in the set.
+SetVector<ScalarEvolution::ValueOffsetPair> *
+ScalarEvolution::getSCEVValues(const SCEV *S) {
   ExprValueMapType::iterator SI = ExprValueMap.find_as(S);
   if (SI == ExprValueMap.end())
     return nullptr;
@@ -3387,24 +3432,31 @@ SetVector<Value *> *ScalarEvolution::getSCEVValues(const SCEV *S) {
   if (VerifySCEVMap) {
     // Check there is no dangling Value in the set returned.
     for (const auto &VE : SI->second)
-      assert(ValueExprMap.count(VE));
+      assert(ValueExprMap.count(VE.first));
   }
 #endif
   return &SI->second;
 }
 
-/// Erase Value from ValueExprMap and ExprValueMap.  If ValueExprMap.erase(V) is
-/// not used together with forgetMemoizedResults(S), eraseValueFromMap should be
-/// used instead to ensure whenever V->S is removed from ValueExprMap, V is also
-/// removed from the set of ExprValueMap[S].
+/// Erase Value from ValueExprMap and ExprValueMap. ValueExprMap.erase(V)
+/// cannot be used separately. eraseValueFromMap should be used to remove
+/// V from ValueExprMap and ExprValueMap at the same time.
 void ScalarEvolution::eraseValueFromMap(Value *V) {
   ValueExprMapType::iterator I = ValueExprMap.find_as(V);
   if (I != ValueExprMap.end()) {
     const SCEV *S = I->second;
-    SetVector<Value *> *SV = getSCEVValues(S);
-    // Remove V from the set of ExprValueMap[S]
-    if (SV)
-      SV->remove(V);
+    // Remove {V, 0} from the set of ExprValueMap[S]
+    if (SetVector<ValueOffsetPair> *SV = getSCEVValues(S))
+      SV->remove({V, nullptr});
+
+    // Remove {V, Offset} from the set of ExprValueMap[Stripped]
+    const SCEV *Stripped;
+    ConstantInt *Offset;
+    std::tie(Stripped, Offset) = splitAddExpr(S);
+    if (Offset != nullptr) {
+      if (SetVector<ValueOffsetPair> *SV = getSCEVValues(Stripped))
+        SV->remove({V, Offset});
+    }
     ValueExprMap.erase(V);
   }
 }
@@ -3419,11 +3471,26 @@ const SCEV *ScalarEvolution::getSCEV(Value *V) {
     S = createSCEV(V);
     // During PHI resolution, it is possible to create two SCEVs for the same
     // V, so it is needed to double check whether V->S is inserted into
-    // ValueExprMap before insert S->V into ExprValueMap.
+    // ValueExprMap before insert S->{V, 0} into ExprValueMap.
     std::pair<ValueExprMapType::iterator, bool> Pair =
         ValueExprMap.insert({SCEVCallbackVH(V, this), S});
-    if (Pair.second)
-      ExprValueMap[S].insert(V);
+    if (Pair.second) {
+      ExprValueMap[S].insert({V, nullptr});
+
+      // If S == Stripped + Offset, add Stripped -> {V, Offset} into
+      // ExprValueMap.
+      const SCEV *Stripped = S;
+      ConstantInt *Offset = nullptr;
+      std::tie(Stripped, Offset) = splitAddExpr(S);
+      // If stripped is SCEVUnknown, don't bother to save
+      // Stripped -> {V, offset}. It doesn't simplify and sometimes even
+      // increase the complexity of the expansion code.
+      // If V is GetElementPtrInst, don't save Stripped -> {V, offset}
+      // because it may generate add/sub instead of GEP in SCEV expansion.
+      if (Offset != nullptr && !isa<SCEVUnknown>(Stripped) &&
+          !isa<GetElementPtrInst>(V))
+        ExprValueMap[Stripped].insert({V, Offset});
+    }
   }
   return S;
 }
@@ -3436,8 +3503,8 @@ const SCEV *ScalarEvolution::getExistingSCEV(Value *V) {
     const SCEV *S = I->second;
     if (checkValidity(S))
       return S;
+    eraseValueFromMap(V);
     forgetMemoizedResults(S);
-    ValueExprMap.erase(I);
   }
   return nullptr;
 }
@@ -3675,8 +3742,8 @@ void ScalarEvolution::forgetSymbolicName(Instruction *PN, const SCEV *SymName) {
       if (!isa<PHINode>(I) ||
           !isa<SCEVUnknown>(Old) ||
           (I != PN && Old == SymName)) {
+        eraseValueFromMap(It->first);
         forgetMemoizedResults(Old);
-        ValueExprMap.erase(It);
       }
     }
 
@@ -4055,7 +4122,7 @@ const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) {
     // to create an AddRecExpr for this PHI node. We can not keep this temporary
     // as it will prevent later (possibly simpler) SCEV expressions to be added
     // to the ValueExprMap.
-    ValueExprMap.erase(PN);
+    eraseValueFromMap(PN);
   }
 
   return nullptr;
@@ -4168,7 +4235,9 @@ static bool BrPHIToSelect(DominatorTree &DT, BranchInst *BI, PHINode *Merge,
 }
 
 const SCEV *ScalarEvolution::createNodeFromSelectLikePHI(PHINode *PN) {
-  if (PN->getNumIncomingValues() == 2) {
+  auto IsReachable =
+      [&](BasicBlock *BB) { return DT.isReachableFromEntry(BB); };
+  if (PN->getNumIncomingValues() == 2 && all_of(PN->blocks(), IsReachable)) {
     const Loop *L = LI.getLoopFor(PN->getParent());
 
     // We don't want to break LCSSA, even in a SCEV expression tree.
@@ -4244,7 +4313,7 @@ const SCEV *ScalarEvolution::createNodeForSelectOrPHI(Instruction *I,
   case ICmpInst::ICMP_SLT:
   case ICmpInst::ICMP_SLE:
     std::swap(LHS, RHS);
-  // fall through
+    LLVM_FALLTHROUGH;
   case ICmpInst::ICMP_SGT:
   case ICmpInst::ICMP_SGE:
     // a >s b ? a+x : b+x  ->  smax(a, b)+x
@@ -4267,7 +4336,7 @@ const SCEV *ScalarEvolution::createNodeForSelectOrPHI(Instruction *I,
   case ICmpInst::ICMP_ULT:
   case ICmpInst::ICMP_ULE:
     std::swap(LHS, RHS);
-  // fall through
+    LLVM_FALLTHROUGH;
   case ICmpInst::ICMP_UGT:
   case ICmpInst::ICMP_UGE:
     // a >u b ? a+x : b+x  ->  umax(a, b)+x
@@ -4332,9 +4401,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
   SmallVector<const SCEV *, 4> IndexExprs;
   for (auto Index = GEP->idx_begin(); Index != GEP->idx_end(); ++Index)
     IndexExprs.push_back(getSCEV(*Index));
-  return getGEPExpr(GEP->getSourceElementType(),
-                    getSCEV(GEP->getPointerOperand()),
-                    IndexExprs, GEP->isInBounds());
+  return getGEPExpr(GEP, IndexExprs);
 }
 
 uint32_t
@@ -4612,19 +4679,18 @@ ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start,
 
   MaxBECount = getNoopOrZeroExtend(MaxBECount, Start->getType());
   ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
-  ConstantRange ZExtMaxBECountRange =
-      MaxBECountRange.zextOrTrunc(BitWidth * 2 + 1);
+  ConstantRange ZExtMaxBECountRange = MaxBECountRange.zextOrTrunc(BitWidth * 2);
 
   ConstantRange StepSRange = getSignedRange(Step);
-  ConstantRange SExtStepSRange = StepSRange.sextOrTrunc(BitWidth * 2 + 1);
+  ConstantRange SExtStepSRange = StepSRange.sextOrTrunc(BitWidth * 2);
 
   ConstantRange StartURange = getUnsignedRange(Start);
   ConstantRange EndURange =
       StartURange.add(MaxBECountRange.multiply(StepSRange));
 
   // Check for unsigned overflow.
-  ConstantRange ZExtStartURange = StartURange.zextOrTrunc(BitWidth * 2 + 1);
-  ConstantRange ZExtEndURange = EndURange.zextOrTrunc(BitWidth * 2 + 1);
+  ConstantRange ZExtStartURange = StartURange.zextOrTrunc(BitWidth * 2);
+  ConstantRange ZExtEndURange = EndURange.zextOrTrunc(BitWidth * 2);
   if (ZExtStartURange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) ==
       ZExtEndURange) {
     APInt Min = APIntOps::umin(StartURange.getUnsignedMin(),
@@ -4644,8 +4710,8 @@ ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start,
   // Check for signed overflow. This must be done with ConstantRange
   // arithmetic because we could be called from within the ScalarEvolution
   // overflow checking code.
-  ConstantRange SExtStartSRange = StartSRange.sextOrTrunc(BitWidth * 2 + 1);
-  ConstantRange SExtEndSRange = EndSRange.sextOrTrunc(BitWidth * 2 + 1);
+  ConstantRange SExtStartSRange = StartSRange.sextOrTrunc(BitWidth * 2);
+  ConstantRange SExtEndSRange = EndSRange.sextOrTrunc(BitWidth * 2);
   if (SExtStartSRange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) ==
       SExtEndSRange) {
     APInt Min =
@@ -4909,17 +4975,33 @@ bool ScalarEvolution::isAddRecNeverPoison(const Instruction *I, const Loop *L) {
   return LatchControlDependentOnPoison && loopHasNoAbnormalExits(L);
 }
 
-bool ScalarEvolution::loopHasNoAbnormalExits(const Loop *L) {
-  auto Itr = LoopHasNoAbnormalExits.find(L);
-  if (Itr == LoopHasNoAbnormalExits.end()) {
-    auto NoAbnormalExitInBB = [&](BasicBlock *BB) {
-      return all_of(*BB, [](Instruction &I) {
-        return isGuaranteedToTransferExecutionToSuccessor(&I);
-      });
+ScalarEvolution::LoopProperties
+ScalarEvolution::getLoopProperties(const Loop *L) {
+  typedef ScalarEvolution::LoopProperties LoopProperties;
+
+  auto Itr = LoopPropertiesCache.find(L);
+  if (Itr == LoopPropertiesCache.end()) {
+    auto HasSideEffects = [](Instruction *I) {
+      if (auto *SI = dyn_cast<StoreInst>(I))
+        return !SI->isSimple();
+
+      return I->mayHaveSideEffects();
     };
 
-    auto InsertPair = LoopHasNoAbnormalExits.insert(
-        {L, all_of(L->getBlocks(), NoAbnormalExitInBB)});
+    LoopProperties LP = {/* HasNoAbnormalExits */ true,
+                         /*HasNoSideEffects*/ true};
+
+    for (auto *BB : L->getBlocks())
+      for (auto &I : *BB) {
+        if (!isGuaranteedToTransferExecutionToSuccessor(&I))
+          LP.HasNoAbnormalExits = false;
+        if (HasSideEffects(&I))
+          LP.HasNoSideEffects = false;
+        if (!LP.HasNoAbnormalExits && !LP.HasNoSideEffects)
+          break; // We're already as pessimistic as we can get.
+      }
+
+    auto InsertPair = LoopPropertiesCache.insert({L, LP});
     assert(InsertPair.second && "We just checked!");
     Itr = InsertPair.first;
   }
@@ -5247,6 +5329,20 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
 //                   Iteration Count Computation Code
 //
 
+static unsigned getConstantTripCount(const SCEVConstant *ExitCount) {
+  if (!ExitCount)
+    return 0;
+
+  ConstantInt *ExitConst = ExitCount->getValue();
+
+  // Guard against huge trip counts.
+  if (ExitConst->getValue().getActiveBits() > 32)
+    return 0;
+
+  // In case of integer overflow, this returns 0, which is correct.
+  return ((unsigned)ExitConst->getZExtValue()) + 1;
+}
+
 unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L) {
   if (BasicBlock *ExitingBB = L->getExitingBlock())
     return getSmallConstantTripCount(L, ExitingBB);
@@ -5262,17 +5358,13 @@ unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L,
          "Exiting block must actually branch out of the loop!");
   const SCEVConstant *ExitCount =
       dyn_cast<SCEVConstant>(getExitCount(L, ExitingBlock));
-  if (!ExitCount)
-    return 0;
-
-  ConstantInt *ExitConst = ExitCount->getValue();
-
-  // Guard against huge trip counts.
-  if (ExitConst->getValue().getActiveBits() > 32)
-    return 0;
+  return getConstantTripCount(ExitCount);
+}
 
-  // In case of integer overflow, this returns 0, which is correct.
-  return ((unsigned)ExitConst->getZExtValue()) + 1;
+unsigned ScalarEvolution::getSmallConstantMaxTripCount(Loop *L) {
+  const auto *MaxExitCount =
+      dyn_cast<SCEVConstant>(getMaxBackedgeTakenCount(L));
+  return getConstantTripCount(MaxExitCount);
 }
 
 unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L) {
@@ -5351,6 +5443,10 @@ const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) {
   return getBackedgeTakenInfo(L).getMax(this);
 }
 
+bool ScalarEvolution::isBackedgeTakenCountMaxOrZero(const Loop *L) {
+  return getBackedgeTakenInfo(L).isMaxOrZero(this);
+}
+
 /// Push PHI nodes in the header of the given loop onto the given Worklist.
 static void
 PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) {
@@ -5376,7 +5472,7 @@ ScalarEvolution::getPredicatedBackedgeTakenInfo(const Loop *L) {
   BackedgeTakenInfo Result =
       computeBackedgeTakenCount(L, /*AllowPredicates=*/true);
 
-  return PredicatedBackedgeTakenCounts.find(L)->second = Result;
+  return PredicatedBackedgeTakenCounts.find(L)->second = std::move(Result);
 }
 
 const ScalarEvolution::BackedgeTakenInfo &
@@ -5435,8 +5531,8 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
         // case, createNodeForPHI will perform the necessary updates on its
         // own when it gets to that point.
         if (!isa<PHINode>(I) || !isa<SCEVUnknown>(Old)) {
+          eraseValueFromMap(It->first);
           forgetMemoizedResults(Old);
-          ValueExprMap.erase(It);
         }
         if (PHINode *PN = dyn_cast<PHINode>(I))
           ConstantEvolutionLoopExitValue.erase(PN);
@@ -5451,7 +5547,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
   // recusive call to getBackedgeTakenInfo (on a different
   // loop), which would invalidate the iterator computed
   // earlier.
-  return BackedgeTakenCounts.find(L)->second = Result;
+  return BackedgeTakenCounts.find(L)->second = std::move(Result);
 }
 
 void ScalarEvolution::forgetLoop(const Loop *L) {
@@ -5481,8 +5577,8 @@ void ScalarEvolution::forgetLoop(const Loop *L) {
     ValueExprMapType::iterator It =
       ValueExprMap.find_as(static_cast<Value *>(I));
     if (It != ValueExprMap.end()) {
+      eraseValueFromMap(It->first);
       forgetMemoizedResults(It->second);
-      ValueExprMap.erase(It);
       if (PHINode *PN = dyn_cast<PHINode>(I))
         ConstantEvolutionLoopExitValue.erase(PN);
     }
@@ -5495,7 +5591,7 @@ void ScalarEvolution::forgetLoop(const Loop *L) {
   for (Loop *I : *L)
     forgetLoop(I);
 
-  LoopHasNoAbnormalExits.erase(L);
+  LoopPropertiesCache.erase(L);
 }
 
 void ScalarEvolution::forgetValue(Value *V) {
@@ -5515,8 +5611,8 @@ void ScalarEvolution::forgetValue(Value *V) {
     ValueExprMapType::iterator It =
       ValueExprMap.find_as(static_cast<Value *>(I));
     if (It != ValueExprMap.end()) {
+      eraseValueFromMap(It->first);
       forgetMemoizedResults(It->second);
-      ValueExprMap.erase(It);
       if (PHINode *PN = dyn_cast<PHINode>(I))
         ConstantEvolutionLoopExitValue.erase(PN);
     }
@@ -5534,14 +5630,11 @@ void ScalarEvolution::forgetValue(Value *V) {
 /// caller's responsibility to specify the relevant loop exit using
 /// getExact(ExitingBlock, SE).
 const SCEV *
-ScalarEvolution::BackedgeTakenInfo::getExact(
-    ScalarEvolution *SE, SCEVUnionPredicate *Preds) const {
+ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE,
+                                             SCEVUnionPredicate *Preds) const {
   // If any exits were not computable, the loop is not computable.
-  if (!ExitNotTaken.isCompleteList()) return SE->getCouldNotCompute();
-
-  // We need exactly one computable exit.
-  if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute();
-  assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info");
+  if (!isComplete() || ExitNotTaken.empty())
+    return SE->getCouldNotCompute();
 
   const SCEV *BECount = nullptr;
   for (auto &ENT : ExitNotTaken) {
@@ -5551,10 +5644,10 @@ ScalarEvolution::BackedgeTakenInfo::getExact(
       BECount = ENT.ExactNotTaken;
     else if (BECount != ENT.ExactNotTaken)
       return SE->getCouldNotCompute();
-    if (Preds && ENT.getPred())
-      Preds->add(ENT.getPred());
+    if (Preds && !ENT.hasAlwaysTruePredicate())
+      Preds->add(ENT.Predicate.get());
 
-    assert((Preds || ENT.hasAlwaysTruePred()) &&
+    assert((Preds || ENT.hasAlwaysTruePredicate()) &&
            "Predicate should be always true!");
   }
 
@@ -5567,7 +5660,7 @@ const SCEV *
 ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock,
                                              ScalarEvolution *SE) const {
   for (auto &ENT : ExitNotTaken)
-    if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePred())
+    if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate())
       return ENT.ExactNotTaken;
 
   return SE->getCouldNotCompute();
@@ -5576,21 +5669,29 @@ ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock,
 /// getMax - Get the max backedge taken count for the loop.
 const SCEV *
 ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const {
-  for (auto &ENT : ExitNotTaken)
-    if (!ENT.hasAlwaysTruePred())
-      return SE->getCouldNotCompute();
+  auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) {
+    return !ENT.hasAlwaysTruePredicate();
+  };
 
-  return Max ? Max : SE->getCouldNotCompute();
+  if (any_of(ExitNotTaken, PredicateNotAlwaysTrue) || !getMax())
+    return SE->getCouldNotCompute();
+
+  return getMax();
+}
+
+bool ScalarEvolution::BackedgeTakenInfo::isMaxOrZero(ScalarEvolution *SE) const {
+  auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) {
+    return !ENT.hasAlwaysTruePredicate();
+  };
+  return MaxOrZero && !any_of(ExitNotTaken, PredicateNotAlwaysTrue);
 }
 
 bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S,
                                                     ScalarEvolution *SE) const {
-  if (Max && Max != SE->getCouldNotCompute() && SE->hasOperand(Max, S))
+  if (getMax() && getMax() != SE->getCouldNotCompute() &&
+      SE->hasOperand(getMax(), S))
     return true;
 
-  if (!ExitNotTaken.ExitingBlock)
-    return false;
-
   for (auto &ENT : ExitNotTaken)
     if (ENT.ExactNotTaken != SE->getCouldNotCompute() &&
         SE->hasOperand(ENT.ExactNotTaken, S))
@@ -5602,62 +5703,31 @@ bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S,
 /// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each
 /// computable exit into a persistent ExitNotTakenInfo array.
 ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
-    SmallVectorImpl<EdgeInfo> &ExitCounts, bool Complete, const SCEV *MaxCount)
-    : Max(MaxCount) {
-
-  if (!Complete)
-    ExitNotTaken.setIncomplete();
-
-  unsigned NumExits = ExitCounts.size();
-  if (NumExits == 0) return;
-
-  ExitNotTaken.ExitingBlock = ExitCounts[0].ExitBlock;
-  ExitNotTaken.ExactNotTaken = ExitCounts[0].Taken;
-
-  // Determine the number of ExitNotTakenExtras structures that we need.
-  unsigned ExtraInfoSize = 0;
-  if (NumExits > 1)
-    ExtraInfoSize = 1 + std::count_if(std::next(ExitCounts.begin()),
-                                      ExitCounts.end(), [](EdgeInfo &Entry) {
-                                        return !Entry.Pred.isAlwaysTrue();
-                                      });
-  else if (!ExitCounts[0].Pred.isAlwaysTrue())
-    ExtraInfoSize = 1;
-
-  ExitNotTakenExtras *ENT = nullptr;
-
-  // Allocate the ExitNotTakenExtras structures and initialize the first
-  // element (ExitNotTaken).
-  if (ExtraInfoSize > 0) {
-    ENT = new ExitNotTakenExtras[ExtraInfoSize];
-    ExitNotTaken.ExtraInfo = &ENT[0];
-    *ExitNotTaken.getPred() = std::move(ExitCounts[0].Pred);
-  }
-
-  if (NumExits == 1)
-    return;
-
-  assert(ENT && "ExitNotTakenExtras is NULL while having more than one exit");
-
-  auto &Exits = ExitNotTaken.ExtraInfo->Exits;
-
-  // Handle the rare case of multiple computable exits.
-  for (unsigned i = 1, PredPos = 1; i < NumExits; ++i) {
-    ExitNotTakenExtras *Ptr = nullptr;
-    if (!ExitCounts[i].Pred.isAlwaysTrue()) {
-      Ptr = &ENT[PredPos++];
-      Ptr->Pred = std::move(ExitCounts[i].Pred);
-    }
-
-    Exits.emplace_back(ExitCounts[i].ExitBlock, ExitCounts[i].Taken, Ptr);
-  }
+    SmallVectorImpl<ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo>
+        &&ExitCounts,
+    bool Complete, const SCEV *MaxCount, bool MaxOrZero)
+    : MaxAndComplete(MaxCount, Complete), MaxOrZero(MaxOrZero) {
+  typedef ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo EdgeExitInfo;
+  ExitNotTaken.reserve(ExitCounts.size());
+  std::transform(
+      ExitCounts.begin(), ExitCounts.end(), std::back_inserter(ExitNotTaken),
+      [&](const EdgeExitInfo &EEI) {
+        BasicBlock *ExitBB = EEI.first;
+        const ExitLimit &EL = EEI.second;
+        if (EL.Predicates.empty())
+          return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, nullptr);
+
+        std::unique_ptr<SCEVUnionPredicate> Predicate(new SCEVUnionPredicate);
+        for (auto *Pred : EL.Predicates)
+          Predicate->add(Pred);
+
+        return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, std::move(Predicate));
+      });
 }
 
 /// Invalidate this result and free the ExitNotTakenInfo array.
 void ScalarEvolution::BackedgeTakenInfo::clear() {
-  ExitNotTaken.ExitingBlock = nullptr;
-  ExitNotTaken.ExactNotTaken = nullptr;
-  delete[] ExitNotTaken.ExtraInfo;
+  ExitNotTaken.clear();
 }
 
 /// Compute the number of times the backedge of the specified loop will execute.
@@ -5667,11 +5737,14 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L,
   SmallVector<BasicBlock *, 8> ExitingBlocks;
   L->getExitingBlocks(ExitingBlocks);
 
-  SmallVector<EdgeInfo, 4> ExitCounts;
+  typedef ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo EdgeExitInfo;
+
+  SmallVector<EdgeExitInfo, 4> ExitCounts;
   bool CouldComputeBECount = true;
   BasicBlock *Latch = L->getLoopLatch(); // may be NULL.
   const SCEV *MustExitMaxBECount = nullptr;
   const SCEV *MayExitMaxBECount = nullptr;
+  bool MustExitMaxOrZero = false;
 
   // Compute the ExitLimit for each loop exit. Use this to populate ExitCounts
   // and compute maxBECount.
@@ -5680,17 +5753,17 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L,
     BasicBlock *ExitBB = ExitingBlocks[i];
     ExitLimit EL = computeExitLimit(L, ExitBB, AllowPredicates);
 
-    assert((AllowPredicates || EL.Pred.isAlwaysTrue()) &&
+    assert((AllowPredicates || EL.Predicates.empty()) &&
            "Predicated exit limit when predicates are not allowed!");
 
     // 1. For each exit that can be computed, add an entry to ExitCounts.
     // CouldComputeBECount is true only if all exits can be computed.
-    if (EL.Exact == getCouldNotCompute())
+    if (EL.ExactNotTaken == getCouldNotCompute())
       // We couldn't compute an exact value for this exit, so
       // we won't be able to compute an exact value for the loop.
       CouldComputeBECount = false;
     else
-      ExitCounts.emplace_back(EdgeInfo(ExitBB, EL.Exact, EL.Pred));
+      ExitCounts.emplace_back(ExitBB, EL);
 
     // 2. Derive the loop's MaxBECount from each exit's max number of
     // non-exiting iterations. Partition the loop exits into two kinds:
@@ -5698,29 +5771,35 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L,
     //
     // If the exit dominates the loop latch, it is a LoopMustExit otherwise it
     // is a LoopMayExit.  If any computable LoopMustExit is found, then
-    // MaxBECount is the minimum EL.Max of computable LoopMustExits. Otherwise,
-    // MaxBECount is conservatively the maximum EL.Max, where CouldNotCompute is
-    // considered greater than any computable EL.Max.
-    if (EL.Max != getCouldNotCompute() && Latch &&
+    // MaxBECount is the minimum EL.MaxNotTaken of computable
+    // LoopMustExits. Otherwise, MaxBECount is conservatively the maximum
+    // EL.MaxNotTaken, where CouldNotCompute is considered greater than any
+    // computable EL.MaxNotTaken.
+    if (EL.MaxNotTaken != getCouldNotCompute() && Latch &&
         DT.dominates(ExitBB, Latch)) {
-      if (!MustExitMaxBECount)
-        MustExitMaxBECount = EL.Max;
-      else {
+      if (!MustExitMaxBECount) {
+        MustExitMaxBECount = EL.MaxNotTaken;
+        MustExitMaxOrZero = EL.MaxOrZero;
+      } else {
         MustExitMaxBECount =
-          getUMinFromMismatchedTypes(MustExitMaxBECount, EL.Max);
+            getUMinFromMismatchedTypes(MustExitMaxBECount, EL.MaxNotTaken);
       }
     } else if (MayExitMaxBECount != getCouldNotCompute()) {
-      if (!MayExitMaxBECount || EL.Max == getCouldNotCompute())
-        MayExitMaxBECount = EL.Max;
+      if (!MayExitMaxBECount || EL.MaxNotTaken == getCouldNotCompute())
+        MayExitMaxBECount = EL.MaxNotTaken;
       else {
         MayExitMaxBECount =
-          getUMaxFromMismatchedTypes(MayExitMaxBECount, EL.Max);
+            getUMaxFromMismatchedTypes(MayExitMaxBECount, EL.MaxNotTaken);
       }
     }
   }
   const SCEV *MaxBECount = MustExitMaxBECount ? MustExitMaxBECount :
     (MayExitMaxBECount ? MayExitMaxBECount : getCouldNotCompute());
-  return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount);
+  // The loop backedge will be taken the maximum or zero times if there's
+  // a single exit that must be taken the maximum or zero times.
+  bool MaxOrZero = (MustExitMaxOrZero && ExitingBlocks.size() == 1);
+  return BackedgeTakenInfo(std::move(ExitCounts), CouldComputeBECount,
+                           MaxBECount, MaxOrZero);
 }
 
 ScalarEvolution::ExitLimit
@@ -5825,39 +5904,40 @@ ScalarEvolution::computeExitLimitFromCond(const Loop *L,
       if (EitherMayExit) {
         // Both conditions must be true for the loop to continue executing.
         // Choose the less conservative count.
-        if (EL0.Exact == getCouldNotCompute() ||
-            EL1.Exact == getCouldNotCompute())
+        if (EL0.ExactNotTaken == getCouldNotCompute() ||
+            EL1.ExactNotTaken == getCouldNotCompute())
           BECount = getCouldNotCompute();
         else
-          BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact);
-        if (EL0.Max == getCouldNotCompute())
-          MaxBECount = EL1.Max;
-        else if (EL1.Max == getCouldNotCompute())
-          MaxBECount = EL0.Max;
+          BECount =
+              getUMinFromMismatchedTypes(EL0.ExactNotTaken, EL1.ExactNotTaken);
+        if (EL0.MaxNotTaken == getCouldNotCompute())
+          MaxBECount = EL1.MaxNotTaken;
+        else if (EL1.MaxNotTaken == getCouldNotCompute())
+          MaxBECount = EL0.MaxNotTaken;
         else
-          MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
+          MaxBECount =
+              getUMinFromMismatchedTypes(EL0.MaxNotTaken, EL1.MaxNotTaken);
       } else {
         // Both conditions must be true at the same time for the loop to exit.
         // For now, be conservative.
         assert(L->contains(FBB) && "Loop block has no successor in loop!");
-        if (EL0.Max == EL1.Max)
-          MaxBECount = EL0.Max;
-        if (EL0.Exact == EL1.Exact)
-          BECount = EL0.Exact;
+        if (EL0.MaxNotTaken == EL1.MaxNotTaken)
+          MaxBECount = EL0.MaxNotTaken;
+        if (EL0.ExactNotTaken == EL1.ExactNotTaken)
+          BECount = EL0.ExactNotTaken;
       }
 
-      SCEVUnionPredicate NP;
-      NP.add(&EL0.Pred);
-      NP.add(&EL1.Pred);
       // There are cases (e.g. PR26207) where computeExitLimitFromCond is able
       // to be more aggressive when computing BECount than when computing
-      // MaxBECount.  In these cases it is possible for EL0.Exact and EL1.Exact
-      // to match, but for EL0.Max and EL1.Max to not.
+      // MaxBECount.  In these cases it is possible for EL0.ExactNotTaken and
+      // EL1.ExactNotTaken to match, but for EL0.MaxNotTaken and EL1.MaxNotTaken
+      // to not.
       if (isa<SCEVCouldNotCompute>(MaxBECount) &&
           !isa<SCEVCouldNotCompute>(BECount))
         MaxBECount = BECount;
 
-      return ExitLimit(BECount, MaxBECount, NP);
+      return ExitLimit(BECount, MaxBECount, false,
+                       {&EL0.Predicates, &EL1.Predicates});
     }
     if (BO->getOpcode() == Instruction::Or) {
       // Recurse on the operands of the or.
@@ -5873,31 +5953,31 @@ ScalarEvolution::computeExitLimitFromCond(const Loop *L,
       if (EitherMayExit) {
         // Both conditions must be false for the loop to continue executing.
         // Choose the less conservative count.
-        if (EL0.Exact == getCouldNotCompute() ||
-            EL1.Exact == getCouldNotCompute())
+        if (EL0.ExactNotTaken == getCouldNotCompute() ||
+            EL1.ExactNotTaken == getCouldNotCompute())
           BECount = getCouldNotCompute();
         else
-          BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact);
-        if (EL0.Max == getCouldNotCompute())
-          MaxBECount = EL1.Max;
-        else if (EL1.Max == getCouldNotCompute())
-          MaxBECount = EL0.Max;
+          BECount =
+              getUMinFromMismatchedTypes(EL0.ExactNotTaken, EL1.ExactNotTaken);
+        if (EL0.MaxNotTaken == getCouldNotCompute())
+          MaxBECount = EL1.MaxNotTaken;
+        else if (EL1.MaxNotTaken == getCouldNotCompute())
+          MaxBECount = EL0.MaxNotTaken;
         else
-          MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
+          MaxBECount =
+              getUMinFromMismatchedTypes(EL0.MaxNotTaken, EL1.MaxNotTaken);
       } else {
         // Both conditions must be false at the same time for the loop to exit.
         // For now, be conservative.
         assert(L->contains(TBB) && "Loop block has no successor in loop!");
-        if (EL0.Max == EL1.Max)
-          MaxBECount = EL0.Max;
-        if (EL0.Exact == EL1.Exact)
-          BECount = EL0.Exact;
+        if (EL0.MaxNotTaken == EL1.MaxNotTaken)
+          MaxBECount = EL0.MaxNotTaken;
+        if (EL0.ExactNotTaken == EL1.ExactNotTaken)
+          BECount = EL0.ExactNotTaken;
       }
 
-      SCEVUnionPredicate NP;
-      NP.add(&EL0.Pred);
-      NP.add(&EL1.Pred);
-      return ExitLimit(BECount, MaxBECount, NP);
+      return ExitLimit(BECount, MaxBECount, false,
+                       {&EL0.Predicates, &EL1.Predicates});
     }
   }
 
@@ -5979,8 +6059,8 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
     if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS))
       if (AddRec->getLoop() == L) {
         // Form the constant range.
-        ConstantRange CompRange(
-            ICmpInst::makeConstantRange(Cond, RHSC->getAPInt()));
+        ConstantRange CompRange =
+            ConstantRange::makeExactICmpRegion(Cond, RHSC->getAPInt());
 
         const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this);
         if (!isa<SCEVCouldNotCompute>(Ret)) return Ret;
@@ -6184,7 +6264,7 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit(
   //   %iv = phi i32 [ %iv.shifted, %loop ], [ %val, %preheader ]
   //   %iv.shifted = lshr i32 %iv, <positive constant>
   //
-  // Return true on a succesful match.  Return the corresponding PHI node (%iv
+  // Return true on a successful match.  Return the corresponding PHI node (%iv
   // above) in PNOut and the opcode of the shift operation in OpCodeOut.
   auto MatchShiftRecurrence =
       [&](Value *V, PHINode *&PNOut, Instruction::BinaryOps &OpCodeOut) {
@@ -6282,8 +6362,7 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit(
     unsigned BitWidth = getTypeSizeInBits(RHS->getType());
     const SCEV *UpperBound =
         getConstant(getEffectiveSCEVType(RHS->getType()), BitWidth);
-    SCEVUnionPredicate P;
-    return ExitLimit(getCouldNotCompute(), UpperBound, P);
+    return ExitLimit(getCouldNotCompute(), UpperBound, false);
   }
 
   return getCouldNotCompute();
@@ -7044,7 +7123,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
   // effectively V != 0.  We know and take advantage of the fact that this
   // expression only being used in a comparison by zero context.
 
-  SCEVUnionPredicate P;
+  SmallPtrSet<const SCEVPredicate *, 4> Predicates;
   // If the value is a constant
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
     // If the value is already zero, the branch will execute zero times.
@@ -7057,7 +7136,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
     // Try to make this an AddRec using runtime tests, in the first X
     // iterations of this loop, where X is the SCEV expression found by the
     // algorithm below.
-    AddRec = convertSCEVToAddRecWithPredicates(V, L, P);
+    AddRec = convertSCEVToAddRecWithPredicates(V, L, Predicates);
 
   if (!AddRec || AddRec->getLoop() != L)
     return getCouldNotCompute();
@@ -7079,7 +7158,8 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
         // should not accept a root of 2.
         const SCEV *Val = AddRec->evaluateAtIteration(R1, *this);
         if (Val->isZero())
-          return ExitLimit(R1, R1, P); // We found a quadratic root!
+          // We found a quadratic root!
+          return ExitLimit(R1, R1, false, Predicates);
       }
     }
     return getCouldNotCompute();
@@ -7136,7 +7216,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
     else
       MaxBECount = getConstant(CountDown ? CR.getUnsignedMax()
                                          : -CR.getUnsignedMin());
-    return ExitLimit(Distance, MaxBECount, P);
+    return ExitLimit(Distance, MaxBECount, false, Predicates);
   }
 
   // As a special case, handle the instance where Step is a positive power of
@@ -7191,7 +7271,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
 
       const SCEV *Limit =
           getZeroExtendExpr(getTruncateExpr(ModuloResult, NarrowTy), WideTy);
-      return ExitLimit(Limit, Limit, P);
+      return ExitLimit(Limit, Limit, false, Predicates);
     }
   }
 
@@ -7204,14 +7284,14 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
       loopHasNoAbnormalExits(AddRec->getLoop())) {
     const SCEV *Exact =
         getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
-    return ExitLimit(Exact, Exact, P);
+    return ExitLimit(Exact, Exact, false, Predicates);
   }
 
   // Then, try to solve the above equation provided that Start is constant.
   if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start)) {
     const SCEV *E = SolveLinEquationWithOverflow(
         StepC->getValue()->getValue(), -StartC->getValue()->getValue(), *this);
-    return ExitLimit(E, E, P);
+    return ExitLimit(E, E, false, Predicates);
   }
   return getCouldNotCompute();
 }
@@ -7323,149 +7403,77 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
   // cases, and canonicalize *-or-equal comparisons to regular comparisons.
   if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) {
     const APInt &RA = RC->getAPInt();
-    switch (Pred) {
-    default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
-    case ICmpInst::ICMP_EQ:
-    case ICmpInst::ICMP_NE:
-      // Fold ((-1) * %a) + %b == 0 (equivalent to %b-%a == 0) into %a == %b.
-      if (!RA)
-        if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(LHS))
-          if (const SCEVMulExpr *ME = dyn_cast<SCEVMulExpr>(AE->getOperand(0)))
-            if (AE->getNumOperands() == 2 && ME->getNumOperands() == 2 &&
-                ME->getOperand(0)->isAllOnesValue()) {
-              RHS = AE->getOperand(1);
-              LHS = ME->getOperand(1);
-              Changed = true;
-            }
-      break;
-    case ICmpInst::ICMP_UGE:
-      if ((RA - 1).isMinValue()) {
-        Pred = ICmpInst::ICMP_NE;
-        RHS = getConstant(RA - 1);
-        Changed = true;
-        break;
-      }
-      if (RA.isMaxValue()) {
-        Pred = ICmpInst::ICMP_EQ;
-        Changed = true;
-        break;
-      }
-      if (RA.isMinValue()) goto trivially_true;
 
-      Pred = ICmpInst::ICMP_UGT;
-      RHS = getConstant(RA - 1);
-      Changed = true;
-      break;
-    case ICmpInst::ICMP_ULE:
-      if ((RA + 1).isMaxValue()) {
-        Pred = ICmpInst::ICMP_NE;
-        RHS = getConstant(RA + 1);
-        Changed = true;
-        break;
-      }
-      if (RA.isMinValue()) {
-        Pred = ICmpInst::ICMP_EQ;
-        Changed = true;
-        break;
-      }
-      if (RA.isMaxValue()) goto trivially_true;
+    bool SimplifiedByConstantRange = false;
 
-      Pred = ICmpInst::ICMP_ULT;
-      RHS = getConstant(RA + 1);
-      Changed = true;
-      break;
-    case ICmpInst::ICMP_SGE:
-      if ((RA - 1).isMinSignedValue()) {
-        Pred = ICmpInst::ICMP_NE;
-        RHS = getConstant(RA - 1);
-        Changed = true;
-        break;
-      }
-      if (RA.isMaxSignedValue()) {
-        Pred = ICmpInst::ICMP_EQ;
-        Changed = true;
-        break;
+    if (!ICmpInst::isEquality(Pred)) {
+      ConstantRange ExactCR = ConstantRange::makeExactICmpRegion(Pred, RA);
+      if (ExactCR.isFullSet())
+        goto trivially_true;
+      else if (ExactCR.isEmptySet())
+        goto trivially_false;
+
+      APInt NewRHS;
+      CmpInst::Predicate NewPred;
+      if (ExactCR.getEquivalentICmp(NewPred, NewRHS) &&
+          ICmpInst::isEquality(NewPred)) {
+        // We were able to convert an inequality to an equality.
+        Pred = NewPred;
+        RHS = getConstant(NewRHS);
+        Changed = SimplifiedByConstantRange = true;
       }
-      if (RA.isMinSignedValue()) goto trivially_true;
+    }
 
-      Pred = ICmpInst::ICMP_SGT;
-      RHS = getConstant(RA - 1);
-      Changed = true;
-      break;
-    case ICmpInst::ICMP_SLE:
-      if ((RA + 1).isMaxSignedValue()) {
-        Pred = ICmpInst::ICMP_NE;
-        RHS = getConstant(RA + 1);
-        Changed = true;
+    if (!SimplifiedByConstantRange) {
+      switch (Pred) {
+      default:
         break;
-      }
-      if (RA.isMinSignedValue()) {
-        Pred = ICmpInst::ICMP_EQ;
-        Changed = true;
+      case ICmpInst::ICMP_EQ:
+      case ICmpInst::ICMP_NE:
+        // Fold ((-1) * %a) + %b == 0 (equivalent to %b-%a == 0) into %a == %b.
+        if (!RA)
+          if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(LHS))
+            if (const SCEVMulExpr *ME =
+                    dyn_cast<SCEVMulExpr>(AE->getOperand(0)))
+              if (AE->getNumOperands() == 2 && ME->getNumOperands() == 2 &&
+                  ME->getOperand(0)->isAllOnesValue()) {
+                RHS = AE->getOperand(1);
+                LHS = ME->getOperand(1);
+                Changed = true;
+              }
         break;
-      }
-      if (RA.isMaxSignedValue()) goto trivially_true;
 
-      Pred = ICmpInst::ICMP_SLT;
-      RHS = getConstant(RA + 1);
-      Changed = true;
-      break;
-    case ICmpInst::ICMP_UGT:
-      if (RA.isMinValue()) {
-        Pred = ICmpInst::ICMP_NE;
+
+        // The "Should have been caught earlier!" messages refer to the fact
+        // that the ExactCR.isFullSet() or ExactCR.isEmptySet() check above
+        // should have fired on the corresponding cases, and canonicalized the
+        // check to trivially_true or trivially_false.
+
+      case ICmpInst::ICMP_UGE:
+        assert(!RA.isMinValue() && "Should have been caught earlier!");
+        Pred = ICmpInst::ICMP_UGT;
+        RHS = getConstant(RA - 1);
         Changed = true;
         break;
-      }
-      if ((RA + 1).isMaxValue()) {
-        Pred = ICmpInst::ICMP_EQ;
+      case ICmpInst::ICMP_ULE:
+        assert(!RA.isMaxValue() && "Should have been caught earlier!");
+        Pred = ICmpInst::ICMP_ULT;
         RHS = getConstant(RA + 1);
         Changed = true;
         break;
-      }
-      if (RA.isMaxValue()) goto trivially_false;
-      break;
-    case ICmpInst::ICMP_ULT:
-      if (RA.isMaxValue()) {
-        Pred = ICmpInst::ICMP_NE;
-        Changed = true;
-        break;
-      }
-      if ((RA - 1).isMinValue()) {
-        Pred = ICmpInst::ICMP_EQ;
+      case ICmpInst::ICMP_SGE:
+        assert(!RA.isMinSignedValue() && "Should have been caught earlier!");
+        Pred = ICmpInst::ICMP_SGT;
         RHS = getConstant(RA - 1);
         Changed = true;
         break;
-      }
-      if (RA.isMinValue()) goto trivially_false;
-      break;
-    case ICmpInst::ICMP_SGT:
-      if (RA.isMinSignedValue()) {
-        Pred = ICmpInst::ICMP_NE;
-        Changed = true;
-        break;
-      }
-      if ((RA + 1).isMaxSignedValue()) {
-        Pred = ICmpInst::ICMP_EQ;
+      case ICmpInst::ICMP_SLE:
+        assert(!RA.isMaxSignedValue() && "Should have been caught earlier!");
+        Pred = ICmpInst::ICMP_SLT;
         RHS = getConstant(RA + 1);
         Changed = true;
         break;
       }
-      if (RA.isMaxSignedValue()) goto trivially_false;
-      break;
-    case ICmpInst::ICMP_SLT:
-      if (RA.isMaxSignedValue()) {
-        Pred = ICmpInst::ICMP_NE;
-        Changed = true;
-        break;
-      }
-      if ((RA - 1).isMinSignedValue()) {
-       Pred = ICmpInst::ICMP_EQ;
-       RHS = getConstant(RA - 1);
-        Changed = true;
-       break;
-      }
-      if (RA.isMinSignedValue()) goto trivially_false;
-      break;
     }
   }
 
@@ -8025,34 +8033,16 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
   return false;
 }
 
-namespace {
-/// RAII wrapper to prevent recursive application of isImpliedCond.
-/// ScalarEvolution's PendingLoopPredicates set must be empty unless we are
-/// currently evaluating isImpliedCond.
-struct MarkPendingLoopPredicate {
-  Value *Cond;
-  DenseSet<Value*> &LoopPreds;
-  bool Pending;
-
-  MarkPendingLoopPredicate(Value *C, DenseSet<Value*> &LP)
-    : Cond(C), LoopPreds(LP) {
-    Pending = !LoopPreds.insert(Cond).second;
-  }
-  ~MarkPendingLoopPredicate() {
-    if (!Pending)
-      LoopPreds.erase(Cond);
-  }
-};
-} // end anonymous namespace
-
 bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
                                     const SCEV *LHS, const SCEV *RHS,
                                     Value *FoundCondValue,
                                     bool Inverse) {
-  MarkPendingLoopPredicate Mark(FoundCondValue, PendingLoopPredicates);
-  if (Mark.Pending)
+  if (!PendingLoopPredicates.insert(FoundCondValue).second)
     return false;
 
+  auto ClearOnExit =
+      make_scope_exit([&]() { PendingLoopPredicates.erase(FoundCondValue); });
+
   // Recursively handle And and Or conditions.
   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FoundCondValue)) {
     if (BO->getOpcode() == Instruction::And) {
@@ -8237,9 +8227,8 @@ bool ScalarEvolution::splitBinaryAdd(const SCEV *Expr,
   return true;
 }
 
-bool ScalarEvolution::computeConstantDifference(const SCEV *Less,
-                                                const SCEV *More,
-                                                APInt &C) {
+Optional<APInt> ScalarEvolution::computeConstantDifference(const SCEV *More,
+                                                           const SCEV *Less) {
   // We avoid subtracting expressions here because this function is usually
   // fairly deep in the call stack (i.e. is called many times).
 
@@ -8248,15 +8237,15 @@ bool ScalarEvolution::computeConstantDifference(const SCEV *Less,
     const auto *MAR = cast<SCEVAddRecExpr>(More);
 
     if (LAR->getLoop() != MAR->getLoop())
-      return false;
+      return None;
 
     // We look at affine expressions only; not for correctness but to keep
     // getStepRecurrence cheap.
     if (!LAR->isAffine() || !MAR->isAffine())
-      return false;
+      return None;
 
     if (LAR->getStepRecurrence(*this) != MAR->getStepRecurrence(*this))
-      return false;
+      return None;
 
     Less = LAR->getStart();
     More = MAR->getStart();
@@ -8267,27 +8256,22 @@ bool ScalarEvolution::computeConstantDifference(const SCEV *Less,
   if (isa<SCEVConstant>(Less) && isa<SCEVConstant>(More)) {
     const auto &M = cast<SCEVConstant>(More)->getAPInt();
     const auto &L = cast<SCEVConstant>(Less)->getAPInt();
-    C = M - L;
-    return true;
+    return M - L;
   }
 
   const SCEV *L, *R;
   SCEV::NoWrapFlags Flags;
   if (splitBinaryAdd(Less, L, R, Flags))
     if (const auto *LC = dyn_cast<SCEVConstant>(L))
-      if (R == More) {
-        C = -(LC->getAPInt());
-        return true;
-      }
+      if (R == More)
+        return -(LC->getAPInt());
 
   if (splitBinaryAdd(More, L, R, Flags))
     if (const auto *LC = dyn_cast<SCEVConstant>(L))
-      if (R == Less) {
-        C = LC->getAPInt();
-        return true;
-      }
+      if (R == Less)
+        return LC->getAPInt();
 
-  return false;
+  return None;
 }
 
 bool ScalarEvolution::isImpliedCondOperandsViaNoOverflow(
@@ -8344,22 +8328,21 @@ bool ScalarEvolution::isImpliedCondOperandsViaNoOverflow(
   // neither necessary nor sufficient to prove "(FoundLHS + C) s< (FoundRHS +
   // C)".
 
-  APInt LDiff, RDiff;
-  if (!computeConstantDifference(FoundLHS, LHS, LDiff) ||
-      !computeConstantDifference(FoundRHS, RHS, RDiff) ||
-      LDiff != RDiff)
+  Optional<APInt> LDiff = computeConstantDifference(LHS, FoundLHS);
+  Optional<APInt> RDiff = computeConstantDifference(RHS, FoundRHS);
+  if (!LDiff || !RDiff || *LDiff != *RDiff)
     return false;
 
-  if (LDiff == 0)
+  if (LDiff->isMinValue())
     return true;
 
   APInt FoundRHSLimit;
 
   if (Pred == CmpInst::ICMP_ULT) {
-    FoundRHSLimit = -RDiff;
+    FoundRHSLimit = -(*RDiff);
   } else {
     assert(Pred == CmpInst::ICMP_SLT && "Checked above!");
-    FoundRHSLimit = APInt::getSignedMinValue(getTypeSizeInBits(RHS->getType())) - RDiff;
+    FoundRHSLimit = APInt::getSignedMinValue(getTypeSizeInBits(RHS->getType())) - *RDiff;
   }
 
   // Try to prove (1) or (2), as needed.
@@ -8469,7 +8452,7 @@ static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE,
 
   case ICmpInst::ICMP_SGE:
     std::swap(LHS, RHS);
-    // fall through
+    LLVM_FALLTHROUGH;
   case ICmpInst::ICMP_SLE:
     return
       // min(A, ...) <= A
@@ -8479,7 +8462,7 @@ static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE,
 
   case ICmpInst::ICMP_UGE:
     std::swap(LHS, RHS);
-    // fall through
+    LLVM_FALLTHROUGH;
   case ICmpInst::ICMP_ULE:
     return
       // min(A, ...) <= A
@@ -8550,9 +8533,8 @@ bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,
     // reduce the compile time impact of this optimization.
     return false;
 
-  const SCEVAddExpr *AddLHS = dyn_cast<SCEVAddExpr>(LHS);
-  if (!AddLHS || AddLHS->getOperand(1) != FoundLHS ||
-      !isa<SCEVConstant>(AddLHS->getOperand(0)))
+  Optional<APInt> Addend = computeConstantDifference(LHS, FoundLHS);
+  if (!Addend)
     return false;
 
   APInt ConstFoundRHS = cast<SCEVConstant>(FoundRHS)->getAPInt();
@@ -8562,10 +8544,8 @@ bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,
   ConstantRange FoundLHSRange =
       ConstantRange::makeAllowedICmpRegion(Pred, ConstFoundRHS);
 
-  // Since `LHS` is `FoundLHS` + `AddLHS->getOperand(0)`, we can compute a range
-  // for `LHS`:
-  APInt Addend = cast<SCEVConstant>(AddLHS->getOperand(0))->getAPInt();
-  ConstantRange LHSRange = FoundLHSRange.add(ConstantRange(Addend));
+  // Since `LHS` is `FoundLHS` + `Addend`, we can compute a range for `LHS`:
+  ConstantRange LHSRange = FoundLHSRange.add(ConstantRange(*Addend));
 
   // We can also compute the range of values for `LHS` that satisfy the
   // consequent, "`LHS` `Pred` `RHS`":
@@ -8580,6 +8560,8 @@ bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,
 
 bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,
                                          bool IsSigned, bool NoWrap) {
+  assert(isKnownPositive(Stride) && "Positive stride expected!");
+
   if (NoWrap) return false;
 
   unsigned BitWidth = getTypeSizeInBits(RHS->getType());
@@ -8642,17 +8624,21 @@ ScalarEvolution::ExitLimit
 ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
                                   const Loop *L, bool IsSigned,
                                   bool ControlsExit, bool AllowPredicates) {
-  SCEVUnionPredicate P;
+  SmallPtrSet<const SCEVPredicate *, 4> Predicates;
   // We handle only IV < Invariant
   if (!isLoopInvariant(RHS, L))
     return getCouldNotCompute();
 
   const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
-  if (!IV && AllowPredicates)
+  bool PredicatedIV = false;
+
+  if (!IV && AllowPredicates) {
     // Try to make this an AddRec using runtime tests, in the first X
     // iterations of this loop, where X is the SCEV expression found by the
     // algorithm below.
-    IV = convertSCEVToAddRecWithPredicates(LHS, L, P);
+    IV = convertSCEVToAddRecWithPredicates(LHS, L, Predicates);
+    PredicatedIV = true;
+  }
 
   // Avoid weird loops
   if (!IV || IV->getLoop() != L || !IV->isAffine())
@@ -8663,61 +8649,144 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
 
   const SCEV *Stride = IV->getStepRecurrence(*this);
 
-  // Avoid negative or zero stride values
-  if (!isKnownPositive(Stride))
-    return getCouldNotCompute();
+  bool PositiveStride = isKnownPositive(Stride);
 
-  // Avoid proven overflow cases: this will ensure that the backedge taken count
-  // will not generate any unsigned overflow. Relaxed no-overflow conditions
-  // exploit NoWrapFlags, allowing to optimize in presence of undefined
-  // behaviors like the case of C language.
-  if (!Stride->isOne() && doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap))
+  // Avoid negative or zero stride values.
+  if (!PositiveStride) {
+    // We can compute the correct backedge taken count for loops with unknown
+    // strides if we can prove that the loop is not an infinite loop with side
+    // effects. Here's the loop structure we are trying to handle -
+    //
+    // i = start
+    // do {
+    //   A[i] = i;
+    //   i += s;
+    // } while (i < end);
+    //
+    // The backedge taken count for such loops is evaluated as -
+    // (max(end, start + stride) - start - 1) /u stride
+    //
+    // The additional preconditions that we need to check to prove correctness
+    // of the above formula is as follows -
+    //
+    // a) IV is either nuw or nsw depending upon signedness (indicated by the
+    //    NoWrap flag).
+    // b) loop is single exit with no side effects.
+    //
+    //
+    // Precondition a) implies that if the stride is negative, this is a single
+    // trip loop. The backedge taken count formula reduces to zero in this case.
+    //
+    // Precondition b) implies that the unknown stride cannot be zero otherwise
+    // we have UB.
+    //
+    // The positive stride case is the same as isKnownPositive(Stride) returning
+    // true (original behavior of the function).
+    //
+    // We want to make sure that the stride is truly unknown as there are edge
+    // cases where ScalarEvolution propagates no wrap flags to the
+    // post-increment/decrement IV even though the increment/decrement operation
+    // itself is wrapping. The computed backedge taken count may be wrong in
+    // such cases. This is prevented by checking that the stride is not known to
+    // be either positive or non-positive. For example, no wrap flags are
+    // propagated to the post-increment IV of this loop with a trip count of 2 -
+    //
+    // unsigned char i;
+    // for(i=127; i<128; i+=129)
+    //   A[i] = i;
+    //
+    if (PredicatedIV || !NoWrap || isKnownNonPositive(Stride) ||
+        !loopHasNoSideEffects(L))
+      return getCouldNotCompute();
+
+  } else if (!Stride->isOne() &&
+             doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap))
+    // Avoid proven overflow cases: this will ensure that the backedge taken
+    // count will not generate any unsigned overflow. Relaxed no-overflow
+    // conditions exploit NoWrapFlags, allowing to optimize in presence of
+    // undefined behaviors like the case of C language.
     return getCouldNotCompute();
 
   ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT
                                       : ICmpInst::ICMP_ULT;
   const SCEV *Start = IV->getStart();
   const SCEV *End = RHS;
-  if (!isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS))
+  // If the backedge is taken at least once, then it will be taken
+  // (End-Start)/Stride times (rounded up to a multiple of Stride), where Start
+  // is the LHS value of the less-than comparison the first time it is evaluated
+  // and End is the RHS.
+  const SCEV *BECountIfBackedgeTaken =
+    computeBECount(getMinusSCEV(End, Start), Stride, false);
+  // If the loop entry is guarded by the result of the backedge test of the
+  // first loop iteration, then we know the backedge will be taken at least
+  // once and so the backedge taken count is as above. If not then we use the
+  // expression (max(End,Start)-Start)/Stride to describe the backedge count,
+  // as if the backedge is taken at least once max(End,Start) is End and so the
+  // result is as above, and if not max(End,Start) is Start so we get a backedge
+  // count of zero.
+  const SCEV *BECount;
+  if (isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS))
+    BECount = BECountIfBackedgeTaken;
+  else {
     End = IsSigned ? getSMaxExpr(RHS, Start) : getUMaxExpr(RHS, Start);
+    BECount = computeBECount(getMinusSCEV(End, Start), Stride, false);
+  }
 
-  const SCEV *BECount = computeBECount(getMinusSCEV(End, Start), Stride, false);
+  const SCEV *MaxBECount;
+  bool MaxOrZero = false;
+  if (isa<SCEVConstant>(BECount))
+    MaxBECount = BECount;
+  else if (isa<SCEVConstant>(BECountIfBackedgeTaken)) {
+    // If we know exactly how many times the backedge will be taken if it's
+    // taken at least once, then the backedge count will either be that or
+    // zero.
+    MaxBECount = BECountIfBackedgeTaken;
+    MaxOrZero = true;
+  } else {
+    // Calculate the maximum backedge count based on the range of values
+    // permitted by Start, End, and Stride.
+    APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin()
+                              : getUnsignedRange(Start).getUnsignedMin();
 
-  APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin()
-                            : getUnsignedRange(Start).getUnsignedMin();
+    unsigned BitWidth = getTypeSizeInBits(LHS->getType());
 
-  APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin()
-                             : getUnsignedRange(Stride).getUnsignedMin();
+    APInt StrideForMaxBECount;
 
-  unsigned BitWidth = getTypeSizeInBits(LHS->getType());
-  APInt Limit = IsSigned ? APInt::getSignedMaxValue(BitWidth) - (MinStride - 1)
-                         : APInt::getMaxValue(BitWidth) - (MinStride - 1);
+    if (PositiveStride)
+      StrideForMaxBECount =
+        IsSigned ? getSignedRange(Stride).getSignedMin()
+                 : getUnsignedRange(Stride).getUnsignedMin();
+    else
+      // Using a stride of 1 is safe when computing max backedge taken count for
+      // a loop with unknown stride.
+      StrideForMaxBECount = APInt(BitWidth, 1, IsSigned);
 
-  // Although End can be a MAX expression we estimate MaxEnd considering only
-  // the case End = RHS. This is safe because in the other case (End - Start)
-  // is zero, leading to a zero maximum backedge taken count.
-  APInt MaxEnd =
-    IsSigned ? APIntOps::smin(getSignedRange(RHS).getSignedMax(), Limit)
-             : APIntOps::umin(getUnsignedRange(RHS).getUnsignedMax(), Limit);
+    APInt Limit =
+      IsSigned ? APInt::getSignedMaxValue(BitWidth) - (StrideForMaxBECount - 1)
+               : APInt::getMaxValue(BitWidth) - (StrideForMaxBECount - 1);
+
+    // Although End can be a MAX expression we estimate MaxEnd considering only
+    // the case End = RHS. This is safe because in the other case (End - Start)
+    // is zero, leading to a zero maximum backedge taken count.
+    APInt MaxEnd =
+      IsSigned ? APIntOps::smin(getSignedRange(RHS).getSignedMax(), Limit)
+               : APIntOps::umin(getUnsignedRange(RHS).getUnsignedMax(), Limit);
 
-  const SCEV *MaxBECount;
-  if (isa<SCEVConstant>(BECount))
-    MaxBECount = BECount;
-  else
     MaxBECount = computeBECount(getConstant(MaxEnd - MinStart),
-                                getConstant(MinStride), false);
+                                getConstant(StrideForMaxBECount), false);
+  }
 
   if (isa<SCEVCouldNotCompute>(MaxBECount))
     MaxBECount = BECount;
 
-  return ExitLimit(BECount, MaxBECount, P);
+  return ExitLimit(BECount, MaxBECount, MaxOrZero, Predicates);
 }
 
 ScalarEvolution::ExitLimit
 ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
                                      const Loop *L, bool IsSigned,
                                      bool ControlsExit, bool AllowPredicates) {
-  SCEVUnionPredicate P;
+  SmallPtrSet<const SCEVPredicate *, 4> Predicates;
   // We handle only IV > Invariant
   if (!isLoopInvariant(RHS, L))
     return getCouldNotCompute();
@@ -8727,7 +8796,7 @@ ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
     // Try to make this an AddRec using runtime tests, in the first X
     // iterations of this loop, where X is the SCEV expression found by the
     // algorithm below.
-    IV = convertSCEVToAddRecWithPredicates(LHS, L, P);
+    IV = convertSCEVToAddRecWithPredicates(LHS, L, Predicates);
 
   // Avoid weird loops
   if (!IV || IV->getLoop() != L || !IV->isAffine())
@@ -8787,7 +8856,7 @@ ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
   if (isa<SCEVCouldNotCompute>(MaxBECount))
     MaxBECount = BECount;
 
-  return ExitLimit(BECount, MaxBECount, P);
+  return ExitLimit(BECount, MaxBECount, false, Predicates);
 }
 
 const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range,
@@ -8859,9 +8928,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range,
     // Range.getUpper() is crossed.
     SmallVector<const SCEV *, 4> NewOps(op_begin(), op_end());
     NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper()));
-    const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop(),
-                                             // getNoWrapFlags(FlagNW)
-                                             FlagAnyWrap);
+    const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop(), FlagAnyWrap);
 
     // Next, solve the constructed addrec
     if (auto Roots =
@@ -8905,38 +8972,15 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range,
   return SE.getCouldNotCompute();
 }
 
-namespace {
-struct FindUndefs {
-  bool Found;
-  FindUndefs() : Found(false) {}
-
-  bool follow(const SCEV *S) {
-    if (const SCEVUnknown *C = dyn_cast<SCEVUnknown>(S)) {
-      if (isa<UndefValue>(C->getValue()))
-        Found = true;
-    } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
-      if (isa<UndefValue>(C->getValue()))
-        Found = true;
-    }
-
-    // Keep looking if we haven't found it yet.
-    return !Found;
-  }
-  bool isDone() const {
-    // Stop recursion if we have found an undef.
-    return Found;
-  }
-};
-}
-
 // Return true when S contains at least an undef value.
-static inline bool
-containsUndefs(const SCEV *S) {
-  FindUndefs F;
-  SCEVTraversal<FindUndefs> ST(F);
-  ST.visitAll(S);
-
-  return F.Found;
+static inline bool containsUndefs(const SCEV *S) {
+  return SCEVExprContains(S, [](const SCEV *S) {
+    if (const auto *SU = dyn_cast<SCEVUnknown>(S))
+      return isa<UndefValue>(SU->getValue());
+    else if (const auto *SC = dyn_cast<SCEVConstant>(S))
+      return isa<UndefValue>(SC->getValue());
+    return false;
+  });
 }
 
 namespace {
@@ -8964,7 +9008,8 @@ struct SCEVCollectTerms {
       : Terms(T) {}
 
   bool follow(const SCEV *S) {
-    if (isa<SCEVUnknown>(S) || isa<SCEVMulExpr>(S)) {
+    if (isa<SCEVUnknown>(S) || isa<SCEVMulExpr>(S) ||
+        isa<SCEVSignExtendExpr>(S)) {
       if (!containsUndefs(S))
         Terms.push_back(S);
 
@@ -9116,10 +9161,9 @@ static bool findArrayDimensionsRec(ScalarEvolution &SE,
   }
 
   // Remove all SCEVConstants.
-  Terms.erase(std::remove_if(Terms.begin(), Terms.end(), [](const SCEV *E) {
-                return isa<SCEVConstant>(E);
-              }),
-              Terms.end());
+  Terms.erase(
+      remove_if(Terms, [](const SCEV *E) { return isa<SCEVConstant>(E); }),
+      Terms.end());
 
   if (Terms.size() > 0)
     if (!findArrayDimensionsRec(SE, Terms, Sizes))
@@ -9129,40 +9173,11 @@ static bool findArrayDimensionsRec(ScalarEvolution &SE,
   return true;
 }
 
-// Returns true when S contains at least a SCEVUnknown parameter.
-static inline bool
-containsParameters(const SCEV *S) {
-  struct FindParameter {
-    bool FoundParameter;
-    FindParameter() : FoundParameter(false) {}
-
-    bool follow(const SCEV *S) {
-      if (isa<SCEVUnknown>(S)) {
-        FoundParameter = true;
-        // Stop recursion: we found a parameter.
-        return false;
-      }
-      // Keep looking.
-      return true;
-    }
-    bool isDone() const {
-      // Stop recursion if we have found a parameter.
-      return FoundParameter;
-    }
-  };
-
-  FindParameter F;
-  SCEVTraversal<FindParameter> ST(F);
-  ST.visitAll(S);
-
-  return F.FoundParameter;
-}
 
 // Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter.
-static inline bool
-containsParameters(SmallVectorImpl<const SCEV *> &Terms) {
+static inline bool containsParameters(SmallVectorImpl<const SCEV *> &Terms) {
   for (const SCEV *T : Terms)
-    if (containsParameters(T))
+    if (SCEVExprContains(T, isa<SCEVUnknown, const SCEV *>))
       return true;
   return false;
 }
@@ -9493,6 +9508,7 @@ ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg)
     : F(Arg.F), HasGuards(Arg.HasGuards), TLI(Arg.TLI), AC(Arg.AC), DT(Arg.DT),
       LI(Arg.LI), CouldNotCompute(std::move(Arg.CouldNotCompute)),
       ValueExprMap(std::move(Arg.ValueExprMap)),
+      PendingLoopPredicates(std::move(Arg.PendingLoopPredicates)),
       WalkingBEDominatingConds(false), ProvingSplitPredicate(false),
       BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)),
       PredicatedBackedgeTakenCounts(
@@ -9501,6 +9517,7 @@ ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg)
           std::move(Arg.ConstantEvolutionLoopExitValue)),
       ValuesAtScopes(std::move(Arg.ValuesAtScopes)),
       LoopDispositions(std::move(Arg.LoopDispositions)),
+      LoopPropertiesCache(std::move(Arg.LoopPropertiesCache)),
       BlockDispositions(std::move(Arg.BlockDispositions)),
       UnsignedRanges(std::move(Arg.UnsignedRanges)),
       SignedRanges(std::move(Arg.SignedRanges)),
@@ -9569,6 +9586,8 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
 
   if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) {
     OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L);
+    if (SE->isBackedgeTakenCountMaxOrZero(L))
+      OS << ", actual taken count either this or zero.";
   } else {
     OS << "Unpredictable max backedge-taken count. ";
   }
@@ -9829,8 +9848,10 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
     if (!DT.dominates(AR->getLoop()->getHeader(), BB))
       return DoesNotDominateBlock;
+
+    // Fall through into SCEVNAryExpr handling.
+    LLVM_FALLTHROUGH;
   }
-  // FALL THROUGH into SCEVNAryExpr handling.
   case scAddExpr:
   case scMulExpr:
   case scUMaxExpr:
@@ -9883,24 +9904,7 @@ bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) {
 }
 
 bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
-  // Search for a SCEV expression node within an expression tree.
-  // Implements SCEVTraversal::Visitor.
-  struct SCEVSearch {
-    const SCEV *Node;
-    bool IsFound;
-
-    SCEVSearch(const SCEV *N): Node(N), IsFound(false) {}
-
-    bool follow(const SCEV *S) {
-      IsFound |= (S == Node);
-      return !IsFound;
-    }
-    bool isDone() const { return IsFound; }
-  };
-
-  SCEVSearch Search(Op);
-  visitAll(S, Search);
-  return Search.IsFound;
+  return SCEVExprContains(S, [&](const SCEV *Expr) { return Expr == Op; });
 }
 
 void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
@@ -10008,10 +10012,10 @@ void ScalarEvolution::verify() const {
   // TODO: Verify more things.
 }
 
-char ScalarEvolutionAnalysis::PassID;
+AnalysisKey ScalarEvolutionAnalysis::Key;
 
 ScalarEvolution ScalarEvolutionAnalysis::run(Function &F,
-                                             AnalysisManager<Function> &AM) {
+                                             FunctionAnalysisManager &AM) {
   return ScalarEvolution(F, AM.getResult<TargetLibraryAnalysis>(F),
                          AM.getResult<AssumptionAnalysis>(F),
                          AM.getResult<DominatorTreeAnalysis>(F),
@@ -10019,7 +10023,7 @@ ScalarEvolution ScalarEvolutionAnalysis::run(Function &F,
 }
 
 PreservedAnalyses
-ScalarEvolutionPrinterPass::run(Function &F, AnalysisManager<Function> &AM) {
+ScalarEvolutionPrinterPass::run(Function &F, FunctionAnalysisManager &AM) {
   AM.getResult<ScalarEvolutionAnalysis>(F).print(OS);
   return PreservedAnalyses::all();
 }
@@ -10106,25 +10110,34 @@ namespace {
 
 class SCEVPredicateRewriter : public SCEVRewriteVisitor<SCEVPredicateRewriter> {
 public:
-  // Rewrites \p S in the context of a loop L and the predicate A.
-  // If Assume is true, rewrite is free to add further predicates to A
-  // such that the result will be an AddRecExpr.
+  /// Rewrites \p S in the context of a loop L and the SCEV predication
+  /// infrastructure.
+  ///
+  /// If \p Pred is non-null, the SCEV expression is rewritten to respect the
+  /// equivalences present in \p Pred.
+  ///
+  /// If \p NewPreds is non-null, rewrite is free to add further predicates to
+  /// \p NewPreds such that the result will be an AddRecExpr.
   static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE,
-                             SCEVUnionPredicate &A, bool Assume) {
-    SCEVPredicateRewriter Rewriter(L, SE, A, Assume);
+                             SmallPtrSetImpl<const SCEVPredicate *> *NewPreds,
+                             SCEVUnionPredicate *Pred) {
+    SCEVPredicateRewriter Rewriter(L, SE, NewPreds, Pred);
     return Rewriter.visit(S);
   }
 
   SCEVPredicateRewriter(const Loop *L, ScalarEvolution &SE,
-                        SCEVUnionPredicate &P, bool Assume)
-      : SCEVRewriteVisitor(SE), P(P), L(L), Assume(Assume) {}
+                        SmallPtrSetImpl<const SCEVPredicate *> *NewPreds,
+                        SCEVUnionPredicate *Pred)
+      : SCEVRewriteVisitor(SE), NewPreds(NewPreds), Pred(Pred), L(L) {}
 
   const SCEV *visitUnknown(const SCEVUnknown *Expr) {
-    auto ExprPreds = P.getPredicatesForExpr(Expr);
-    for (auto *Pred : ExprPreds)
-      if (const auto *IPred = dyn_cast<SCEVEqualPredicate>(Pred))
-        if (IPred->getLHS() == Expr)
-          return IPred->getRHS();
+    if (Pred) {
+      auto ExprPreds = Pred->getPredicatesForExpr(Expr);
+      for (auto *Pred : ExprPreds)
+        if (const auto *IPred = dyn_cast<SCEVEqualPredicate>(Pred))
+          if (IPred->getLHS() == Expr)
+            return IPred->getRHS();
+    }
 
     return Expr;
   }
@@ -10165,32 +10178,31 @@ private:
   bool addOverflowAssumption(const SCEVAddRecExpr *AR,
                              SCEVWrapPredicate::IncrementWrapFlags AddedFlags) {
     auto *A = SE.getWrapPredicate(AR, AddedFlags);
-    if (!Assume) {
+    if (!NewPreds) {
       // Check if we've already made this assumption.
-      if (P.implies(A))
-        return true;
-      return false;
+      return Pred && Pred->implies(A);
     }
-    P.add(A);
+    NewPreds->insert(A);
     return true;
   }
 
-  SCEVUnionPredicate &P;
+  SmallPtrSetImpl<const SCEVPredicate *> *NewPreds;
+  SCEVUnionPredicate *Pred;
   const Loop *L;
-  bool Assume;
 };
 } // end anonymous namespace
 
 const SCEV *ScalarEvolution::rewriteUsingPredicate(const SCEV *S, const Loop *L,
                                                    SCEVUnionPredicate &Preds) {
-  return SCEVPredicateRewriter::rewrite(S, L, *this, Preds, false);
+  return SCEVPredicateRewriter::rewrite(S, L, *this, nullptr, &Preds);
 }
 
-const SCEVAddRecExpr *
-ScalarEvolution::convertSCEVToAddRecWithPredicates(const SCEV *S, const Loop *L,
-                                                   SCEVUnionPredicate &Preds) {
-  SCEVUnionPredicate TransformPreds;
-  S = SCEVPredicateRewriter::rewrite(S, L, *this, TransformPreds, true);
+const SCEVAddRecExpr *ScalarEvolution::convertSCEVToAddRecWithPredicates(
+    const SCEV *S, const Loop *L,
+    SmallPtrSetImpl<const SCEVPredicate *> &Preds) {
+
+  SmallPtrSet<const SCEVPredicate *, 4> TransformPreds;
+  S = SCEVPredicateRewriter::rewrite(S, L, *this, &TransformPreds, nullptr);
   auto *AddRec = dyn_cast<SCEVAddRecExpr>(S);
 
   if (!AddRec)
@@ -10198,7 +10210,9 @@ ScalarEvolution::convertSCEVToAddRecWithPredicates(const SCEV *S, const Loop *L,
 
   // Since the transformation was successful, we can now transfer the SCEV
   // predicates.
-  Preds.add(&TransformPreds);
+  for (auto *P : TransformPreds)
+    Preds.insert(P);
+
   return AddRec;
 }
 
@@ -10351,7 +10365,7 @@ const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) {
     return Entry.second;
 
   // We found an entry but it's stale. Rewrite the stale entry
-  // acording to the current predicate.
+  // according to the current predicate.
   if (Entry.second)
     Expr = Entry.second;
 
@@ -10425,11 +10439,15 @@ bool PredicatedScalarEvolution::hasNoOverflow(
 
 const SCEVAddRecExpr *PredicatedScalarEvolution::getAsAddRec(Value *V) {
   const SCEV *Expr = this->getSCEV(V);
-  auto *New = SE.convertSCEVToAddRecWithPredicates(Expr, &L, Preds);
+  SmallPtrSet<const SCEVPredicate *, 4> NewPreds;
+  auto *New = SE.convertSCEVToAddRecWithPredicates(Expr, &L, NewPreds);
 
   if (!New)
     return nullptr;
 
+  for (auto *P : NewPreds)
+    Preds.add(P);
+
   updateGeneration();
   RewriteMap[SE.getSCEV(V)] = {Generation, New};
   return New;
diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index 61fb411d3150..7bea994121c8 100644
--- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -110,9 +110,9 @@ Value *SCEVAAResult::GetBaseValue(const SCEV *S) {
   return nullptr;
 }
 
-char SCEVAA::PassID;
+AnalysisKey SCEVAA::Key;
 
-SCEVAAResult SCEVAA::run(Function &F, AnalysisManager<Function> &AM) {
+SCEVAAResult SCEVAA::run(Function &F, FunctionAnalysisManager &AM) {
   return SCEVAAResult(AM.getResult<ScalarEvolutionAnalysis>(F));
 }
 
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 2e45bb840946..d15a7dbd20e6 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -549,9 +549,8 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
     while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
       if (!L->isLoopInvariant(V)) break;
 
-      bool AnyIndexNotLoopInvariant =
-          std::any_of(GepIndices.begin(), GepIndices.end(),
-                      [L](Value *Op) { return !L->isLoopInvariant(Op); });
+      bool AnyIndexNotLoopInvariant = any_of(
+          GepIndices, [L](Value *Op) { return !L->isLoopInvariant(Op); });
 
       if (AnyIndexNotLoopInvariant)
         break;
@@ -1183,11 +1182,14 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
   PostIncLoopSet SavedPostIncLoops = PostIncLoops;
   PostIncLoops.clear();
 
-  // Expand code for the start value.
-  Value *StartV =
-      expandCodeFor(Normalized->getStart(), ExpandTy, &L->getHeader()->front());
+  // Expand code for the start value into the loop preheader.
+  assert(L->getLoopPreheader() &&
+         "Can't expand add recurrences without a loop preheader!");
+  Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy,
+                                L->getLoopPreheader()->getTerminator());
 
-  // StartV must be hoisted into L's preheader to dominate the new phi.
+  // StartV must have been be inserted into L's preheader to dominate the new
+  // phi.
   assert(!isa<Instruction>(StartV) ||
          SE.DT.properlyDominates(cast<Instruction>(StartV)->getParent(),
                                  L->getHeader()));
@@ -1625,9 +1627,10 @@ Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) {
   return V;
 }
 
-Value *SCEVExpander::FindValueInExprValueMap(const SCEV *S,
-                                             const Instruction *InsertPt) {
-  SetVector<Value *> *Set = SE.getSCEVValues(S);
+ScalarEvolution::ValueOffsetPair
+SCEVExpander::FindValueInExprValueMap(const SCEV *S,
+                                      const Instruction *InsertPt) {
+  SetVector<ScalarEvolution::ValueOffsetPair> *Set = SE.getSCEVValues(S);
   // If the expansion is not in CanonicalMode, and the SCEV contains any
   // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally.
   if (CanonicalMode || !SE.containsAddRecurrence(S)) {
@@ -1636,21 +1639,21 @@ Value *SCEVExpander::FindValueInExprValueMap(const SCEV *S,
       // Choose a Value from the set which dominates the insertPt.
       // insertPt should be inside the Value's parent loop so as not to break
       // the LCSSA form.
-      for (auto const &Ent : *Set) {
+      for (auto const &VOPair : *Set) {
+        Value *V = VOPair.first;
+        ConstantInt *Offset = VOPair.second;
         Instruction *EntInst = nullptr;
-        if (Ent && isa<Instruction>(Ent) &&
-            (EntInst = cast<Instruction>(Ent)) &&
-            S->getType() == Ent->getType() &&
+        if (V && isa<Instruction>(V) && (EntInst = cast<Instruction>(V)) &&
+            S->getType() == V->getType() &&
             EntInst->getFunction() == InsertPt->getFunction() &&
             SE.DT.dominates(EntInst, InsertPt) &&
             (SE.LI.getLoopFor(EntInst->getParent()) == nullptr ||
-             SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt))) {
-          return Ent;
-        }
+             SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt)))
+          return {V, Offset};
       }
     }
   }
-  return nullptr;
+  return {nullptr, nullptr};
 }
 
 // The expansion of SCEV will either reuse a previous Value in ExprValueMap,
@@ -1698,11 +1701,33 @@ Value *SCEVExpander::expand(const SCEV *S) {
   Builder.SetInsertPoint(InsertPt);
 
   // Expand the expression into instructions.
-  Value *V = FindValueInExprValueMap(S, InsertPt);
+  ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, InsertPt);
+  Value *V = VO.first;
 
   if (!V)
     V = visit(S);
-
+  else if (VO.second) {
+    if (PointerType *Vty = dyn_cast<PointerType>(V->getType())) {
+      Type *Ety = Vty->getPointerElementType();
+      int64_t Offset = VO.second->getSExtValue();
+      int64_t ESize = SE.getTypeSizeInBits(Ety);
+      if ((Offset * 8) % ESize == 0) {
+        ConstantInt *Idx =
+            ConstantInt::getSigned(VO.second->getType(), -(Offset * 8) / ESize);
+        V = Builder.CreateGEP(Ety, V, Idx, "scevgep");
+      } else {
+        ConstantInt *Idx =
+            ConstantInt::getSigned(VO.second->getType(), -Offset);
+        unsigned AS = Vty->getAddressSpace();
+        V = Builder.CreateBitCast(V, Type::getInt8PtrTy(SE.getContext(), AS));
+        V = Builder.CreateGEP(Type::getInt8Ty(SE.getContext()), V, Idx,
+                              "uglygep");
+        V = Builder.CreateBitCast(V, Vty);
+      }
+    } else {
+      V = Builder.CreateSub(V, VO.second);
+    }
+  }
   // Remember the expanded value for this SCEV at this location.
   //
   // This is independent of PostIncLoops. The mapped value simply materializes
@@ -1887,8 +1912,18 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
   return NumElim;
 }
 
-Value *SCEVExpander::findExistingExpansion(const SCEV *S,
-                                           const Instruction *At, Loop *L) {
+Value *SCEVExpander::getExactExistingExpansion(const SCEV *S,
+                                               const Instruction *At, Loop *L) {
+  Optional<ScalarEvolution::ValueOffsetPair> VO =
+      getRelatedExistingExpansion(S, At, L);
+  if (VO && VO.getValue().second == nullptr)
+    return VO.getValue().first;
+  return nullptr;
+}
+
+Optional<ScalarEvolution::ValueOffsetPair>
+SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At,
+                                          Loop *L) {
   using namespace llvm::PatternMatch;
 
   SmallVector<BasicBlock *, 4> ExitingBlocks;
@@ -1906,31 +1941,32 @@ Value *SCEVExpander::findExistingExpansion(const SCEV *S,
       continue;
 
     if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At))
-      return LHS;
+      return ScalarEvolution::ValueOffsetPair(LHS, nullptr);
 
     if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At))
-      return RHS;
+      return ScalarEvolution::ValueOffsetPair(RHS, nullptr);
   }
 
   // Use expand's logic which is used for reusing a previous Value in
   // ExprValueMap.
-  if (Value *Val = FindValueInExprValueMap(S, At))
-    return Val;
+  ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, At);
+  if (VO.first)
+    return VO;
 
   // There is potential to make this significantly smarter, but this simple
   // heuristic already gets some interesting cases.
 
   // Can not find suitable value.
-  return nullptr;
+  return None;
 }
 
 bool SCEVExpander::isHighCostExpansionHelper(
     const SCEV *S, Loop *L, const Instruction *At,
     SmallPtrSetImpl<const SCEV *> &Processed) {
 
-  // If we can find an existing value for this scev avaliable at the point "At"
+  // If we can find an existing value for this scev available at the point "At"
   // then consider the expression cheap.
-  if (At && findExistingExpansion(S, At, L) != nullptr)
+  if (At && getRelatedExistingExpansion(S, At, L))
     return false;
 
   // Zero/One operand expressions
@@ -1978,7 +2014,7 @@ bool SCEVExpander::isHighCostExpansionHelper(
     // involving division. This is just a simple search heuristic.
     if (!At)
       At = &ExitingBB->back();
-    if (!findExistingExpansion(
+    if (!getRelatedExistingExpansion(
             SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), At, L))
       return true;
   }
diff --git a/lib/Analysis/ScopedNoAliasAA.cpp b/lib/Analysis/ScopedNoAliasAA.cpp
index 82e65a1f2088..833c6e09f6fd 100644
--- a/lib/Analysis/ScopedNoAliasAA.cpp
+++ b/lib/Analysis/ScopedNoAliasAA.cpp
@@ -127,9 +127,8 @@ ModRefInfo ScopedNoAliasAAResult::getModRefInfo(ImmutableCallSite CS1,
   return AAResultBase::getModRefInfo(CS1, CS2);
 }
 
-void ScopedNoAliasAAResult::collectMDInDomain(
-    const MDNode *List, const MDNode *Domain,
-    SmallPtrSetImpl<const MDNode *> &Nodes) const {
+static void collectMDInDomain(const MDNode *List, const MDNode *Domain,
+                              SmallPtrSetImpl<const MDNode *> &Nodes) {
   for (const MDOperand &MDOp : List->operands())
     if (const MDNode *MD = dyn_cast<MDNode>(MDOp))
       if (AliasScopeNode(MD).getDomain() == Domain)
@@ -151,12 +150,14 @@ bool ScopedNoAliasAAResult::mayAliasInScopes(const MDNode *Scopes,
   // We alias unless, for some domain, the set of noalias scopes in that domain
   // is a superset of the set of alias scopes in that domain.
   for (const MDNode *Domain : Domains) {
-    SmallPtrSet<const MDNode *, 16> NANodes, ScopeNodes;
-    collectMDInDomain(NoAlias, Domain, NANodes);
+    SmallPtrSet<const MDNode *, 16> ScopeNodes;
     collectMDInDomain(Scopes, Domain, ScopeNodes);
-    if (!ScopeNodes.size())
+    if (ScopeNodes.empty())
       continue;
 
+    SmallPtrSet<const MDNode *, 16> NANodes;
+    collectMDInDomain(NoAlias, Domain, NANodes);
+
     // To not alias, all of the nodes in ScopeNodes must be in NANodes.
     bool FoundAll = true;
     for (const MDNode *SMD : ScopeNodes)
@@ -172,10 +173,10 @@ bool ScopedNoAliasAAResult::mayAliasInScopes(const MDNode *Scopes,
   return true;
 }
 
-char ScopedNoAliasAA::PassID;
+AnalysisKey ScopedNoAliasAA::Key;
 
 ScopedNoAliasAAResult ScopedNoAliasAA::run(Function &F,
-                                           AnalysisManager<Function> &AM) {
+                                           FunctionAnalysisManager &AM) {
   return ScopedNoAliasAAResult();
 }
 
diff --git a/lib/Analysis/StratifiedSets.h b/lib/Analysis/StratifiedSets.h
index fd3a241d79c1..772df175b384 100644
--- a/lib/Analysis/StratifiedSets.h
+++ b/lib/Analysis/StratifiedSets.h
@@ -85,17 +85,8 @@ struct StratifiedLink {
 template <typename T> class StratifiedSets {
 public:
   StratifiedSets() = default;
-
-  // TODO: Figure out how to make MSVC not call the copy ctor here, and delete
-  // it.
-
-  // Can't default these due to compile errors in MSVC2013
-  StratifiedSets(StratifiedSets &&Other) { *this = std::move(Other); }
-  StratifiedSets &operator=(StratifiedSets &&Other) {
-    Values = std::move(Other.Values);
-    Links = std::move(Other.Links);
-    return *this;
-  }
+  StratifiedSets(StratifiedSets &&) = default;
+  StratifiedSets &operator=(StratifiedSets &&) = default;
 
   StratifiedSets(DenseMap<T, StratifiedInfo> Map,
                  std::vector<StratifiedLink> Links)
diff --git a/lib/Analysis/TargetLibraryInfo.cpp b/lib/Analysis/TargetLibraryInfo.cpp
index 93d537ad3abb..112118ab77eb 100644
--- a/lib/Analysis/TargetLibraryInfo.cpp
+++ b/lib/Analysis/TargetLibraryInfo.cpp
@@ -23,9 +23,10 @@ static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary(
                           "No vector functions library"),
                clEnumValN(TargetLibraryInfoImpl::Accelerate, "Accelerate",
                           "Accelerate framework"),
-               clEnumValEnd));
+               clEnumValN(TargetLibraryInfoImpl::SVML, "SVML",
+                          "Intel SVML library")));
 
-const char *const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] = {
+StringRef const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] = {
 #define TLI_DEFINE_STRING
 #include "llvm/Analysis/TargetLibraryInfo.def"
 };
@@ -52,14 +53,33 @@ static bool hasSinCosPiStret(const Triple &T) {
 /// specified target triple.  This should be carefully written so that a missing
 /// target triple gets a sane set of defaults.
 static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
-                       ArrayRef<const char *> StandardNames) {
+                       ArrayRef<StringRef> StandardNames) {
   // Verify that the StandardNames array is in alphabetical order.
   assert(std::is_sorted(StandardNames.begin(), StandardNames.end(),
-                        [](const char *LHS, const char *RHS) {
-                          return strcmp(LHS, RHS) < 0;
+                        [](StringRef LHS, StringRef RHS) {
+                          return LHS < RHS;
                         }) &&
          "TargetLibraryInfoImpl function names must be sorted");
 
+  bool ShouldExtI32Param = false, ShouldExtI32Return = false,
+       ShouldSignExtI32Param = false;
+  // PowerPC64, Sparc64, SystemZ need signext/zeroext on i32 parameters and
+  // returns corresponding to C-level ints and unsigned ints.
+  if (T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le ||
+      T.getArch() == Triple::sparcv9 || T.getArch() == Triple::systemz) {
+    ShouldExtI32Param = true;
+    ShouldExtI32Return = true;
+  }
+  // Mips, on the other hand, needs signext on i32 parameters corresponding
+  // to both signed and unsigned ints.
+  if (T.getArch() == Triple::mips || T.getArch() == Triple::mipsel ||
+      T.getArch() == Triple::mips64 || T.getArch() == Triple::mips64el) {
+    ShouldSignExtI32Param = true;
+  }
+  TLI.setShouldExtI32Param(ShouldExtI32Param);
+  TLI.setShouldExtI32Return(ShouldExtI32Return);
+  TLI.setShouldSignExtI32Param(ShouldSignExtI32Param);
+
   if (T.getArch() == Triple::r600 ||
       T.getArch() == Triple::amdgcn) {
     TLI.setUnavailable(LibFunc::ldexp);
@@ -322,6 +342,7 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
     // on Linux.
     //
     // Fall through to disable all of them.
+    LLVM_FALLTHROUGH;
   default:
     TLI.setUnavailable(LibFunc::exp10);
     TLI.setUnavailable(LibFunc::exp10f);
@@ -429,14 +450,19 @@ TargetLibraryInfoImpl::TargetLibraryInfoImpl(const Triple &T) {
 }
 
 TargetLibraryInfoImpl::TargetLibraryInfoImpl(const TargetLibraryInfoImpl &TLI)
-    : CustomNames(TLI.CustomNames) {
+    : CustomNames(TLI.CustomNames), ShouldExtI32Param(TLI.ShouldExtI32Param),
+      ShouldExtI32Return(TLI.ShouldExtI32Return),
+      ShouldSignExtI32Param(TLI.ShouldSignExtI32Param) {
   memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray));
   VectorDescs = TLI.VectorDescs;
   ScalarDescs = TLI.ScalarDescs;
 }
 
 TargetLibraryInfoImpl::TargetLibraryInfoImpl(TargetLibraryInfoImpl &&TLI)
-    : CustomNames(std::move(TLI.CustomNames)) {
+    : CustomNames(std::move(TLI.CustomNames)),
+      ShouldExtI32Param(TLI.ShouldExtI32Param),
+      ShouldExtI32Return(TLI.ShouldExtI32Return),
+      ShouldSignExtI32Param(TLI.ShouldSignExtI32Param) {
   std::move(std::begin(TLI.AvailableArray), std::end(TLI.AvailableArray),
             AvailableArray);
   VectorDescs = TLI.VectorDescs;
@@ -445,12 +471,18 @@ TargetLibraryInfoImpl::TargetLibraryInfoImpl(TargetLibraryInfoImpl &&TLI)
 
 TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(const TargetLibraryInfoImpl &TLI) {
   CustomNames = TLI.CustomNames;
+  ShouldExtI32Param = TLI.ShouldExtI32Param;
+  ShouldExtI32Return = TLI.ShouldExtI32Return;
+  ShouldSignExtI32Param = TLI.ShouldSignExtI32Param;
   memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray));
   return *this;
 }
 
 TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(TargetLibraryInfoImpl &&TLI) {
   CustomNames = std::move(TLI.CustomNames);
+  ShouldExtI32Param = TLI.ShouldExtI32Param;
+  ShouldExtI32Return = TLI.ShouldExtI32Return;
+  ShouldSignExtI32Param = TLI.ShouldSignExtI32Param;
   std::move(std::begin(TLI.AvailableArray), std::end(TLI.AvailableArray),
             AvailableArray);
   return *this;
@@ -469,16 +501,16 @@ static StringRef sanitizeFunctionName(StringRef funcName) {
 
 bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName,
                                        LibFunc::Func &F) const {
-  const char *const *Start = &StandardNames[0];
-  const char *const *End = &StandardNames[LibFunc::NumLibFuncs];
+  StringRef const *Start = &StandardNames[0];
+  StringRef const *End = &StandardNames[LibFunc::NumLibFuncs];
 
   funcName = sanitizeFunctionName(funcName);
   if (funcName.empty())
     return false;
 
-  const char *const *I = std::lower_bound(
-      Start, End, funcName, [](const char *LHS, StringRef RHS) {
-        return std::strncmp(LHS, RHS.data(), RHS.size()) < 0;
+  StringRef const *I = std::lower_bound(
+      Start, End, funcName, [](StringRef LHS, StringRef RHS) {
+        return LHS < RHS;
       });
   if (I != End && *I == funcName) {
     F = (LibFunc::Func)(I - Start);
@@ -535,7 +567,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
     --NumParams;
     if (!IsSizeTTy(FTy.getParamType(NumParams)))
       return false;
-  // fallthrough
+    LLVM_FALLTHROUGH;
   case LibFunc::strcpy:
   case LibFunc::stpcpy:
     return (NumParams == 2 && FTy.getReturnType() == FTy.getParamType(0) &&
@@ -547,7 +579,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
     --NumParams;
     if (!IsSizeTTy(FTy.getParamType(NumParams)))
       return false;
-  // fallthrough
+    LLVM_FALLTHROUGH;
   case LibFunc::strncpy:
   case LibFunc::stpncpy:
     return (NumParams == 3 && FTy.getReturnType() == FTy.getParamType(0) &&
@@ -640,8 +672,9 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
     --NumParams;
     if (!IsSizeTTy(FTy.getParamType(NumParams)))
       return false;
-  // fallthrough
+    LLVM_FALLTHROUGH;
   case LibFunc::memcpy:
+  case LibFunc::mempcpy:
   case LibFunc::memmove:
     return (NumParams == 3 && FTy.getReturnType() == FTy.getParamType(0) &&
             FTy.getParamType(0)->isPointerTy() &&
@@ -652,7 +685,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
     --NumParams;
     if (!IsSizeTTy(FTy.getParamType(NumParams)))
       return false;
-  // fallthrough
+    LLVM_FALLTHROUGH;
   case LibFunc::memset:
     return (NumParams == 3 && FTy.getReturnType() == FTy.getParamType(0) &&
             FTy.getParamType(0)->isPointerTy() &&
@@ -843,10 +876,10 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
   case LibFunc::stat64:
   case LibFunc::lstat64:
   case LibFunc::statvfs64:
-    return (NumParams >= 1 && FTy.getParamType(0)->isPointerTy() &&
+    return (NumParams == 2 && FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy());
   case LibFunc::dunder_isoc99_sscanf:
-    return (NumParams >= 1 && FTy.getParamType(0)->isPointerTy() &&
+    return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy());
   case LibFunc::fopen64:
     return (NumParams == 2 && FTy.getReturnType()->isPointerTy() &&
@@ -953,15 +986,18 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
   case LibFunc::ffs:
   case LibFunc::ffsl:
   case LibFunc::ffsll:
+  case LibFunc::fls:
+  case LibFunc::flsl:
+  case LibFunc::flsll:
+    return (NumParams == 1 && FTy.getReturnType()->isIntegerTy(32) &&
+            FTy.getParamType(0)->isIntegerTy());
+
   case LibFunc::isdigit:
   case LibFunc::isascii:
   case LibFunc::toascii:
     return (NumParams == 1 && FTy.getReturnType()->isIntegerTy(32) &&
-            FTy.getParamType(0)->isIntegerTy());
+            FTy.getReturnType() == FTy.getParamType(0));
 
-  case LibFunc::fls:
-  case LibFunc::flsl:
-  case LibFunc::flsll:
   case LibFunc::abs:
   case LibFunc::labs:
   case LibFunc::llabs:
@@ -1004,21 +1040,19 @@ void TargetLibraryInfoImpl::disableAllFunctions() {
 }
 
 static bool compareByScalarFnName(const VecDesc &LHS, const VecDesc &RHS) {
-  return std::strncmp(LHS.ScalarFnName, RHS.ScalarFnName,
-                      std::strlen(RHS.ScalarFnName)) < 0;
+  return LHS.ScalarFnName < RHS.ScalarFnName;
 }
 
 static bool compareByVectorFnName(const VecDesc &LHS, const VecDesc &RHS) {
-  return std::strncmp(LHS.VectorFnName, RHS.VectorFnName,
-                      std::strlen(RHS.VectorFnName)) < 0;
+  return LHS.VectorFnName < RHS.VectorFnName;
 }
 
 static bool compareWithScalarFnName(const VecDesc &LHS, StringRef S) {
-  return std::strncmp(LHS.ScalarFnName, S.data(), S.size()) < 0;
+  return LHS.ScalarFnName < S;
 }
 
 static bool compareWithVectorFnName(const VecDesc &LHS, StringRef S) {
-  return std::strncmp(LHS.VectorFnName, S.data(), S.size()) < 0;
+  return LHS.VectorFnName < S;
 }
 
 void TargetLibraryInfoImpl::addVectorizableFunctions(ArrayRef<VecDesc> Fns) {
@@ -1074,6 +1108,75 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
     addVectorizableFunctions(VecFuncs);
     break;
   }
+  case SVML: {
+    const VecDesc VecFuncs[] = {
+        {"sin", "__svml_sin2", 2},
+        {"sin", "__svml_sin4", 4},
+        {"sin", "__svml_sin8", 8},
+
+        {"sinf", "__svml_sinf4", 4},
+        {"sinf", "__svml_sinf8", 8},
+        {"sinf", "__svml_sinf16", 16},
+
+        {"cos", "__svml_cos2", 2},
+        {"cos", "__svml_cos4", 4},
+        {"cos", "__svml_cos8", 8},
+
+        {"cosf", "__svml_cosf4", 4},
+        {"cosf", "__svml_cosf8", 8},
+        {"cosf", "__svml_cosf16", 16},
+
+        {"pow", "__svml_pow2", 2},
+        {"pow", "__svml_pow4", 4},
+        {"pow", "__svml_pow8", 8},
+
+        {"powf", "__svml_powf4", 4},
+        {"powf", "__svml_powf8", 8},
+        {"powf", "__svml_powf16", 16},
+
+        {"llvm.pow.f64", "__svml_pow2", 2},
+        {"llvm.pow.f64", "__svml_pow4", 4},
+        {"llvm.pow.f64", "__svml_pow8", 8},
+
+        {"llvm.pow.f32", "__svml_powf4", 4},
+        {"llvm.pow.f32", "__svml_powf8", 8},
+        {"llvm.pow.f32", "__svml_powf16", 16},
+
+        {"exp", "__svml_exp2", 2},
+        {"exp", "__svml_exp4", 4},
+        {"exp", "__svml_exp8", 8},
+
+        {"expf", "__svml_expf4", 4},
+        {"expf", "__svml_expf8", 8},
+        {"expf", "__svml_expf16", 16},
+
+        {"llvm.exp.f64", "__svml_exp2", 2},
+        {"llvm.exp.f64", "__svml_exp4", 4},
+        {"llvm.exp.f64", "__svml_exp8", 8},
+
+        {"llvm.exp.f32", "__svml_expf4", 4},
+        {"llvm.exp.f32", "__svml_expf8", 8},
+        {"llvm.exp.f32", "__svml_expf16", 16},
+
+        {"log", "__svml_log2", 2},
+        {"log", "__svml_log4", 4},
+        {"log", "__svml_log8", 8},
+
+        {"logf", "__svml_logf4", 4},
+        {"logf", "__svml_logf8", 8},
+        {"logf", "__svml_logf16", 16},
+
+        {"llvm.log.f64", "__svml_log2", 2},
+        {"llvm.log.f64", "__svml_log4", 4},
+        {"llvm.log.f64", "__svml_log8", 8},
+
+        {"llvm.log.f32", "__svml_logf4", 4},
+        {"llvm.log.f32", "__svml_logf8", 8},
+        {"llvm.log.f32", "__svml_logf16", 16},
+    };
+    addVectorizableFunctions(VecFuncs);
+    break;
+  }
   case NoLibrary:
     break;
   }
@@ -1162,7 +1265,7 @@ TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass(
   initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry());
 }
 
-char TargetLibraryAnalysis::PassID;
+AnalysisKey TargetLibraryAnalysis::Key;
 
 // Register the basic pass.
 INITIALIZE_PASS(TargetLibraryInfoWrapperPass, "targetlibinfo",
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 52013f796c56..2a15b9b264e3 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -150,6 +150,11 @@ int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
   return Cost;
 }
 
+bool TargetTransformInfo::isFoldableMemAccessOffset(Instruction *I,
+                                                    int64_t Offset) const {
+  return TTIImpl->isFoldableMemAccessOffset(I, Offset);
+}
+
 bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const {
   return TTIImpl->isTruncateFree(Ty1, Ty2);
 }
@@ -173,6 +178,9 @@ unsigned TargetTransformInfo::getJumpBufSize() const {
 bool TargetTransformInfo::shouldBuildLookupTables() const {
   return TTIImpl->shouldBuildLookupTables();
 }
+bool TargetTransformInfo::shouldBuildLookupTablesForConstant(Constant *C) const {
+  return TTIImpl->shouldBuildLookupTablesForConstant(C);
+}
 
 bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const {
   return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
@@ -186,11 +194,12 @@ bool TargetTransformInfo::isFPVectorizationPotentiallyUnsafe() const {
   return TTIImpl->isFPVectorizationPotentiallyUnsafe();
 }
 
-bool TargetTransformInfo::allowsMisalignedMemoryAccesses(unsigned BitWidth,
+bool TargetTransformInfo::allowsMisalignedMemoryAccesses(LLVMContext &Context,
+                                                         unsigned BitWidth,
                                                          unsigned AddressSpace,
                                                          unsigned Alignment,
                                                          bool *Fast) const {
-  return TTIImpl->allowsMisalignedMemoryAccesses(BitWidth, AddressSpace,
+  return TTIImpl->allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
                                                  Alignment, Fast);
 }
 
@@ -245,10 +254,6 @@ unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const {
   return TTIImpl->getRegisterBitWidth(Vector);
 }
 
-unsigned TargetTransformInfo::getLoadStoreVecRegBitWidth(unsigned AS) const {
-  return TTIImpl->getLoadStoreVecRegBitWidth(AS);
-}
-
 unsigned TargetTransformInfo::getCacheLineSize() const {
   return TTIImpl->getCacheLineSize();
 }
@@ -417,6 +422,44 @@ bool TargetTransformInfo::areInlineCompatible(const Function *Caller,
   return TTIImpl->areInlineCompatible(Caller, Callee);
 }
 
+unsigned TargetTransformInfo::getLoadStoreVecRegBitWidth(unsigned AS) const {
+  return TTIImpl->getLoadStoreVecRegBitWidth(AS);
+}
+
+bool TargetTransformInfo::isLegalToVectorizeLoad(LoadInst *LI) const {
+  return TTIImpl->isLegalToVectorizeLoad(LI);
+}
+
+bool TargetTransformInfo::isLegalToVectorizeStore(StoreInst *SI) const {
+  return TTIImpl->isLegalToVectorizeStore(SI);
+}
+
+bool TargetTransformInfo::isLegalToVectorizeLoadChain(
+    unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const {
+  return TTIImpl->isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
+                                              AddrSpace);
+}
+
+bool TargetTransformInfo::isLegalToVectorizeStoreChain(
+    unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const {
+  return TTIImpl->isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
+                                               AddrSpace);
+}
+
+unsigned TargetTransformInfo::getLoadVectorFactor(unsigned VF,
+                                                  unsigned LoadSize,
+                                                  unsigned ChainSizeInBytes,
+                                                  VectorType *VecTy) const {
+  return TTIImpl->getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
+}
+
+unsigned TargetTransformInfo::getStoreVectorFactor(unsigned VF,
+                                                   unsigned StoreSize,
+                                                   unsigned ChainSizeInBytes,
+                                                   VectorType *VecTy) const {
+  return TTIImpl->getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
+}
+
 TargetTransformInfo::Concept::~Concept() {}
 
 TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
@@ -426,11 +469,11 @@ TargetIRAnalysis::TargetIRAnalysis(
     : TTICallback(std::move(TTICallback)) {}
 
 TargetIRAnalysis::Result TargetIRAnalysis::run(const Function &F,
-                                               AnalysisManager<Function> &) {
+                                               FunctionAnalysisManager &) {
   return TTICallback(F);
 }
 
-char TargetIRAnalysis::PassID;
+AnalysisKey TargetIRAnalysis::Key;
 
 TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(const Function &F) {
   return Result(F.getParent()->getDataLayout());
@@ -457,7 +500,7 @@ TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass(
 }
 
 TargetTransformInfo &TargetTransformInfoWrapperPass::getTTI(const Function &F) {
-  AnalysisManager<Function> DummyFAM;
+  FunctionAnalysisManager DummyFAM;
   TTI = TIRA.run(F, DummyFAM);
   return *TTI;
 }
diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 20d162a03c30..e920c4c4e6b2 100644
--- a/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -135,34 +135,35 @@ using namespace llvm;
 static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true));
 
 namespace {
-/// TBAANode - This is a simple wrapper around an MDNode which provides a
-/// higher-level interface by hiding the details of how alias analysis
-/// information is encoded in its operands.
-class TBAANode {
-  const MDNode *Node;
+/// This is a simple wrapper around an MDNode which provides a higher-level
+/// interface by hiding the details of how alias analysis information is encoded
+/// in its operands.
+template<typename MDNodeTy>
+class TBAANodeImpl {
+  MDNodeTy *Node;
 
 public:
-  TBAANode() : Node(nullptr) {}
-  explicit TBAANode(const MDNode *N) : Node(N) {}
+  TBAANodeImpl() : Node(nullptr) {}
+  explicit TBAANodeImpl(MDNodeTy *N) : Node(N) {}
 
   /// getNode - Get the MDNode for this TBAANode.
-  const MDNode *getNode() const { return Node; }
+  MDNodeTy *getNode() const { return Node; }
 
   /// getParent - Get this TBAANode's Alias tree parent.
-  TBAANode getParent() const {
+  TBAANodeImpl<MDNodeTy> getParent() const {
     if (Node->getNumOperands() < 2)
-      return TBAANode();
-    MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
+      return TBAANodeImpl<MDNodeTy>();
+    MDNodeTy *P = dyn_cast_or_null<MDNodeTy>(Node->getOperand(1));
     if (!P)
-      return TBAANode();
+      return TBAANodeImpl<MDNodeTy>();
     // Ok, this node has a valid parent. Return it.
-    return TBAANode(P);
+    return TBAANodeImpl<MDNodeTy>(P);
   }
 
-  /// TypeIsImmutable - Test if this TBAANode represents a type for objects
-  /// which are not modified (by any means) in the context where this
+  /// Test if this TBAANode represents a type for objects which are
+  /// not modified (by any means) in the context where this
   /// AliasAnalysis is relevant.
-  bool TypeIsImmutable() const {
+  bool isTypeImmutable() const {
     if (Node->getNumOperands() < 3)
       return false;
     ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(2));
@@ -172,32 +173,40 @@ public:
   }
 };
 
+/// \name Specializations of \c TBAANodeImpl for const and non const qualified
+/// \c MDNode.
+/// @{
+typedef TBAANodeImpl<const MDNode> TBAANode;
+typedef TBAANodeImpl<MDNode> MutableTBAANode;
+/// @}
+
 /// This is a simple wrapper around an MDNode which provides a
 /// higher-level interface by hiding the details of how alias analysis
 /// information is encoded in its operands.
-class TBAAStructTagNode {
+template<typename MDNodeTy>
+class TBAAStructTagNodeImpl {
   /// This node should be created with createTBAAStructTagNode.
-  const MDNode *Node;
+  MDNodeTy *Node;
 
 public:
-  explicit TBAAStructTagNode(const MDNode *N) : Node(N) {}
+  explicit TBAAStructTagNodeImpl(MDNodeTy *N) : Node(N) {}
 
   /// Get the MDNode for this TBAAStructTagNode.
-  const MDNode *getNode() const { return Node; }
+  MDNodeTy *getNode() const { return Node; }
 
-  const MDNode *getBaseType() const {
+  MDNodeTy *getBaseType() const {
     return dyn_cast_or_null<MDNode>(Node->getOperand(0));
   }
-  const MDNode *getAccessType() const {
+  MDNodeTy *getAccessType() const {
     return dyn_cast_or_null<MDNode>(Node->getOperand(1));
   }
   uint64_t getOffset() const {
     return mdconst::extract<ConstantInt>(Node->getOperand(2))->getZExtValue();
   }
-  /// TypeIsImmutable - Test if this TBAAStructTagNode represents a type for
-  /// objects which are not modified (by any means) in the context where this
+  /// Test if this TBAAStructTagNode represents a type for objects
+  /// which are not modified (by any means) in the context where this
   /// AliasAnalysis is relevant.
-  bool TypeIsImmutable() const {
+  bool isTypeImmutable() const {
     if (Node->getNumOperands() < 4)
       return false;
     ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(3));
@@ -207,6 +216,13 @@ public:
   }
 };
 
+/// \name Specializations of \c TBAAStructTagNodeImpl for const and non const
+/// qualified \c MDNods.
+/// @{
+typedef TBAAStructTagNodeImpl<const MDNode> TBAAStructTagNode;
+typedef TBAAStructTagNodeImpl<MDNode> MutableTBAAStructTagNode;
+/// @}
+
 /// This is a simple wrapper around an MDNode which provides a
 /// higher-level interface by hiding the details of how alias analysis
 /// information is encoded in its operands.
@@ -311,8 +327,8 @@ bool TypeBasedAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
 
   // If this is an "immutable" type, we can assume the pointer is pointing
   // to constant memory.
-  if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) ||
-      (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable()))
+  if ((!isStructPathTBAA(M) && TBAANode(M).isTypeImmutable()) ||
+      (isStructPathTBAA(M) && TBAAStructTagNode(M).isTypeImmutable()))
     return true;
 
   return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
@@ -328,8 +344,8 @@ TypeBasedAAResult::getModRefBehavior(ImmutableCallSite CS) {
   // If this is an "immutable" type, we can assume the call doesn't write
   // to memory.
   if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
-    if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) ||
-        (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable()))
+    if ((!isStructPathTBAA(M) && TBAANode(M).isTypeImmutable()) ||
+        (isStructPathTBAA(M) && TBAAStructTagNode(M).isTypeImmutable()))
       Min = FMRB_OnlyReadsMemory;
 
   return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min);
@@ -401,34 +417,31 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
     return A;
 
   // For struct-path aware TBAA, we use the access type of the tag.
-  bool StructPath = isStructPathTBAA(A) && isStructPathTBAA(B);
-  if (StructPath) {
-    A = cast_or_null<MDNode>(A->getOperand(1));
-    if (!A)
-      return nullptr;
-    B = cast_or_null<MDNode>(B->getOperand(1));
-    if (!B)
-      return nullptr;
-  }
+  assert(isStructPathTBAA(A) && isStructPathTBAA(B) &&
+         "Auto upgrade should have taken care of this!");
+  A = cast_or_null<MDNode>(MutableTBAAStructTagNode(A).getAccessType());
+  if (!A)
+    return nullptr;
+  B = cast_or_null<MDNode>(MutableTBAAStructTagNode(B).getAccessType());
+  if (!B)
+    return nullptr;
 
   SmallSetVector<MDNode *, 4> PathA;
-  MDNode *T = A;
-  while (T) {
-    if (PathA.count(T))
+  MutableTBAANode TA(A);
+  while (TA.getNode()) {
+    if (PathA.count(TA.getNode()))
       report_fatal_error("Cycle found in TBAA metadata.");
-    PathA.insert(T);
-    T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1))
-                                 : nullptr;
+    PathA.insert(TA.getNode());
+    TA = TA.getParent();
   }
 
   SmallSetVector<MDNode *, 4> PathB;
-  T = B;
-  while (T) {
-    if (PathB.count(T))
+  MutableTBAANode TB(B);
+  while (TB.getNode()) {
+    if (PathB.count(TB.getNode()))
       report_fatal_error("Cycle found in TBAA metadata.");
-    PathB.insert(T);
-    T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1))
-                                 : nullptr;
+    PathB.insert(TB.getNode());
+    TB = TB.getParent();
   }
 
   int IA = PathA.size() - 1;
@@ -443,11 +456,13 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
     --IA;
     --IB;
   }
-  if (!StructPath)
-    return Ret;
 
-  if (!Ret)
+  // We either did not find a match, or the only common base "type" is
+  // the root node.  In either case, we don't have any useful TBAA
+  // metadata to attach.
+  if (!Ret || Ret->getNumOperands() < 2)
     return nullptr;
+
   // We need to convert from a type node to a tag node.
   Type *Int64 = IntegerType::get(A->getContext(), 64);
   Metadata *Ops[3] = {Ret, Ret,
@@ -478,52 +493,8 @@ void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const {
 /// Aliases - Test whether the type represented by A may alias the
 /// type represented by B.
 bool TypeBasedAAResult::Aliases(const MDNode *A, const MDNode *B) const {
-  // Make sure that both MDNodes are struct-path aware.
-  if (isStructPathTBAA(A) && isStructPathTBAA(B))
-    return PathAliases(A, B);
-
-  // Keep track of the root node for A and B.
-  TBAANode RootA, RootB;
-
-  // Climb the tree from A to see if we reach B.
-  for (TBAANode T(A);;) {
-    if (T.getNode() == B)
-      // B is an ancestor of A.
-      return true;
-
-    RootA = T;
-    T = T.getParent();
-    if (!T.getNode())
-      break;
-  }
-
-  // Climb the tree from B to see if we reach A.
-  for (TBAANode T(B);;) {
-    if (T.getNode() == A)
-      // A is an ancestor of B.
-      return true;
-
-    RootB = T;
-    T = T.getParent();
-    if (!T.getNode())
-      break;
-  }
-
-  // Neither node is an ancestor of the other.
-
-  // If they have different roots, they're part of different potentially
-  // unrelated type systems, so we must be conservative.
-  if (RootA.getNode() != RootB.getNode())
-    return true;
-
-  // If they have the same root, then we've proved there's no alias.
-  return false;
-}
-
-/// Test whether the struct-path tag represented by A may alias the
-/// struct-path tag represented by B.
-bool TypeBasedAAResult::PathAliases(const MDNode *A, const MDNode *B) const {
-  // Verify that both input nodes are struct-path aware.
+  // Verify that both input nodes are struct-path aware.  Auto-upgrade should
+  // have taken care of this.
   assert(isStructPathTBAA(A) && "MDNode A is not struct-path aware.");
   assert(isStructPathTBAA(B) && "MDNode B is not struct-path aware.");
 
@@ -583,9 +554,9 @@ bool TypeBasedAAResult::PathAliases(const MDNode *A, const MDNode *B) const {
   return false;
 }
 
-char TypeBasedAA::PassID;
+AnalysisKey TypeBasedAA::Key;
 
-TypeBasedAAResult TypeBasedAA::run(Function &F, AnalysisManager<Function> &AM) {
+TypeBasedAAResult TypeBasedAA::run(Function &F, FunctionAnalysisManager &AM) {
   return TypeBasedAAResult();
 }
 
diff --git a/lib/Analysis/TypeMetadataUtils.cpp b/lib/Analysis/TypeMetadataUtils.cpp
index 31e2b42075d6..f56754167360 100644
--- a/lib/Analysis/TypeMetadataUtils.cpp
+++ b/lib/Analysis/TypeMetadataUtils.cpp
@@ -69,8 +69,7 @@ void llvm::findDevirtualizableCallsForTypeTest(
 
   // Find llvm.assume intrinsics for this llvm.type.test call.
   for (const Use &CIU : CI->uses()) {
-    auto AssumeCI = dyn_cast<CallInst>(CIU.getUser());
-    if (AssumeCI) {
+    if (auto *AssumeCI = dyn_cast<CallInst>(CIU.getUser())) {
       Function *F = AssumeCI->getCalledFunction();
       if (F && F->getIntrinsicID() == Intrinsic::assume)
         Assumes.push_back(AssumeCI);
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index f2b40787443a..2a77baec6c36 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -51,6 +51,12 @@ const unsigned MaxDepth = 6;
 static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
                                               cl::Hidden, cl::init(20));
 
+// This optimization is known to cause performance regressions is some cases,
+// keep it under a temporary flag for now.
+static cl::opt<bool>
+DontImproveNonNegativePhiBits("dont-improve-non-negative-phi-bits",
+                              cl::Hidden, cl::init(true));
+
 /// Returns the bitwidth of the given scalar or pointer type (if unknown returns
 /// 0). For vector types, returns the element type's bitwidth.
 static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
@@ -80,7 +86,7 @@ struct Query {
   /// isKnownNonZero, which calls computeKnownBits and ComputeSignBit and
   /// isKnownToBeAPowerOfTwo (all of which can call computeKnownBits), and so
   /// on.
-  std::array<const Value*, MaxDepth> Excluded;
+  std::array<const Value *, MaxDepth> Excluded;
   unsigned NumExcluded;
 
   Query(const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI,
@@ -119,10 +125,10 @@ static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) {
   return nullptr;
 }
 
-static void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
+static void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne,
                              unsigned Depth, const Query &Q);
 
-void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
+void llvm::computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne,
                             const DataLayout &DL, unsigned Depth,
                             AssumptionCache *AC, const Instruction *CxtI,
                             const DominatorTree *DT) {
@@ -130,7 +136,8 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
                      Query(DL, AC, safeCxtI(V, CxtI), DT));
 }
 
-bool llvm::haveNoCommonBitsSet(Value *LHS, Value *RHS, const DataLayout &DL,
+bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
+                               const DataLayout &DL,
                                AssumptionCache *AC, const Instruction *CxtI,
                                const DominatorTree *DT) {
   assert(LHS->getType() == RHS->getType() &&
@@ -145,10 +152,10 @@ bool llvm::haveNoCommonBitsSet(Value *LHS, Value *RHS, const DataLayout &DL,
   return (LHSKnownZero | RHSKnownZero).isAllOnesValue();
 }
 
-static void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
+static void ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne,
                            unsigned Depth, const Query &Q);
 
-void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
+void llvm::ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne,
                           const DataLayout &DL, unsigned Depth,
                           AssumptionCache *AC, const Instruction *CxtI,
                           const DominatorTree *DT) {
@@ -156,10 +163,11 @@ void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
                    Query(DL, AC, safeCxtI(V, CxtI), DT));
 }
 
-static bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
+static bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
                                    const Query &Q);
 
-bool llvm::isKnownToBeAPowerOfTwo(Value *V, const DataLayout &DL, bool OrZero,
+bool llvm::isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL,
+                                  bool OrZero,
                                   unsigned Depth, AssumptionCache *AC,
                                   const Instruction *CxtI,
                                   const DominatorTree *DT) {
@@ -167,15 +175,16 @@ bool llvm::isKnownToBeAPowerOfTwo(Value *V, const DataLayout &DL, bool OrZero,
                                   Query(DL, AC, safeCxtI(V, CxtI), DT));
 }
 
-static bool isKnownNonZero(Value *V, unsigned Depth, const Query &Q);
+static bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q);
 
-bool llvm::isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth,
+bool llvm::isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth,
                           AssumptionCache *AC, const Instruction *CxtI,
                           const DominatorTree *DT) {
   return ::isKnownNonZero(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT));
 }
 
-bool llvm::isKnownNonNegative(Value *V, const DataLayout &DL, unsigned Depth,
+bool llvm::isKnownNonNegative(const Value *V, const DataLayout &DL,
+                              unsigned Depth,
                               AssumptionCache *AC, const Instruction *CxtI,
                               const DominatorTree *DT) {
   bool NonNegative, Negative;
@@ -183,7 +192,7 @@ bool llvm::isKnownNonNegative(Value *V, const DataLayout &DL, unsigned Depth,
   return NonNegative;
 }
 
-bool llvm::isKnownPositive(Value *V, const DataLayout &DL, unsigned Depth,
+bool llvm::isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth,
                            AssumptionCache *AC, const Instruction *CxtI,
                            const DominatorTree *DT) {
   if (auto *CI = dyn_cast<ConstantInt>(V))
@@ -195,7 +204,7 @@ bool llvm::isKnownPositive(Value *V, const DataLayout &DL, unsigned Depth,
     isKnownNonZero(V, DL, Depth, AC, CxtI, DT);
 }
 
-bool llvm::isKnownNegative(Value *V, const DataLayout &DL, unsigned Depth,
+bool llvm::isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth,
                            AssumptionCache *AC, const Instruction *CxtI,
                            const DominatorTree *DT) {
   bool NonNegative, Negative;
@@ -203,41 +212,45 @@ bool llvm::isKnownNegative(Value *V, const DataLayout &DL, unsigned Depth,
   return Negative;
 }
 
-static bool isKnownNonEqual(Value *V1, Value *V2, const Query &Q);
+static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q);
 
-bool llvm::isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL,
-                          AssumptionCache *AC, const Instruction *CxtI,
-                          const DominatorTree *DT) {
+bool llvm::isKnownNonEqual(const Value *V1, const Value *V2,
+                           const DataLayout &DL,
+                           AssumptionCache *AC, const Instruction *CxtI,
+                           const DominatorTree *DT) {
   return ::isKnownNonEqual(V1, V2, Query(DL, AC,
                                          safeCxtI(V1, safeCxtI(V2, CxtI)),
                                          DT));
 }
 
-static bool MaskedValueIsZero(Value *V, const APInt &Mask, unsigned Depth,
+static bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth,
                               const Query &Q);
 
-bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout &DL,
+bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask,
+                             const DataLayout &DL,
                              unsigned Depth, AssumptionCache *AC,
                              const Instruction *CxtI, const DominatorTree *DT) {
   return ::MaskedValueIsZero(V, Mask, Depth,
                              Query(DL, AC, safeCxtI(V, CxtI), DT));
 }
 
-static unsigned ComputeNumSignBits(Value *V, unsigned Depth, const Query &Q);
+static unsigned ComputeNumSignBits(const Value *V, unsigned Depth,
+                                   const Query &Q);
 
-unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout &DL,
+unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL,
                                   unsigned Depth, AssumptionCache *AC,
                                   const Instruction *CxtI,
                                   const DominatorTree *DT) {
   return ::ComputeNumSignBits(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT));
 }
 
-static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
+static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
+                                   bool NSW,
                                    APInt &KnownZero, APInt &KnownOne,
                                    APInt &KnownZero2, APInt &KnownOne2,
                                    unsigned Depth, const Query &Q) {
   if (!Add) {
-    if (ConstantInt *CLHS = dyn_cast<ConstantInt>(Op0)) {
+    if (const ConstantInt *CLHS = dyn_cast<ConstantInt>(Op0)) {
       // We know that the top bits of C-X are clear if X contains less bits
       // than C (i.e. no wrap-around can happen).  For example, 20-X is
       // positive if we can prove that X is >= 0 and < 16.
@@ -311,7 +324,7 @@ static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
   }
 }
 
-static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW,
+static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
                                 APInt &KnownZero, APInt &KnownOne,
                                 APInt &KnownZero2, APInt &KnownOne2,
                                 unsigned Depth, const Query &Q) {
@@ -398,7 +411,7 @@ void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
   }
 }
 
-static bool isEphemeralValueOf(Instruction *I, const Value *E) {
+static bool isEphemeralValueOf(const Instruction *I, const Value *E) {
   SmallVector<const Value *, 16> WorkSet(1, I);
   SmallPtrSet<const Value *, 32> Visited;
   SmallPtrSet<const Value *, 16> EphValues;
@@ -406,7 +419,7 @@ static bool isEphemeralValueOf(Instruction *I, const Value *E) {
   // The instruction defining an assumption's condition itself is always
   // considered ephemeral to that assumption (even if it has other
   // non-ephemeral users). See r246696's test case for an example.
-  if (std::find(I->op_begin(), I->op_end(), E) != I->op_end())
+  if (is_contained(I->operands(), E))
     return true;
 
   while (!WorkSet.empty()) {
@@ -415,8 +428,7 @@ static bool isEphemeralValueOf(Instruction *I, const Value *E) {
       continue;
 
     // If all uses of this value are ephemeral, then so is this value.
-    if (std::all_of(V->user_begin(), V->user_end(),
-                    [&](const User *U) { return EphValues.count(U); })) {
+    if (all_of(V->users(), [&](const User *U) { return EphValues.count(U); })) {
       if (V == E)
         return true;
 
@@ -456,9 +468,9 @@ static bool isAssumeLikeIntrinsic(const Instruction *I) {
   return false;
 }
 
-static bool isValidAssumeForContext(Value *V, const Instruction *CxtI,
-                                    const DominatorTree *DT) {
-  Instruction *Inv = cast<Instruction>(V);
+bool llvm::isValidAssumeForContext(const Instruction *Inv,
+                                   const Instruction *CxtI,
+                                   const DominatorTree *DT) {
 
   // There are two restrictions on the use of an assume:
   //  1. The assume must dominate the context (or the control flow must
@@ -469,54 +481,42 @@ static bool isValidAssumeForContext(Value *V, const Instruction *CxtI,
   //     the assume).
 
   if (DT) {
-    if (DT->dominates(Inv, CxtI)) {
+    if (DT->dominates(Inv, CxtI))
       return true;
-    } else if (Inv->getParent() == CxtI->getParent()) {
-      // The context comes first, but they're both in the same block. Make sure
-      // there is nothing in between that might interrupt the control flow.
-      for (BasicBlock::const_iterator I =
-             std::next(BasicBlock::const_iterator(CxtI)),
-                                      IE(Inv); I != IE; ++I)
-        if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I))
-          return false;
-
-      return !isEphemeralValueOf(Inv, CxtI);
-    }
+  } else if (Inv->getParent() == CxtI->getParent()->getSinglePredecessor()) {
+    // We don't have a DT, but this trivially dominates.
+    return true;
+  }
 
+  // With or without a DT, the only remaining case we will check is if the
+  // instructions are in the same BB.  Give up if that is not the case.
+  if (Inv->getParent() != CxtI->getParent())
     return false;
-  }
 
-  // When we don't have a DT, we do a limited search...
-  if (Inv->getParent() == CxtI->getParent()->getSinglePredecessor()) {
-    return true;
-  } else if (Inv->getParent() == CxtI->getParent()) {
+  // If we have a dom tree, then we now know that the assume doens't dominate
+  // the other instruction.  If we don't have a dom tree then we can check if
+  // the assume is first in the BB.
+  if (!DT) {
     // Search forward from the assume until we reach the context (or the end
     // of the block); the common case is that the assume will come first.
-    for (BasicBlock::iterator I = std::next(BasicBlock::iterator(Inv)),
+    for (auto I = std::next(BasicBlock::const_iterator(Inv)),
          IE = Inv->getParent()->end(); I != IE; ++I)
       if (&*I == CxtI)
         return true;
-
-    // The context must come first...
-    for (BasicBlock::const_iterator I =
-           std::next(BasicBlock::const_iterator(CxtI)),
-                                    IE(Inv); I != IE; ++I)
-      if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I))
-        return false;
-
-    return !isEphemeralValueOf(Inv, CxtI);
   }
 
-  return false;
-}
+  // The context comes first, but they're both in the same block. Make sure
+  // there is nothing in between that might interrupt the control flow.
+  for (BasicBlock::const_iterator I =
+         std::next(BasicBlock::const_iterator(CxtI)), IE(Inv);
+       I != IE; ++I)
+    if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I))
+      return false;
 
-bool llvm::isValidAssumeForContext(const Instruction *I,
-                                   const Instruction *CxtI,
-                                   const DominatorTree *DT) {
-  return ::isValidAssumeForContext(const_cast<Instruction *>(I), CxtI, DT);
+  return !isEphemeralValueOf(Inv, CxtI);
 }
 
-static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
+static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero,
                                        APInt &KnownOne, unsigned Depth,
                                        const Query &Q) {
   // Use of assumptions is context-sensitive. If we don't have a context, we
@@ -788,11 +788,11 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
 // shift amount, compute the implied known-zero or known-one bits of the shift
 // operator's result respectively for that shift amount. The results from calling
 // KZF and KOF are conservatively combined for all permitted shift amounts.
-template <typename KZFunctor, typename KOFunctor>
-static void computeKnownBitsFromShiftOperator(Operator *I,
-              APInt &KnownZero, APInt &KnownOne,
-              APInt &KnownZero2, APInt &KnownOne2,
-              unsigned Depth, const Query &Q, KZFunctor KZF, KOFunctor KOF) {
+static void computeKnownBitsFromShiftOperator(
+    const Operator *I, APInt &KnownZero, APInt &KnownOne, APInt &KnownZero2,
+    APInt &KnownOne2, unsigned Depth, const Query &Q,
+    function_ref<APInt(const APInt &, unsigned)> KZF,
+    function_ref<APInt(const APInt &, unsigned)> KOF) {
   unsigned BitWidth = KnownZero.getBitWidth();
 
   if (auto *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
@@ -801,6 +801,14 @@ static void computeKnownBitsFromShiftOperator(Operator *I,
     computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q);
     KnownZero = KZF(KnownZero, ShiftAmt);
     KnownOne  = KOF(KnownOne, ShiftAmt);
+    // If there is conflict between KnownZero and KnownOne, this must be an
+    // overflowing left shift, so the shift result is undefined. Clear KnownZero
+    // and KnownOne bits so that other code could propagate this undef.
+    if ((KnownZero & KnownOne) != 0) {
+      KnownZero.clearAllBits();
+      KnownOne.clearAllBits();
+    }
+
     return;
   }
 
@@ -866,7 +874,7 @@ static void computeKnownBitsFromShiftOperator(Operator *I,
   }
 }
 
-static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
+static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
                                          APInt &KnownOne, unsigned Depth,
                                          const Query &Q) {
   unsigned BitWidth = KnownZero.getBitWidth();
@@ -950,14 +958,64 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
     KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ);
     break;
   }
-  case Instruction::Select:
+  case Instruction::Select: {
     computeKnownBits(I->getOperand(2), KnownZero, KnownOne, Depth + 1, Q);
     computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, Depth + 1, Q);
 
+    const Value *LHS;
+    const Value *RHS;
+    SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor;
+    if (SelectPatternResult::isMinOrMax(SPF)) {
+      computeKnownBits(RHS, KnownZero, KnownOne, Depth + 1, Q);
+      computeKnownBits(LHS, KnownZero2, KnownOne2, Depth + 1, Q);
+    } else {
+      computeKnownBits(I->getOperand(2), KnownZero, KnownOne, Depth + 1, Q);
+      computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, Depth + 1, Q);
+    }
+
+    unsigned MaxHighOnes = 0;
+    unsigned MaxHighZeros = 0;
+    if (SPF == SPF_SMAX) {
+      // If both sides are negative, the result is negative.
+      if (KnownOne[BitWidth - 1] && KnownOne2[BitWidth - 1])
+        // We can derive a lower bound on the result by taking the max of the
+        // leading one bits.
+        MaxHighOnes =
+            std::max(KnownOne.countLeadingOnes(), KnownOne2.countLeadingOnes());
+      // If either side is non-negative, the result is non-negative.
+      else if (KnownZero[BitWidth - 1] || KnownZero2[BitWidth - 1])
+        MaxHighZeros = 1;
+    } else if (SPF == SPF_SMIN) {
+      // If both sides are non-negative, the result is non-negative.
+      if (KnownZero[BitWidth - 1] && KnownZero2[BitWidth - 1])
+        // We can derive an upper bound on the result by taking the max of the
+        // leading zero bits.
+        MaxHighZeros = std::max(KnownZero.countLeadingOnes(),
+                                KnownZero2.countLeadingOnes());
+      // If either side is negative, the result is negative.
+      else if (KnownOne[BitWidth - 1] || KnownOne2[BitWidth - 1])
+        MaxHighOnes = 1;
+    } else if (SPF == SPF_UMAX) {
+      // We can derive a lower bound on the result by taking the max of the
+      // leading one bits.
+      MaxHighOnes =
+          std::max(KnownOne.countLeadingOnes(), KnownOne2.countLeadingOnes());
+    } else if (SPF == SPF_UMIN) {
+      // We can derive an upper bound on the result by taking the max of the
+      // leading zero bits.
+      MaxHighZeros =
+          std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes());
+    }
+
     // Only known if known in both the LHS and RHS.
     KnownOne &= KnownOne2;
     KnownZero &= KnownZero2;
+    if (MaxHighOnes > 0)
+      KnownOne |= APInt::getHighBitsSet(BitWidth, MaxHighOnes);
+    if (MaxHighZeros > 0)
+      KnownZero |= APInt::getHighBitsSet(BitWidth, MaxHighZeros);
     break;
+  }
   case Instruction::FPTrunc:
   case Instruction::FPExt:
   case Instruction::FPToUI:
@@ -967,8 +1025,8 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
     break; // Can't work with floating point.
   case Instruction::PtrToInt:
   case Instruction::IntToPtr:
-  case Instruction::AddrSpaceCast: // Pointers could be different sizes.
-    // FALL THROUGH and handle them the same as zext/trunc.
+    // Fall through and handle them the same as zext/trunc.
+    LLVM_FALLTHROUGH;
   case Instruction::ZExt:
   case Instruction::Trunc: {
     Type *SrcTy = I->getOperand(0)->getType();
@@ -1020,13 +1078,23 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
   }
   case Instruction::Shl: {
     // (shl X, C1) & C2 == 0   iff   (X & C2 >>u C1) == 0
-    auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) {
-      return (KnownZero << ShiftAmt) |
-             APInt::getLowBitsSet(BitWidth, ShiftAmt); // Low bits known 0.
+    bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+    auto KZF = [BitWidth, NSW](const APInt &KnownZero, unsigned ShiftAmt) {
+      APInt KZResult =
+          (KnownZero << ShiftAmt) |
+          APInt::getLowBitsSet(BitWidth, ShiftAmt); // Low bits known 0.
+      // If this shift has "nsw" keyword, then the result is either a poison
+      // value or has the same sign bit as the first operand.
+      if (NSW && KnownZero.isNegative())
+        KZResult.setBit(BitWidth - 1);
+      return KZResult;
     };
 
-    auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) {
-      return KnownOne << ShiftAmt;
+    auto KOF = [BitWidth, NSW](const APInt &KnownOne, unsigned ShiftAmt) {
+      APInt KOResult = KnownOne << ShiftAmt;
+      if (NSW && KnownOne.isNegative())
+        KOResult.setBit(BitWidth - 1);
+      return KOResult;
     };
 
     computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne,
@@ -1143,7 +1211,7 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
   }
 
   case Instruction::Alloca: {
-    AllocaInst *AI = cast<AllocaInst>(I);
+    const AllocaInst *AI = cast<AllocaInst>(I);
     unsigned Align = AI->getAlignment();
     if (Align == 0)
       Align = Q.DL.getABITypeAlignment(AI->getAllocatedType());
@@ -1163,7 +1231,7 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
     gep_type_iterator GTI = gep_type_begin(I);
     for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) {
       Value *Index = I->getOperand(i);
-      if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+      if (StructType *STy = GTI.getStructTypeOrNull()) {
         // Handle struct member offset arithmetic.
 
         // Handle case when index is vector zeroinitializer
@@ -1200,7 +1268,7 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
     break;
   }
   case Instruction::PHI: {
-    PHINode *P = cast<PHINode>(I);
+    const PHINode *P = cast<PHINode>(I);
     // Handle the case of a simple two-predecessor recurrence PHI.
     // There's a lot more that could theoretically be done here, but
     // this is sufficient to catch some interesting cases.
@@ -1237,9 +1305,46 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
           APInt KnownZero3(KnownZero), KnownOne3(KnownOne);
           computeKnownBits(L, KnownZero3, KnownOne3, Depth + 1, Q);
 
-          KnownZero = APInt::getLowBitsSet(BitWidth,
-                                           std::min(KnownZero2.countTrailingOnes(),
-                                                    KnownZero3.countTrailingOnes()));
+          KnownZero = APInt::getLowBitsSet(
+              BitWidth, std::min(KnownZero2.countTrailingOnes(),
+                                 KnownZero3.countTrailingOnes()));
+
+          if (DontImproveNonNegativePhiBits)
+            break;
+
+          auto *OverflowOp = dyn_cast<OverflowingBinaryOperator>(LU);
+          if (OverflowOp && OverflowOp->hasNoSignedWrap()) {
+            // If initial value of recurrence is nonnegative, and we are adding
+            // a nonnegative number with nsw, the result can only be nonnegative
+            // or poison value regardless of the number of times we execute the
+            // add in phi recurrence. If initial value is negative and we are
+            // adding a negative number with nsw, the result can only be
+            // negative or poison value. Similar arguments apply to sub and mul.
+            //
+            // (add non-negative, non-negative) --> non-negative
+            // (add negative, negative) --> negative
+            if (Opcode == Instruction::Add) {
+              if (KnownZero2.isNegative() && KnownZero3.isNegative())
+                KnownZero.setBit(BitWidth - 1);
+              else if (KnownOne2.isNegative() && KnownOne3.isNegative())
+                KnownOne.setBit(BitWidth - 1);
+            }
+
+            // (sub nsw non-negative, negative) --> non-negative
+            // (sub nsw negative, non-negative) --> negative
+            else if (Opcode == Instruction::Sub && LL == I) {
+              if (KnownZero2.isNegative() && KnownOne3.isNegative())
+                KnownZero.setBit(BitWidth - 1);
+              else if (KnownOne2.isNegative() && KnownZero3.isNegative())
+                KnownOne.setBit(BitWidth - 1);
+            }
+
+            // (mul nsw non-negative, non-negative) --> non-negative
+            else if (Opcode == Instruction::Mul && KnownZero2.isNegative() &&
+                     KnownZero3.isNegative())
+              KnownZero.setBit(BitWidth - 1);
+          }
+
           break;
         }
       }
@@ -1284,12 +1389,12 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
     // function.
     if (MDNode *MD = cast<Instruction>(I)->getMetadata(LLVMContext::MD_range))
       computeKnownBitsFromRangeMetadata(*MD, KnownZero, KnownOne);
-    if (Value *RV = CallSite(I).getReturnedArgOperand()) {
+    if (const Value *RV = ImmutableCallSite(I).getReturnedArgOperand()) {
       computeKnownBits(RV, KnownZero2, KnownOne2, Depth + 1, Q);
       KnownZero |= KnownZero2;
       KnownOne |= KnownOne2;
     }
-    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
       switch (II->getIntrinsicID()) {
       default: break;
       case Intrinsic::bswap:
@@ -1326,9 +1431,16 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
       }
     }
     break;
+  case Instruction::ExtractElement:
+    // Look through extract element. At the moment we keep this simple and skip
+    // tracking the specific element. But at least we might find information
+    // valid for all elements of the vector (for example if vector is sign
+    // extended, shifted, etc).
+    computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q);
+    break;
   case Instruction::ExtractValue:
     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) {
-      ExtractValueInst *EVI = cast<ExtractValueInst>(I);
+      const ExtractValueInst *EVI = cast<ExtractValueInst>(I);
       if (EVI->getNumIndices() != 1) break;
       if (EVI->getIndices()[0] == 0) {
         switch (II->getIntrinsicID()) {
@@ -1372,7 +1484,7 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
 /// where V is a vector, known zero, and known one values are the
 /// same width as the vector element, and the bit is set only if it is true
 /// for all of the elements in the vector.
-void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
+void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne,
                       unsigned Depth, const Query &Q) {
   assert(V && "No Value?");
   assert(Depth <= MaxDepth && "Limit Search Depth");
@@ -1388,9 +1500,10 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
          KnownOne.getBitWidth() == BitWidth &&
          "V, KnownOne and KnownZero should have same BitWidth");
 
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
-    // We know all of the bits for a constant!
-    KnownOne = CI->getValue();
+  const APInt *C;
+  if (match(V, m_APInt(C))) {
+    // We know all of the bits for a scalar constant or a splat vector constant!
+    KnownOne = *C;
     KnownZero = ~KnownOne;
     return;
   }
@@ -1402,7 +1515,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
   }
   // Handle a constant vector by taking the intersection of the known bits of
   // each element.
-  if (ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V)) {
+  if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V)) {
     // We know that CDS must be a vector of integers. Take the intersection of
     // each element.
     KnownZero.setAllBits(); KnownOne.setAllBits();
@@ -1415,7 +1528,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
     return;
   }
 
-  if (auto *CV = dyn_cast<ConstantVector>(V)) {
+  if (const auto *CV = dyn_cast<ConstantVector>(V)) {
     // We know that CV must be a vector of integers. Take the intersection of
     // each element.
     KnownZero.setAllBits(); KnownOne.setAllBits();
@@ -1438,6 +1551,14 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
   // Start out not knowing anything.
   KnownZero.clearAllBits(); KnownOne.clearAllBits();
 
+  // We can't imply anything about undefs.
+  if (isa<UndefValue>(V))
+    return;
+
+  // There's no point in looking through other users of ConstantData for
+  // assumptions.  Confirm that we've handled them all.
+  assert(!isa<ConstantData>(V) && "Unhandled constant data!");
+
   // Limit search depth.
   // All recursive calls that increase depth must come after this.
   if (Depth == MaxDepth)
@@ -1445,13 +1566,13 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
 
   // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
   // the bits of its aliasee.
-  if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
     if (!GA->isInterposable())
       computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, Depth + 1, Q);
     return;
   }
 
-  if (Operator *I = dyn_cast<Operator>(V))
+  if (const Operator *I = dyn_cast<Operator>(V))
     computeKnownBitsFromOperator(I, KnownZero, KnownOne, Depth, Q);
 
   // Aligned pointers have trailing zeros - refine KnownZero set
@@ -1472,7 +1593,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
 
 /// Determine whether the sign bit is known to be zero or one.
 /// Convenience wrapper around computeKnownBits.
-void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
+void ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne,
                     unsigned Depth, const Query &Q) {
   unsigned BitWidth = getBitWidth(V->getType(), Q.DL);
   if (!BitWidth) {
@@ -1491,9 +1612,9 @@ void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
 /// bit set when defined. For vectors return true if every element is known to
 /// be a power of two when defined. Supports values with integer or pointer
 /// types and vectors of integers.
-bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
+bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
                             const Query &Q) {
-  if (Constant *C = dyn_cast<Constant>(V)) {
+  if (const Constant *C = dyn_cast<Constant>(V)) {
     if (C->isNullValue())
       return OrZero;
 
@@ -1523,10 +1644,10 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
                  match(V, m_LShr(m_Value(X), m_Value()))))
     return isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q);
 
-  if (ZExtInst *ZI = dyn_cast<ZExtInst>(V))
+  if (const ZExtInst *ZI = dyn_cast<ZExtInst>(V))
     return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth, Q);
 
-  if (SelectInst *SI = dyn_cast<SelectInst>(V))
+  if (const SelectInst *SI = dyn_cast<SelectInst>(V))
     return isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth, Q) &&
            isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth, Q);
 
@@ -1544,7 +1665,7 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
   // Adding a power-of-two or zero to the same power-of-two or zero yields
   // either the original power-of-two, a larger power-of-two or zero.
   if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
-    OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V);
+    const OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V);
     if (OrZero || VOBO->hasNoUnsignedWrap() || VOBO->hasNoSignedWrap()) {
       if (match(X, m_And(m_Specific(Y), m_Value())) ||
           match(X, m_And(m_Value(), m_Specific(Y))))
@@ -1590,7 +1711,7 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
 /// to be non-null.
 ///
 /// Currently this routine does not support vector GEPs.
-static bool isGEPKnownNonNull(GEPOperator *GEP, unsigned Depth,
+static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth,
                               const Query &Q) {
   if (!GEP->isInBounds() || GEP->getPointerAddressSpace() != 0)
     return false;
@@ -1609,7 +1730,7 @@ static bool isGEPKnownNonNull(GEPOperator *GEP, unsigned Depth,
   for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
        GTI != GTE; ++GTI) {
     // Struct types are easy -- they must always be indexed by a constant.
-    if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+    if (StructType *STy = GTI.getStructTypeOrNull()) {
       ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand());
       unsigned ElementIdx = OpC->getZExtValue();
       const StructLayout *SL = Q.DL.getStructLayout(STy);
@@ -1649,7 +1770,7 @@ static bool isGEPKnownNonNull(GEPOperator *GEP, unsigned Depth,
 /// Does the 'Range' metadata (which must be a valid MD_range operand list)
 /// ensure that the value it's attached to is never Value?  'RangeType' is
 /// is the type of the value described by the range.
-static bool rangeMetadataExcludesValue(MDNode* Ranges, const APInt& Value) {
+static bool rangeMetadataExcludesValue(const MDNode* Ranges, const APInt& Value) {
   const unsigned NumRanges = Ranges->getNumOperands() / 2;
   assert(NumRanges >= 1);
   for (unsigned i = 0; i < NumRanges; ++i) {
@@ -1668,7 +1789,7 @@ static bool rangeMetadataExcludesValue(MDNode* Ranges, const APInt& Value) {
 /// For vectors return true if every element is known to be non-zero when
 /// defined. Supports values with integer or pointer type and vectors of
 /// integers.
-bool isKnownNonZero(Value *V, unsigned Depth, const Query &Q) {
+bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
   if (auto *C = dyn_cast<Constant>(V)) {
     if (C->isNullValue())
       return false;
@@ -1712,7 +1833,7 @@ bool isKnownNonZero(Value *V, unsigned Depth, const Query &Q) {
   if (V->getType()->isPointerTy()) {
     if (isKnownNonNull(V))
       return true;
-    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
+    if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V))
       if (isGEPKnownNonNull(GEP, Depth, Q))
         return true;
   }
@@ -1732,7 +1853,7 @@ bool isKnownNonZero(Value *V, unsigned Depth, const Query &Q) {
   // if the lowest bit is shifted off the end.
   if (BitWidth && match(V, m_Shl(m_Value(X), m_Value(Y)))) {
     // shl nuw can't remove any non-zero bits.
-    OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
+    const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
     if (BO->hasNoUnsignedWrap())
       return isKnownNonZero(X, Depth, Q);
 
@@ -1746,7 +1867,7 @@ bool isKnownNonZero(Value *V, unsigned Depth, const Query &Q) {
   // defined if the sign bit is shifted off the end.
   else if (match(V, m_Shr(m_Value(X), m_Value(Y)))) {
     // shr exact can only shift out zero bits.
-    PossiblyExactOperator *BO = cast<PossiblyExactOperator>(V);
+    const PossiblyExactOperator *BO = cast<PossiblyExactOperator>(V);
     if (BO->isExact())
       return isKnownNonZero(X, Depth, Q);
 
@@ -1817,7 +1938,7 @@ bool isKnownNonZero(Value *V, unsigned Depth, const Query &Q) {
   }
   // X * Y.
   else if (match(V, m_Mul(m_Value(X), m_Value(Y)))) {
-    OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
+    const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
     // If X and Y are non-zero then so is X * Y as long as the multiplication
     // does not overflow.
     if ((BO->hasNoSignedWrap() || BO->hasNoUnsignedWrap()) &&
@@ -1825,13 +1946,13 @@ bool isKnownNonZero(Value *V, unsigned Depth, const Query &Q) {
       return true;
   }
   // (C ? X : Y) != 0 if X != 0 and Y != 0.
-  else if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
+  else if (const SelectInst *SI = dyn_cast<SelectInst>(V)) {
     if (isKnownNonZero(SI->getTrueValue(), Depth, Q) &&
         isKnownNonZero(SI->getFalseValue(), Depth, Q))
       return true;
   }
   // PHI
-  else if (PHINode *PN = dyn_cast<PHINode>(V)) {
+  else if (const PHINode *PN = dyn_cast<PHINode>(V)) {
     // Try and detect a recurrence that monotonically increases from a
     // starting value, as these are common as induction variables.
     if (PN->getNumIncomingValues() == 2) {
@@ -1865,8 +1986,8 @@ bool isKnownNonZero(Value *V, unsigned Depth, const Query &Q) {
 }
 
 /// Return true if V2 == V1 + X, where X is known non-zero.
-static bool isAddOfNonZero(Value *V1, Value *V2, const Query &Q) {
-  BinaryOperator *BO = dyn_cast<BinaryOperator>(V1);
+static bool isAddOfNonZero(const Value *V1, const Value *V2, const Query &Q) {
+  const BinaryOperator *BO = dyn_cast<BinaryOperator>(V1);
   if (!BO || BO->getOpcode() != Instruction::Add)
     return false;
   Value *Op = nullptr;
@@ -1880,7 +2001,7 @@ static bool isAddOfNonZero(Value *V1, Value *V2, const Query &Q) {
 }
 
 /// Return true if it is known that V1 != V2.
-static bool isKnownNonEqual(Value *V1, Value *V2, const Query &Q) {
+static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q) {
   if (V1->getType()->isVectorTy() || V1 == V2)
     return false;
   if (V1->getType() != V2->getType())
@@ -1916,7 +2037,7 @@ static bool isKnownNonEqual(Value *V1, Value *V2, const Query &Q) {
 /// where V is a vector, the mask, known zero, and known one values are the
 /// same width as the vector element, and the bit is set only if it is true
 /// for all of the elements in the vector.
-bool MaskedValueIsZero(Value *V, const APInt &Mask, unsigned Depth,
+bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth,
                        const Query &Q) {
   APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0);
   computeKnownBits(V, KnownZero, KnownOne, Depth, Q);
@@ -1927,8 +2048,9 @@ bool MaskedValueIsZero(Value *V, const APInt &Mask, unsigned Depth,
 /// minimum number of sign bits. Return 0 if the value is not a vector constant
 /// or if any element was not analyzed; otherwise, return the count for the
 /// element with the minimum number of sign bits.
-static unsigned computeNumSignBitsVectorConstant(Value *V, unsigned TyBits) {
-  auto *CV = dyn_cast<Constant>(V);
+static unsigned computeNumSignBitsVectorConstant(const Value *V,
+                                                 unsigned TyBits) {
+  const auto *CV = dyn_cast<Constant>(V);
   if (!CV || !CV->getType()->isVectorTy())
     return 0;
 
@@ -1956,7 +2078,7 @@ static unsigned computeNumSignBitsVectorConstant(Value *V, unsigned TyBits) {
 /// after an "ashr X, 2", we know that the top 3 bits are all equal to each
 /// other, so we return 3. For vectors, return the number of sign bits for the
 /// vector element with the mininum number of known sign bits.
-unsigned ComputeNumSignBits(Value *V, unsigned Depth, const Query &Q) {
+unsigned ComputeNumSignBits(const Value *V, unsigned Depth, const Query &Q) {
   unsigned TyBits = Q.DL.getTypeSizeInBits(V->getType()->getScalarType());
   unsigned Tmp, Tmp2;
   unsigned FirstAnswer = 1;
@@ -1964,10 +2086,10 @@ unsigned ComputeNumSignBits(Value *V, unsigned Depth, const Query &Q) {
   // Note that ConstantInt is handled by the general computeKnownBits case
   // below.
 
-  if (Depth == 6)
+  if (Depth == MaxDepth)
     return 1;  // Limit search depth.
 
-  Operator *U = dyn_cast<Operator>(V);
+  const Operator *U = dyn_cast<Operator>(V);
   switch (Operator::getOpcode(V)) {
   default: break;
   case Instruction::SExt:
@@ -2125,7 +2247,7 @@ unsigned ComputeNumSignBits(Value *V, unsigned Depth, const Query &Q) {
     return std::min(Tmp, Tmp2)-1;
 
   case Instruction::PHI: {
-    PHINode *PN = cast<PHINode>(U);
+    const PHINode *PN = cast<PHINode>(U);
     unsigned NumIncomingValues = PN->getNumIncomingValues();
     // Don't analyze large in-degree PHIs.
     if (NumIncomingValues > 4) break;
@@ -2147,6 +2269,13 @@ unsigned ComputeNumSignBits(Value *V, unsigned Depth, const Query &Q) {
     // FIXME: it's tricky to do anything useful for this, but it is an important
     // case for targets like X86.
     break;
+
+  case Instruction::ExtractElement:
+    // Look through extract element. At the moment we keep this simple and skip
+    // tracking the specific element. But at least we might find information
+    // valid for all elements of the vector (for example if vector is sign
+    // extended, shifted, etc).
+    return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
   }
 
   // Finally, if we can prove that the top bits of the result are 0's or 1's,
@@ -2416,7 +2545,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI,
   // FIXME: Magic number! At the least, this should be given a name because it's
   // used similarly in CannotBeOrderedLessThanZero(). A better fix may be to
   // expose it as a parameter, so it can be used for testing / experimenting.
-  if (Depth == 6)
+  if (Depth == MaxDepth)
     return false;  // Limit search depth.
 
   const Operator *I = dyn_cast<Operator>(V);
@@ -2463,7 +2592,7 @@ bool llvm::CannotBeOrderedLessThanZero(const Value *V,
   // FIXME: Magic number! At the least, this should be given a name because it's
   // used similarly in CannotBeNegativeZero(). A better fix may be to
   // expose it as a parameter, so it can be used for testing / experimenting.
-  if (Depth == 6)
+  if (Depth == MaxDepth)
     return false;  // Limit search depth.
 
   const Operator *I = dyn_cast<Operator>(V);
@@ -2478,7 +2607,7 @@ bool llvm::CannotBeOrderedLessThanZero(const Value *V,
     // x*x is always non-negative or a NaN.
     if (I->getOperand(0) == I->getOperand(1))
       return true;
-    // Fall through
+    LLVM_FALLTHROUGH;
   case Instruction::FAdd:
   case Instruction::FDiv:
   case Instruction::FRem:
@@ -2768,11 +2897,17 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
       break;
 
     if (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
-      APInt GEPOffset(BitWidth, 0);
+      // If one of the values we have visited is an addrspacecast, then
+      // the pointer type of this GEP may be different from the type
+      // of the Ptr parameter which was passed to this function.  This
+      // means when we construct GEPOffset, we need to use the size
+      // of GEP's pointer type rather than the size of the original
+      // pointer type.
+      APInt GEPOffset(DL.getPointerTypeSizeInBits(Ptr->getType()), 0);
       if (!GEP->accumulateConstantOffset(DL, GEPOffset))
         break;
 
-      ByteOffset += GEPOffset;
+      ByteOffset += GEPOffset.getSExtValue();
 
       Ptr = GEP->getPointerOperand();
     } else if (Operator::getOpcode(Ptr) == Instruction::BitCast ||
@@ -2886,13 +3021,14 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
 
 /// If we can compute the length of the string pointed to by
 /// the specified pointer, return 'len+1'.  If we can't, return 0.
-static uint64_t GetStringLengthH(Value *V, SmallPtrSetImpl<PHINode*> &PHIs) {
+static uint64_t GetStringLengthH(const Value *V,
+                                 SmallPtrSetImpl<const PHINode*> &PHIs) {
   // Look through noop bitcast instructions.
   V = V->stripPointerCasts();
 
   // If this is a PHI node, there are two cases: either we have already seen it
   // or we haven't.
-  if (PHINode *PN = dyn_cast<PHINode>(V)) {
+  if (const PHINode *PN = dyn_cast<PHINode>(V)) {
     if (!PHIs.insert(PN).second)
       return ~0ULL;  // already in the set.
 
@@ -2914,7 +3050,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSetImpl<PHINode*> &PHIs) {
   }
 
   // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
-  if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
+  if (const SelectInst *SI = dyn_cast<SelectInst>(V)) {
     uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs);
     if (Len1 == 0) return 0;
     uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs);
@@ -2935,10 +3071,10 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSetImpl<PHINode*> &PHIs) {
 
 /// If we can compute the length of the string pointed to by
 /// the specified pointer, return 'len+1'.  If we can't, return 0.
-uint64_t llvm::GetStringLength(Value *V) {
+uint64_t llvm::GetStringLength(const Value *V) {
   if (!V->getType()->isPointerTy()) return 0;
 
-  SmallPtrSet<PHINode*, 32> PHIs;
+  SmallPtrSet<const PHINode*, 32> PHIs;
   uint64_t Len = GetStringLengthH(V, PHIs);
   // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
   // an empty string as a length.
@@ -2947,7 +3083,8 @@ uint64_t llvm::GetStringLength(Value *V) {
 
 /// \brief \p PN defines a loop-variant pointer to an object.  Check if the
 /// previous iteration of the loop was referring to the same object as \p PN.
-static bool isSameUnderlyingObjectInLoop(PHINode *PN, LoopInfo *LI) {
+static bool isSameUnderlyingObjectInLoop(const PHINode *PN,
+                                         const LoopInfo *LI) {
   // Find the loop-defined value.
   Loop *L = LI->getLoopFor(PN->getParent());
   if (PN->getNumIncomingValues() != 2)
@@ -3208,11 +3345,11 @@ bool llvm::isKnownNonNull(const Value *V) {
   if (const Argument *A = dyn_cast<Argument>(V))
     return A->hasByValOrInAllocaAttr() || A->hasNonNullAttr();
 
-  // A global variable in address space 0 is non null unless extern weak.
-  // Other address spaces may have null as a valid address for a global,
-  // so we can't assume anything.
+  // A global variable in address space 0 is non null unless extern weak
+  // or an absolute symbol reference. Other address spaces may have null as a
+  // valid address for a global, so we can't assume anything.
   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
-    return !GV->hasExternalWeakLinkage() &&
+    return !GV->isAbsoluteSymbolRef() && !GV->hasExternalWeakLinkage() &&
            GV->getType()->getAddressSpace() == 0;
 
   // A Load tagged with nonnull metadata is never null.
@@ -3230,6 +3367,9 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V,
                                                   const Instruction *CtxI,
                                                   const DominatorTree *DT) {
   assert(V->getType()->isPointerTy() && "V must be pointer type");
+  assert(!isa<ConstantData>(V) && "Did not expect ConstantPointerNull");
+  assert(CtxI && "Context instruction required for analysis");
+  assert(DT && "Dominator tree required for analysis");
 
   unsigned NumUsesExplored = 0;
   for (auto *U : V->users()) {
@@ -3266,13 +3406,20 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V,
 
 bool llvm::isKnownNonNullAt(const Value *V, const Instruction *CtxI,
                             const DominatorTree *DT) {
+  if (isa<ConstantPointerNull>(V) || isa<UndefValue>(V))
+    return false;
+
   if (isKnownNonNull(V))
     return true;
 
-  return CtxI ? ::isKnownNonNullFromDominatingCondition(V, CtxI, DT) : false;
+  if (!CtxI || !DT)
+    return false;
+
+  return ::isKnownNonNullFromDominatingCondition(V, CtxI, DT);
 }
 
-OverflowResult llvm::computeOverflowForUnsignedMul(Value *LHS, Value *RHS,
+OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS,
+                                                   const Value *RHS,
                                                    const DataLayout &DL,
                                                    AssumptionCache *AC,
                                                    const Instruction *CxtI,
@@ -3322,7 +3469,8 @@ OverflowResult llvm::computeOverflowForUnsignedMul(Value *LHS, Value *RHS,
   return OverflowResult::MayOverflow;
 }
 
-OverflowResult llvm::computeOverflowForUnsignedAdd(Value *LHS, Value *RHS,
+OverflowResult llvm::computeOverflowForUnsignedAdd(const Value *LHS,
+                                                   const Value *RHS,
                                                    const DataLayout &DL,
                                                    AssumptionCache *AC,
                                                    const Instruction *CxtI,
@@ -3351,9 +3499,13 @@ OverflowResult llvm::computeOverflowForUnsignedAdd(Value *LHS, Value *RHS,
   return OverflowResult::MayOverflow;
 }
 
-static OverflowResult computeOverflowForSignedAdd(
-    Value *LHS, Value *RHS, AddOperator *Add, const DataLayout &DL,
-    AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) {
+static OverflowResult computeOverflowForSignedAdd(const Value *LHS,
+                                                  const Value *RHS,
+                                                  const AddOperator *Add,
+                                                  const DataLayout &DL,
+                                                  AssumptionCache *AC,
+                                                  const Instruction *CxtI,
+                                                  const DominatorTree *DT) {
   if (Add && Add->hasNoSignedWrap()) {
     return OverflowResult::NeverOverflows;
   }
@@ -3395,7 +3547,8 @@ static OverflowResult computeOverflowForSignedAdd(
   return OverflowResult::MayOverflow;
 }
 
-bool llvm::isOverflowIntrinsicNoWrap(IntrinsicInst *II, DominatorTree &DT) {
+bool llvm::isOverflowIntrinsicNoWrap(const IntrinsicInst *II,
+                                     const DominatorTree &DT) {
 #ifndef NDEBUG
   auto IID = II->getIntrinsicID();
   assert((IID == Intrinsic::sadd_with_overflow ||
@@ -3407,11 +3560,11 @@ bool llvm::isOverflowIntrinsicNoWrap(IntrinsicInst *II, DominatorTree &DT) {
          "Not an overflow intrinsic!");
 #endif
 
-  SmallVector<BranchInst *, 2> GuardingBranches;
-  SmallVector<ExtractValueInst *, 2> Results;
+  SmallVector<const BranchInst *, 2> GuardingBranches;
+  SmallVector<const ExtractValueInst *, 2> Results;
 
-  for (User *U : II->users()) {
-    if (auto *EVI = dyn_cast<ExtractValueInst>(U)) {
+  for (const User *U : II->users()) {
+    if (const auto *EVI = dyn_cast<ExtractValueInst>(U)) {
       assert(EVI->getNumIndices() == 1 && "Obvious from CI's type");
 
       if (EVI->getIndices()[0] == 0)
@@ -3419,8 +3572,8 @@ bool llvm::isOverflowIntrinsicNoWrap(IntrinsicInst *II, DominatorTree &DT) {
       else {
         assert(EVI->getIndices()[0] == 1 && "Obvious from CI's type");
 
-        for (auto *U : EVI->users())
-          if (auto *B = dyn_cast<BranchInst>(U)) {
+        for (const auto *U : EVI->users())
+          if (const auto *B = dyn_cast<BranchInst>(U)) {
             assert(B->isConditional() && "How else is it using an i1?");
             GuardingBranches.push_back(B);
           }
@@ -3432,13 +3585,13 @@ bool llvm::isOverflowIntrinsicNoWrap(IntrinsicInst *II, DominatorTree &DT) {
     }
   }
 
-  auto AllUsesGuardedByBranch = [&](BranchInst *BI) {
+  auto AllUsesGuardedByBranch = [&](const BranchInst *BI) {
     BasicBlockEdge NoWrapEdge(BI->getParent(), BI->getSuccessor(1));
     if (!NoWrapEdge.isSingleEdge())
       return false;
 
     // Check if all users of the add are provably no-wrap.
-    for (auto *Result : Results) {
+    for (const auto *Result : Results) {
       // If the extractvalue itself is not executed on overflow, the we don't
       // need to check each use separately, since domination is transitive.
       if (DT.dominates(NoWrapEdge, Result->getParent()))
@@ -3456,7 +3609,7 @@ bool llvm::isOverflowIntrinsicNoWrap(IntrinsicInst *II, DominatorTree &DT) {
 }
 
 
-OverflowResult llvm::computeOverflowForSignedAdd(AddOperator *Add,
+OverflowResult llvm::computeOverflowForSignedAdd(const AddOperator *Add,
                                                  const DataLayout &DL,
                                                  AssumptionCache *AC,
                                                  const Instruction *CxtI,
@@ -3465,7 +3618,8 @@ OverflowResult llvm::computeOverflowForSignedAdd(AddOperator *Add,
                                        Add, DL, AC, CxtI, DT);
 }
 
-OverflowResult llvm::computeOverflowForSignedAdd(Value *LHS, Value *RHS,
+OverflowResult llvm::computeOverflowForSignedAdd(const Value *LHS,
+                                                 const Value *RHS,
                                                  const DataLayout &DL,
                                                  AssumptionCache *AC,
                                                  const Instruction *CxtI,
@@ -3502,12 +3656,27 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
     return false;
 
   // Calls can throw, or contain an infinite loop, or kill the process.
-  if (CallSite CS = CallSite(const_cast<Instruction*>(I))) {
-    // Calls which don't write to arbitrary memory are safe.
-    // FIXME: Ignoring infinite loops without any side-effects is too aggressive,
-    // but it's consistent with other passes. See http://llvm.org/PR965 .
-    // FIXME: This isn't aggressive enough; a call which only writes to a
-    // global is guaranteed to return.
+  if (auto CS = ImmutableCallSite(I)) {
+    // Call sites that throw have implicit non-local control flow.
+    if (!CS.doesNotThrow())
+      return false;
+
+    // Non-throwing call sites can loop infinitely, call exit/pthread_exit
+    // etc. and thus not return.  However, LLVM already assumes that
+    //
+    //  - Thread exiting actions are modeled as writes to memory invisible to
+    //    the program.
+    //
+    //  - Loops that don't have side effects (side effects are volatile/atomic
+    //    stores and IO) always terminate (see http://llvm.org/PR965).
+    //    Furthermore IO itself is also modeled as writes to memory invisible to
+    //    the program.
+    //
+    // We rely on those assumptions here, and use the memory effects of the call
+    // target as a proxy for checking that it always returns.
+
+    // FIXME: This isn't aggressive enough; a call which only writes to a global
+    // is guaranteed to return.
     return CS.onlyReadsMemory() || CS.onlyAccessesArgMemory() ||
            match(I, m_Intrinsic<Intrinsic::assume>());
   }
@@ -3688,7 +3857,7 @@ bool llvm::isKnownNotFullPoison(const Instruction *PoisonI) {
   return false;
 }
 
-static bool isKnownNonNaN(Value *V, FastMathFlags FMF) {
+static bool isKnownNonNaN(const Value *V, FastMathFlags FMF) {
   if (FMF.noNaNs())
     return true;
 
@@ -3697,12 +3866,90 @@ static bool isKnownNonNaN(Value *V, FastMathFlags FMF) {
   return false;
 }
 
-static bool isKnownNonZero(Value *V) {
+static bool isKnownNonZero(const Value *V) {
   if (auto *C = dyn_cast<ConstantFP>(V))
     return !C->isZero();
   return false;
 }
 
+/// Match non-obvious integer minimum and maximum sequences.
+static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
+                                       Value *CmpLHS, Value *CmpRHS,
+                                       Value *TrueVal, Value *FalseVal,
+                                       Value *&LHS, Value *&RHS) {
+  if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT)
+    return {SPF_UNKNOWN, SPNB_NA, false};
+
+  // Z = X -nsw Y
+  // (X >s Y) ? 0 : Z ==> (Z >s 0) ? 0 : Z ==> SMIN(Z, 0)
+  // (X <s Y) ? 0 : Z ==> (Z <s 0) ? 0 : Z ==> SMAX(Z, 0)
+  if (match(TrueVal, m_Zero()) &&
+      match(FalseVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS)))) {
+    LHS = TrueVal;
+    RHS = FalseVal;
+    return {Pred == CmpInst::ICMP_SGT ? SPF_SMIN : SPF_SMAX, SPNB_NA, false};
+  }
+
+  // Z = X -nsw Y
+  // (X >s Y) ? Z : 0 ==> (Z >s 0) ? Z : 0 ==> SMAX(Z, 0)
+  // (X <s Y) ? Z : 0 ==> (Z <s 0) ? Z : 0 ==> SMIN(Z, 0)
+  if (match(FalseVal, m_Zero()) &&
+      match(TrueVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS)))) {
+    LHS = TrueVal;
+    RHS = FalseVal;
+    return {Pred == CmpInst::ICMP_SGT ? SPF_SMAX : SPF_SMIN, SPNB_NA, false};
+  }
+
+  const APInt *C1;
+  if (!match(CmpRHS, m_APInt(C1)))
+    return {SPF_UNKNOWN, SPNB_NA, false};
+
+  // An unsigned min/max can be written with a signed compare.
+  const APInt *C2;
+  if ((CmpLHS == TrueVal && match(FalseVal, m_APInt(C2))) ||
+      (CmpLHS == FalseVal && match(TrueVal, m_APInt(C2)))) {
+    // Is the sign bit set?
+    // (X <s 0) ? X : MAXVAL ==> (X >u MAXVAL) ? X : MAXVAL ==> UMAX
+    // (X <s 0) ? MAXVAL : X ==> (X >u MAXVAL) ? MAXVAL : X ==> UMIN
+    if (Pred == CmpInst::ICMP_SLT && *C1 == 0 && C2->isMaxSignedValue()) {
+      LHS = TrueVal;
+      RHS = FalseVal;
+      return {CmpLHS == TrueVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false};
+    }
+
+    // Is the sign bit clear?
+    // (X >s -1) ? MINVAL : X ==> (X <u MINVAL) ? MINVAL : X ==> UMAX
+    // (X >s -1) ? X : MINVAL ==> (X <u MINVAL) ? X : MINVAL ==> UMIN
+    if (Pred == CmpInst::ICMP_SGT && C1->isAllOnesValue() &&
+        C2->isMinSignedValue()) {
+      LHS = TrueVal;
+      RHS = FalseVal;
+      return {CmpLHS == FalseVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false};
+    }
+  }
+
+  // Look through 'not' ops to find disguised signed min/max.
+  // (X >s C) ? ~X : ~C ==> (~X <s ~C) ? ~X : ~C ==> SMIN(~X, ~C)
+  // (X <s C) ? ~X : ~C ==> (~X >s ~C) ? ~X : ~C ==> SMAX(~X, ~C)
+  if (match(TrueVal, m_Not(m_Specific(CmpLHS))) &&
+      match(FalseVal, m_APInt(C2)) && ~(*C1) == *C2) {
+    LHS = TrueVal;
+    RHS = FalseVal;
+    return {Pred == CmpInst::ICMP_SGT ? SPF_SMIN : SPF_SMAX, SPNB_NA, false};
+  }
+
+  // (X >s C) ? ~C : ~X ==> (~X <s ~C) ? ~C : ~X ==> SMAX(~C, ~X)
+  // (X <s C) ? ~C : ~X ==> (~X >s ~C) ? ~C : ~X ==> SMIN(~C, ~X)
+  if (match(FalseVal, m_Not(m_Specific(CmpLHS))) &&
+      match(TrueVal, m_APInt(C2)) && ~(*C1) == *C2) {
+    LHS = TrueVal;
+    RHS = FalseVal;
+    return {Pred == CmpInst::ICMP_SGT ? SPF_SMAX : SPF_SMIN, SPNB_NA, false};
+  }
+
+  return {SPF_UNKNOWN, SPNB_NA, false};
+}
+
 static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
                                               FastMathFlags FMF,
                                               Value *CmpLHS, Value *CmpRHS,
@@ -3801,39 +4048,26 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
     }
   }
 
-  if (ConstantInt *C1 = dyn_cast<ConstantInt>(CmpRHS)) {
+  const APInt *C1;
+  if (match(CmpRHS, m_APInt(C1))) {
     if ((CmpLHS == TrueVal && match(FalseVal, m_Neg(m_Specific(CmpLHS)))) ||
         (CmpLHS == FalseVal && match(TrueVal, m_Neg(m_Specific(CmpLHS))))) {
 
       // ABS(X) ==> (X >s 0) ? X : -X and (X >s -1) ? X : -X
       // NABS(X) ==> (X >s 0) ? -X : X and (X >s -1) ? -X : X
-      if (Pred == ICmpInst::ICMP_SGT && (C1->isZero() || C1->isMinusOne())) {
+      if (Pred == ICmpInst::ICMP_SGT && (*C1 == 0 || C1->isAllOnesValue())) {
         return {(CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false};
       }
 
       // ABS(X) ==> (X <s 0) ? -X : X and (X <s 1) ? -X : X
       // NABS(X) ==> (X <s 0) ? X : -X and (X <s 1) ? X : -X
-      if (Pred == ICmpInst::ICMP_SLT && (C1->isZero() || C1->isOne())) {
+      if (Pred == ICmpInst::ICMP_SLT && (*C1 == 0 || *C1 == 1)) {
         return {(CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false};
       }
     }
-
-    // Y >s C ? ~Y : ~C == ~Y <s ~C ? ~Y : ~C = SMIN(~Y, ~C)
-    if (const auto *C2 = dyn_cast<ConstantInt>(FalseVal)) {
-      if (Pred == ICmpInst::ICMP_SGT && C1->getType() == C2->getType() &&
-          ~C1->getValue() == C2->getValue() &&
-          (match(TrueVal, m_Not(m_Specific(CmpLHS))) ||
-           match(CmpLHS, m_Not(m_Specific(TrueVal))))) {
-        LHS = TrueVal;
-        RHS = FalseVal;
-        return {SPF_SMIN, SPNB_NA, false};
-      }
-    }
   }
 
-  // TODO: (X > 4) ? X : 5   -->  (X >= 5) ? X : 5  -->  MAX(X, 5)
-
-  return {SPF_UNKNOWN, SPNB_NA, false};
+  return matchMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS);
 }
 
 static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2,
@@ -3932,30 +4166,9 @@ SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
                               LHS, RHS);
 }
 
-ConstantRange llvm::getConstantRangeFromMetadata(MDNode &Ranges) {
-  const unsigned NumRanges = Ranges.getNumOperands() / 2;
-  assert(NumRanges >= 1 && "Must have at least one range!");
-  assert(Ranges.getNumOperands() % 2 == 0 && "Must be a sequence of pairs");
-
-  auto *FirstLow = mdconst::extract<ConstantInt>(Ranges.getOperand(0));
-  auto *FirstHigh = mdconst::extract<ConstantInt>(Ranges.getOperand(1));
-
-  ConstantRange CR(FirstLow->getValue(), FirstHigh->getValue());
-
-  for (unsigned i = 1; i < NumRanges; ++i) {
-    auto *Low = mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0));
-    auto *High = mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1));
-
-    // Note: unionWith will potentially create a range that contains values not
-    // contained in any of the original N ranges.
-    CR = CR.unionWith(ConstantRange(Low->getValue(), High->getValue()));
-  }
-
-  return CR;
-}
-
 /// Return true if "icmp Pred LHS RHS" is always true.
-static bool isTruePredicate(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
+static bool isTruePredicate(CmpInst::Predicate Pred,
+                            const Value *LHS, const Value *RHS,
                             const DataLayout &DL, unsigned Depth,
                             AssumptionCache *AC, const Instruction *CxtI,
                             const DominatorTree *DT) {
@@ -3984,7 +4197,8 @@ static bool isTruePredicate(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
       return true;
 
     // Match A to (X +_{nuw} CA) and B to (X +_{nuw} CB)
-    auto MatchNUWAddsToSameValue = [&](Value *A, Value *B, Value *&X,
+    auto MatchNUWAddsToSameValue = [&](const Value *A, const Value *B,
+                                       const Value *&X,
                                        const APInt *&CA, const APInt *&CB) {
       if (match(A, m_NUWAdd(m_Value(X), m_APInt(CA))) &&
           match(B, m_NUWAdd(m_Specific(X), m_APInt(CB))))
@@ -4004,7 +4218,7 @@ static bool isTruePredicate(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
       return false;
     };
 
-    Value *X;
+    const Value *X;
     const APInt *CLHS, *CRHS;
     if (MatchNUWAddsToSameValue(LHS, RHS, X, CLHS, CRHS))
       return CLHS->ule(*CRHS);
@@ -4017,8 +4231,9 @@ static bool isTruePredicate(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
 /// Return true if "icmp Pred BLHS BRHS" is true whenever "icmp Pred
 /// ALHS ARHS" is true.  Otherwise, return None.
 static Optional<bool>
-isImpliedCondOperands(CmpInst::Predicate Pred, Value *ALHS, Value *ARHS,
-                      Value *BLHS, Value *BRHS, const DataLayout &DL,
+isImpliedCondOperands(CmpInst::Predicate Pred, const Value *ALHS,
+                      const Value *ARHS, const Value *BLHS,
+                      const Value *BRHS, const DataLayout &DL,
                       unsigned Depth, AssumptionCache *AC,
                       const Instruction *CxtI, const DominatorTree *DT) {
   switch (Pred) {
@@ -4045,7 +4260,8 @@ isImpliedCondOperands(CmpInst::Predicate Pred, Value *ALHS, Value *ARHS,
 
 /// Return true if the operands of the two compares match.  IsSwappedOps is true
 /// when the operands match, but are swapped.
-static bool isMatchingOps(Value *ALHS, Value *ARHS, Value *BLHS, Value *BRHS,
+static bool isMatchingOps(const Value *ALHS, const Value *ARHS,
+                          const Value *BLHS, const Value *BRHS,
                           bool &IsSwappedOps) {
 
   bool IsMatchingOps = (ALHS == BLHS && ARHS == BRHS);
@@ -4057,9 +4273,11 @@ static bool isMatchingOps(Value *ALHS, Value *ARHS, Value *BLHS, Value *BRHS,
 /// true.  Return false if "icmp1 APred ALHS ARHS" implies "icmp2 BPred BLHS
 /// BRHS" is false.  Otherwise, return None if we can't infer anything.
 static Optional<bool> isImpliedCondMatchingOperands(CmpInst::Predicate APred,
-                                                    Value *ALHS, Value *ARHS,
+                                                    const Value *ALHS,
+                                                    const Value *ARHS,
                                                     CmpInst::Predicate BPred,
-                                                    Value *BLHS, Value *BRHS,
+                                                    const Value *BLHS,
+                                                    const Value *BRHS,
                                                     bool IsSwappedOps) {
   // Canonicalize the operands so they're matching.
   if (IsSwappedOps) {
@@ -4078,9 +4296,10 @@ static Optional<bool> isImpliedCondMatchingOperands(CmpInst::Predicate APred,
 /// true.  Return false if "icmp1 APred ALHS C1" implies "icmp2 BPred BLHS
 /// C2" is false.  Otherwise, return None if we can't infer anything.
 static Optional<bool>
-isImpliedCondMatchingImmOperands(CmpInst::Predicate APred, Value *ALHS,
-                                 ConstantInt *C1, CmpInst::Predicate BPred,
-                                 Value *BLHS, ConstantInt *C2) {
+isImpliedCondMatchingImmOperands(CmpInst::Predicate APred, const Value *ALHS,
+                                 const ConstantInt *C1,
+                                 CmpInst::Predicate BPred,
+                                 const Value *BLHS, const ConstantInt *C2) {
   assert(ALHS == BLHS && "LHS operands must match.");
   ConstantRange DomCR =
       ConstantRange::makeExactICmpRegion(APred, C1->getValue());
@@ -4095,7 +4314,7 @@ isImpliedCondMatchingImmOperands(CmpInst::Predicate APred, Value *ALHS,
   return None;
 }
 
-Optional<bool> llvm::isImpliedCondition(Value *LHS, Value *RHS,
+Optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS,
                                         const DataLayout &DL, bool InvertAPred,
                                         unsigned Depth, AssumptionCache *AC,
                                         const Instruction *CxtI,
diff --git a/lib/Analysis/VectorUtils.cpp b/lib/Analysis/VectorUtils.cpp
index 53e7153a350f..7e598f435ff5 100644
--- a/lib/Analysis/VectorUtils.cpp
+++ b/lib/Analysis/VectorUtils.cpp
@@ -107,11 +107,11 @@ unsigned llvm::getGEPInductionOperand(const GetElementPtrInst *Gep) {
   while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
     // Find the type we're currently indexing into.
     gep_type_iterator GEPTI = gep_type_begin(Gep);
-    std::advance(GEPTI, LastOperand - 1);
+    std::advance(GEPTI, LastOperand - 2);
 
     // If it's a type with the same allocation size as the result of the GEP we
     // can peel off the zero index.
-    if (DL.getTypeAllocSize(*GEPTI) != GEPAllocSize)
+    if (DL.getTypeAllocSize(GEPTI.getIndexedType()) != GEPAllocSize)
       break;
     --LastOperand;
   }
@@ -454,9 +454,10 @@ Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) {
   SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
   I0->getAllMetadataOtherThanDebugLoc(Metadata);
 
-  for (auto Kind : { LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
-                     LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
-                     LLVMContext::MD_nontemporal }) {
+  for (auto Kind :
+       {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
+        LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
+        LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load}) {
     MDNode *MD = I0->getMetadata(Kind);
 
     for (int J = 1, E = VL.size(); MD && J != E; ++J) {
@@ -469,13 +470,12 @@ Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) {
       case LLVMContext::MD_alias_scope:
         MD = MDNode::getMostGenericAliasScope(MD, IMD);
         break;
-      case LLVMContext::MD_noalias:
-        MD = MDNode::intersect(MD, IMD);
-        break;
       case LLVMContext::MD_fpmath:
         MD = MDNode::getMostGenericFPMath(MD, IMD);
         break;
+      case LLVMContext::MD_noalias:
       case LLVMContext::MD_nontemporal:
+      case LLVMContext::MD_invariant_load:
         MD = MDNode::intersect(MD, IMD);
         break;
       default:
author	Dimitry Andric <dim@FreeBSD.org>	2017-01-02 19:17:04 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2017-01-02 19:17:04 +0000
commit	b915e9e0fc85ba6f398b3fab0db6a81a8913af94 (patch)
tree	98b8f811c7aff2547cab8642daf372d6c59502fb /lib/Analysis
parent	6421cca32f69ac849537a3cff78c352195e99f1b (diff)
download	src-test2-b915e9e0fc85ba6f398b3fab0db6a81a8913af94.tar.gz src-test2-b915e9e0fc85ba6f398b3fab0db6a81a8913af94.zip