61 files changed, 1708 insertions, 609 deletions
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index a2c8a32dfe866..25db0eff8848e 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -106,9 +106,9 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
   AttributeList PAL = F->getAttributes();
 
   // First, determine the new argument list
-  unsigned ArgIndex = 0;
+  unsigned ArgNo = 0;
   for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
-       ++I, ++ArgIndex) {
+       ++I, ++ArgNo) {
     if (ByValArgsToTransform.count(&*I)) {
       // Simple byval argument? Just add all the struct element types.
       Type *AgTy = cast<PointerType>(I->getType())->getElementType();
@@ -120,7 +120,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
     } else if (!ArgsToPromote.count(&*I)) {
       // Unchanged argument
       Params.push_back(I->getType());
-      ArgAttrVec.push_back(PAL.getParamAttributes(ArgIndex));
+      ArgAttrVec.push_back(PAL.getParamAttributes(ArgNo));
     } else if (I->use_empty()) {
       // Dead argument (which are always marked as promotable)
       ++NumArgumentsDead;
@@ -214,12 +214,12 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
     // Loop over the operands, inserting GEP and loads in the caller as
     // appropriate.
     CallSite::arg_iterator AI = CS.arg_begin();
-    ArgIndex = 1;
+    ArgNo = 0;
     for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
-         ++I, ++AI, ++ArgIndex)
+         ++I, ++AI, ++ArgNo)
       if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
         Args.push_back(*AI); // Unmodified argument
-        ArgAttrVec.push_back(CallPAL.getAttributes(ArgIndex));
+        ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo));
       } else if (ByValArgsToTransform.count(&*I)) {
         // Emit a GEP and load for each element of the struct.
         Type *AgTy = cast<PointerType>(I->getType())->getElementType();
@@ -280,9 +280,9 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
       }
 
     // Push any varargs arguments on the list.
-    for (; AI != CS.arg_end(); ++AI, ++ArgIndex) {
+    for (; AI != CS.arg_end(); ++AI, ++ArgNo) {
       Args.push_back(*AI);
-      ArgAttrVec.push_back(CallPAL.getAttributes(ArgIndex));
+      ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo));
     }
 
     SmallVector<OperandBundleDef, 1> OpBundles;
@@ -839,17 +839,12 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
     // avoiding a register copy.
     if (PtrArg->hasStructRetAttr()) {
       unsigned ArgNo = PtrArg->getArgNo();
-      F->setAttributes(
-          F->getAttributes()
-              .removeAttribute(F->getContext(), ArgNo + 1, Attribute::StructRet)
-              .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias));
+      F->removeAttribute(ArgNo + 1, Attribute::StructRet);
+      F->addAttribute(ArgNo + 1, Attribute::NoAlias);
       for (Use &U : F->uses()) {
         CallSite CS(U.getUser());
-        CS.setAttributes(
-            CS.getAttributes()
-                .removeAttribute(F->getContext(), ArgNo + 1,
-                                 Attribute::StructRet)
-                .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias));
+        CS.removeAttribute(ArgNo + 1, Attribute::StructRet);
+        CS.addAttribute(ArgNo + 1, Attribute::NoAlias);
       }
     }
 
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 9648883b7f275..031c3d8a9ebed 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -855,10 +855,11 @@ static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) {
 
   bool MadeChange = false;
   for (Function *F : SCCNodes) {
-    if (F->doesNotAlias(0) || !F->getReturnType()->isPointerTy())
+    if (F->doesNotAlias(AttributeList::ReturnIndex) ||
+        !F->getReturnType()->isPointerTy())
       continue;
 
-    F->setDoesNotAlias(0);
+    F->setDoesNotAlias(AttributeList::ReturnIndex);
     ++NumNoAlias;
     MadeChange = true;
   }
diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp
index d66411f04cc4a..c7ef2494e3b8b 100644
--- a/lib/Transforms/IPO/FunctionImport.cpp
+++ b/lib/Transforms/IPO/FunctionImport.cpp
@@ -17,6 +17,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/IR/AutoUpgrade.h"
 #include "llvm/IR/DiagnosticPrinter.h"
 #include "llvm/IR/IntrinsicInst.h"
@@ -25,7 +26,6 @@
 #include "llvm/IRReader/IRReader.h"
 #include "llvm/Linker/Linker.h"
 #include "llvm/Object/IRObjectFile.h"
-#include "llvm/Object/ModuleSummaryIndexObjectFile.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/SourceMgr.h"
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index ae9d4ce11e0db..f277a51ae659a 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -239,7 +239,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
   // we delete a constant array, we may also be holding pointer to one of its
   // elements (or an element of one of its elements if we're dealing with an
   // array of arrays) in the worklist.
-  SmallVector<WeakVH, 8> WorkList(V->user_begin(), V->user_end());
+  SmallVector<WeakTrackingVH, 8> WorkList(V->user_begin(), V->user_end());
   while (!WorkList.empty()) {
     Value *UV = WorkList.pop_back_val();
     if (!UV)
@@ -1792,7 +1792,9 @@ static void makeAllConstantUsesInstructions(Constant *C) {
       NewU->insertBefore(UI);
       UI->replaceUsesOfWith(U, NewU);
     }
-    U->dropAllReferences();
+    // We've replaced all the uses, so destroy the constant. (destroyConstant
+    // will update value handles and metadata.)
+    U->destroyConstant();
   }
 }
 
diff --git a/lib/Transforms/IPO/LLVMBuild.txt b/lib/Transforms/IPO/LLVMBuild.txt
index 9c83f88b2210c..a8b0f32fd785e 100644
--- a/lib/Transforms/IPO/LLVMBuild.txt
+++ b/lib/Transforms/IPO/LLVMBuild.txt
@@ -20,4 +20,4 @@ type = Library
 name = IPO
 parent = Transforms
 library_name = ipo
-required_libraries = Analysis BitWriter Core InstCombine IRReader Linker Object ProfileData Scalar Support TransformUtils Vectorize Instrumentation
+required_libraries = Analysis BitReader BitWriter Core InstCombine IRReader Linker Object ProfileData Scalar Support TransformUtils Vectorize Instrumentation
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 771770ddc0600..0e478ba607be2 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -207,11 +207,13 @@ private:
 
   /// A work queue of functions that may have been modified and should be
   /// analyzed again.
-  std::vector<WeakVH> Deferred;
+  std::vector<WeakTrackingVH> Deferred;
 
   /// Checks the rules of order relation introduced among functions set.
   /// Returns true, if sanity check has been passed, and false if failed.
-  bool doSanityCheck(std::vector<WeakVH> &Worklist);
+#ifndef NDEBUG
+  bool doSanityCheck(std::vector<WeakTrackingVH> &Worklist);
+#endif
 
   /// Insert a ComparableFunction into the FnTree, or merge it away if it's
   /// equal to one that's already present.
@@ -283,7 +285,8 @@ ModulePass *llvm::createMergeFunctionsPass() {
   return new MergeFunctions();
 }
 
-bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
+#ifndef NDEBUG
+bool MergeFunctions::doSanityCheck(std::vector<WeakTrackingVH> &Worklist) {
   if (const unsigned Max = NumFunctionsForSanityCheck) {
     unsigned TripleNumber = 0;
     bool Valid = true;
@@ -291,10 +294,12 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
     dbgs() << "MERGEFUNC-SANITY: Started for first " << Max << " functions.\n";
 
     unsigned i = 0;
-    for (std::vector<WeakVH>::iterator I = Worklist.begin(), E = Worklist.end();
+    for (std::vector<WeakTrackingVH>::iterator I = Worklist.begin(),
+                                               E = Worklist.end();
          I != E && i < Max; ++I, ++i) {
       unsigned j = i;
-      for (std::vector<WeakVH>::iterator J = I; J != E && j < Max; ++J, ++j) {
+      for (std::vector<WeakTrackingVH>::iterator J = I; J != E && j < Max;
+           ++J, ++j) {
         Function *F1 = cast<Function>(*I);
         Function *F2 = cast<Function>(*J);
         int Res1 = FunctionComparator(F1, F2, &GlobalNumbers).compare();
@@ -312,7 +317,7 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
           continue;
 
         unsigned k = j;
-        for (std::vector<WeakVH>::iterator K = J; K != E && k < Max;
+        for (std::vector<WeakTrackingVH>::iterator K = J; K != E && k < Max;
              ++k, ++K, ++TripleNumber) {
           if (K == J)
             continue;
@@ -351,6 +356,7 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
   }
   return true;
 }
+#endif
 
 bool MergeFunctions::runOnModule(Module &M) {
   if (skipModule(M))
@@ -381,12 +387,12 @@ bool MergeFunctions::runOnModule(Module &M) {
     // consider merging it. Otherwise it is dropped and never considered again.
     if ((I != S && std::prev(I)->first == I->first) ||
         (std::next(I) != IE && std::next(I)->first == I->first) ) {
-      Deferred.push_back(WeakVH(I->second));
+      Deferred.push_back(WeakTrackingVH(I->second));
     }
   }
   
   do {
-    std::vector<WeakVH> Worklist;
+    std::vector<WeakTrackingVH> Worklist;
     Deferred.swap(Worklist);
 
     DEBUG(doSanityCheck(Worklist));
@@ -395,7 +401,7 @@ bool MergeFunctions::runOnModule(Module &M) {
     DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n');
 
     // Insert functions and merge them.
-    for (WeakVH &I : Worklist) {
+    for (WeakTrackingVH &I : Worklist) {
       if (!I)
         continue;
       Function *F = cast<Function>(I);
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index 78e71c18fe290..1bb9d654ec1cc 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -16,8 +16,12 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/InlineCost.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Dominators.h"
@@ -31,13 +35,18 @@ using namespace llvm;
 
 #define DEBUG_TYPE "partial-inlining"
 
-STATISTIC(NumPartialInlined, "Number of functions partially inlined");
+STATISTIC(NumPartialInlined,
+          "Number of callsites functions partially inlined into.");
 
 // Command line option to disable partial-inlining. The default is false:
 static cl::opt<bool>
     DisablePartialInlining("disable-partial-inlining", cl::init(false),
                            cl::Hidden, cl::desc("Disable partial ininling"));
 
+static cl::opt<unsigned> MaxNumInlineBlocks(
+    "max-num-inline-blocks", cl::init(5), cl::Hidden,
+    cl::desc("Max Number of Blocks  To be Partially Inlined"));
+
 // Command line option to set the maximum number of partial inlining allowed
 // for the module. The default value of -1 means no limit.
 static cl::opt<int> MaxNumPartialInlining(
@@ -45,20 +54,52 @@ static cl::opt<int> MaxNumPartialInlining(
     cl::desc("Max number of partial inlining. The default is unlimited"));
 
 namespace {
+
+struct FunctionOutliningInfo {
+  FunctionOutliningInfo()
+      : Entries(), ReturnBlock(nullptr), NonReturnBlock(nullptr),
+        ReturnBlockPreds() {}
+  // Returns the number of blocks to be inlined including all blocks
+  // in Entries and one return block.
+  unsigned GetNumInlinedBlocks() const { return Entries.size() + 1; }
+
+  // A set of blocks including the function entry that guard
+  // the region to be outlined.
+  SmallVector<BasicBlock *, 4> Entries;
+  // The return block that is not included in the outlined region.
+  BasicBlock *ReturnBlock;
+  // The dominating block of the region ot be outlined.
+  BasicBlock *NonReturnBlock;
+  // The set of blocks in Entries that that are predecessors to ReturnBlock
+  SmallVector<BasicBlock *, 4> ReturnBlockPreds;
+};
+
 struct PartialInlinerImpl {
-  PartialInlinerImpl(InlineFunctionInfo IFI) : IFI(std::move(IFI)) {}
+  PartialInlinerImpl(
+      std::function<AssumptionCache &(Function &)> *GetAC,
+      std::function<TargetTransformInfo &(Function &)> *GTTI,
+      Optional<function_ref<BlockFrequencyInfo &(Function &)>> GBFI,
+      ProfileSummaryInfo *ProfSI)
+      : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {}
   bool run(Module &M);
   Function *unswitchFunction(Function *F);
 
+  std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F);
+
 private:
-  InlineFunctionInfo IFI;
   int NumPartialInlining = 0;
+  std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
+  std::function<TargetTransformInfo &(Function &)> *GetTTI;
+  Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI;
+  ProfileSummaryInfo *PSI;
 
+  bool shouldPartialInline(CallSite CS, OptimizationRemarkEmitter &ORE);
   bool IsLimitReached() {
     return (MaxNumPartialInlining != -1 &&
             NumPartialInlining >= MaxNumPartialInlining);
   }
 };
+
 struct PartialInlinerLegacyPass : public ModulePass {
   static char ID; // Pass identification, replacement for typeid
   PartialInlinerLegacyPass() : ModulePass(ID) {
@@ -67,91 +108,319 @@ struct PartialInlinerLegacyPass : public ModulePass {
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<AssumptionCacheTracker>();
+    AU.addRequired<ProfileSummaryInfoWrapperPass>();
+    AU.addRequired<TargetTransformInfoWrapperPass>();
   }
   bool runOnModule(Module &M) override {
     if (skipModule(M))
       return false;
 
     AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
+    TargetTransformInfoWrapperPass *TTIWP =
+        &getAnalysis<TargetTransformInfoWrapperPass>();
+    ProfileSummaryInfo *PSI =
+        getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+
     std::function<AssumptionCache &(Function &)> GetAssumptionCache =
         [&ACT](Function &F) -> AssumptionCache & {
       return ACT->getAssumptionCache(F);
     };
-    InlineFunctionInfo IFI(nullptr, &GetAssumptionCache);
-    return PartialInlinerImpl(IFI).run(M);
+
+    std::function<TargetTransformInfo &(Function &)> GetTTI =
+        [&TTIWP](Function &F) -> TargetTransformInfo & {
+      return TTIWP->getTTI(F);
+    };
+
+    return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, None, PSI).run(M);
   }
 };
 }
 
-Function *PartialInlinerImpl::unswitchFunction(Function *F) {
-  // First, verify that this function is an unswitching candidate...
-  if (F->hasAddressTaken())
-    return nullptr;
-
+std::unique_ptr<FunctionOutliningInfo>
+PartialInlinerImpl::computeOutliningInfo(Function *F) {
   BasicBlock *EntryBlock = &F->front();
   BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
   if (!BR || BR->isUnconditional())
-    return nullptr;
+    return std::unique_ptr<FunctionOutliningInfo>();
+
+  // Returns true if Succ is BB's successor
+  auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
+    return is_contained(successors(BB), Succ);
+  };
+
+  auto SuccSize = [](BasicBlock *BB) {
+    return std::distance(succ_begin(BB), succ_end(BB));
+  };
+
+  auto IsReturnBlock = [](BasicBlock *BB) {
+    TerminatorInst *TI = BB->getTerminator();
+    return isa<ReturnInst>(TI);
+  };
+
+  auto GetReturnBlock = [=](BasicBlock *Succ1, BasicBlock *Succ2) {
+    if (IsReturnBlock(Succ1))
+      return std::make_tuple(Succ1, Succ2);
+    if (IsReturnBlock(Succ2))
+      return std::make_tuple(Succ2, Succ1);
+
+    return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
+  };
+
+  // Detect a triangular shape:
+  auto GetCommonSucc = [=](BasicBlock *Succ1, BasicBlock *Succ2) {
+    if (IsSuccessor(Succ1, Succ2))
+      return std::make_tuple(Succ1, Succ2);
+    if (IsSuccessor(Succ2, Succ1))
+      return std::make_tuple(Succ2, Succ1);
+
+    return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
+  };
+
+  std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
+      llvm::make_unique<FunctionOutliningInfo>();
+
+  BasicBlock *CurrEntry = EntryBlock;
+  bool CandidateFound = false;
+  do {
+    // The number of blocks to be inlined has already reached
+    // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
+    // disables partial inlining for the function.
+    if (OutliningInfo->GetNumInlinedBlocks() >= MaxNumInlineBlocks)
+      break;
+
+    if (SuccSize(CurrEntry) != 2)
+      break;
+
+    BasicBlock *Succ1 = *succ_begin(CurrEntry);
+    BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
+
+    BasicBlock *ReturnBlock, *NonReturnBlock;
+    std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
+
+    if (ReturnBlock) {
+      OutliningInfo->Entries.push_back(CurrEntry);
+      OutliningInfo->ReturnBlock = ReturnBlock;
+      OutliningInfo->NonReturnBlock = NonReturnBlock;
+      CandidateFound = true;
+      break;
+    }
+
+    BasicBlock *CommSucc;
+    BasicBlock *OtherSucc;
+    std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
+
+    if (!CommSucc)
+      break;
+
+    OutliningInfo->Entries.push_back(CurrEntry);
+    CurrEntry = OtherSucc;
+
+  } while (true);
+
+  if (!CandidateFound)
+    return std::unique_ptr<FunctionOutliningInfo>();
+
+  // Do sanity check of the entries: threre should not
+  // be any successors (not in the entry set) other than
+  // {ReturnBlock, NonReturnBlock}
+  assert(OutliningInfo->Entries[0] == &F->front());
+  DenseSet<BasicBlock *> Entries;
+  for (BasicBlock *E : OutliningInfo->Entries)
+    Entries.insert(E);
+
+  // Returns true of BB has Predecessor which is not
+  // in Entries set.
+  auto HasNonEntryPred = [Entries](BasicBlock *BB) {
+    for (auto Pred : predecessors(BB)) {
+      if (!Entries.count(Pred))
+        return true;
+    }
+    return false;
+  };
+  auto CheckAndNormalizeCandidate =
+      [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
+        for (BasicBlock *E : OutliningInfo->Entries) {
+          for (auto Succ : successors(E)) {
+            if (Entries.count(Succ))
+              continue;
+            if (Succ == OutliningInfo->ReturnBlock)
+              OutliningInfo->ReturnBlockPreds.push_back(E);
+            else if (Succ != OutliningInfo->NonReturnBlock)
+              return false;
+          }
+          // There should not be any outside incoming edges either:
+          if (HasNonEntryPred(E))
+            return false;
+        }
+        return true;
+      };
+
+  if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
+    return std::unique_ptr<FunctionOutliningInfo>();
 
-  BasicBlock *ReturnBlock = nullptr;
-  BasicBlock *NonReturnBlock = nullptr;
-  unsigned ReturnCount = 0;
-  for (BasicBlock *BB : successors(EntryBlock)) {
-    if (isa<ReturnInst>(BB->getTerminator())) {
-      ReturnBlock = BB;
-      ReturnCount++;
-    } else
-      NonReturnBlock = BB;
+  // Now further growing the candidate's inlining region by
+  // peeling off dominating blocks from the outlining region:
+  while (OutliningInfo->GetNumInlinedBlocks() < MaxNumInlineBlocks) {
+    BasicBlock *Cand = OutliningInfo->NonReturnBlock;
+    if (SuccSize(Cand) != 2)
+      break;
+
+    if (HasNonEntryPred(Cand))
+      break;
+
+    BasicBlock *Succ1 = *succ_begin(Cand);
+    BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
+
+    BasicBlock *ReturnBlock, *NonReturnBlock;
+    std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
+    if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
+      break;
+
+    if (NonReturnBlock->getSinglePredecessor() != Cand)
+      break;
+
+    // Now grow and update OutlininigInfo:
+    OutliningInfo->Entries.push_back(Cand);
+    OutliningInfo->NonReturnBlock = NonReturnBlock;
+    OutliningInfo->ReturnBlockPreds.push_back(Cand);
+    Entries.insert(Cand);
   }
 
-  if (ReturnCount != 1)
+  return OutliningInfo;
+}
+
+bool PartialInlinerImpl::shouldPartialInline(CallSite CS,
+                                             OptimizationRemarkEmitter &ORE) {
+  // TODO : more sharing with shouldInline in Inliner.cpp
+  using namespace ore;
+  Instruction *Call = CS.getInstruction();
+  Function *Callee = CS.getCalledFunction();
+  Function *Caller = CS.getCaller();
+  auto &CalleeTTI = (*GetTTI)(*Callee);
+  InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
+                                *GetAssumptionCache, GetBFI, PSI);
+
+  if (IC.isAlways()) {
+    ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
+             << NV("Callee", Callee)
+             << " should always be fully inlined, not partially");
+    return false;
+  }
+
+  if (IC.isNever()) {
+    ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
+             << NV("Callee", Callee) << " not partially inlined into "
+             << NV("Caller", Caller)
+             << " because it should never be inlined (cost=never)");
+    return false;
+  }
+
+  if (!IC) {
+    ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call)
+             << NV("Callee", Callee) << " not partially inlined into "
+             << NV("Caller", Caller) << " because too costly to inline (cost="
+             << NV("Cost", IC.getCost()) << ", threshold="
+             << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
+    return false;
+  }
+
+  ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call)
+           << NV("Callee", Callee) << " can be partially inlined into "
+           << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
+           << " (threshold="
+           << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
+  return true;
+}
+
+Function *PartialInlinerImpl::unswitchFunction(Function *F) {
+
+  if (F->hasAddressTaken())
+    return nullptr;
+
+  std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
+      computeOutliningInfo(F);
+
+  if (!OutliningInfo)
     return nullptr;
 
   // Clone the function, so that we can hack away on it.
   ValueToValueMapTy VMap;
   Function *DuplicateFunction = CloneFunction(F, VMap);
-  DuplicateFunction->setLinkage(GlobalValue::InternalLinkage);
-  BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[EntryBlock]);
-  BasicBlock *NewReturnBlock = cast<BasicBlock>(VMap[ReturnBlock]);
-  BasicBlock *NewNonReturnBlock = cast<BasicBlock>(VMap[NonReturnBlock]);
+  BasicBlock *NewReturnBlock =
+      cast<BasicBlock>(VMap[OutliningInfo->ReturnBlock]);
+  BasicBlock *NewNonReturnBlock =
+      cast<BasicBlock>(VMap[OutliningInfo->NonReturnBlock]);
+  DenseSet<BasicBlock *> NewEntries;
+  for (BasicBlock *BB : OutliningInfo->Entries) {
+    NewEntries.insert(cast<BasicBlock>(VMap[BB]));
+  }
 
   // Go ahead and update all uses to the duplicate, so that we can just
   // use the inliner functionality when we're done hacking.
   F->replaceAllUsesWith(DuplicateFunction);
 
+  auto getFirstPHI = [](BasicBlock *BB) {
+    BasicBlock::iterator I = BB->begin();
+    PHINode *FirstPhi = nullptr;
+    while (I != BB->end()) {
+      PHINode *Phi = dyn_cast<PHINode>(I);
+      if (!Phi)
+        break;
+      if (!FirstPhi) {
+        FirstPhi = Phi;
+        break;
+      }
+    }
+    return FirstPhi;
+  };
   // Special hackery is needed with PHI nodes that have inputs from more than
   // one extracted block.  For simplicity, just split the PHIs into a two-level
   // sequence of PHIs, some of which will go in the extracted region, and some
   // of which will go outside.
   BasicBlock *PreReturn = NewReturnBlock;
-  NewReturnBlock = NewReturnBlock->splitBasicBlock(
-      NewReturnBlock->getFirstNonPHI()->getIterator());
-  BasicBlock::iterator I = PreReturn->begin();
-  Instruction *Ins = &NewReturnBlock->front();
-  while (I != PreReturn->end()) {
-    PHINode *OldPhi = dyn_cast<PHINode>(I);
-    if (!OldPhi)
-      break;
-
-    PHINode *RetPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins);
-    OldPhi->replaceAllUsesWith(RetPhi);
-    Ins = NewReturnBlock->getFirstNonPHI();
-
-    RetPhi->addIncoming(&*I, PreReturn);
-    RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewEntryBlock),
-                        NewEntryBlock);
-    OldPhi->removeIncomingValue(NewEntryBlock);
-
-    ++I;
+  // only split block when necessary:
+  PHINode *FirstPhi = getFirstPHI(PreReturn);
+  unsigned NumPredsFromEntries = OutliningInfo->ReturnBlockPreds.size();
+  if (FirstPhi && FirstPhi->getNumIncomingValues() > NumPredsFromEntries + 1) {
+
+    NewReturnBlock = NewReturnBlock->splitBasicBlock(
+        NewReturnBlock->getFirstNonPHI()->getIterator());
+    BasicBlock::iterator I = PreReturn->begin();
+    Instruction *Ins = &NewReturnBlock->front();
+    while (I != PreReturn->end()) {
+      PHINode *OldPhi = dyn_cast<PHINode>(I);
+      if (!OldPhi)
+        break;
+
+      PHINode *RetPhi =
+          PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
+      OldPhi->replaceAllUsesWith(RetPhi);
+      Ins = NewReturnBlock->getFirstNonPHI();
+
+      RetPhi->addIncoming(&*I, PreReturn);
+      for (BasicBlock *E : OutliningInfo->ReturnBlockPreds) {
+        BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
+        RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewE), NewE);
+        OldPhi->removeIncomingValue(NewE);
+      }
+      ++I;
+    }
+    for (auto E : OutliningInfo->ReturnBlockPreds) {
+      BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
+      NewE->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock);
+    }
   }
-  NewEntryBlock->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock);
 
+  // Returns true if the block is to be partial inlined into the caller
+  // (i.e. not to be extracted to the out of line function)
+  auto ToBeInlined = [=](BasicBlock *BB) {
+    return BB == NewReturnBlock || NewEntries.count(BB);
+  };
   // Gather up the blocks that we're going to extract.
   std::vector<BasicBlock *> ToExtract;
   ToExtract.push_back(NewNonReturnBlock);
   for (BasicBlock &BB : *DuplicateFunction)
-    if (&BB != NewEntryBlock && &BB != NewReturnBlock &&
-        &BB != NewNonReturnBlock)
+    if (!ToBeInlined(&BB) && &BB != NewNonReturnBlock)
       ToExtract.push_back(&BB);
 
   // The CodeExtractor needs a dominator tree.
@@ -183,16 +452,22 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) {
 
     if (IsLimitReached())
       continue;
-    NumPartialInlining++;
 
     OptimizationRemarkEmitter ORE(CS.getCaller());
+    if (!shouldPartialInline(CS, ORE))
+      continue;
+
     DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
     BasicBlock *Block = CS.getParent();
     ORE.emit(OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", DLoc, Block)
              << ore::NV("Callee", F) << " partially inlined into "
              << ore::NV("Caller", CS.getCaller()));
 
+    InlineFunctionInfo IFI(nullptr, GetAssumptionCache);
     InlineFunction(CS, IFI);
+    NumPartialInlining++;
+    // update stats
+    NumPartialInlined++;
   }
 
   // Ditch the duplicate, since we're done with it, and rewrite all remaining
@@ -200,7 +475,6 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) {
   DuplicateFunction->replaceAllUsesWith(F);
   DuplicateFunction->eraseFromParent();
 
-  ++NumPartialInlined;
 
   return ExtractedFunction;
 }
@@ -246,6 +520,8 @@ char PartialInlinerLegacyPass::ID = 0;
 INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",
                       "Partial Inliner", false, false)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner",
                     "Partial Inliner", false, false)
 
@@ -256,12 +532,25 @@ ModulePass *llvm::createPartialInliningPass() {
 PreservedAnalyses PartialInlinerPass::run(Module &M,
                                           ModuleAnalysisManager &AM) {
   auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
   std::function<AssumptionCache &(Function &)> GetAssumptionCache =
       [&FAM](Function &F) -> AssumptionCache & {
     return FAM.getResult<AssumptionAnalysis>(F);
   };
-  InlineFunctionInfo IFI(nullptr, &GetAssumptionCache);
-  if (PartialInlinerImpl(IFI).run(M))
+
+  std::function<BlockFrequencyInfo &(Function &)> GetBFI =
+      [&FAM](Function &F) -> BlockFrequencyInfo & {
+    return FAM.getResult<BlockFrequencyAnalysis>(F);
+  };
+
+  std::function<TargetTransformInfo &(Function &)> GetTTI =
+      [&FAM](Function &F) -> TargetTransformInfo & {
+    return FAM.getResult<TargetIRAnalysis>(F);
+  };
+
+  ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
+
+  if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI).run(M))
     return PreservedAnalyses::none();
   return PreservedAnalyses::all();
 }
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 0d5910ebbfcc9..203594572618a 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -38,6 +38,7 @@
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
 #include "llvm/Transforms/Vectorize.h"
 
 using namespace llvm;
@@ -137,14 +138,19 @@ static cl::opt<int> PreInlineThreshold(
              "(default = 75)"));
 
 static cl::opt<bool> EnableGVNHoist(
-    "enable-gvn-hoist", cl::init(true), cl::Hidden,
-    cl::desc("Enable the GVN hoisting pass (default = on)"));
+    "enable-gvn-hoist", cl::init(false), cl::Hidden,
+    cl::desc("Enable the GVN hoisting pass (default = off)"));
 
 static cl::opt<bool>
     DisableLibCallsShrinkWrap("disable-libcalls-shrinkwrap", cl::init(false),
                               cl::Hidden,
                               cl::desc("Disable shrink-wrap library calls"));
 
+static cl::opt<bool>
+    EnableSimpleLoopUnswitch("enable-simple-loop-unswitch", cl::init(false),
+                             cl::Hidden,
+                             cl::desc("Enable the simple loop unswitch pass."));
+
 PassManagerBuilder::PassManagerBuilder() {
     OptLevel = 2;
     SizeLevel = 0;
@@ -318,7 +324,10 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
   // Rotate Loop - disable header duplication at -Oz
   MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
   MPM.add(createLICMPass());                  // Hoist loop invariants
-  MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
+  if (EnableSimpleLoopUnswitch)
+    MPM.add(createSimpleLoopUnswitchLegacyPass());
+  else
+    MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
   MPM.add(createCFGSimplificationPass());
   addInstructionCombiningPass(MPM);
   MPM.add(createIndVarSimplifyPass());        // Canonicalize indvars
diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 9801a0a614165..d3a3c24ce7b44 100644
--- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -30,42 +30,11 @@
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/FunctionAttrs.h"
 #include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
 using namespace llvm;
 
 namespace {
 
-// Produce a unique identifier for this module by taking the MD5 sum of the
-// names of the module's strong external symbols. This identifier is
-// normally guaranteed to be unique, or the program would fail to link due to
-// multiply defined symbols.
-//
-// If the module has no strong external symbols (such a module may still have a
-// semantic effect if it performs global initialization), we cannot produce a
-// unique identifier for this module, so we return the empty string, which
-// causes the entire module to be written as a regular LTO module.
-std::string getModuleId(Module *M) {
-  MD5 Md5;
-  bool ExportsSymbols = false;
-  for (auto &GV : M->global_values()) {
-    if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
-        !GV.hasExternalLinkage())
-      continue;
-    ExportsSymbols = true;
-    Md5.update(GV.getName());
-    Md5.update(ArrayRef<uint8_t>{0});
-  }
-
-  if (!ExportsSymbols)
-    return "";
-
-  MD5::MD5Result R;
-  Md5.final(R);
-
-  SmallString<32> Str;
-  MD5::stringifyResult(R, Str);
-  return ("$" + Str).str();
-}
-
 // Promote each local-linkage entity defined by ExportM and used by ImportM by
 // changing visibility and appending the given ModuleId.
 void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) {
@@ -251,7 +220,7 @@ void forEachVirtualFunction(Constant *C, function_ref<void(Function *)> Fn) {
 void splitAndWriteThinLTOBitcode(
     raw_ostream &OS, raw_ostream *ThinLinkOS,
     function_ref<AAResults &(Function &)> AARGetter, Module &M) {
-  std::string ModuleId = getModuleId(&M);
+  std::string ModuleId = getUniqueModuleId(&M);
   if (ModuleId.empty()) {
     // We couldn't generate a module ID for this module, just write it out as a
     // regular LTO module.
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 030461004f568..4f1f194997683 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -861,7 +861,7 @@ static bool checkRippleForAdd(const APInt &Op0KnownZero,
   // Find the most significant known 0 other than the sign bit.
   int BitWidth = Op0KnownZero.getBitWidth();
   APInt Op0KnownZeroTemp(Op0KnownZero);
-  Op0KnownZeroTemp.clearBit(BitWidth - 1);
+  Op0KnownZeroTemp.clearSignBit();
   int Op0ZeroPosition = BitWidth - Op0KnownZeroTemp.countLeadingZeros() - 1;
 
   int Op1OnePosition = BitWidth - Op1MaybeOne.countLeadingZeros() - 1;
@@ -1037,7 +1037,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
     return replaceInstUsesWith(I, V);
 
   if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
-                                 I.hasNoUnsignedWrap(), DL, &TLI, &DT, &AC))
+                                 I.hasNoUnsignedWrap(), SQ))
     return replaceInstUsesWith(I, V);
 
    // (A*B)+(A*C) -> A*(B+C) etc
@@ -1358,8 +1358,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V =
-          SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), SQ))
     return replaceInstUsesWith(I, V);
 
   if (isa<Constant>(RHS))
@@ -1550,7 +1549,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
     return replaceInstUsesWith(I, V);
 
   if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(),
-                                 I.hasNoUnsignedWrap(), DL, &TLI, &DT, &AC))
+                                 I.hasNoUnsignedWrap(), SQ))
     return replaceInstUsesWith(I, V);
 
   // (A*B)-(A*C) -> A*(B-C) etc
@@ -1756,8 +1755,7 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V =
-          SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), SQ))
     return replaceInstUsesWith(I, V);
 
   // fsub nsz 0, X ==> fsub nsz -0.0, X
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index a97b5a9ec0bb8..c7092bf3a398b 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1213,7 +1213,7 @@ static Instruction *foldAndToXor(BinaryOperator &I,
   // (~B | A) & (~A | B) --> ~(A ^ B)
   // (~B | A) & (B | ~A) --> ~(A ^ B)
   if (match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) &&
-      match(Op1, m_c_Or(m_Not(m_Specific(A)), m_Value(B))))
+      match(Op1, m_c_Or(m_Not(m_Specific(A)), m_Specific(B))))
     return BinaryOperator::CreateNot(Builder.CreateXor(A, B));
 
   return nullptr;
@@ -1254,7 +1254,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyAndInst(Op0, Op1, DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifyAndInst(Op0, Op1, SQ))
     return replaceInstUsesWith(I, V);
 
   // See if we can simplify any instructions used by the instruction whose sole
@@ -2039,7 +2039,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyOrInst(Op0, Op1, DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifyOrInst(Op0, Op1, SQ))
     return replaceInstUsesWith(I, V);
 
   // See if we can simplify any instructions used by the instruction whose sole
@@ -2415,7 +2415,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyXorInst(Op0, Op1, DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifyXorInst(Op0, Op1, SQ))
     return replaceInstUsesWith(I, V);
 
   if (Instruction *NewXor = foldXorToXor(I))
@@ -2433,25 +2433,32 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   if (Value *V = SimplifyBSwap(I))
     return replaceInstUsesWith(I, V);
 
+  // Apply DeMorgan's Law for 'nand' / 'nor' logic with an inverted operand.
+  Value *X, *Y;
+
+  // We must eliminate the and/or (one-use) for these transforms to not increase
+  // the instruction count.
+  // ~(~X & Y) --> (X | ~Y)
+  // ~(Y & ~X) --> (X | ~Y)
+  if (match(&I, m_Not(m_OneUse(m_c_And(m_Not(m_Value(X)), m_Value(Y)))))) {
+    Value *NotY = Builder->CreateNot(Y, Y->getName() + ".not");
+    return BinaryOperator::CreateOr(X, NotY);
+  }
+  // ~(~X | Y) --> (X & ~Y)
+  // ~(Y | ~X) --> (X & ~Y)
+  if (match(&I, m_Not(m_OneUse(m_c_Or(m_Not(m_Value(X)), m_Value(Y)))))) {
+    Value *NotY = Builder->CreateNot(Y, Y->getName() + ".not");
+    return BinaryOperator::CreateAnd(X, NotY);
+  }
+
   // Is this a 'not' (~) fed by a binary operator?
   BinaryOperator *NotOp;
   if (match(&I, m_Not(m_BinOp(NotOp)))) {
     if (NotOp->getOpcode() == Instruction::And ||
         NotOp->getOpcode() == Instruction::Or) {
-      // ~(~X & Y) --> (X | ~Y) - De Morgan's Law
-      // ~(~X | Y) === (X & ~Y) - De Morgan's Law
-      if (dyn_castNotVal(NotOp->getOperand(1)))
-        NotOp->swapOperands();
-      if (Value *Op0NotVal = dyn_castNotVal(NotOp->getOperand(0))) {
-        Value *NotY = Builder->CreateNot(
-            NotOp->getOperand(1), NotOp->getOperand(1)->getName() + ".not");
-        if (NotOp->getOpcode() == Instruction::And)
-          return BinaryOperator::CreateOr(Op0NotVal, NotY);
-        return BinaryOperator::CreateAnd(Op0NotVal, NotY);
-      }
-
-      // ~(X & Y) --> (~X | ~Y) - De Morgan's Law
-      // ~(X | Y) === (~X & ~Y) - De Morgan's Law
+      // Apply DeMorgan's Law when inverts are free:
+      // ~(X & Y) --> (~X | ~Y)
+      // ~(X | Y) --> (~X & ~Y)
       if (IsFreeToInvert(NotOp->getOperand(0),
                          NotOp->getOperand(0)->hasOneUse()) &&
           IsFreeToInvert(NotOp->getOperand(1),
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 313ab13b9e2b7..e9286b1bf1750 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -379,7 +379,7 @@ static Value *simplifyX86immShift(const IntrinsicInst &II,
     for (unsigned i = 0; i != NumSubElts; ++i) {
       unsigned SubEltIdx = (NumSubElts - 1) - i;
       auto SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
-      Count = Count.shl(BitWidth);
+      Count <<= BitWidth;
       Count |= SubElt->getValue().zextOrTrunc(64);
     }
   }
@@ -1384,17 +1384,17 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) {
 
   // Create a mask for bits above (ctlz) or below (cttz) the first known one.
   bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
-  unsigned NumMaskBits = IsTZ ? Known.One.countTrailingZeros()
-                              : Known.One.countLeadingZeros();
-  APInt Mask = IsTZ ? APInt::getLowBitsSet(BitWidth, NumMaskBits)
-                    : APInt::getHighBitsSet(BitWidth, NumMaskBits);
+  unsigned PossibleZeros = IsTZ ? Known.One.countTrailingZeros()
+                                : Known.One.countLeadingZeros();
+  unsigned DefiniteZeros = IsTZ ? Known.Zero.countTrailingOnes()
+                                : Known.Zero.countLeadingOnes();
 
   // If all bits above (ctlz) or below (cttz) the first known one are known
   // zero, this value is constant.
   // FIXME: This should be in InstSimplify because we're replacing an
   // instruction with a constant.
-  if (Mask.isSubsetOf(Known.Zero)) {
-    auto *C = ConstantInt::get(IT, APInt(BitWidth, NumMaskBits));
+  if (PossibleZeros == DefiniteZeros) {
+    auto *C = ConstantInt::get(IT, DefiniteZeros);
     return IC.replaceInstUsesWith(II, C);
   }
 
@@ -1818,8 +1818,8 @@ Instruction *InstCombiner::visitVACopyInst(VACopyInst &I) {
 /// lifting.
 Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   auto Args = CI.arg_operands();
-  if (Value *V = SimplifyCall(CI.getCalledValue(), Args.begin(), Args.end(), DL,
-                              &TLI, &DT, &AC))
+  if (Value *V =
+          SimplifyCall(CI.getCalledValue(), Args.begin(), Args.end(), SQ))
     return replaceInstUsesWith(CI, V);
 
   if (isFreeCall(&CI, &TLI))
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index d846a631b96f6..60970775de63b 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -190,8 +190,8 @@ static void computeSignedMinMaxValuesFromKnownBits(const KnownBits &Known,
   Max = Known.One|UnknownBits;
 
   if (UnknownBits.isNegative()) { // Sign bit is unknown
-    Min.setBit(Min.getBitWidth()-1);
-    Max.clearBit(Max.getBitWidth()-1);
+    Min.setSignBit();
+    Max.clearSignBit();
   }
 }
 
@@ -4269,8 +4269,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
     Changed = true;
   }
 
-  if (Value *V =
-          SimplifyICmpInst(I.getPredicate(), Op0, Op1, DL, &TLI, &DT, &AC, &I))
+  if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1,
+                                  SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
   // comparing -val or val with non-zero is the same as just comparing val
@@ -4778,8 +4778,9 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
 
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
-  if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1,
-                                  I.getFastMathFlags(), DL, &TLI, &DT, &AC, &I))
+  if (Value *V =
+          SimplifyFCmpInst(I.getPredicate(), Op0, Op1, I.getFastMathFlags(),
+                           SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
   // Simplify 'fcmp pred X, X'
diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h
index 776686d3d1171..3be6419a129a4 100644
--- a/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -17,9 +17,11 @@
 
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/TargetFolder.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstVisitor.h"
@@ -27,10 +29,9 @@
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/Dwarf.h"
 #include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/IR/DIBuilder.h"
 
 #define DEBUG_TYPE "instcombine"
 
@@ -193,7 +194,7 @@ private:
   TargetLibraryInfo &TLI;
   DominatorTree &DT;
   const DataLayout &DL;
-
+  const SimplifyQuery SQ;
   // Optional analyses. When non-null, these can both be used to do better
   // combining and will be updated to reflect any changes.
   LoopInfo *LI;
@@ -203,11 +204,11 @@ private:
 public:
   InstCombiner(InstCombineWorklist &Worklist, BuilderTy *Builder,
                bool MinimizeSize, bool ExpensiveCombines, AliasAnalysis *AA,
-               AssumptionCache &AC, TargetLibraryInfo &TLI,
-               DominatorTree &DT, const DataLayout &DL, LoopInfo *LI)
+               AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT,
+               const DataLayout &DL, LoopInfo *LI)
       : Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize),
         ExpensiveCombines(ExpensiveCombines), AA(AA), AC(AC), TLI(TLI), DT(DT),
-        DL(DL), LI(LI), MadeIRChange(false) {}
+        DL(DL), SQ(DL, &TLI, &DT, &AC), LI(LI), MadeIRChange(false) {}
 
   /// \brief Run the combiner over the entire worklist until it is empty.
   ///
@@ -533,6 +534,12 @@ private:
   /// value, or null if it didn't simplify.
   Value *SimplifyUsingDistributiveLaws(BinaryOperator &I);
 
+  /// This tries to simplify binary operations by factorizing out common terms
+  /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
+  Value *tryFactorization(InstCombiner::BuilderTy *, BinaryOperator &,
+                          Instruction::BinaryOps, Value *, Value *, Value *,
+                          Value *);
+
   /// \brief Attempts to replace V with a simpler value based on the demanded
   /// bits.
   Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, KnownBits &Known,
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index ce66581a491a0..face9d9237ae1 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -179,7 +179,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyMulInst(Op0, Op1, DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifyMulInst(Op0, Op1, SQ))
     return replaceInstUsesWith(I, V);
 
   if (Value *V = SimplifyUsingDistributiveLaws(I))
@@ -606,8 +606,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
   if (isa<Constant>(Op0))
     std::swap(Op0, Op1);
 
-  if (Value *V =
-          SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), SQ))
     return replaceInstUsesWith(I, V);
 
   bool AllowReassociate = I.hasUnsafeAlgebra();
@@ -1111,7 +1110,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyUDivInst(Op0, Op1, DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifyUDivInst(Op0, Op1, SQ))
     return replaceInstUsesWith(I, V);
 
   // Handle the integer div common cases
@@ -1184,7 +1183,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifySDivInst(Op0, Op1, DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifySDivInst(Op0, Op1, SQ))
     return replaceInstUsesWith(I, V);
 
   // Handle the integer div common cases
@@ -1296,8 +1295,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyFDivInst(Op0, Op1, I.getFastMathFlags(),
-                                  DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifyFDivInst(Op0, Op1, I.getFastMathFlags(), SQ))
     return replaceInstUsesWith(I, V);
 
   if (isa<Constant>(Op0))
@@ -1481,7 +1479,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyURemInst(Op0, Op1, DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifyURemInst(Op0, Op1, SQ))
     return replaceInstUsesWith(I, V);
 
   if (Instruction *common = commonIRemTransforms(I))
@@ -1524,7 +1522,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifySRemInst(Op0, Op1, DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifySRemInst(Op0, Op1, SQ))
     return replaceInstUsesWith(I, V);
 
   // Handle the integer rem common cases
@@ -1597,8 +1595,7 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyFRemInst(Op0, Op1, I.getFastMathFlags(),
-                                  DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifyFRemInst(Op0, Op1, I.getFastMathFlags(), SQ))
     return replaceInstUsesWith(I, V);
 
   // Handle cases involving: rem X, (select Cond, Y, Z)
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 85e5b6ba2dc2f..1117c11f4f51d 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -880,7 +880,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
 // PHINode simplification
 //
 Instruction *InstCombiner::visitPHINode(PHINode &PN) {
-  if (Value *V = SimplifyInstruction(&PN, DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifyInstruction(&PN, SQ))
     return replaceInstUsesWith(PN, V);
 
   if (Instruction *Result = FoldPHIArgZextsIntoPHI(PN))
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 76829c5e457bf..7afb8814fe52d 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1121,8 +1121,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
   Value *FalseVal = SI.getFalseValue();
   Type *SelType = SI.getType();
 
-  if (Value *V =
-          SimplifySelectInst(CondVal, TrueVal, FalseVal, DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifySelectInst(CondVal, TrueVal, FalseVal, SQ))
     return replaceInstUsesWith(SI, V);
 
   if (Instruction *I = canonicalizeSelectToShuffle(SI))
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index f77d713b9b071..219effce7ba56 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -520,7 +520,7 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
 
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
   if (Value *V = SimplifyShlInst(Op0, Op1, I.hasNoSignedWrap(),
-                                 I.hasNoUnsignedWrap(), DL, &TLI, &DT, &AC))
+                                 I.hasNoUnsignedWrap(), SQ))
     return replaceInstUsesWith(I, V);
 
   if (Instruction *V = commonShiftTransforms(I))
@@ -618,7 +618,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
     return replaceInstUsesWith(I, V);
 
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-  if (Value *V = SimplifyLShrInst(Op0, Op1, I.isExact(), DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifyLShrInst(Op0, Op1, I.isExact(), SQ))
     return replaceInstUsesWith(I, V);
 
   if (Instruction *R = commonShiftTransforms(I))
@@ -702,7 +702,7 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
     return replaceInstUsesWith(I, V);
 
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-  if (Value *V = SimplifyAShrInst(Op0, Op1, I.isExact(), DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifyAShrInst(Op0, Op1, I.isExact(), SQ))
     return replaceInstUsesWith(I, V);
 
   if (Instruction *R = commonShiftTransforms(I))
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 8d0ed8532779e..0195c5e727c9a 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -589,12 +589,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
 
         // If LHS is non-negative or has all low bits zero, then the upper bits
         // are all zero.
-        if (LHSKnown.Zero.isSignBitSet() || LowBits.isSubsetOf(LHSKnown.Zero))
+        if (LHSKnown.isNonNegative() || LowBits.isSubsetOf(LHSKnown.Zero))
           Known.Zero |= ~LowBits;
 
         // If LHS is negative and not all low bits are zero, then the upper bits
         // are all one.
-        if (LHSKnown.One.isSignBitSet() && LowBits.intersects(LHSKnown.One))
+        if (LHSKnown.isNegative() && LowBits.intersects(LHSKnown.One))
           Known.One |= ~LowBits;
 
         assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?");
@@ -607,8 +607,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     if (DemandedMask.isSignBitSet()) {
       computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI);
       // If it's known zero, our sign bit is also zero.
-      if (LHSKnown.Zero.isSignBitSet())
-        Known.Zero.setSignBit();
+      if (LHSKnown.isNonNegative())
+        Known.makeNonNegative();
     }
     break;
   case Instruction::URem: {
@@ -1537,7 +1537,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
         for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
           APInt LaneElts = OpUndefElts.lshr(InnerVWidthPerLane * Lane);
           LaneElts = LaneElts.getLoBits(InnerVWidthPerLane);
-          LaneElts = LaneElts.shl(InnerVWidthPerLane * (2 * Lane + OpNum));
+          LaneElts <<= InnerVWidthPerLane * (2 * Lane + OpNum);
           UndefElts |= LaneElts;
         }
       }
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index e89b400a4afc8..7fc6774f1849c 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -144,8 +144,8 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
 }
 
 Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
-  if (Value *V = SimplifyExtractElementInst(
-          EI.getVectorOperand(), EI.getIndexOperand(), DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifyExtractElementInst(EI.getVectorOperand(),
+                                            EI.getIndexOperand(), SQ))
     return replaceInstUsesWith(EI, V);
 
   // If vector val is constant with all elements the same, replace EI with
@@ -1140,8 +1140,8 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
   SmallVector<int, 16> Mask = SVI.getShuffleMask();
   Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
 
-  if (auto *V = SimplifyShuffleVectorInst(LHS, RHS, SVI.getMask(),
-                                          SVI.getType(), DL, &TLI, &DT, &AC))
+  if (auto *V =
+          SimplifyShuffleVectorInst(LHS, RHS, SVI.getMask(), SVI.getType(), SQ))
     return replaceInstUsesWith(SVI, V);
 
   bool MadeChange = false;
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 4729c79ca4c3d..1eb98b18bfb50 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -256,7 +256,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
         Value *C = I.getOperand(1);
 
         // Does "B op C" simplify?
-        if (Value *V = SimplifyBinOp(Opcode, B, C, DL)) {
+        if (Value *V = SimplifyBinOp(Opcode, B, C, SQ)) {
           // It simplifies to V.  Form "A op V".
           I.setOperand(0, A);
           I.setOperand(1, V);
@@ -285,7 +285,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
         Value *C = Op1->getOperand(1);
 
         // Does "A op B" simplify?
-        if (Value *V = SimplifyBinOp(Opcode, A, B, DL)) {
+        if (Value *V = SimplifyBinOp(Opcode, A, B, SQ)) {
           // It simplifies to V.  Form "V op C".
           I.setOperand(0, V);
           I.setOperand(1, C);
@@ -313,7 +313,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
         Value *C = I.getOperand(1);
 
         // Does "C op A" simplify?
-        if (Value *V = SimplifyBinOp(Opcode, C, A, DL)) {
+        if (Value *V = SimplifyBinOp(Opcode, C, A, SQ)) {
           // It simplifies to V.  Form "V op B".
           I.setOperand(0, V);
           I.setOperand(1, B);
@@ -333,7 +333,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
         Value *C = Op1->getOperand(1);
 
         // Does "C op A" simplify?
-        if (Value *V = SimplifyBinOp(Opcode, C, A, DL)) {
+        if (Value *V = SimplifyBinOp(Opcode, C, A, SQ)) {
           // It simplifies to V.  Form "B op V".
           I.setOperand(0, B);
           I.setOperand(1, V);
@@ -498,10 +498,10 @@ getBinOpsForFactorization(Instruction::BinaryOps TopLevelOpcode,
 
 /// This tries to simplify binary operations by factorizing out common terms
 /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
-static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
-                               const DataLayout &DL, BinaryOperator &I,
-                               Instruction::BinaryOps InnerOpcode, Value *A,
-                               Value *B, Value *C, Value *D) {
+Value *InstCombiner::tryFactorization(InstCombiner::BuilderTy *Builder,
+                                      BinaryOperator &I,
+                                      Instruction::BinaryOps InnerOpcode,
+                                      Value *A, Value *B, Value *C, Value *D) {
   assert(A && B && C && D && "All values must be provided");
 
   Value *V = nullptr;
@@ -521,7 +521,7 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
         std::swap(C, D);
       // Consider forming "A op' (B op D)".
       // If "B op D" simplifies then it can be formed with no cost.
-      V = SimplifyBinOp(TopLevelOpcode, B, D, DL);
+      V = SimplifyBinOp(TopLevelOpcode, B, D, SQ);
       // If "B op D" doesn't simplify then only go on if both of the existing
       // operations "A op' B" and "C op' D" will be zapped as no longer used.
       if (!V && LHS->hasOneUse() && RHS->hasOneUse())
@@ -540,7 +540,7 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
         std::swap(C, D);
       // Consider forming "(A op C) op' B".
       // If "A op C" simplifies then it can be formed with no cost.
-      V = SimplifyBinOp(TopLevelOpcode, A, C, DL);
+      V = SimplifyBinOp(TopLevelOpcode, A, C, SQ);
 
       // If "A op C" doesn't simplify then only go on if both of the existing
       // operations "A op' B" and "C op' D" will be zapped as no longer used.
@@ -610,23 +610,23 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
     // The instruction has the form "(A op' B) op (C op' D)".  Try to factorize
     // a common term.
     if (Op0 && Op1 && LHSOpcode == RHSOpcode)
-      if (Value *V = tryFactorization(Builder, DL, I, LHSOpcode, A, B, C, D))
+      if (Value *V = tryFactorization(Builder, I, LHSOpcode, A, B, C, D))
         return V;
 
     // The instruction has the form "(A op' B) op (C)".  Try to factorize common
     // term.
     if (Op0)
       if (Value *Ident = getIdentityValue(LHSOpcode, RHS))
-        if (Value *V = tryFactorization(Builder, DL, I, LHSOpcode, A, B, RHS,
-                                        Ident))
+        if (Value *V =
+                tryFactorization(Builder, I, LHSOpcode, A, B, RHS, Ident))
           return V;
 
     // The instruction has the form "(B) op (C op' D)".  Try to factorize common
     // term.
     if (Op1)
       if (Value *Ident = getIdentityValue(RHSOpcode, LHS))
-        if (Value *V = tryFactorization(Builder, DL, I, RHSOpcode, LHS, Ident,
-                                        C, D))
+        if (Value *V =
+                tryFactorization(Builder, I, RHSOpcode, LHS, Ident, C, D))
           return V;
   }
 
@@ -638,8 +638,8 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
     Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
 
     // Do "A op C" and "B op C" both simplify?
-    if (Value *L = SimplifyBinOp(TopLevelOpcode, A, C, DL))
-      if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, DL)) {
+    if (Value *L = SimplifyBinOp(TopLevelOpcode, A, C, SQ))
+      if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, SQ)) {
         // They do! Return "L op' R".
         ++NumExpand;
         C = Builder->CreateBinOp(InnerOpcode, L, R);
@@ -655,8 +655,8 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
     Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
 
     // Do "A op B" and "A op C" both simplify?
-    if (Value *L = SimplifyBinOp(TopLevelOpcode, A, B, DL))
-      if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, DL)) {
+    if (Value *L = SimplifyBinOp(TopLevelOpcode, A, B, SQ))
+      if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, SQ)) {
         // They do! Return "L op' R".
         ++NumExpand;
         A = Builder->CreateBinOp(InnerOpcode, L, R);
@@ -672,14 +672,14 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
       if (SI0->getCondition() == SI1->getCondition()) {
         Value *SI = nullptr;
         if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getFalseValue(),
-                                     SI1->getFalseValue(), DL, &TLI, &DT, &AC))
+                                     SI1->getFalseValue(), SQ))
           SI = Builder->CreateSelect(SI0->getCondition(),
                                      Builder->CreateBinOp(TopLevelOpcode,
                                                           SI0->getTrueValue(),
                                                           SI1->getTrueValue()),
                                      V);
         if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getTrueValue(),
-                                     SI1->getTrueValue(), DL, &TLI, &DT, &AC))
+                                     SI1->getTrueValue(), SQ))
           SI = Builder->CreateSelect(
               SI0->getCondition(), V,
               Builder->CreateBinOp(TopLevelOpcode, SI0->getFalseValue(),
@@ -1399,8 +1399,7 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) {
 Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
 
-  if (Value *V =
-          SimplifyGEPInst(GEP.getSourceElementType(), Ops, DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifyGEPInst(GEP.getSourceElementType(), Ops, SQ))
     return replaceInstUsesWith(GEP, V);
 
   Value *PtrOp = GEP.getOperand(0);
@@ -1589,7 +1588,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       if (SO1->getType() != GO1->getType())
         return nullptr;
 
-      Value* Sum = SimplifyAddInst(GO1, SO1, false, false, DL, &TLI, &DT, &AC);
+      Value *Sum = SimplifyAddInst(GO1, SO1, false, false, SQ);
       // Only do the combine when we are sure the cost after the
       // merge is never more than that before the merge.
       if (Sum == nullptr)
@@ -1949,9 +1948,9 @@ static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo *TLI,
   return isAllocLikeFn(V, TLI) && V != AI;
 }
 
-static bool
-isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users,
-                     const TargetLibraryInfo *TLI) {
+static bool isAllocSiteRemovable(Instruction *AI,
+                                 SmallVectorImpl<WeakTrackingVH> &Users,
+                                 const TargetLibraryInfo *TLI) {
   SmallVector<Instruction*, 4> Worklist;
   Worklist.push_back(AI);
 
@@ -2035,7 +2034,7 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
   // If we have a malloc call which is only used in any amount of comparisons
   // to null and free calls, delete the calls and replace the comparisons with
   // true or false as appropriate.
-  SmallVector<WeakVH, 64> Users;
+  SmallVector<WeakTrackingVH, 64> Users;
   if (isAllocSiteRemovable(&MI, Users, &TLI)) {
     for (unsigned i = 0, e = Users.size(); i != e; ++i) {
       // Lowering all @llvm.objectsize calls first because they may
@@ -2304,8 +2303,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
   if (!EV.hasIndices())
     return replaceInstUsesWith(EV, Agg);
 
-  if (Value *V =
-          SimplifyExtractValueInst(Agg, EV.getIndices(), DL, &TLI, &DT, &AC))
+  if (Value *V = SimplifyExtractValueInst(Agg, EV.getIndices(), SQ))
     return replaceInstUsesWith(EV, V);
 
   if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index b866958e3c4b8..b034ccc469338 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -101,6 +101,10 @@ static const char *const kAsanRegisterImageGlobalsName =
   "__asan_register_image_globals";
 static const char *const kAsanUnregisterImageGlobalsName =
   "__asan_unregister_image_globals";
+static const char *const kAsanRegisterElfGlobalsName =
+  "__asan_register_elf_globals";
+static const char *const kAsanUnregisterElfGlobalsName =
+  "__asan_unregister_elf_globals";
 static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
 static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
 static const char *const kAsanInitName = "__asan_init";
@@ -120,8 +124,11 @@ static const char *const kAsanPoisonStackMemoryName =
     "__asan_poison_stack_memory";
 static const char *const kAsanUnpoisonStackMemoryName =
     "__asan_unpoison_stack_memory";
+
+// ASan version script has __asan_* wildcard. Triple underscore prevents a
+// linker (gold) warning about attempting to export a local symbol.
 static const char *const kAsanGlobalsRegisteredFlagName =
-    "__asan_globals_registered";
+    "___asan_globals_registered";
 
 static const char *const kAsanOptionDetectUseAfterReturn =
     "__asan_option_detect_stack_use_after_return";
@@ -270,6 +277,13 @@ static cl::opt<bool>
                             "code stripping of globals"),
                    cl::Hidden, cl::init(true));
 
+// This is on by default even though there is a bug in gold:
+// https://sourceware.org/bugzilla/show_bug.cgi?id=19002
+static cl::opt<bool>
+    ClWithComdat("asan-with-comdat",
+                 cl::desc("Place ASan constructors in comdat sections"),
+                 cl::Hidden, cl::init(true));
+
 // Debug flags.
 static cl::opt<int> ClDebug("asan-debug", cl::desc("debug"), cl::Hidden,
                             cl::init(0));
@@ -607,10 +621,14 @@ public:
 private:
   void initializeCallbacks(Module &M);
 
-  bool InstrumentGlobals(IRBuilder<> &IRB, Module &M);
+  bool InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool *CtorComdat);
   void InstrumentGlobalsCOFF(IRBuilder<> &IRB, Module &M,
                              ArrayRef<GlobalVariable *> ExtendedGlobals,
                              ArrayRef<Constant *> MetadataInitializers);
+  void InstrumentGlobalsELF(IRBuilder<> &IRB, Module &M,
+                            ArrayRef<GlobalVariable *> ExtendedGlobals,
+                            ArrayRef<Constant *> MetadataInitializers,
+                            const std::string &UniqueModuleId);
   void InstrumentGlobalsMachO(IRBuilder<> &IRB, Module &M,
                               ArrayRef<GlobalVariable *> ExtendedGlobals,
                               ArrayRef<Constant *> MetadataInitializers);
@@ -621,7 +639,8 @@ private:
 
   GlobalVariable *CreateMetadataGlobal(Module &M, Constant *Initializer,
                                        StringRef OriginalName);
-  void SetComdatForGlobalMetadata(GlobalVariable *G, GlobalVariable *Metadata);
+  void SetComdatForGlobalMetadata(GlobalVariable *G, GlobalVariable *Metadata,
+                                  StringRef InternalSuffix);
   IRBuilder<> CreateAsanModuleDtor(Module &M);
 
   bool ShouldInstrumentGlobal(GlobalVariable *G);
@@ -647,6 +666,11 @@ private:
   Function *AsanUnregisterGlobals;
   Function *AsanRegisterImageGlobals;
   Function *AsanUnregisterImageGlobals;
+  Function *AsanRegisterElfGlobals;
+  Function *AsanUnregisterElfGlobals;
+
+  Function *AsanCtorFunction = nullptr;
+  Function *AsanDtorFunction = nullptr;
 };
 
 // Stack poisoning does not play well with exception handling.
@@ -1431,8 +1455,13 @@ void AddressSanitizerModule::poisonOneInitializer(Function &GlobalInit,
 void AddressSanitizerModule::createInitializerPoisonCalls(
     Module &M, GlobalValue *ModuleName) {
   GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
+  if (!GV)
+    return;
+
+  ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
+  if (!CA)
+    return;
 
-  ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
   for (Use &OP : CA->operands()) {
     if (isa<ConstantAggregateZero>(OP)) continue;
     ConstantStruct *CS = cast<ConstantStruct>(OP);
@@ -1594,12 +1623,22 @@ void AddressSanitizerModule::initializeCallbacks(Module &M) {
       checkSanitizerInterfaceFunction(M.getOrInsertFunction(
           kAsanUnregisterImageGlobalsName, IRB.getVoidTy(), IntptrTy));
   AsanUnregisterImageGlobals->setLinkage(Function::ExternalLinkage);
+
+  AsanRegisterElfGlobals = checkSanitizerInterfaceFunction(
+      M.getOrInsertFunction(kAsanRegisterElfGlobalsName, IRB.getVoidTy(),
+                            IntptrTy, IntptrTy, IntptrTy));
+  AsanRegisterElfGlobals->setLinkage(Function::ExternalLinkage);
+
+  AsanUnregisterElfGlobals = checkSanitizerInterfaceFunction(
+      M.getOrInsertFunction(kAsanUnregisterElfGlobalsName, IRB.getVoidTy(),
+                            IntptrTy, IntptrTy, IntptrTy));
+  AsanUnregisterElfGlobals->setLinkage(Function::ExternalLinkage);
 }
 
 // Put the metadata and the instrumented global in the same group. This ensures
 // that the metadata is discarded if the instrumented global is discarded.
 void AddressSanitizerModule::SetComdatForGlobalMetadata(
-    GlobalVariable *G, GlobalVariable *Metadata) {
+    GlobalVariable *G, GlobalVariable *Metadata, StringRef InternalSuffix) {
   Module &M = *G->getParent();
   Comdat *C = G->getComdat();
   if (!C) {
@@ -1609,7 +1648,15 @@ void AddressSanitizerModule::SetComdatForGlobalMetadata(
       assert(G->hasLocalLinkage());
       G->setName(Twine(kAsanGenPrefix) + "_anon_global");
     }
-    C = M.getOrInsertComdat(G->getName());
+
+    if (!InternalSuffix.empty() && G->hasLocalLinkage()) {
+      std::string Name = G->getName();
+      Name += InternalSuffix;
+      C = M.getOrInsertComdat(Name);
+    } else {
+      C = M.getOrInsertComdat(G->getName());
+    }
+
     // Make this IMAGE_COMDAT_SELECT_NODUPLICATES on COFF.
     if (TargetTriple.isOSBinFormatCOFF())
       C->setSelectionKind(Comdat::NoDuplicates);
@@ -1636,11 +1683,10 @@ AddressSanitizerModule::CreateMetadataGlobal(Module &M, Constant *Initializer,
 }
 
 IRBuilder<> AddressSanitizerModule::CreateAsanModuleDtor(Module &M) {
-  Function *AsanDtorFunction =
+  AsanDtorFunction =
       Function::Create(FunctionType::get(Type::getVoidTy(*C), false),
                        GlobalValue::InternalLinkage, kAsanModuleDtorName, &M);
   BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction);
-  appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority);
 
   return IRBuilder<>(ReturnInst::Create(*C, AsanDtorBB));
 }
@@ -1665,8 +1711,67 @@ void AddressSanitizerModule::InstrumentGlobalsCOFF(
            "global metadata will not be padded appropriately");
     Metadata->setAlignment(SizeOfGlobalStruct);
 
-    SetComdatForGlobalMetadata(G, Metadata);
+    SetComdatForGlobalMetadata(G, Metadata, "");
+  }
+}
+
+void AddressSanitizerModule::InstrumentGlobalsELF(
+    IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals,
+    ArrayRef<Constant *> MetadataInitializers,
+    const std::string &UniqueModuleId) {
+  assert(ExtendedGlobals.size() == MetadataInitializers.size());
+
+  SmallVector<GlobalValue *, 16> MetadataGlobals(ExtendedGlobals.size());
+  for (size_t i = 0; i < ExtendedGlobals.size(); i++) {
+    GlobalVariable *G = ExtendedGlobals[i];
+    GlobalVariable *Metadata =
+        CreateMetadataGlobal(M, MetadataInitializers[i], G->getName());
+    MDNode *MD = MDNode::get(M.getContext(), ValueAsMetadata::get(G));
+    Metadata->setMetadata(LLVMContext::MD_associated, MD);
+    MetadataGlobals[i] = Metadata;
+
+    SetComdatForGlobalMetadata(G, Metadata, UniqueModuleId);
   }
+
+  // Update llvm.compiler.used, adding the new metadata globals. This is
+  // needed so that during LTO these variables stay alive.
+  if (!MetadataGlobals.empty())
+    appendToCompilerUsed(M, MetadataGlobals);
+
+  // RegisteredFlag serves two purposes. First, we can pass it to dladdr()
+  // to look up the loaded image that contains it. Second, we can store in it
+  // whether registration has already occurred, to prevent duplicate
+  // registration.
+  //
+  // Common linkage ensures that there is only one global per shared library.
+  GlobalVariable *RegisteredFlag = new GlobalVariable(
+      M, IntptrTy, false, GlobalVariable::CommonLinkage,
+      ConstantInt::get(IntptrTy, 0), kAsanGlobalsRegisteredFlagName);
+  RegisteredFlag->setVisibility(GlobalVariable::HiddenVisibility);
+
+  // Create start and stop symbols.
+  GlobalVariable *StartELFMetadata = new GlobalVariable(
+      M, IntptrTy, false, GlobalVariable::ExternalWeakLinkage, nullptr,
+      "__start_" + getGlobalMetadataSection());
+  StartELFMetadata->setVisibility(GlobalVariable::HiddenVisibility);
+  GlobalVariable *StopELFMetadata = new GlobalVariable(
+      M, IntptrTy, false, GlobalVariable::ExternalWeakLinkage, nullptr,
+      "__stop_" + getGlobalMetadataSection());
+  StopELFMetadata->setVisibility(GlobalVariable::HiddenVisibility);
+
+  // Create a call to register the globals with the runtime.
+  IRB.CreateCall(AsanRegisterElfGlobals,
+                 {IRB.CreatePointerCast(RegisteredFlag, IntptrTy),
+                  IRB.CreatePointerCast(StartELFMetadata, IntptrTy),
+                  IRB.CreatePointerCast(StopELFMetadata, IntptrTy)});
+
+  // We also need to unregister globals at the end, e.g., when a shared library
+  // gets closed.
+  IRBuilder<> IRB_Dtor = CreateAsanModuleDtor(M);
+  IRB_Dtor.CreateCall(AsanUnregisterElfGlobals,
+                      {IRB.CreatePointerCast(RegisteredFlag, IntptrTy),
+                       IRB.CreatePointerCast(StartELFMetadata, IntptrTy),
+                       IRB.CreatePointerCast(StopELFMetadata, IntptrTy)});
 }
 
 void AddressSanitizerModule::InstrumentGlobalsMachO(
@@ -1756,7 +1861,10 @@ void AddressSanitizerModule::InstrumentGlobalsWithMetadataArray(
 // This function replaces all global variables with new variables that have
 // trailing redzones. It also creates a function that poisons
 // redzones and inserts this function into llvm.global_ctors.
-bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
+// Sets *CtorComdat to true if the global registration code emitted into the
+// asan constructor is comdat-compatible.
+bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool *CtorComdat) {
+  *CtorComdat = false;
   GlobalsMD.init(M);
 
   SmallVector<GlobalVariable *, 16> GlobalsToChange;
@@ -1766,7 +1874,10 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
   }
 
   size_t n = GlobalsToChange.size();
-  if (n == 0) return false;
+  if (n == 0) {
+    *CtorComdat = true;
+    return false;
+  }
 
   auto &DL = M.getDataLayout();
 
@@ -1911,7 +2022,14 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
     Initializers[i] = Initializer;
   }
 
-  if (UseGlobalsGC && TargetTriple.isOSBinFormatCOFF()) {
+  std::string ELFUniqueModuleId =
+      (UseGlobalsGC && TargetTriple.isOSBinFormatELF()) ? getUniqueModuleId(&M)
+                                                        : "";
+
+  if (!ELFUniqueModuleId.empty()) {
+    InstrumentGlobalsELF(IRB, M, NewGlobals, Initializers, ELFUniqueModuleId);
+    *CtorComdat = true;
+  } else if (UseGlobalsGC && TargetTriple.isOSBinFormatCOFF()) {
     InstrumentGlobalsCOFF(IRB, M, NewGlobals, Initializers);
   } else if (UseGlobalsGC && ShouldUseMachOGlobalsSection()) {
     InstrumentGlobalsMachO(IRB, M, NewGlobals, Initializers);
@@ -1938,17 +2056,36 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
   if (CompileKernel)
     return false;
 
-  Function *AsanCtorFunction;
+  // Create a module constructor. A destructor is created lazily because not all
+  // platforms, and not all modules need it.
   std::tie(AsanCtorFunction, std::ignore) = createSanitizerCtorAndInitFunctions(
       M, kAsanModuleCtorName, kAsanInitName, /*InitArgTypes=*/{},
       /*InitArgs=*/{}, kAsanVersionCheckName);
-  appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority);
 
+  bool CtorComdat = true;
   bool Changed = false;
   // TODO(glider): temporarily disabled globals instrumentation for KASan.
   if (ClGlobals) {
     IRBuilder<> IRB(AsanCtorFunction->getEntryBlock().getTerminator());
-    Changed |= InstrumentGlobals(IRB, M);
+    Changed |= InstrumentGlobals(IRB, M, &CtorComdat);
+  }
+
+  // Put the constructor and destructor in comdat if both
+  // (1) global instrumentation is not TU-specific
+  // (2) target is ELF.
+  if (ClWithComdat && TargetTriple.isOSBinFormatELF() && CtorComdat) {
+    AsanCtorFunction->setComdat(M.getOrInsertComdat(kAsanModuleCtorName));
+    appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority,
+                        AsanCtorFunction);
+    if (AsanDtorFunction) {
+      AsanDtorFunction->setComdat(M.getOrInsertComdat(kAsanModuleDtorName));
+      appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority,
+                          AsanDtorFunction);
+    }
+  } else {
+    appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority);
+    if (AsanDtorFunction)
+      appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority);
   }
 
   return Changed;
@@ -2586,7 +2723,7 @@ void FunctionStackPoisoner::processStaticAllocas() {
     Value *NewAllocaPtr = IRB.CreateIntToPtr(
         IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Desc.Offset)),
         AI->getType());
-    replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB, /*Deref=*/false);
+    replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB, DIExpression::NoDeref);
     AI->replaceAllUsesWith(NewAllocaPtr);
   }
 
diff --git a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index d7eb857cff7e1..493d014586c6a 100644
--- a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -771,7 +771,7 @@ public:
       if (perform(MI)) {
         Changed = true;
         ++NumOfPGOMemOPOpt;
-        DEBUG(dbgs() << "MemOP calls: " << MI->getCalledFunction()->getName()
+        DEBUG(dbgs() << "MemOP call: " << MI->getCalledFunction()->getName()
                      << "is Transformed.\n");
       }
     }
@@ -863,13 +863,23 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) {
     ActualCount = *BBEdgeCount;
   }
 
+  ArrayRef<InstrProfValueData> VDs(ValueDataArray.get(), NumVals);
+  DEBUG(dbgs() << "Read one memory intrinsic profile with count " << ActualCount
+               << "\n");
+  DEBUG(
+      for (auto &VD
+           : VDs) { dbgs() << "  (" << VD.Value << "," << VD.Count << ")\n"; });
+
   if (ActualCount < MemOPCountThreshold)
     return false;
+  // Skip if the total value profiled count is 0, in which case we can't
+  // scale up the counts properly (and there is no profitable transformation).
+  if (TotalCount == 0)
+    return false;
 
-  ArrayRef<InstrProfValueData> VDs(ValueDataArray.get(), NumVals);
   TotalCount = ActualCount;
   if (MemOPScaleCount)
-    DEBUG(dbgs() << "Scale counts: numberator = " << ActualCount
+    DEBUG(dbgs() << "Scale counts: numerator = " << ActualCount
                  << " denominator = " << SavedTotalCount << "\n");
 
   // Keeping track of the count of the default case:
@@ -915,14 +925,10 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) {
     MaxCount = RemainCount;
 
   uint64_t SumForOpt = TotalCount - RemainCount;
-  DEBUG(dbgs() << "Read one memory intrinsic profile: " << SumForOpt << " vs "
-               << TotalCount << "\n");
-  DEBUG(
-      for (auto &VD
-           : VDs) { dbgs() << "  (" << VD.Value << "," << VD.Count << ")\n"; });
 
   DEBUG(dbgs() << "Optimize one memory intrinsic call to " << Version
-               << " Versions\n");
+               << " Versions (covering " << SumForOpt << " out of "
+               << TotalCount << ")\n");
 
   // mem_op(..., size)
   // ==>
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 190f05db4b0c1..3e480a6df446d 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -2643,7 +2643,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
                "ByVal argument is not a pointer!");
         Size = DL.getTypeAllocSize(A->getType()->getPointerElementType());
         if (ArgOffset + Size > kParamTLSSize) break;
-        unsigned ParamAlignment = CS.getParamAlignment(i + 1);
+        unsigned ParamAlignment = CS.getParamAlignment(i);
         unsigned Alignment = std::min(ParamAlignment, kShadowTLSAlignment);
         Store = IRB.CreateMemCpy(ArgShadowBase,
                                  getShadowPtr(A, Type::getInt8Ty(*MS.C), IRB),
@@ -3502,7 +3502,7 @@ struct VarArgPowerPC64Helper : public VarArgHelper {
         assert(A->getType()->isPointerTy());
         Type *RealTy = A->getType()->getPointerElementType();
         uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
-        uint64_t ArgAlign = CS.getParamAlignment(ArgNo + 1);
+        uint64_t ArgAlign = CS.getParamAlignment(ArgNo);
         if (ArgAlign < 8)
           ArgAlign = 8;
         VAArgOffset = alignTo(VAArgOffset, ArgAlign);
diff --git a/lib/Transforms/ObjCARC/ObjCARC.h b/lib/Transforms/ObjCARC/ObjCARC.h
index f02b75f0b4560..cd9b3d96a14f7 100644
--- a/lib/Transforms/ObjCARC/ObjCARC.h
+++ b/lib/Transforms/ObjCARC/ObjCARC.h
@@ -69,6 +69,19 @@ static inline void EraseInstruction(Instruction *CI) {
     RecursivelyDeleteTriviallyDeadInstructions(OldArg);
 }
 
+/// If Inst is a ReturnRV and its operand is a call or invoke, return the
+/// operand. Otherwise return null.
+static inline const Instruction *getreturnRVOperand(const Instruction &Inst,
+                                                    ARCInstKind Class) {
+  if (Class != ARCInstKind::RetainRV)
+    return nullptr;
+
+  const auto *Opnd = Inst.getOperand(0)->stripPointerCasts();
+  if (const auto *C = dyn_cast<CallInst>(Opnd))
+    return C;
+  return dyn_cast<InvokeInst>(Opnd);
+}
+
 } // end namespace objcarc
 } // end namespace llvm
 
diff --git a/lib/Transforms/ObjCARC/PtrState.cpp b/lib/Transforms/ObjCARC/PtrState.cpp
index c1bbc4e96b16d..d13e941044f14 100644
--- a/lib/Transforms/ObjCARC/PtrState.cpp
+++ b/lib/Transforms/ObjCARC/PtrState.cpp
@@ -244,6 +244,18 @@ void BottomUpPtrState::HandlePotentialUse(BasicBlock *BB, Instruction *Inst,
                                           const Value *Ptr,
                                           ProvenanceAnalysis &PA,
                                           ARCInstKind Class) {
+  auto SetSeqAndInsertReverseInsertPt = [&](Sequence NewSeq){
+    assert(!HasReverseInsertPts());
+    SetSeq(NewSeq);
+    // If this is an invoke instruction, we're scanning it as part of
+    // one of its successor blocks, since we can't insert code after it
+    // in its own block, and we don't want to split critical edges.
+    if (isa<InvokeInst>(Inst))
+      InsertReverseInsertPt(&*BB->getFirstInsertionPt());
+    else
+      InsertReverseInsertPt(&*++Inst->getIterator());
+  };
+
   // Check for possible direct uses.
   switch (GetSeq()) {
   case S_Release:
@@ -251,26 +263,18 @@ void BottomUpPtrState::HandlePotentialUse(BasicBlock *BB, Instruction *Inst,
     if (CanUse(Inst, Ptr, PA, Class)) {
       DEBUG(dbgs() << "            CanUse: Seq: " << GetSeq() << "; " << *Ptr
                    << "\n");
-      assert(!HasReverseInsertPts());
-      // If this is an invoke instruction, we're scanning it as part of
-      // one of its successor blocks, since we can't insert code after it
-      // in its own block, and we don't want to split critical edges.
-      if (isa<InvokeInst>(Inst))
-        InsertReverseInsertPt(&*BB->getFirstInsertionPt());
-      else
-        InsertReverseInsertPt(&*++Inst->getIterator());
-      SetSeq(S_Use);
+      SetSeqAndInsertReverseInsertPt(S_Use);
     } else if (Seq == S_Release && IsUser(Class)) {
       DEBUG(dbgs() << "            PreciseReleaseUse: Seq: " << GetSeq() << "; "
                    << *Ptr << "\n");
       // Non-movable releases depend on any possible objc pointer use.
-      SetSeq(S_Stop);
-      assert(!HasReverseInsertPts());
-      // As above; handle invoke specially.
-      if (isa<InvokeInst>(Inst))
-        InsertReverseInsertPt(&*BB->getFirstInsertionPt());
-      else
-        InsertReverseInsertPt(&*++Inst->getIterator());
+      SetSeqAndInsertReverseInsertPt(S_Stop);
+    } else if (const auto *Call = getreturnRVOperand(*Inst, Class)) {
+      if (CanUse(Call, Ptr, PA, GetBasicARCInstKind(Call))) {
+        DEBUG(dbgs() << "            ReleaseUse: Seq: " << GetSeq() << "; "
+                     << *Ptr << "\n");
+        SetSeqAndInsertReverseInsertPt(S_Stop);
+      }
     }
     break;
   case S_Stop:
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index b323ab3bd443c..523390758769a 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -55,6 +55,7 @@ add_llvm_library(LLVMScalarOpts
   Scalar.cpp
   Scalarizer.cpp
   SeparateConstOffsetFromGEP.cpp
+  SimpleLoopUnswitch.cpp
   SimplifyCFGPass.cpp
   Sink.cpp
   SpeculativeExecution.cpp
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index b5a4cc2f39533..dc864f48bf1f8 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -151,7 +151,7 @@ static bool processPHI(PHINode *P, LazyValueInfo *LVI,
     Changed = true;
   }
 
-  if (Value *V = SimplifyInstruction(P, SQ.getWithInstruction(P))) {
+  if (Value *V = SimplifyInstruction(P, SQ)) {
     P->replaceAllUsesWith(V);
     P->eraseFromParent();
     Changed = true;
@@ -565,25 +565,14 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) {
     return false;
 
   LazyValueInfo *LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
-  auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
-  auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
-  auto *TLIWP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
-  auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr;
-  auto *ACWP = getAnalysisIfAvailable<AssumptionCacheTracker>();
-  auto *AC = ACWP ? &ACWP->getAssumptionCache(F) : nullptr;
-  const SimplifyQuery SQ(F.getParent()->getDataLayout(), TLI, DT, AC);
-  return runImpl(F, LVI, SQ);
+  return runImpl(F, LVI, getBestSimplifyQuery(*this, F));
 }
 
 PreservedAnalyses
 CorrelatedValuePropagationPass::run(Function &F, FunctionAnalysisManager &AM) {
 
   LazyValueInfo *LVI = &AM.getResult<LazyValueAnalysis>(F);
-  auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
-  auto *TLI = AM.getCachedResult<TargetLibraryAnalysis>(F);
-  auto *AC = AM.getCachedResult<AssumptionAnalysis>(F);
-  const SimplifyQuery SQ(F.getParent()->getDataLayout(), TLI, DT, AC);
-  bool Changed = runImpl(F, LVI, SQ);
+  bool Changed = runImpl(F, LVI, getBestSimplifyQuery(AM, F));
 
   if (!Changed)
     return PreservedAnalyses::all();
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 04479b6e49ac8..d8f8a58a5fdfa 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -253,6 +253,7 @@ public:
   const TargetTransformInfo &TTI;
   DominatorTree &DT;
   AssumptionCache &AC;
+  const SimplifyQuery SQ;
   MemorySSA *MSSA;
   std::unique_ptr<MemorySSAUpdater> MSSAUpdater;
   typedef RecyclingAllocator<
@@ -315,9 +316,10 @@ public:
   unsigned CurrentGeneration;
 
   /// \brief Set up the EarlyCSE runner for a particular function.
-  EarlyCSE(const TargetLibraryInfo &TLI, const TargetTransformInfo &TTI,
-           DominatorTree &DT, AssumptionCache &AC, MemorySSA *MSSA)
-      : TLI(TLI), TTI(TTI), DT(DT), AC(AC), MSSA(MSSA),
+  EarlyCSE(const DataLayout &DL, const TargetLibraryInfo &TLI,
+           const TargetTransformInfo &TTI, DominatorTree &DT,
+           AssumptionCache &AC, MemorySSA *MSSA)
+      : TLI(TLI), TTI(TTI), DT(DT), AC(AC), SQ(DL, &TLI, &DT, &AC), MSSA(MSSA),
         MSSAUpdater(make_unique<MemorySSAUpdater>(MSSA)), CurrentGeneration(0) {
   }
 
@@ -616,8 +618,6 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
   /// stores which can occur in bitfield code among other things.
   Instruction *LastStore = nullptr;
 
-  const DataLayout &DL = BB->getModule()->getDataLayout();
-
   // See if any instructions in the block can be eliminated.  If so, do it.  If
   // not, add them to AvailableValues.
   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
@@ -635,10 +635,16 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
 
     // Skip assume intrinsics, they don't really have side effects (although
     // they're marked as such to ensure preservation of control dependencies),
-    // and this pass will not disturb any of the assumption's control
-    // dependencies.
+    // and this pass will not bother with its removal. However, we should mark
+    // its condition as true for all dominated blocks.
     if (match(Inst, m_Intrinsic<Intrinsic::assume>())) {
-      DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst << '\n');
+      auto *CondI =
+          dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0));
+      if (CondI && SimpleValue::canHandle(CondI)) {
+        DEBUG(dbgs() << "EarlyCSE considering assumption: " << *Inst << '\n');
+        AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
+      } else
+        DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst << '\n');
       continue;
     }
 
@@ -658,10 +664,25 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
     if (match(Inst, m_Intrinsic<Intrinsic::experimental_guard>())) {
       if (auto *CondI =
               dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0))) {
-        // The condition we're on guarding here is true for all dominated
-        // locations.
-        if (SimpleValue::canHandle(CondI))
+        if (SimpleValue::canHandle(CondI)) {
+          // Do we already know the actual value of this condition?
+          if (auto *KnownCond = AvailableValues.lookup(CondI)) {
+            // Is the condition known to be true?
+            if (isa<ConstantInt>(KnownCond) &&
+                cast<ConstantInt>(KnownCond)->isOneValue()) {
+              DEBUG(dbgs() << "EarlyCSE removing guard: " << *Inst << '\n');
+              removeMSSA(Inst);
+              Inst->eraseFromParent();
+              Changed = true;
+              continue;
+            } else
+              // Use the known value if it wasn't true.
+              cast<CallInst>(Inst)->setArgOperand(0, KnownCond);
+          }
+          // The condition we're on guarding here is true for all dominated
+          // locations.
           AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
+        }
       }
 
       // Guard intrinsics read all memory, but don't write any memory.
@@ -673,7 +694,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
 
     // If the instruction can be simplified (e.g. X+0 = X) then replace it with
     // its simpler value.
-    if (Value *V = SimplifyInstruction(Inst, DL, &TLI, &DT, &AC)) {
+    if (Value *V = SimplifyInstruction(Inst, SQ)) {
       DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << "  to: " << *V << '\n');
       bool Killed = false;
       if (!Inst->use_empty()) {
@@ -964,7 +985,7 @@ PreservedAnalyses EarlyCSEPass::run(Function &F,
   auto *MSSA =
       UseMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr;
 
-  EarlyCSE CSE(TLI, TTI, DT, AC, MSSA);
+  EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
 
   if (!CSE.run())
     return PreservedAnalyses::all();
@@ -1008,7 +1029,7 @@ public:
     auto *MSSA =
         UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() : nullptr;
 
-    EarlyCSE CSE(TLI, TTI, DT, AC, MSSA);
+    EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
 
     return CSE.run();
   }
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index be696df548d52..c04646eed49a8 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -1687,7 +1687,7 @@ bool GVN::processInstruction(Instruction *I) {
   // example if it determines that %y is equal to %x then the instruction
   // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
   const DataLayout &DL = I->getModule()->getDataLayout();
-  if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC)) {
+  if (Value *V = SimplifyInstruction(I, {DL, TLI, DT, AC})) {
     bool Changed = false;
     if (!I->use_empty()) {
       I->replaceAllUsesWith(V);
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index dcb2a4a0c6e6b..3953198fe6052 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -97,7 +97,7 @@ class IndVarSimplify {
   TargetLibraryInfo *TLI;
   const TargetTransformInfo *TTI;
 
-  SmallVector<WeakVH, 16> DeadInsts;
+  SmallVector<WeakTrackingVH, 16> DeadInsts;
   bool Changed = false;
 
   bool isValidRewrite(Value *FromVal, Value *ToVal);
@@ -415,8 +415,8 @@ void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
                                       Compare->getName());
 
   // In the following deletions, PN may become dead and may be deleted.
-  // Use a WeakVH to observe whether this happens.
-  WeakVH WeakPH = PN;
+  // Use a WeakTrackingVH to observe whether this happens.
+  WeakTrackingVH WeakPH = PN;
 
   // Delete the old floating point exit comparison.  The branch starts using the
   // new comparison.
@@ -451,7 +451,7 @@ void IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
   //
   BasicBlock *Header = L->getHeader();
 
-  SmallVector<WeakVH, 8> PHIs;
+  SmallVector<WeakTrackingVH, 8> PHIs;
   for (BasicBlock::iterator I = Header->begin();
        PHINode *PN = dyn_cast<PHINode>(I); ++I)
     PHIs.push_back(PN);
@@ -901,7 +901,7 @@ class WidenIV {
   PHINode *WidePhi;
   Instruction *WideInc;
   const SCEV *WideIncExpr;
-  SmallVectorImpl<WeakVH> &DeadInsts;
+  SmallVectorImpl<WeakTrackingVH> &DeadInsts;
 
   SmallPtrSet<Instruction *,16> Widened;
   SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
@@ -941,20 +941,13 @@ class WidenIV {
   }
 
 public:
-  WidenIV(const WideIVInfo &WI, LoopInfo *LInfo,
-          ScalarEvolution *SEv, DominatorTree *DTree,
-          SmallVectorImpl<WeakVH> &DI, bool HasGuards) :
-    OrigPhi(WI.NarrowIV),
-    WideType(WI.WidestNativeType),
-    LI(LInfo),
-    L(LI->getLoopFor(OrigPhi->getParent())),
-    SE(SEv),
-    DT(DTree),
-    HasGuards(HasGuards),
-    WidePhi(nullptr),
-    WideInc(nullptr),
-    WideIncExpr(nullptr),
-    DeadInsts(DI) {
+  WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
+          DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
+          bool HasGuards)
+      : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
+        L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
+        HasGuards(HasGuards), WidePhi(nullptr), WideInc(nullptr),
+        WideIncExpr(nullptr), DeadInsts(DI) {
     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
     ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
   }
diff --git a/lib/Transforms/Scalar/InferAddressSpaces.cpp b/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 9e2563879da2b..5e116ef2fe75e 100644
--- a/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -138,7 +138,7 @@ private:
 
   // Tries to infer the specific address space of each address expression in
   // Postorder.
-  void inferAddressSpaces(const std::vector<Value *> &Postorder,
+  void inferAddressSpaces(ArrayRef<WeakTrackingVH> Postorder,
                           ValueToAddrSpaceMapTy *InferredAddrSpace) const;
 
   bool isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const;
@@ -147,7 +147,7 @@ private:
   // address spaces if InferredAddrSpace says so. Postorder is the postorder of
   // all flat expressions in the use-def graph of function F.
   bool
-  rewriteWithNewAddressSpaces(const std::vector<Value *> &Postorder,
+  rewriteWithNewAddressSpaces(ArrayRef<WeakTrackingVH> Postorder,
                               const ValueToAddrSpaceMapTy &InferredAddrSpace,
                               Function *F) const;
 
@@ -162,7 +162,7 @@ private:
     std::vector<std::pair<Value *, bool>> &PostorderStack,
     DenseSet<Value *> &Visited) const;
 
-  std::vector<Value *> collectFlatAddressExpressions(Function &F) const;
+  std::vector<WeakTrackingVH> collectFlatAddressExpressions(Function &F) const;
 
   Value *cloneValueWithNewAddressSpace(
     Value *V, unsigned NewAddrSpace,
@@ -274,16 +274,36 @@ void InferAddressSpaces::appendsFlatAddressExpressionToPostorderStack(
     Value *V, std::vector<std::pair<Value *, bool>> &PostorderStack,
     DenseSet<Value *> &Visited) const {
   assert(V->getType()->isPointerTy());
+
+  // Generic addressing expressions may be hidden in nested constant
+  // expressions.
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+    // TODO: Look in non-address parts, like icmp operands.
+    if (isAddressExpression(*CE) && Visited.insert(CE).second)
+      PostorderStack.push_back(std::make_pair(CE, false));
+
+    return;
+  }
+
   if (isAddressExpression(*V) &&
       V->getType()->getPointerAddressSpace() == FlatAddrSpace) {
-    if (Visited.insert(V).second)
+    if (Visited.insert(V).second) {
       PostorderStack.push_back(std::make_pair(V, false));
+
+      Operator *Op = cast<Operator>(V);
+      for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I) {
+        if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op->getOperand(I))) {
+          if (isAddressExpression(*CE) && Visited.insert(CE).second)
+            PostorderStack.emplace_back(CE, false);
+        }
+      }
+    }
   }
 }
 
 // Returns all flat address expressions in function F. The elements are ordered
 // ordered in postorder.
-std::vector<Value *>
+std::vector<WeakTrackingVH>
 InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {
   // This function implements a non-recursive postorder traversal of a partial
   // use-def graph of function F.
@@ -326,21 +346,25 @@ InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {
         PushPtrOperand(Cmp->getOperand(0));
         PushPtrOperand(Cmp->getOperand(1));
       }
+    } else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
+      if (!ASC->getType()->isVectorTy())
+        PushPtrOperand(ASC->getPointerOperand());
     }
   }
 
-  std::vector<Value *> Postorder; // The resultant postorder.
+  std::vector<WeakTrackingVH> Postorder; // The resultant postorder.
   while (!PostorderStack.empty()) {
+    Value *TopVal = PostorderStack.back().first;
     // If the operands of the expression on the top are already explored,
     // adds that expression to the resultant postorder.
     if (PostorderStack.back().second) {
-      Postorder.push_back(PostorderStack.back().first);
+      Postorder.push_back(TopVal);
       PostorderStack.pop_back();
       continue;
     }
     // Otherwise, adds its operands to the stack and explores them.
     PostorderStack.back().second = true;
-    for (Value *PtrOperand : getPointerOperands(*PostorderStack.back().first)) {
+    for (Value *PtrOperand : getPointerOperands(*TopVal)) {
       appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack,
                                                    Visited);
     }
@@ -559,7 +583,7 @@ bool InferAddressSpaces::runOnFunction(Function &F) {
     return false;
 
   // Collects all flat address expressions in postorder.
-  std::vector<Value *> Postorder = collectFlatAddressExpressions(F);
+  std::vector<WeakTrackingVH> Postorder = collectFlatAddressExpressions(F);
 
   // Runs a data-flow analysis to refine the address spaces of every expression
   // in Postorder.
@@ -571,8 +595,10 @@ bool InferAddressSpaces::runOnFunction(Function &F) {
   return rewriteWithNewAddressSpaces(Postorder, InferredAddrSpace, &F);
 }
 
+// Constants need to be tracked through RAUW to handle cases with nested
+// constant expressions, so wrap values in WeakTrackingVH.
 void InferAddressSpaces::inferAddressSpaces(
-    const std::vector<Value *> &Postorder,
+    ArrayRef<WeakTrackingVH> Postorder,
     ValueToAddrSpaceMapTy *InferredAddrSpace) const {
   SetVector<Value *> Worklist(Postorder.begin(), Postorder.end());
   // Initially, all expressions are in the uninitialized address space.
@@ -784,8 +810,8 @@ static Value::use_iterator skipToNextUser(Value::use_iterator I,
 }
 
 bool InferAddressSpaces::rewriteWithNewAddressSpaces(
-  const std::vector<Value *> &Postorder,
-  const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const {
+    ArrayRef<WeakTrackingVH> Postorder,
+    const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const {
   // For each address expression to be modified, creates a clone of it with its
   // pointer operands converted to the new address space. Since the pointer
   // operands are converted, the clone is naturally in the new address space by
@@ -812,8 +838,12 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(
     NewV->setOperand(OperandNo, ValueWithNewAddrSpace.lookup(UndefUse->get()));
   }
 
+  SmallVector<Instruction *, 16> DeadInstructions;
+
   // Replaces the uses of the old address expressions with the new ones.
-  for (Value *V : Postorder) {
+  for (const WeakTrackingVH &WVH : Postorder) {
+    assert(WVH && "value was unexpectedly deleted");
+    Value *V = WVH;
     Value *NewV = ValueWithNewAddrSpace.lookup(V);
     if (NewV == nullptr)
       continue;
@@ -821,6 +851,17 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(
     DEBUG(dbgs() << "Replacing the uses of " << *V
                  << "\n  with\n  " << *NewV << '\n');
 
+    if (Constant *C = dyn_cast<Constant>(V)) {
+      Constant *Replace = ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV),
+                                                         C->getType());
+      if (C != Replace) {
+        DEBUG(dbgs() << "Inserting replacement const cast: "
+              << Replace << ": " << *Replace << '\n');
+        C->replaceAllUsesWith(Replace);
+        V = Replace;
+      }
+    }
+
     Value::use_iterator I, E, Next;
     for (I = V->use_begin(), E = V->use_end(); I != E; ) {
       Use &U = *I;
@@ -881,6 +922,15 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(
           }
         }
 
+        if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(CurUser)) {
+          unsigned NewAS = NewV->getType()->getPointerAddressSpace();
+          if (ASC->getDestAddressSpace() == NewAS) {
+            ASC->replaceAllUsesWith(NewV);
+            DeadInstructions.push_back(ASC);
+            continue;
+          }
+        }
+
         // Otherwise, replaces the use with flat(NewV).
         if (Instruction *I = dyn_cast<Instruction>(V)) {
           BasicBlock::iterator InsertPos = std::next(I->getIterator());
@@ -894,10 +944,15 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(
       }
     }
 
-    if (V->use_empty())
-      RecursivelyDeleteTriviallyDeadInstructions(V);
+    if (V->use_empty()) {
+      if (Instruction *I = dyn_cast<Instruction>(V))
+        DeadInstructions.push_back(I);
+    }
   }
 
+  for (Instruction *I : DeadInstructions)
+    RecursivelyDeleteTriviallyDeadInstructions(I);
+
   return true;
 }
 
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index a0da81605a809..7dacaba1193e8 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -557,7 +557,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
         Value *LHS = PN->getIncomingValue(i);
         Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB);
 
-        Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, DL);
+        Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, {DL});
         if (!Res) {
           if (!isa<Constant>(RHS))
             continue;
@@ -1250,37 +1250,53 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
 
   BasicBlock *OnlyDest = nullptr;
   BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
+  Constant *OnlyVal = nullptr;
+  Constant *MultipleVal = (Constant *)(intptr_t)~0ULL;
 
+  unsigned PredWithKnownDest = 0;
   for (const auto &PredValue : PredValues) {
     BasicBlock *Pred = PredValue.second;
     if (!SeenPreds.insert(Pred).second)
       continue;  // Duplicate predecessor entry.
 
-    // If the predecessor ends with an indirect goto, we can't change its
-    // destination.
-    if (isa<IndirectBrInst>(Pred->getTerminator()))
-      continue;
-
     Constant *Val = PredValue.first;
 
     BasicBlock *DestBB;
     if (isa<UndefValue>(Val))
       DestBB = nullptr;
-    else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
+    else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+      assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
       DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
-    else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+      assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
       DestBB = SI->findCaseValue(cast<ConstantInt>(Val))->getCaseSuccessor();
     } else {
       assert(isa<IndirectBrInst>(BB->getTerminator())
               && "Unexpected terminator");
+      assert(isa<BlockAddress>(Val) && "Expecting a constant blockaddress");
       DestBB = cast<BlockAddress>(Val)->getBasicBlock();
     }
 
     // If we have exactly one destination, remember it for efficiency below.
-    if (PredToDestList.empty())
+    if (PredToDestList.empty()) {
       OnlyDest = DestBB;
-    else if (OnlyDest != DestBB)
-      OnlyDest = MultipleDestSentinel;
+      OnlyVal = Val;
+    } else {
+      if (OnlyDest != DestBB)
+        OnlyDest = MultipleDestSentinel;
+      // It possible we have same destination, but different value, e.g. default
+      // case in switchinst.
+      if (Val != OnlyVal)
+        OnlyVal = MultipleVal;
+    }
+
+    // We know where this predecessor is going.
+    ++PredWithKnownDest;
+
+    // If the predecessor ends with an indirect goto, we can't change its
+    // destination.
+    if (isa<IndirectBrInst>(Pred->getTerminator()))
+      continue;
 
     PredToDestList.push_back(std::make_pair(Pred, DestBB));
   }
@@ -1293,7 +1309,7 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
   // not thread. By doing so, we do not need to duplicate the current block and
   // also miss potential opportunities in case we dont/cant duplicate.
   if (OnlyDest && OnlyDest != MultipleDestSentinel) {
-    if (PredToDestList.size() ==
+    if (PredWithKnownDest ==
         (size_t)std::distance(pred_begin(BB), pred_end(BB))) {
       bool SeenFirstBranchToOnlyDest = false;
       for (BasicBlock *SuccBB : successors(BB)) {
@@ -1310,11 +1326,18 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
 
       // If the condition is now dead due to the removal of the old terminator,
       // erase it.
-      auto *CondInst = dyn_cast<Instruction>(Cond);
-      if (CondInst && CondInst->use_empty())
-        CondInst->eraseFromParent();
-      // FIXME: in case this instruction is defined in the current BB and it
-      // resolves to a single value from all predecessors, we can do RAUW.
+      if (auto *CondInst = dyn_cast<Instruction>(Cond)) {
+        if (CondInst->use_empty() && !CondInst->mayHaveSideEffects())
+          CondInst->eraseFromParent();
+        else if (OnlyVal && OnlyVal != MultipleVal &&
+                 CondInst->getParent() == BB) {
+          // If we just learned Cond is the same value for all uses of the
+          // condition, replace it with a constant value
+          CondInst->replaceAllUsesWith(OnlyVal);
+          if (!CondInst->mayHaveSideEffects())
+            CondInst->eraseFromParent();
+        }
+      }
       return true;
     }
   }
@@ -1883,8 +1906,9 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred(
     // If this instruction can be simplified after the operands are updated,
     // just use the simplified value instead.  This frequently happens due to
     // phi translation.
-    if (Value *IV =
-            SimplifyInstruction(New, BB->getModule()->getDataLayout())) {
+    if (Value *IV = SimplifyInstruction(
+            New,
+            {BB->getModule()->getDataLayout(), TLI, nullptr, nullptr, New})) {
       ValueMapping[&*BI] = IV;
       if (!New->mayHaveSideEffects()) {
         delete New;
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 5042fc18d7c49..410fbb03068f4 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -499,7 +499,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(
     Instruction *Inst = &*I++;
     // Look for memset instructions, which may be optimized to a larger memset.
     if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) {
-      WeakVH InstPtr(&*I);
+      WeakTrackingVH InstPtr(&*I);
       if (!processLoopMemSet(MSI, BECount))
         continue;
       MadeChange = true;
@@ -856,7 +856,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
 
 /// If the stored value is a strided load in the same loop with the same stride
 /// this may be transformable into a memcpy.  This kicks in for stuff like
-///   for (i) A[i] = B[i];
+/// for (i) A[i] = B[i];
 bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
                                                     const SCEV *BECount) {
   assert(SI->isSimple() && "Expected only non-volatile stores.");
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
index 28e71ca05436c..af095560cc025 100644
--- a/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -77,7 +77,7 @@ static bool SimplifyLoopInst(Loop *L, DominatorTree *DT, LoopInfo *LI,
 
         // Don't bother simplifying unused instructions.
         if (!I->use_empty()) {
-          Value *V = SimplifyInstruction(I, DL, TLI, DT, AC);
+          Value *V = SimplifyInstruction(I, {DL, TLI, DT, AC});
           if (V && LI->replacementPreservesLCSSAForm(I, V)) {
             // Mark all uses for resimplification next time round the loop.
             for (User *U : I->users())
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 8ce96cf1b7a67..2ba9265566a8c 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -341,7 +341,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
     // With the operands remapped, see if the instruction constant folds or is
     // otherwise simplifyable.  This commonly occurs because the entry from PHI
     // nodes allows icmps and other instructions to fold.
-    Value *V = SimplifyInstruction(C, SQ.getWithInstruction(C));
+    Value *V = SimplifyInstruction(C, SQ);
     if (V && LI->replacementPreservesLCSSAForm(C, V)) {
       // If so, then delete the temporary instruction and stick the folded value
       // in the map.
@@ -670,8 +670,9 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM,
                                       LPMUpdater &) {
   int Threshold = EnableHeaderDuplication ? DefaultRotationThreshold : 0;
   const DataLayout &DL = L.getHeader()->getModule()->getDataLayout();
-  const SimplifyQuery SQ(DL, &AR.TLI, &AR.DT, &AR.AC);
-  LoopRotate LR(Threshold, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE, SQ);
+  const SimplifyQuery SQ = getBestSimplifyQuery(AR, DL);
+  LoopRotate LR(Threshold, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE,
+                SQ);
 
   bool Changed = LR.processLoop(&L);
   if (!Changed)
@@ -714,10 +715,7 @@ public:
     auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
     auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
     auto *SE = SEWP ? &SEWP->getSE() : nullptr;
-    auto *TLIWP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
-    auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr;
-    const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
-    const SimplifyQuery SQ(DL, TLI, DT, AC);
+    const SimplifyQuery SQ = getBestSimplifyQuery(*this, F);
     LoopRotate LR(MaxHeaderSize, LI, TTI, AC, DT, SE, SQ);
     return LR.processLoop(L);
   }
diff --git a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index a5a81c33a8ebd..35c05e84fd68d 100644
--- a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -40,7 +40,7 @@ static bool simplifyLoopCFG(Loop &L, DominatorTree &DT, LoopInfo &LI) {
   bool Changed = false;
   // Copy blocks into a temporary array to avoid iterator invalidation issues
   // as we remove them.
-  SmallVector<WeakVH, 16> Blocks(L.blocks());
+  SmallVector<WeakTrackingVH, 16> Blocks(L.blocks());
 
   for (auto &Block : Blocks) {
     // Attempt to merge blocks in the trivial case. Don't modify blocks which
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index af137f6faa634..ccedb98d7fa15 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -900,7 +900,7 @@ static bool isHighCostExpansion(const SCEV *S,
 /// If any of the instructions is the specified set are trivially dead, delete
 /// them and see if this makes any of their operands subsequently dead.
 static bool
-DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
+DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
   bool Changed = false;
 
   while (!DeadInsts.empty()) {
@@ -1845,7 +1845,7 @@ class LSRInstance {
   void FinalizeChain(IVChain &Chain);
   void CollectChains();
   void GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
-                       SmallVectorImpl<WeakVH> &DeadInsts);
+                       SmallVectorImpl<WeakTrackingVH> &DeadInsts);
 
   void CollectInterestingTypesAndFactors();
   void CollectFixupsAndInitialFormulae();
@@ -1920,19 +1920,15 @@ class LSRInstance {
                                   const LSRUse &LU,
                                   SCEVExpander &Rewriter) const;
 
-  Value *Expand(const LSRUse &LU, const LSRFixup &LF,
-                const Formula &F,
-                BasicBlock::iterator IP,
-                SCEVExpander &Rewriter,
-                SmallVectorImpl<WeakVH> &DeadInsts) const;
+  Value *Expand(const LSRUse &LU, const LSRFixup &LF, const Formula &F,
+                BasicBlock::iterator IP, SCEVExpander &Rewriter,
+                SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
   void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF,
-                     const Formula &F,
-                     SCEVExpander &Rewriter,
-                     SmallVectorImpl<WeakVH> &DeadInsts) const;
-  void Rewrite(const LSRUse &LU, const LSRFixup &LF,
-               const Formula &F,
+                     const Formula &F, SCEVExpander &Rewriter,
+                     SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
+  void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F,
                SCEVExpander &Rewriter,
-               SmallVectorImpl<WeakVH> &DeadInsts) const;
+               SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
   void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution);
 
 public:
@@ -3014,7 +3010,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
 /// Generate an add or subtract for each IVInc in a chain to materialize the IV
 /// user's operand from the previous IV user's operand.
 void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
-                                  SmallVectorImpl<WeakVH> &DeadInsts) {
+                                  SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
   // Find the new IVOperand for the head of the chain. It may have been replaced
   // by LSR.
   const IVInc &Head = Chain.Incs[0];
@@ -4759,12 +4755,10 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
 
 /// Emit instructions for the leading candidate expression for this LSRUse (this
 /// is called "expanding").
-Value *LSRInstance::Expand(const LSRUse &LU,
-                           const LSRFixup &LF,
-                           const Formula &F,
-                           BasicBlock::iterator IP,
+Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF,
+                           const Formula &F, BasicBlock::iterator IP,
                            SCEVExpander &Rewriter,
-                           SmallVectorImpl<WeakVH> &DeadInsts) const {
+                           SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
   if (LU.RigidFormula)
     return LF.OperandValToReplace;
 
@@ -4939,12 +4933,9 @@ Value *LSRInstance::Expand(const LSRUse &LU,
 /// Helper for Rewrite. PHI nodes are special because the use of their operands
 /// effectively happens in their predecessor blocks, so the expression may need
 /// to be expanded in multiple places.
-void LSRInstance::RewriteForPHI(PHINode *PN,
-                                const LSRUse &LU,
-                                const LSRFixup &LF,
-                                const Formula &F,
-                                SCEVExpander &Rewriter,
-                                SmallVectorImpl<WeakVH> &DeadInsts) const {
+void LSRInstance::RewriteForPHI(
+    PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F,
+    SCEVExpander &Rewriter, SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
   DenseMap<BasicBlock *, Value *> Inserted;
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
     if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
@@ -5016,11 +5007,9 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
 /// Emit instructions for the leading candidate expression for this LSRUse (this
 /// is called "expanding"), and update the UserInst to reference the newly
 /// expanded value.
-void LSRInstance::Rewrite(const LSRUse &LU,
-                          const LSRFixup &LF,
-                          const Formula &F,
-                          SCEVExpander &Rewriter,
-                          SmallVectorImpl<WeakVH> &DeadInsts) const {
+void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF,
+                          const Formula &F, SCEVExpander &Rewriter,
+                          SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
   // First, find an insertion point that dominates UserInst. For PHI nodes,
   // find the nearest block which dominates all the relevant uses.
   if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
@@ -5058,7 +5047,7 @@ void LSRInstance::ImplementSolution(
     const SmallVectorImpl<const Formula *> &Solution) {
   // Keep track of instructions we may have made dead, so that
   // we can remove them after we are done working.
-  SmallVector<WeakVH, 16> DeadInsts;
+  SmallVector<WeakTrackingVH, 16> DeadInsts;
 
   SCEVExpander Rewriter(SE, L->getHeader()->getModule()->getDataLayout(),
                         "lsr");
@@ -5308,7 +5297,7 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
   // Remove any extra phis created by processing inner loops.
   Changed |= DeleteDeadPHIs(L->getHeader());
   if (EnablePhiElim && L->isLoopSimplifyForm()) {
-    SmallVector<WeakVH, 16> DeadInsts;
+    SmallVector<WeakTrackingVH, 16> DeadInsts;
     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
     SCEVExpander Rewriter(SE, DL, "lsr");
 #ifndef NDEBUG
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 8fa806a7e8bcc..6ef1464e9338e 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -1231,11 +1231,12 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
   LoopProcessWorklist.push_back(NewLoop);
   redoLoop = true;
 
-  // Keep a WeakVH holding onto LIC.  If the first call to RewriteLoopBody
+  // Keep a WeakTrackingVH holding onto LIC.  If the first call to
+  // RewriteLoopBody
   // deletes the instruction (for example by simplifying a PHI that feeds into
   // the condition that we're unswitching on), we don't rewrite the second
   // iteration.
-  WeakVH LICHandle(LIC);
+  WeakTrackingVH LICHandle(LIC);
 
   // Now we rewrite the original code to know that the condition is true and the
   // new code to know that the condition is false.
@@ -1262,7 +1263,7 @@ static void RemoveFromWorklist(Instruction *I,
 static void ReplaceUsesOfWith(Instruction *I, Value *V,
                               std::vector<Instruction*> &Worklist,
                               Loop *L, LPPassManager *LPM) {
-  DEBUG(dbgs() << "Replace with '" << *V << "': " << *I);
+  DEBUG(dbgs() << "Replace with '" << *V << "': " << *I << "\n");
 
   // Add uses to the worklist, which may be dead now.
   for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
@@ -1275,7 +1276,8 @@ static void ReplaceUsesOfWith(Instruction *I, Value *V,
   LPM->deleteSimpleAnalysisValue(I, L);
   RemoveFromWorklist(I, Worklist);
   I->replaceAllUsesWith(V);
-  I->eraseFromParent();
+  if (!I->mayHaveSideEffects())
+    I->eraseFromParent();
   ++NumSimplify;
 }
 
@@ -1431,7 +1433,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
 
     // Simple DCE.
     if (isInstructionTriviallyDead(I)) {
-      DEBUG(dbgs() << "Remove dead instruction '" << *I);
+      DEBUG(dbgs() << "Remove dead instruction '" << *I << "\n");
 
       // Add uses to the worklist, which may be dead now.
       for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index a3f3f25c1e0f6..21a632073da7a 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1323,7 +1323,7 @@ bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) {
 
   // Get the alignment of the byval.  If the call doesn't specify the alignment,
   // then it is some target specific value that we can't know.
-  unsigned ByValAlign = CS.getParamAlignment(ArgNo+1);
+  unsigned ByValAlign = CS.getParamAlignment(ArgNo);
   if (ByValAlign == 0) return false;
 
   // If it is greater than the memcpy, then we check to see if we can force the
diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp
index c5bf2f28d1852..d0bfe36038973 100644
--- a/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -211,7 +211,8 @@ bool NaryReassociatePass::doOneIteration(Function &F) {
           Changed = true;
           SE->forgetValue(&*I);
           I->replaceAllUsesWith(NewI);
-          // If SeenExprs constains I's WeakVH, that entry will be replaced with
+          // If SeenExprs constains I's WeakTrackingVH, that entry will be
+          // replaced with
           // nullptr.
           RecursivelyDeleteTriviallyDeadInstructions(&*I, TLI);
           I = NewI->getIterator();
@@ -219,7 +220,7 @@ bool NaryReassociatePass::doOneIteration(Function &F) {
         // Add the rewritten instruction to SeenExprs; the original instruction
         // is deleted.
         const SCEV *NewSCEV = SE->getSCEV(&*I);
-        SeenExprs[NewSCEV].push_back(WeakVH(&*I));
+        SeenExprs[NewSCEV].push_back(WeakTrackingVH(&*I));
         // Ideally, NewSCEV should equal OldSCEV because tryReassociate(I)
         // is equivalent to I. However, ScalarEvolution::getSCEV may
         // weaken nsw causing NewSCEV not to equal OldSCEV. For example, suppose
@@ -239,7 +240,7 @@ bool NaryReassociatePass::doOneIteration(Function &F) {
         //
         // This improvement is exercised in @reassociate_gep_nsw in nary-gep.ll.
         if (NewSCEV != OldSCEV)
-          SeenExprs[OldSCEV].push_back(WeakVH(&*I));
+          SeenExprs[OldSCEV].push_back(WeakTrackingVH(&*I));
       }
     }
   }
@@ -494,7 +495,8 @@ NaryReassociatePass::findClosestMatchingDominator(const SCEV *CandidateExpr,
   // future instruction either. Therefore, we pop it out of the stack. This
   // optimization makes the algorithm O(n).
   while (!Candidates.empty()) {
-    // Candidates stores WeakVHs, so a candidate can be nullptr if it's removed
+    // Candidates stores WeakTrackingVHs, so a candidate can be nullptr if it's
+    // removed
     // during rewriting.
     if (Value *Candidate = Candidates.back()) {
       Instruction *CandidateInstruction = cast<Instruction>(Candidate);
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index a014ddd9ba0a4..162d91beae764 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -395,7 +395,6 @@ namespace {
 class NewGVN {
   Function &F;
   DominatorTree *DT;
-  AssumptionCache *AC;
   const TargetLibraryInfo *TLI;
   AliasAnalysis *AA;
   MemorySSA *MSSA;
@@ -405,6 +404,7 @@ class NewGVN {
   BumpPtrAllocator ExpressionAllocator;
   ArrayRecycler<Value *> ArgRecycler;
   TarjanSCC SCCFinder;
+  const SimplifyQuery SQ;
 
   // Number of function arguments, used by ranking
   unsigned int NumFuncArgs;
@@ -504,8 +504,9 @@ public:
   NewGVN(Function &F, DominatorTree *DT, AssumptionCache *AC,
          TargetLibraryInfo *TLI, AliasAnalysis *AA, MemorySSA *MSSA,
          const DataLayout &DL)
-      : F(F), DT(DT), AC(AC), TLI(TLI), AA(AA), MSSA(MSSA), DL(DL),
-        PredInfo(make_unique<PredicateInfo>(F, *DT, *AC)) {}
+      : F(F), DT(DT), TLI(TLI), AA(AA), MSSA(MSSA), DL(DL),
+        PredInfo(make_unique<PredicateInfo>(F, *DT, *AC)), SQ(DL, TLI, DT, AC) {
+  }
   bool runGVN();
 
 private:
@@ -782,8 +783,7 @@ const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T,
   E->op_push_back(lookupOperandLeader(Arg1));
   E->op_push_back(lookupOperandLeader(Arg2));
 
-  Value *V = SimplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), DL, TLI,
-                           DT, AC);
+  Value *V = SimplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), SQ);
   if (const Expression *SimplifiedE = checkSimplificationResults(E, nullptr, V))
     return SimplifiedE;
   return E;
@@ -864,8 +864,8 @@ const Expression *NewGVN::createExpression(Instruction *I) {
            "Wrong types on cmp instruction");
     assert((E->getOperand(0)->getType() == I->getOperand(0)->getType() &&
             E->getOperand(1)->getType() == I->getOperand(1)->getType()));
-    Value *V = SimplifyCmpInst(Predicate, E->getOperand(0), E->getOperand(1),
-                               DL, TLI, DT, AC);
+    Value *V =
+        SimplifyCmpInst(Predicate, E->getOperand(0), E->getOperand(1), SQ);
     if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
       return SimplifiedE;
   } else if (isa<SelectInst>(I)) {
@@ -874,23 +874,23 @@ const Expression *NewGVN::createExpression(Instruction *I) {
       assert(E->getOperand(1)->getType() == I->getOperand(1)->getType() &&
              E->getOperand(2)->getType() == I->getOperand(2)->getType());
       Value *V = SimplifySelectInst(E->getOperand(0), E->getOperand(1),
-                                    E->getOperand(2), DL, TLI, DT, AC);
+                                    E->getOperand(2), SQ);
       if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
         return SimplifiedE;
     }
   } else if (I->isBinaryOp()) {
-    Value *V = SimplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1),
-                             DL, TLI, DT, AC);
+    Value *V =
+        SimplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1), SQ);
     if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
       return SimplifiedE;
   } else if (auto *BI = dyn_cast<BitCastInst>(I)) {
-    Value *V = SimplifyInstruction(BI, DL, TLI, DT, AC);
+    Value *V =
+        SimplifyCastInst(BI->getOpcode(), BI->getOperand(0), BI->getType(), SQ);
     if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
       return SimplifiedE;
   } else if (isa<GetElementPtrInst>(I)) {
-    Value *V = SimplifyGEPInst(E->getType(),
-                               ArrayRef<Value *>(E->op_begin(), E->op_end()),
-                               DL, TLI, DT, AC);
+    Value *V = SimplifyGEPInst(
+        E->getType(), ArrayRef<Value *>(E->op_begin(), E->op_end()), SQ);
     if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
       return SimplifiedE;
   } else if (AllConstant) {
@@ -1628,15 +1628,15 @@ const Expression *NewGVN::performSymbolicCmpEvaluation(Instruction *I) {
         if (PBranch->TrueEdge) {
           // If we know the previous predicate is true and we are in the true
           // edge then we may be implied true or false.
-          if (CmpInst::isImpliedTrueByMatchingCmp(OurPredicate,
-                                                  BranchPredicate)) {
+          if (CmpInst::isImpliedTrueByMatchingCmp(BranchPredicate,
+                                                  OurPredicate)) {
             addPredicateUsers(PI, I);
             return createConstantExpression(
                 ConstantInt::getTrue(CI->getType()));
           }
 
-          if (CmpInst::isImpliedFalseByMatchingCmp(OurPredicate,
-                                                   BranchPredicate)) {
+          if (CmpInst::isImpliedFalseByMatchingCmp(BranchPredicate,
+                                                   OurPredicate)) {
             addPredicateUsers(PI, I);
             return createConstantExpression(
                 ConstantInt::getFalse(CI->getType()));
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 3dcab60907896..ef29d4141600a 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -982,7 +982,7 @@ static unsigned FindInOperandList(SmallVectorImpl<ValueEntry> &Ops, unsigned i,
 /// Emit a tree of add instructions, summing Ops together
 /// and returning the result.  Insert the tree before I.
 static Value *EmitAddTreeOfValues(Instruction *I,
-                                  SmallVectorImpl<WeakVH> &Ops){
+                                  SmallVectorImpl<WeakTrackingVH> &Ops) {
   if (Ops.size() == 1) return Ops.back();
 
   Value *V1 = Ops.back();
@@ -1559,7 +1559,7 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I,
             ? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal)
             : BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal);
 
-    SmallVector<WeakVH, 4> NewMulOps;
+    SmallVector<WeakTrackingVH, 4> NewMulOps;
     for (unsigned i = 0; i != Ops.size(); ++i) {
       // Only try to remove factors from expressions we're allowed to.
       BinaryOperator *BOp =
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index f344eb151464a..c11247c06b856 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -1128,39 +1128,23 @@ normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent,
 
 // Create new attribute set containing only attributes which can be transferred
 // from original call to the safepoint.
-static AttributeList legalizeCallAttributes(AttributeList AS) {
-  AttributeList Ret;
-
-  for (unsigned Slot = 0; Slot < AS.getNumSlots(); Slot++) {
-    unsigned Index = AS.getSlotIndex(Slot);
-
-    if (Index == AttributeList::ReturnIndex ||
-        Index == AttributeList::FunctionIndex) {
-
-      for (Attribute Attr : make_range(AS.begin(Slot), AS.end(Slot))) {
-
-        // Do not allow certain attributes - just skip them
-        // Safepoint can not be read only or read none.
-        if (Attr.hasAttribute(Attribute::ReadNone) ||
-            Attr.hasAttribute(Attribute::ReadOnly))
-          continue;
-
-        // These attributes control the generation of the gc.statepoint call /
-        // invoke itself; and once the gc.statepoint is in place, they're of no
-        // use.
-        if (isStatepointDirectiveAttr(Attr))
-          continue;
-
-        Ret = Ret.addAttributes(
-            AS.getContext(), Index,
-            AttributeList::get(AS.getContext(), Index, AttrBuilder(Attr)));
-      }
-    }
-
-    // Just skip parameter attributes for now
-  }
-
-  return Ret;
+static AttributeList legalizeCallAttributes(AttributeList AL) {
+  if (AL.isEmpty())
+    return AL;
+
+  // Remove the readonly, readnone, and statepoint function attributes.
+  AttrBuilder FnAttrs = AL.getFnAttributes();
+  FnAttrs.removeAttribute(Attribute::ReadNone);
+  FnAttrs.removeAttribute(Attribute::ReadOnly);
+  for (Attribute A : AL.getFnAttributes()) {
+    if (isStatepointDirectiveAttr(A))
+      FnAttrs.remove(A);
+  }
+
+  // Just skip parameter and return attributes for now
+  LLVMContext &Ctx = AL.getContext();
+  return AttributeList::get(Ctx, AttributeList::FunctionIndex,
+                            AttributeSet::get(Ctx, FnAttrs));
 }
 
 /// Helper function to place all gc relocates necessary for the given
@@ -1402,13 +1386,10 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
     Call->setCallingConv(ToReplace->getCallingConv());
 
     // Currently we will fail on parameter attributes and on certain
-    // function attributes.
-    AttributeList NewAttrs = legalizeCallAttributes(ToReplace->getAttributes());
-    // In case if we can handle this set of attributes - set up function attrs
-    // directly on statepoint and return attrs later for gc_result intrinsic.
-    Call->setAttributes(AttributeList::get(Call->getContext(),
-                                           AttributeList::FunctionIndex,
-                                           NewAttrs.getFnAttributes()));
+    // function attributes.  In case if we can handle this set of attributes -
+    // set up function attrs directly on statepoint and return attrs later for
+    // gc_result intrinsic.
+    Call->setAttributes(legalizeCallAttributes(ToReplace->getAttributes()));
 
     Token = Call;
 
@@ -1431,13 +1412,10 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
     Invoke->setCallingConv(ToReplace->getCallingConv());
 
     // Currently we will fail on parameter attributes and on certain
-    // function attributes.
-    AttributeList NewAttrs = legalizeCallAttributes(ToReplace->getAttributes());
-    // In case if we can handle this set of attributes - set up function attrs
-    // directly on statepoint and return attrs later for gc_result intrinsic.
-    Invoke->setAttributes(AttributeList::get(Invoke->getContext(),
-                                             AttributeList::FunctionIndex,
-                                             NewAttrs.getFnAttributes()));
+    // function attributes.  In case if we can handle this set of attributes -
+    // set up function attrs directly on statepoint and return attrs later for
+    // gc_result intrinsic.
+    Invoke->setAttributes(legalizeCallAttributes(ToReplace->getAttributes()));
 
     Token = Invoke;
 
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index d01e91a7f2356..1d9beffaf06bf 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -25,6 +25,7 @@
 
 #include "llvm/Transforms/Scalar/SROA.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AssumptionCache.h"
@@ -2186,8 +2187,8 @@ class llvm::sroa::AllocaSliceRewriter
   Instruction *OldPtr;
 
   // Track post-rewrite users which are PHI nodes and Selects.
-  SmallPtrSetImpl<PHINode *> &PHIUsers;
-  SmallPtrSetImpl<SelectInst *> &SelectUsers;
+  SmallSetVector<PHINode *, 8> &PHIUsers;
+  SmallSetVector<SelectInst *, 8> &SelectUsers;
 
   // Utility IR builder, whose name prefix is setup for each visited use, and
   // the insertion point is set to point to the user.
@@ -2199,8 +2200,8 @@ public:
                       uint64_t NewAllocaBeginOffset,
                       uint64_t NewAllocaEndOffset, bool IsIntegerPromotable,
                       VectorType *PromotableVecTy,
-                      SmallPtrSetImpl<PHINode *> &PHIUsers,
-                      SmallPtrSetImpl<SelectInst *> &SelectUsers)
+                      SmallSetVector<PHINode *, 8> &PHIUsers,
+                      SmallSetVector<SelectInst *, 8> &SelectUsers)
       : DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI),
         NewAllocaBeginOffset(NewAllocaBeginOffset),
         NewAllocaEndOffset(NewAllocaEndOffset),
@@ -3880,8 +3881,8 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
   // fact scheduled for promotion.
   unsigned PPWOldSize = PostPromotionWorklist.size();
   unsigned NumUses = 0;
-  SmallPtrSet<PHINode *, 8> PHIUsers;
-  SmallPtrSet<SelectInst *, 8> SelectUsers;
+  SmallSetVector<PHINode *, 8> PHIUsers;
+  SmallSetVector<SelectInst *, 8> SelectUsers;
 
   AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(),
                                P.endOffset(), IsIntegerPromotable, VecTy,
@@ -3902,19 +3903,16 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
 
   // Now that we've processed all the slices in the new partition, check if any
   // PHIs or Selects would block promotion.
-  for (SmallPtrSetImpl<PHINode *>::iterator I = PHIUsers.begin(),
-                                            E = PHIUsers.end();
-       I != E; ++I)
-    if (!isSafePHIToSpeculate(**I)) {
+  for (PHINode *PHI : PHIUsers)
+    if (!isSafePHIToSpeculate(*PHI)) {
       Promotable = false;
       PHIUsers.clear();
       SelectUsers.clear();
       break;
     }
-  for (SmallPtrSetImpl<SelectInst *>::iterator I = SelectUsers.begin(),
-                                               E = SelectUsers.end();
-       I != E; ++I)
-    if (!isSafeSelectToSpeculate(**I)) {
+
+  for (SelectInst *Sel : SelectUsers)
+    if (!isSafeSelectToSpeculate(*Sel)) {
       Promotable = false;
       PHIUsers.clear();
       SelectUsers.clear();
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 00e3c95f6f06d..52201d8f3e51a 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Analysis/ScopedNoAliasAA.h"
 #include "llvm/Analysis/TypeBasedAliasAnalysis.h"
 #include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/InitializePasses.h"
@@ -83,6 +84,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeCFGSimplifyPassPass(Registry);
   initializeLateCFGSimplifyPassPass(Registry);
   initializeStructurizeCFGPass(Registry);
+  initializeSimpleLoopUnswitchLegacyPassPass(Registry);
   initializeSinkingLegacyPassPass(Registry);
   initializeTailCallElimPass(Registry);
   initializeSeparateConstOffsetFromGEPPass(Registry);
diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 4d594532c3651..cde659b9d189f 100644
--- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -1138,7 +1138,7 @@ bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
   // Add I to DominatingExprs if it's an add/sub that can't sign overflow.
   if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS))) ||
       match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) {
-    if (isKnownNotFullPoison(I)) {
+    if (programUndefinedIfFullPoison(I)) {
       const SCEV *Key =
           SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
       DominatingExprs[Key].push_back(I);
diff --git a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
new file mode 100644
index 0000000000000..fb1b47c48276c
--- /dev/null
+++ b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -0,0 +1,626 @@
+//===-- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+
+#define DEBUG_TYPE "simple-loop-unswitch"
+
+using namespace llvm;
+
+STATISTIC(NumBranches, "Number of branches unswitched");
+STATISTIC(NumSwitches, "Number of switches unswitched");
+STATISTIC(NumTrivial, "Number of unswitches that are trivial");
+
+static void replaceLoopUsesWithConstant(Loop &L, Value &LIC,
+                                        Constant &Replacement) {
+  assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?");
+
+  // Replace uses of LIC in the loop with the given constant.
+  for (auto UI = LIC.use_begin(), UE = LIC.use_end(); UI != UE;) {
+    // Grab the use and walk past it so we can clobber it in the use list.
+    Use *U = &*UI++;
+    Instruction *UserI = dyn_cast<Instruction>(U->getUser());
+    if (!UserI || !L.contains(UserI))
+      continue;
+
+    // Replace this use within the loop body.
+    *U = &Replacement;
+  }
+}
+
+/// Update the dominator tree after removing one exiting predecessor of a loop
+/// exit block.
+static void updateLoopExitIDom(BasicBlock *LoopExitBB, Loop &L,
+                                            DominatorTree &DT) {
+  assert(pred_begin(LoopExitBB) != pred_end(LoopExitBB) &&
+         "Cannot have empty predecessors of the loop exit block if we split "
+         "off a block to unswitch!");
+
+  BasicBlock *IDom = *pred_begin(LoopExitBB);
+  // Walk all of the other predecessors finding the nearest common dominator
+  // until all predecessors are covered or we reach the loop header. The loop
+  // header necessarily dominates all loop exit blocks in loop simplified form
+  // so we can early-exit the moment we hit that block.
+  for (auto PI = std::next(pred_begin(LoopExitBB)), PE = pred_end(LoopExitBB);
+       PI != PE && IDom != L.getHeader(); ++PI)
+    IDom = DT.findNearestCommonDominator(IDom, *PI);
+
+  DT.changeImmediateDominator(LoopExitBB, IDom);
+}
+
+/// Update the dominator tree after unswitching a particular former exit block.
+///
+/// This handles the full update of the dominator tree after hoisting a block
+/// that previously was an exit block (or split off of an exit block) up to be
+/// reached from the new immediate dominator of the preheader.
+///
+/// The common case is simple -- we just move the unswitched block to have an
+/// immediate dominator of the old preheader. But in complex cases, there may
+/// be other blocks reachable from the unswitched block that are immediately
+/// dominated by some node between the unswitched one and the old preheader.
+/// All of these also need to be hoisted in the dominator tree. We also want to
+/// minimize queries to the dominator tree because each step of this
+/// invalidates any DFS numbers that would make queries fast.
+static void updateDTAfterUnswitch(BasicBlock *UnswitchedBB, BasicBlock *OldPH,
+                                  DominatorTree &DT) {
+  DomTreeNode *OldPHNode = DT[OldPH];
+  DomTreeNode *UnswitchedNode = DT[UnswitchedBB];
+  // If the dominator tree has already been updated for this unswitched node,
+  // we're done. This makes it easier to use this routine if there are multiple
+  // paths to the same unswitched destination.
+  if (UnswitchedNode->getIDom() == OldPHNode)
+    return;
+
+  // First collect the domtree nodes that we are hoisting over. These are the
+  // set of nodes which may have children that need to be hoisted as well.
+  SmallPtrSet<DomTreeNode *, 4> DomChain;
+  for (auto *IDom = UnswitchedNode->getIDom(); IDom != OldPHNode;
+       IDom = IDom->getIDom())
+    DomChain.insert(IDom);
+
+  // The unswitched block ends up immediately dominated by the old preheader --
+  // regardless of whether it is the loop exit block or split off of the loop
+  // exit block.
+  DT.changeImmediateDominator(UnswitchedNode, OldPHNode);
+
+  // Blocks reachable from the unswitched block may need to change their IDom
+  // as well.
+  SmallSetVector<BasicBlock *, 4> Worklist;
+  for (auto *SuccBB : successors(UnswitchedBB))
+    Worklist.insert(SuccBB);
+
+  // Walk the worklist. We grow the list in the loop and so must recompute size.
+  for (int i = 0; i < (int)Worklist.size(); ++i) {
+    auto *BB = Worklist[i];
+
+    DomTreeNode *Node = DT[BB];
+    assert(!DomChain.count(Node) &&
+           "Cannot be dominated by a block you can reach!");
+    // If this block doesn't have an immediate dominator somewhere in the chain
+    // we hoisted over, then its position in the domtree hasn't changed. Either
+    // it is above the region hoisted and still valid, or it is below the
+    // hoisted block and so was trivially updated. This also applies to
+    // everything reachable from this block so we're completely done with the
+    // it.
+    if (!DomChain.count(Node->getIDom()))
+      continue;
+
+    // We need to change the IDom for this node but also walk its successors
+    // which could have similar dominance position.
+    DT.changeImmediateDominator(Node, OldPHNode);
+    for (auto *SuccBB : successors(BB))
+      Worklist.insert(SuccBB);
+  }
+}
+
+/// Unswitch a trivial branch if the condition is loop invariant.
+///
+/// This routine should only be called when loop code leading to the branch has
+/// been validated as trivial (no side effects). This routine checks if the
+/// condition is invariant and one of the successors is a loop exit. This
+/// allows us to unswitch without duplicating the loop, making it trivial.
+///
+/// If this routine fails to unswitch the branch it returns false.
+///
+/// If the branch can be unswitched, this routine splits the preheader and
+/// hoists the branch above that split. Preserves loop simplified form
+/// (splitting the exit block as necessary). It simplifies the branch within
+/// the loop to an unconditional branch but doesn't remove it entirely. Further
+/// cleanup can be done with some simplify-cfg like pass.
+static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
+                                  LoopInfo &LI) {
+  assert(BI.isConditional() && "Can only unswitch a conditional branch!");
+  DEBUG(dbgs() << "  Trying to unswitch branch: " << BI << "\n");
+
+  Value *LoopCond = BI.getCondition();
+
+  // Need a trivial loop condition to unswitch.
+  if (!L.isLoopInvariant(LoopCond))
+    return false;
+
+  // FIXME: We should compute this once at the start and update it!
+  SmallVector<BasicBlock *, 16> ExitBlocks;
+  L.getExitBlocks(ExitBlocks);
+  SmallPtrSet<BasicBlock *, 16> ExitBlockSet(ExitBlocks.begin(),
+                                             ExitBlocks.end());
+
+  // Check to see if a successor of the branch is guaranteed to
+  // exit through a unique exit block without having any
+  // side-effects.  If so, determine the value of Cond that causes
+  // it to do this.
+  ConstantInt *CondVal = ConstantInt::getTrue(BI.getContext());
+  ConstantInt *Replacement = ConstantInt::getFalse(BI.getContext());
+  int LoopExitSuccIdx = 0;
+  auto *LoopExitBB = BI.getSuccessor(0);
+  if (!ExitBlockSet.count(LoopExitBB)) {
+    std::swap(CondVal, Replacement);
+    LoopExitSuccIdx = 1;
+    LoopExitBB = BI.getSuccessor(1);
+    if (!ExitBlockSet.count(LoopExitBB))
+      return false;
+  }
+  auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
+  assert(L.contains(ContinueBB) &&
+         "Cannot have both successors exit and still be in the loop!");
+
+  // If the loop exit block contains phi nodes, this isn't trivial.
+  // FIXME: We should examine the PHI to determine whether or not we can handle
+  // it trivially.
+  if (isa<PHINode>(LoopExitBB->begin()))
+    return false;
+
+  DEBUG(dbgs() << "    unswitching trivial branch when: " << CondVal
+               << " == " << LoopCond << "\n");
+
+  // Split the preheader, so that we know that there is a safe place to insert
+  // the conditional branch. We will change the preheader to have a conditional
+  // branch on LoopCond.
+  BasicBlock *OldPH = L.getLoopPreheader();
+  BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI);
+
+  // Now that we have a place to insert the conditional branch, create a place
+  // to branch to: this is the exit block out of the loop that we are
+  // unswitching. We need to split this if there are other loop predecessors.
+  // Because the loop is in simplified form, *any* other predecessor is enough.
+  BasicBlock *UnswitchedBB;
+  if (BasicBlock *PredBB = LoopExitBB->getUniquePredecessor()) {
+    (void)PredBB;
+    assert(PredBB == BI.getParent() && "A branch's parent is't a predecessor!");
+    UnswitchedBB = LoopExitBB;
+  } else {
+    UnswitchedBB = SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI);
+  }
+
+  BasicBlock *ParentBB = BI.getParent();
+
+  // Now splice the branch to gate reaching the new preheader and re-point its
+  // successors.
+  OldPH->getInstList().splice(std::prev(OldPH->end()),
+                              BI.getParent()->getInstList(), BI);
+  OldPH->getTerminator()->eraseFromParent();
+  BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
+  BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
+
+  // Create a new unconditional branch that will continue the loop as a new
+  // terminator.
+  BranchInst::Create(ContinueBB, ParentBB);
+
+  // Now we need to update the dominator tree.
+  updateDTAfterUnswitch(UnswitchedBB, OldPH, DT);
+  // But if we split something off of the loop exit block then we also removed
+  // one of the predecessors for the loop exit block and may need to update its
+  // idom.
+  if (UnswitchedBB != LoopExitBB)
+    updateLoopExitIDom(LoopExitBB, L, DT);
+
+  // Since this is an i1 condition we can also trivially replace uses of it
+  // within the loop with a constant.
+  replaceLoopUsesWithConstant(L, *LoopCond, *Replacement);
+
+  ++NumTrivial;
+  ++NumBranches;
+  return true;
+}
+
+/// Unswitch a trivial switch if the condition is loop invariant.
+///
+/// This routine should only be called when loop code leading to the switch has
+/// been validated as trivial (no side effects). This routine checks if the
+/// condition is invariant and that at least one of the successors is a loop
+/// exit. This allows us to unswitch without duplicating the loop, making it
+/// trivial.
+///
+/// If this routine fails to unswitch the switch it returns false.
+///
+/// If the switch can be unswitched, this routine splits the preheader and
+/// copies the switch above that split. If the default case is one of the
+/// exiting cases, it copies the non-exiting cases and points them at the new
+/// preheader. If the default case is not exiting, it copies the exiting cases
+/// and points the default at the preheader. It preserves loop simplified form
+/// (splitting the exit blocks as necessary). It simplifies the switch within
+/// the loop by removing now-dead cases. If the default case is one of those
+/// unswitched, it replaces its destination with a new basic block containing
+/// only unreachable. Such basic blocks, while technically loop exits, are not
+/// considered for unswitching so this is a stable transform and the same
+/// switch will not be revisited. If after unswitching there is only a single
+/// in-loop successor, the switch is further simplified to an unconditional
+/// branch. Still more cleanup can be done with some simplify-cfg like pass.
+static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
+                                  LoopInfo &LI) {
+  DEBUG(dbgs() << "  Trying to unswitch switch: " << SI << "\n");
+  Value *LoopCond = SI.getCondition();
+
+  // If this isn't switching on an invariant condition, we can't unswitch it.
+  if (!L.isLoopInvariant(LoopCond))
+    return false;
+
+  // FIXME: We should compute this once at the start and update it!
+  SmallVector<BasicBlock *, 16> ExitBlocks;
+  L.getExitBlocks(ExitBlocks);
+  SmallPtrSet<BasicBlock *, 16> ExitBlockSet(ExitBlocks.begin(),
+                                             ExitBlocks.end());
+
+  SmallVector<int, 4> ExitCaseIndices;
+  for (auto Case : SI.cases()) {
+    auto *SuccBB = Case.getCaseSuccessor();
+    if (ExitBlockSet.count(SuccBB) && !isa<PHINode>(SuccBB->begin()))
+      ExitCaseIndices.push_back(Case.getCaseIndex());
+  }
+  BasicBlock *DefaultExitBB = nullptr;
+  if (ExitBlockSet.count(SI.getDefaultDest()) &&
+      !isa<PHINode>(SI.getDefaultDest()->begin()) &&
+      !isa<UnreachableInst>(SI.getDefaultDest()->getTerminator()))
+    DefaultExitBB = SI.getDefaultDest();
+  else if (ExitCaseIndices.empty())
+    return false;
+
+  DEBUG(dbgs() << "    unswitching trivial cases...\n");
+
+  SmallVector<std::pair<ConstantInt *, BasicBlock *>, 4> ExitCases;
+  ExitCases.reserve(ExitCaseIndices.size());
+  // We walk the case indices backwards so that we remove the last case first
+  // and don't disrupt the earlier indices.
+  for (unsigned Index : reverse(ExitCaseIndices)) {
+    auto CaseI = SI.case_begin() + Index;
+    // Save the value of this case.
+    ExitCases.push_back({CaseI->getCaseValue(), CaseI->getCaseSuccessor()});
+    // Delete the unswitched cases.
+    SI.removeCase(CaseI);
+  }
+
+  // Check if after this all of the remaining cases point at the same
+  // successor.
+  BasicBlock *CommonSuccBB = nullptr;
+  if (SI.getNumCases() > 0 &&
+      std::all_of(std::next(SI.case_begin()), SI.case_end(),
+                  [&SI](const SwitchInst::CaseHandle &Case) {
+                    return Case.getCaseSuccessor() ==
+                           SI.case_begin()->getCaseSuccessor();
+                  }))
+    CommonSuccBB = SI.case_begin()->getCaseSuccessor();
+
+  if (DefaultExitBB) {
+    // We can't remove the default edge so replace it with an edge to either
+    // the single common remaining successor (if we have one) or an unreachable
+    // block.
+    if (CommonSuccBB) {
+      SI.setDefaultDest(CommonSuccBB);
+    } else {
+      BasicBlock *ParentBB = SI.getParent();
+      BasicBlock *UnreachableBB = BasicBlock::Create(
+          ParentBB->getContext(),
+          Twine(ParentBB->getName()) + ".unreachable_default",
+          ParentBB->getParent());
+      new UnreachableInst(ParentBB->getContext(), UnreachableBB);
+      SI.setDefaultDest(UnreachableBB);
+      DT.addNewBlock(UnreachableBB, ParentBB);
+    }
+  } else {
+    // If we're not unswitching the default, we need it to match any cases to
+    // have a common successor or if we have no cases it is the common
+    // successor.
+    if (SI.getNumCases() == 0)
+      CommonSuccBB = SI.getDefaultDest();
+    else if (SI.getDefaultDest() != CommonSuccBB)
+      CommonSuccBB = nullptr;
+  }
+
+  // Split the preheader, so that we know that there is a safe place to insert
+  // the switch.
+  BasicBlock *OldPH = L.getLoopPreheader();
+  BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI);
+  OldPH->getTerminator()->eraseFromParent();
+
+  // Now add the unswitched switch.
+  auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
+
+  // Split any exit blocks with remaining in-loop predecessors. We walk in
+  // reverse so that we split in the same order as the cases appeared. This is
+  // purely for convenience of reading the resulting IR, but it doesn't cost
+  // anything really.
+  SmallDenseMap<BasicBlock *, BasicBlock *, 2> SplitExitBBMap;
+  // Handle the default exit if necessary.
+  // FIXME: It'd be great if we could merge this with the loop below but LLVM's
+  // ranges aren't quite powerful enough yet.
+  if (DefaultExitBB && !pred_empty(DefaultExitBB)) {
+    auto *SplitBB =
+        SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI);
+    updateLoopExitIDom(DefaultExitBB, L, DT);
+    DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
+  }
+  // Note that we must use a reference in the for loop so that we update the
+  // container.
+  for (auto &CasePair : reverse(ExitCases)) {
+    // Grab a reference to the exit block in the pair so that we can update it.
+    BasicBlock *&ExitBB = CasePair.second;
+
+    // If this case is the last edge into the exit block, we can simply reuse it
+    // as it will no longer be a loop exit. No mapping necessary.
+    if (pred_empty(ExitBB))
+      continue;
+
+    // Otherwise we need to split the exit block so that we retain an exit
+    // block from the loop and a target for the unswitched condition.
+    BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
+    if (!SplitExitBB) {
+      // If this is the first time we see this, do the split and remember it.
+      SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI);
+      updateLoopExitIDom(ExitBB, L, DT);
+    }
+    ExitBB = SplitExitBB;
+  }
+
+  // Now add the unswitched cases. We do this in reverse order as we built them
+  // in reverse order.
+  for (auto CasePair : reverse(ExitCases)) {
+    ConstantInt *CaseVal = CasePair.first;
+    BasicBlock *UnswitchedBB = CasePair.second;
+
+    NewSI->addCase(CaseVal, UnswitchedBB);
+    updateDTAfterUnswitch(UnswitchedBB, OldPH, DT);
+  }
+
+  // If the default was unswitched, re-point it and add explicit cases for
+  // entering the loop.
+  if (DefaultExitBB) {
+    NewSI->setDefaultDest(DefaultExitBB);
+    updateDTAfterUnswitch(DefaultExitBB, OldPH, DT);
+
+    // We removed all the exit cases, so we just copy the cases to the
+    // unswitched switch.
+    for (auto Case : SI.cases())
+      NewSI->addCase(Case.getCaseValue(), NewPH);
+  }
+
+  // If we ended up with a common successor for every path through the switch
+  // after unswitching, rewrite it to an unconditional branch to make it easy
+  // to recognize. Otherwise we potentially have to recognize the default case
+  // pointing at unreachable and other complexity.
+  if (CommonSuccBB) {
+    BasicBlock *BB = SI.getParent();
+    SI.eraseFromParent();
+    BranchInst::Create(CommonSuccBB, BB);
+  }
+
+  DT.verifyDomTree();
+  ++NumTrivial;
+  ++NumSwitches;
+  return true;
+}
+
+/// This routine scans the loop to find a branch or switch which occurs before
+/// any side effects occur. These can potentially be unswitched without
+/// duplicating the loop. If a branch or switch is successfully unswitched the
+/// scanning continues to see if subsequent branches or switches have become
+/// trivial. Once all trivial candidates have been unswitched, this routine
+/// returns.
+///
+/// The return value indicates whether anything was unswitched (and therefore
+/// changed).
+static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT,
+                                         LoopInfo &LI) {
+  bool Changed = false;
+
+  // If loop header has only one reachable successor we should keep looking for
+  // trivial condition candidates in the successor as well. An alternative is
+  // to constant fold conditions and merge successors into loop header (then we
+  // only need to check header's terminator). The reason for not doing this in
+  // LoopUnswitch pass is that it could potentially break LoopPassManager's
+  // invariants. Folding dead branches could either eliminate the current loop
+  // or make other loops unreachable. LCSSA form might also not be preserved
+  // after deleting branches. The following code keeps traversing loop header's
+  // successors until it finds the trivial condition candidate (condition that
+  // is not a constant). Since unswitching generates branches with constant
+  // conditions, this scenario could be very common in practice.
+  BasicBlock *CurrentBB = L.getHeader();
+  SmallPtrSet<BasicBlock *, 8> Visited;
+  Visited.insert(CurrentBB);
+  do {
+    // Check if there are any side-effecting instructions (e.g. stores, calls,
+    // volatile loads) in the part of the loop that the code *would* execute
+    // without unswitching.
+    if (llvm::any_of(*CurrentBB,
+                     [](Instruction &I) { return I.mayHaveSideEffects(); }))
+      return Changed;
+
+    TerminatorInst *CurrentTerm = CurrentBB->getTerminator();
+
+    if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
+      // Don't bother trying to unswitch past a switch with a constant
+      // condition. This should be removed prior to running this pass by
+      // simplify-cfg.
+      if (isa<Constant>(SI->getCondition()))
+        return Changed;
+
+      if (!unswitchTrivialSwitch(L, *SI, DT, LI))
+        // Coludn't unswitch this one so we're done.
+        return Changed;
+
+      // Mark that we managed to unswitch something.
+      Changed = true;
+
+      // If unswitching turned the terminator into an unconditional branch then
+      // we can continue. The unswitching logic specifically works to fold any
+      // cases it can into an unconditional branch to make it easier to
+      // recognize here.
+      auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
+      if (!BI || BI->isConditional())
+        return Changed;
+
+      CurrentBB = BI->getSuccessor(0);
+      continue;
+    }
+
+    auto *BI = dyn_cast<BranchInst>(CurrentTerm);
+    if (!BI)
+      // We do not understand other terminator instructions.
+      return Changed;
+
+    // Don't bother trying to unswitch past an unconditional branch or a branch
+    // with a constant value. These should be removed by simplify-cfg prior to
+    // running this pass.
+    if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
+      return Changed;
+
+    // Found a trivial condition candidate: non-foldable conditional branch. If
+    // we fail to unswitch this, we can't do anything else that is trivial.
+    if (!unswitchTrivialBranch(L, *BI, DT, LI))
+      return Changed;
+
+    // Mark that we managed to unswitch something.
+    Changed = true;
+
+    // We unswitched the branch. This should always leave us with an
+    // unconditional branch that we can follow now.
+    BI = cast<BranchInst>(CurrentBB->getTerminator());
+    assert(!BI->isConditional() &&
+           "Cannot form a conditional branch by unswitching1");
+    CurrentBB = BI->getSuccessor(0);
+
+    // When continuing, if we exit the loop or reach a previous visited block,
+    // then we can not reach any trivial condition candidates (unfoldable
+    // branch instructions or switch instructions) and no unswitch can happen.
+  } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
+
+  return Changed;
+}
+
+/// Unswitch control flow predicated on loop invariant conditions.
+///
+/// This first hoists all branches or switches which are trivial (IE, do not
+/// require duplicating any part of the loop) out of the loop body. It then
+/// looks at other loop invariant control flows and tries to unswitch those as
+/// well by cloning the loop if the result is small enough.
+static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
+                         AssumptionCache &AC) {
+  assert(L.isLCSSAForm(DT) &&
+         "Loops must be in LCSSA form before unswitching.");
+  bool Changed = false;
+
+  // Must be in loop simplified form: we need a preheader and dedicated exits.
+  if (!L.isLoopSimplifyForm())
+    return false;
+
+  // Try trivial unswitch first before loop over other basic blocks in the loop.
+  Changed |= unswitchAllTrivialConditions(L, DT, LI);
+
+  // FIXME: Add support for non-trivial unswitching by cloning the loop.
+
+  return Changed;
+}
+
+PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
+                                              LoopStandardAnalysisResults &AR,
+                                              LPMUpdater &U) {
+  Function &F = *L.getHeader()->getParent();
+  (void)F;
+
+  DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L << "\n");
+
+  if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC))
+    return PreservedAnalyses::all();
+
+#ifndef NDEBUG
+  // Historically this pass has had issues with the dominator tree so verify it
+  // in asserts builds.
+  AR.DT.verifyDomTree();
+#endif
+  return getLoopPassPreservedAnalyses();
+}
+
+namespace {
+class SimpleLoopUnswitchLegacyPass : public LoopPass {
+public:
+  static char ID; // Pass ID, replacement for typeid
+  explicit SimpleLoopUnswitchLegacyPass() : LoopPass(ID) {
+    initializeSimpleLoopUnswitchLegacyPassPass(
+        *PassRegistry::getPassRegistry());
+  }
+
+  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<AssumptionCacheTracker>();
+    getLoopAnalysisUsage(AU);
+  }
+};
+} // namespace
+
+bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
+  if (skipLoop(L))
+    return false;
+
+  Function &F = *L->getHeader()->getParent();
+
+  DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L << "\n");
+
+  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+
+  bool Changed = unswitchLoop(*L, DT, LI, AC);
+
+#ifndef NDEBUG
+  // Historically this pass has had issues with the dominator tree so verify it
+  // in asserts builds.
+  DT.verifyDomTree();
+#endif
+  return Changed;
+}
+
+char SimpleLoopUnswitchLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
+                      "Simple unswitch loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(LoopPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
+                    "Simple unswitch loops", false, false)
+
+Pass *llvm::createSimpleLoopUnswitchLegacyPass() {
+  return new SimpleLoopUnswitchLegacyPass();
+}
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 22af21d55c019..3d5cbfc93f2e6 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -78,8 +78,8 @@ void llvm::FoldSingleEntryPHINodes(BasicBlock *BB,
 
 bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) {
   // Recursively deleting a PHI may cause multiple PHIs to be deleted
-  // or RAUW'd undef, so use an array of WeakVH for the PHIs to delete.
-  SmallVector<WeakVH, 8> PHIs;
+  // or RAUW'd undef, so use an array of WeakTrackingVH for the PHIs to delete.
+  SmallVector<WeakTrackingVH, 8> PHIs;
   for (BasicBlock::iterator I = BB->begin();
        PHINode *PN = dyn_cast<PHINode>(I); ++I)
     PHIs.push_back(PN);
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 385c12302e040..d5124ac89016f 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -245,7 +245,7 @@ namespace {
 void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
                                        BasicBlock::const_iterator StartingInst,
                                        std::vector<const BasicBlock*> &ToClone){
-  WeakVH &BBEntry = VMap[BB];
+  WeakTrackingVH &BBEntry = VMap[BB];
 
   // Have we already cloned this block?
   if (BBEntry) return;
@@ -547,7 +547,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
   // Make a second pass over the PHINodes now that all of them have been
   // remapped into the new function, simplifying the PHINode and performing any
   // recursive simplifications exposed. This will transparently update the
-  // WeakVH in the VMap. Notably, we rely on that so that if we coalesce
+  // WeakTrackingVH in the VMap. Notably, we rely on that so that if we coalesce
   // two PHINodes, the iteration over the old PHIs remains valid, and the
   // mapping will just map us to the new node (which may not even be a PHI
   // node).
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 5d6fbc3325fff..6d56e08af99f9 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -1640,7 +1640,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
       // modify the struct.
       if (CS.isByValArgument(ArgNo)) {
         ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
-                                        CalledFunc->getParamAlignment(ArgNo+1));
+                                        CalledFunc->getParamAlignment(ArgNo));
         if (ActualArg != *AI)
           ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI));
       }
@@ -2302,7 +2302,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
     AssumptionCache *AC =
         IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr;
     auto &DL = Caller->getParent()->getDataLayout();
-    if (Value *V = SimplifyInstruction(PHI, DL, nullptr, nullptr, AC)) {
+    if (Value *V = SimplifyInstruction(PHI, {DL, nullptr, nullptr, AC})) {
       PHI->replaceAllUsesWith(V);
       PHI->eraseFromParent();
     }
diff --git a/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
index fe93d6927c638..42aca757c2afd 100644
--- a/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
+++ b/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
@@ -33,6 +33,7 @@
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstVisitor.h"
@@ -48,16 +49,6 @@ using namespace llvm;
 STATISTIC(NumWrappedOneCond, "Number of One-Condition Wrappers Inserted");
 STATISTIC(NumWrappedTwoCond, "Number of Two-Condition Wrappers Inserted");
 
-static cl::opt<bool> LibCallsShrinkWrapDoDomainError(
-    "libcalls-shrinkwrap-domain-error", cl::init(true), cl::Hidden,
-    cl::desc("Perform shrink-wrap on lib calls with domain errors"));
-static cl::opt<bool> LibCallsShrinkWrapDoRangeError(
-    "libcalls-shrinkwrap-range-error", cl::init(true), cl::Hidden,
-    cl::desc("Perform shrink-wrap on lib calls with range errors"));
-static cl::opt<bool> LibCallsShrinkWrapDoPoleError(
-    "libcalls-shrinkwrap-pole-error", cl::init(true), cl::Hidden,
-    cl::desc("Perform shrink-wrap on lib calls with pole errors"));
-
 namespace {
 class LibCallsShrinkWrapLegacyPass : public FunctionPass {
 public:
@@ -82,10 +73,11 @@ INITIALIZE_PASS_END(LibCallsShrinkWrapLegacyPass, "libcalls-shrinkwrap",
 namespace {
 class LibCallsShrinkWrap : public InstVisitor<LibCallsShrinkWrap> {
 public:
-  LibCallsShrinkWrap(const TargetLibraryInfo &TLI) : TLI(TLI), Changed(false){};
-  bool isChanged() const { return Changed; }
+  LibCallsShrinkWrap(const TargetLibraryInfo &TLI, DominatorTree *DT)
+      : TLI(TLI), DT(DT){};
   void visitCallInst(CallInst &CI) { checkCandidate(CI); }
-  void perform() {
+  bool perform() {
+    bool Changed = false;
     for (auto &CI : WorkList) {
       DEBUG(dbgs() << "CDCE calls: " << CI->getCalledFunction()->getName()
                    << "\n");
@@ -94,6 +86,7 @@ public:
         DEBUG(dbgs() << "Transformed\n");
       }
     }
+    return Changed;
   }
 
 private:
@@ -134,8 +127,8 @@ private:
   }
 
   const TargetLibraryInfo &TLI;
+  DominatorTree *DT;
   SmallVector<CallInst *, 16> WorkList;
-  bool Changed;
 };
 } // end anonymous namespace
 
@@ -241,8 +234,6 @@ bool LibCallsShrinkWrap::performCallErrors(CallInst *CI,
   case LibFunc_atanhf: // Same as atanh
   case LibFunc_atanhl: // Same as atanh
   {
-    if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError)
-      return false;
     ++NumWrappedTwoCond;
     Cond = createOrCond(CI, CmpInst::FCMP_OLE, -1.0f, CmpInst::FCMP_OGE, 1.0f);
     break;
@@ -262,8 +253,6 @@ bool LibCallsShrinkWrap::performCallErrors(CallInst *CI,
   case LibFunc_logbf:  // Same as log
   case LibFunc_logbl:  // Same as log
   {
-    if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError)
-      return false;
     ++NumWrappedOneCond;
     Cond = createCond(CI, CmpInst::FCMP_OLE, 0.0f);
     break;
@@ -274,8 +263,6 @@ bool LibCallsShrinkWrap::performCallErrors(CallInst *CI,
   case LibFunc_log1pf: // Same as log1p
   case LibFunc_log1pl: // Same as log1p
   {
-    if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError)
-      return false;
     ++NumWrappedOneCond;
     Cond = createCond(CI, CmpInst::FCMP_OLE, -1.0f);
     break;
@@ -285,9 +272,6 @@ bool LibCallsShrinkWrap::performCallErrors(CallInst *CI,
                      // RangeError:  overflow or underflow
   case LibFunc_powf:
   case LibFunc_powl: {
-    if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError ||
-        !LibCallsShrinkWrapDoRangeError)
-      return false;
     Cond = generateCondForPow(CI, Func);
     if (Cond == nullptr)
       return false;
@@ -346,7 +330,7 @@ Value *LibCallsShrinkWrap::generateOneRangeCond(CallInst *CI,
     UpperBound = 11356.0f;
     break;
   default:
-    llvm_unreachable("Should be reach here");
+    llvm_unreachable("Unhandled library call!");
   }
 
   ++NumWrappedOneCond;
@@ -410,7 +394,7 @@ Value *LibCallsShrinkWrap::generateTwoRangeCond(CallInst *CI,
     UpperBound = 11383.0f;
     break;
   default:
-    llvm_unreachable("Should be reach here");
+    llvm_unreachable("Unhandled library call!");
   }
 
   ++NumWrappedTwoCond;
@@ -499,14 +483,17 @@ Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI,
 
 // Wrap conditions that can potentially generate errno to the library call.
 void LibCallsShrinkWrap::shrinkWrapCI(CallInst *CI, Value *Cond) {
-  assert(Cond != nullptr && "hrinkWrapCI is not expecting an empty call inst");
+  assert(Cond != nullptr && "ShrinkWrapCI is not expecting an empty call inst");
   MDNode *BranchWeights =
       MDBuilder(CI->getContext()).createBranchWeights(1, 2000);
+
   TerminatorInst *NewInst =
-      SplitBlockAndInsertIfThen(Cond, CI, false, BranchWeights);
+      SplitBlockAndInsertIfThen(Cond, CI, false, BranchWeights, DT);
   BasicBlock *CallBB = NewInst->getParent();
   CallBB->setName("cdce.call");
-  CallBB->getSingleSuccessor()->setName("cdce.end");
+  BasicBlock *SuccBB = CallBB->getSingleSuccessor();
+  assert(SuccBB && "The split block should have a single successor");
+  SuccBB->setName("cdce.end");
   CI->removeFromParent();
   CallBB->getInstList().insert(CallBB->getFirstInsertionPt(), CI);
   DEBUG(dbgs() << "== Basic Block After ==");
@@ -522,32 +509,38 @@ bool LibCallsShrinkWrap::perform(CallInst *CI) {
   TLI.getLibFunc(*Callee, Func);
   assert(Func && "perform() is not expecting an empty function");
 
-  if (LibCallsShrinkWrapDoDomainError && performCallDomainErrorOnly(CI, Func))
-    return true;
-
-  if (LibCallsShrinkWrapDoRangeError && performCallRangeErrorOnly(CI, Func))
+  if (performCallDomainErrorOnly(CI, Func) || performCallRangeErrorOnly(CI, Func))
     return true;
-
   return performCallErrors(CI, Func);
 }
 
 void LibCallsShrinkWrapLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addPreserved<DominatorTreeWrapperPass>();
   AU.addPreserved<GlobalsAAWrapperPass>();
   AU.addRequired<TargetLibraryInfoWrapperPass>();
 }
 
-static bool runImpl(Function &F, const TargetLibraryInfo &TLI) {
+static bool runImpl(Function &F, const TargetLibraryInfo &TLI,
+                    DominatorTree *DT) {
   if (F.hasFnAttribute(Attribute::OptimizeForSize))
     return false;
-  LibCallsShrinkWrap CCDCE(TLI);
+  LibCallsShrinkWrap CCDCE(TLI, DT);
   CCDCE.visit(F);
-  CCDCE.perform();
-  return CCDCE.isChanged();
+  bool Changed = CCDCE.perform();
+
+// Verify the dominator after we've updated it locally.
+#ifndef NDEBUG
+  if (DT)
+    DT->verifyDomTree();
+#endif
+  return Changed;
 }
 
 bool LibCallsShrinkWrapLegacyPass::runOnFunction(Function &F) {
   auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
-  return runImpl(F, TLI);
+  auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+  auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+  return runImpl(F, TLI, DT);
 }
 
 namespace llvm {
@@ -561,11 +554,12 @@ FunctionPass *createLibCallsShrinkWrapPass() {
 PreservedAnalyses LibCallsShrinkWrapPass::run(Function &F,
                                               FunctionAnalysisManager &FAM) {
   auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
-  bool Changed = runImpl(F, TLI);
-  if (!Changed)
+  auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
+  if (!runImpl(F, TLI, DT))
     return PreservedAnalyses::all();
   auto PA = PreservedAnalyses();
   PA.preserve<GlobalsAA>();
+  PA.preserve<DominatorTreeAnalysis>();
   return PA;
 }
 }
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index d3002c5fb7502..ce6b703f3528d 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -562,7 +562,7 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred) {
   // that can be removed.
   BB->removePredecessor(Pred, true);
 
-  WeakVH PhiIt = &BB->front();
+  WeakTrackingVH PhiIt = &BB->front();
   while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) {
     PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt));
     Value *OldPhiIt = PhiIt;
@@ -1259,49 +1259,6 @@ void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V) {
           DbgValues.push_back(DVI);
 }
 
-static void appendOffset(SmallVectorImpl<uint64_t> &Ops, int64_t Offset) {
-  if (Offset > 0) {
-    Ops.push_back(dwarf::DW_OP_plus);
-    Ops.push_back(Offset);
-  } else if (Offset < 0) {
-    Ops.push_back(dwarf::DW_OP_minus);
-    Ops.push_back(-Offset);
-  }
-}
-
-enum { WithStackValue = true };
-
-/// Prepend \p DIExpr with a deref and offset operation and optionally turn it
-/// into a stack value.
-static DIExpression *prependDIExpr(DIBuilder &Builder, DIExpression *DIExpr,
-                                   bool Deref, int64_t Offset = 0,
-                                   bool StackValue = false) {
-  if (!Deref && !Offset && !StackValue)
-    return DIExpr;
-
-  SmallVector<uint64_t, 8> Ops;
-  appendOffset(Ops, Offset);
-  if (Deref)
-    Ops.push_back(dwarf::DW_OP_deref);
-  if (DIExpr)
-    for (auto Op : DIExpr->expr_ops()) {
-      // A DW_OP_stack_value comes at the end, but before a DW_OP_LLVM_fragment.
-      if (StackValue) {
-        if (Op.getOp() == dwarf::DW_OP_stack_value)
-          StackValue = false;
-        else if (Op.getOp() == dwarf::DW_OP_LLVM_fragment) {
-          Ops.push_back(dwarf::DW_OP_stack_value);
-          StackValue = false;
-        }
-      }
-      Ops.push_back(Op.getOp());
-      for (unsigned I = 0; I < Op.getNumArgs(); ++I)
-        Ops.push_back(Op.getArg(I));
-    }
-  if (StackValue)
-    Ops.push_back(dwarf::DW_OP_stack_value);
-  return Builder.createExpression(Ops);
-}
 
 bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
                              Instruction *InsertBefore, DIBuilder &Builder,
@@ -1313,9 +1270,7 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
   auto *DIVar = DDI->getVariable();
   auto *DIExpr = DDI->getExpression();
   assert(DIVar && "Missing variable");
-
-  DIExpr = prependDIExpr(Builder, DIExpr, Deref, Offset);
-
+  DIExpr = DIExpression::prepend(DIExpr, Deref, Offset);
   // Insert llvm.dbg.declare immediately after the original alloca, and remove
   // old llvm.dbg.declare.
   Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, InsertBefore);
@@ -1348,7 +1303,7 @@ static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress,
   if (Offset) {
     SmallVector<uint64_t, 4> Ops;
     Ops.push_back(dwarf::DW_OP_deref);
-    appendOffset(Ops, Offset);
+    DIExpression::appendOffset(Ops, Offset);
     Ops.append(DIExpr->elements_begin() + 1, DIExpr->elements_end());
     DIExpr = Builder.createExpression(Ops);
   }
@@ -1398,8 +1353,9 @@ void llvm::salvageDebugInfo(Instruction &I) {
         auto *DIExpr = DVI->getExpression();
         DIBuilder DIB(M, /*AllowUnresolved*/ false);
         // GEP offsets are i32 and thus always fit into an int64_t.
-        DIExpr = prependDIExpr(DIB, DIExpr, NoDeref, Offset.getSExtValue(),
-                               WithStackValue);
+        DIExpr = DIExpression::prepend(DIExpr, DIExpression::NoDeref,
+                                       Offset.getSExtValue(),
+                                       DIExpression::WithStackValue);
         DVI->setOperand(0, MDWrap(I.getOperand(0)));
         DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr));
         DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n');
@@ -1411,7 +1367,7 @@ void llvm::salvageDebugInfo(Instruction &I) {
       // Rewrite the load into DW_OP_deref.
       auto *DIExpr = DVI->getExpression();
       DIBuilder DIB(M, /*AllowUnresolved*/ false);
-      DIExpr = prependDIExpr(DIB, DIExpr, WithDeref);
+      DIExpr = DIExpression::prepend(DIExpr, DIExpression::WithDeref);
       DVI->setOperand(0, MDWrap(I.getOperand(0)));
       DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr));
       DEBUG(dbgs() << "SALVAGE:  " << *DVI << '\n');
@@ -1520,7 +1476,7 @@ BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
   II->setAttributes(CI->getAttributes());
 
   // Make sure that anything using the call now uses the invoke!  This also
-  // updates the CallGraph if present, because it uses a WeakVH.
+  // updates the CallGraph if present, because it uses a WeakTrackingVH.
   CI->replaceAllUsesWith(II);
 
   // Delete the original call
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index e7ba19665d591..72c06aef80370 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -210,7 +210,7 @@ static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT,
   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
     PHINode *PN = cast<PHINode>(I);
     ++I;
-    if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) {
+    if (Value *V = SimplifyInstruction(PN, {DL, nullptr, DT, AC})) {
       // This is a degenerate PHI already, don't modify it!
       PN->replaceAllUsesWith(V);
       PN->eraseFromParent();
@@ -628,7 +628,7 @@ ReprocessLoop:
   PHINode *PN;
   for (BasicBlock::iterator I = L->getHeader()->begin();
        (PN = dyn_cast<PHINode>(I++)); )
-    if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) {
+    if (Value *V = SimplifyInstruction(PN, {DL, nullptr, DT, AC})) {
       if (SE) SE->forgetValue(PN);
       if (!PreserveLCSSA || LI->replacementPreservesLCSSAForm(PN, V)) {
         PN->replaceAllUsesWith(V);
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 43ab725b07691..4ab4d7949d233 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -757,7 +757,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
 
   // Simplify any new induction variables in the partially unrolled loop.
   if (SE && !CompletelyUnroll && Count > 1) {
-    SmallVector<WeakVH, 16> DeadInsts;
+    SmallVector<WeakTrackingVH, 16> DeadInsts;
     simplifyLoopIVs(L, SE, DT, LI, DeadInsts);
 
     // Aggressively clean up dead instructions that simplifyLoopIVs already
@@ -777,7 +777,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
     for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
       Instruction *Inst = &*I++;
 
-      if (Value *V = SimplifyInstruction(Inst, DL))
+      if (Value *V = SimplifyInstruction(Inst, {DL, nullptr, DT, AC}))
         if (LI->replacementPreservesLCSSAForm(Inst, V))
           Inst->replaceAllUsesWith(V);
       if (isInstructionTriviallyDead(Inst))
diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp
index dbe42c201dd4f..29d334f2968f1 100644
--- a/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/lib/Transforms/Utils/ModuleUtils.cpp
@@ -237,3 +237,35 @@ void llvm::filterDeadComdatFunctions(
            ComdatEntriesCovered.end();
   });
 }
+
+std::string llvm::getUniqueModuleId(Module *M) {
+  MD5 Md5;
+  bool ExportsSymbols = false;
+  auto AddGlobal = [&](GlobalValue &GV) {
+    if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
+        !GV.hasExternalLinkage())
+      return;
+    ExportsSymbols = true;
+    Md5.update(GV.getName());
+    Md5.update(ArrayRef<uint8_t>{0});
+  };
+
+  for (auto &F : *M)
+    AddGlobal(F);
+  for (auto &GV : M->globals())
+    AddGlobal(GV);
+  for (auto &GA : M->aliases())
+    AddGlobal(GA);
+  for (auto &IF : M->ifuncs())
+    AddGlobal(IF);
+
+  if (!ExportsSymbols)
+    return "";
+
+  MD5::MD5Result R;
+  Md5.final(R);
+
+  SmallString<32> Str;
+  MD5::stringifyResult(R, Str);
+  return ("$" + Str).str();
+}
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index a33b85c4ee69a..cdba982e6641f 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -225,10 +225,10 @@ struct PromoteMem2Reg {
   std::vector<AllocaInst *> Allocas;
   DominatorTree &DT;
   DIBuilder DIB;
-
   /// A cache of @llvm.assume intrinsics used by SimplifyInstruction.
   AssumptionCache *AC;
 
+  const SimplifyQuery SQ;
   /// Reverse mapping of Allocas.
   DenseMap<AllocaInst *, unsigned> AllocaLookup;
 
@@ -270,7 +270,8 @@ public:
                  AssumptionCache *AC)
       : Allocas(Allocas.begin(), Allocas.end()), DT(DT),
         DIB(*DT.getRoot()->getParent()->getParent(), /*AllowUnresolved*/ false),
-        AC(AC) {}
+        AC(AC), SQ(DT.getRoot()->getParent()->getParent()->getDataLayout(),
+                   nullptr, &DT, AC) {}
 
   void run();
 
@@ -673,8 +674,6 @@ void PromoteMem2Reg::run() {
     A->eraseFromParent();
   }
 
-  const DataLayout &DL = F.getParent()->getDataLayout();
-
   // Remove alloca's dbg.declare instrinsics from the function.
   for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i)
     if (DbgDeclareInst *DDI = AllocaDbgDeclares[i])
@@ -699,7 +698,7 @@ void PromoteMem2Reg::run() {
       PHINode *PN = I->second;
 
       // If this PHI node merges one value and/or undefs, get the value.
-      if (Value *V = SimplifyInstruction(PN, DL, nullptr, &DT, AC)) {
+      if (Value *V = SimplifyInstruction(PN, SQ)) {
         PN->replaceAllUsesWith(V);
         PN->eraseFromParent();
         NewPhiNodes.erase(I++);
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index f86e97b6cc72f..7a3e8b9ae915d 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -2231,7 +2231,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL,
       }
 
       // Check for trivial simplification.
-      if (Value *V = SimplifyInstruction(N, DL)) {
+      if (Value *V = SimplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
         if (!BBI->use_empty())
           TranslateMap[&*BBI] = V;
         if (!N->mayHaveSideEffects()) {
@@ -2307,7 +2307,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
 
   for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
     PHINode *PN = cast<PHINode>(II++);
-    if (Value *V = SimplifyInstruction(PN, DL)) {
+    if (Value *V = SimplifyInstruction(PN, {DL, PN})) {
       PN->replaceAllUsesWith(V);
       PN->eraseFromParent();
       continue;
@@ -3545,7 +3545,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
     assert(VVal && "Should have a unique destination value");
     ICI->setOperand(0, VVal);
 
-    if (Value *V = SimplifyInstruction(ICI, DL)) {
+    if (Value *V = SimplifyInstruction(ICI, {DL, ICI})) {
       ICI->replaceAllUsesWith(V);
       ICI->eraseFromParent();
     }
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index a4cc6a031ad4c..02a5d3dbeadfb 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -51,13 +51,13 @@ namespace {
     ScalarEvolution  *SE;
     DominatorTree    *DT;
 
-    SmallVectorImpl<WeakVH> &DeadInsts;
+    SmallVectorImpl<WeakTrackingVH> &DeadInsts;
 
     bool Changed;
 
   public:
     SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT,
-                   LoopInfo *LI,SmallVectorImpl<WeakVH> &Dead)
+                   LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead)
         : L(Loop), LI(LI), SE(SE), DT(DT), DeadInsts(Dead), Changed(false) {
       assert(LI && "IV simplification requires LoopInfo");
     }
@@ -701,7 +701,7 @@ void IVVisitor::anchor() { }
 /// Simplify instructions that use this induction variable
 /// by using ScalarEvolution to analyze the IV's recurrence.
 bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT,
-                       LoopInfo *LI, SmallVectorImpl<WeakVH> &Dead,
+                       LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead,
                        IVVisitor *V) {
   SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, Dead);
   SIV.simplifyUsers(CurrIV, V);
@@ -711,7 +711,7 @@ bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT,
 /// Simplify users of induction variables within this
 /// loop. This does not actually change or add IVs.
 bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
-                     LoopInfo *LI, SmallVectorImpl<WeakVH> &Dead) {
+                     LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead) {
   bool Changed = false;
   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
     Changed |= simplifyUsersOfIV(cast<PHINode>(I), SE, DT, LI, Dead);
diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp
index 27373427d4f7d..2509b5f22046b 100644
--- a/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -54,8 +54,7 @@ static bool runImpl(Function &F, const SimplifyQuery &SQ,
 
         // Don't waste time simplifying unused instructions.
         if (!I->use_empty()) {
-          if (Value *V =
-                  SimplifyInstruction(I, SQ.getWithInstruction(I), ORE)) {
+          if (Value *V = SimplifyInstruction(I, SQ, ORE)) {
             // Mark all uses for resimplification next time round the loop.
             for (User *U : I->users())
               Next->insert(cast<Instruction>(U));
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 5549444047088..f112c555205c3 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3899,11 +3899,13 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_,
 }
 
 /// \brief Check that the Values in the slice in VL array are still existent in
-/// the WeakVH array.
+/// the WeakTrackingVH array.
 /// Vectorization of part of the VL array may cause later values in the VL array
-/// to become invalid. We track when this has happened in the WeakVH array.
-static bool hasValueBeenRAUWed(ArrayRef<Value *> VL, ArrayRef<WeakVH> VH,
-                               unsigned SliceBegin, unsigned SliceSize) {
+/// to become invalid. We track when this has happened in the WeakTrackingVH
+/// array.
+static bool hasValueBeenRAUWed(ArrayRef<Value *> VL,
+                               ArrayRef<WeakTrackingVH> VH, unsigned SliceBegin,
+                               unsigned SliceSize) {
   VL = VL.slice(SliceBegin, SliceSize);
   VH = VH.slice(SliceBegin, SliceSize);
   return !std::equal(VL.begin(), VL.end(), VH.begin());
@@ -3921,7 +3923,7 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
     return false;
 
   // Keep track of values that were deleted by vectorizing in the loop below.
-  SmallVector<WeakVH, 8> TrackValues(Chain.begin(), Chain.end());
+  SmallVector<WeakTrackingVH, 8> TrackValues(Chain.begin(), Chain.end());
 
   bool Changed = false;
   // Look for profitable vectorizable trees at all offsets, starting at zero.
@@ -4107,7 +4109,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
   bool Changed = false;
 
   // Keep track of values that were deleted by vectorizing in the loop below.
-  SmallVector<WeakVH, 8> TrackValues(VL.begin(), VL.end());
+  SmallVector<WeakTrackingVH, 8> TrackValues(VL.begin(), VL.end());
 
   unsigned NextInst = 0, MaxInst = VL.size();
   for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF;
@@ -4734,7 +4736,7 @@ static Value *getReductionValue(const DominatorTree *DT, PHINode *P,
 
 namespace {
 /// Tracks instructons and its children.
-class WeakVHWithLevel final : public CallbackVH {
+class WeakTrackingVHWithLevel final : public CallbackVH {
   /// Operand index of the instruction currently beeing analized.
   unsigned Level = 0;
   /// Is this the instruction that should be vectorized, or are we now
@@ -4743,8 +4745,8 @@ class WeakVHWithLevel final : public CallbackVH {
   bool IsInitial = true;
 
 public:
-  explicit WeakVHWithLevel() = default;
-  WeakVHWithLevel(Value *V) : CallbackVH(V){};
+  explicit WeakTrackingVHWithLevel() = default;
+  WeakTrackingVHWithLevel(Value *V) : CallbackVH(V){};
   /// Restart children analysis each time it is repaced by the new instruction.
   void allUsesReplacedWith(Value *New) override {
     setValPtr(New);
@@ -4771,7 +4773,7 @@ public:
            cast<Instruction>(getValPtr())->getNumOperands() > Level);
     return cast<Instruction>(getValPtr())->getOperand(Level++);
   }
-  virtual ~WeakVHWithLevel() = default;
+  virtual ~WeakTrackingVHWithLevel() = default;
 };
 } // namespace
 
@@ -4793,7 +4795,7 @@ static bool canBeVectorized(
 
   if (Root->getParent() != BB)
     return false;
-  SmallVector<WeakVHWithLevel, 8> Stack(1, Root);
+  SmallVector<WeakTrackingVHWithLevel, 8> Stack(1, Root);
   SmallSet<Value *, 8> VisitedInstrs;
   bool Res = false;
   while (!Stack.empty()) {
@@ -5069,7 +5071,8 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) {
       SetVector<Value *> Candidates(GEPList.begin(), GEPList.end());
 
       // Some of the candidates may have already been vectorized after we
-      // initially collected them. If so, the WeakVHs will have nullified the
+      // initially collected them. If so, the WeakTrackingVHs will have
+      // nullified the
       // values, so remove them from the set of candidates.
       Candidates.remove(nullptr);