vendor/llvm/llvm-trunk-r321017

author: Dimitry Andric <dim@FreeBSD.org> 2017-12-18 20:10:56 +0000
committer: Dimitry Andric <dim@FreeBSD.org> 2017-12-18 20:10:56 +0000
commit: 044eb2f6afba375a914ac9d8024f8f5142bb912e (patch)
tree: 1475247dc9f9fe5be155ebd4c9069c75aadf8c20 /lib/Transforms/Utils
parent: eb70dddbd77e120e5d490bd8fbe7ff3f8fa81c6b (diff)
41 files changed, 2875 insertions, 1487 deletions
diff --git a/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/lib/Transforms/Utils/ASanStackFrameLayout.cpp
index df9d5da9e26e..364878dc588d 100644
--- a/lib/Transforms/Utils/ASanStackFrameLayout.cpp
+++ b/lib/Transforms/Utils/ASanStackFrameLayout.cpp
@@ -36,9 +36,11 @@ static inline bool CompareVars(const ASanStackVariableDescription &a,
 // with e.g. alignment 1 and alignment 16 do not get reordered by CompareVars.
 static const size_t kMinAlignment = 16;
 
+// We want to add a full redzone after every variable.
 // The larger the variable Size the larger is the redzone.
 // The resulting frame size is a multiple of Alignment.
-static size_t VarAndRedzoneSize(size_t Size, size_t Alignment) {
+static size_t VarAndRedzoneSize(size_t Size, size_t Granularity,
+                                size_t Alignment) {
   size_t Res = 0;
   if (Size <= 4)  Res = 16;
   else if (Size <= 16) Res = 32;
@@ -46,7 +48,7 @@ static size_t VarAndRedzoneSize(size_t Size, size_t Alignment) {
   else if (Size <= 512) Res = Size + 64;
   else if (Size <= 4096) Res = Size + 128;
   else                   Res = Size + 256;
-  return alignTo(Res, Alignment);
+  return alignTo(std::max(Res, 2 * Granularity), Alignment);
 }
 
 ASanStackFrameLayout
@@ -80,7 +82,8 @@ ComputeASanStackFrameLayout(SmallVectorImpl<ASanStackVariableDescription> &Vars,
     assert(Size > 0);
     size_t NextAlignment = IsLast ? Granularity
                    : std::max(Granularity, Vars[i + 1].Alignment);
-    size_t SizeWithRedzone = VarAndRedzoneSize(Size, NextAlignment);
+    size_t SizeWithRedzone = VarAndRedzoneSize(Size, Granularity,
+                                               NextAlignment);
     Vars[i].Offset = Offset;
     Offset += SizeWithRedzone;
   }
diff --git a/lib/Transforms/Utils/AddDiscriminators.cpp b/lib/Transforms/Utils/AddDiscriminators.cpp
index 4c9746b8c691..0f0668f24db5 100644
--- a/lib/Transforms/Utils/AddDiscriminators.cpp
+++ b/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -50,31 +50,45 @@
 //
 // For more details about DWARF discriminators, please visit
 // http://wiki.dwarfstd.org/index.php?title=Path_Discriminators
+//
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/AddDiscriminators.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
+#include <utility>
 
 using namespace llvm;
 
 #define DEBUG_TYPE "add-discriminators"
 
+// Command line option to disable discriminator generation even in the
+// presence of debug information. This is only needed when debugging
+// debug info generation issues.
+static cl::opt<bool> NoDiscriminators(
+    "no-discriminators", cl::init(false),
+    cl::desc("Disable generation of discriminator information."));
+
 namespace {
+
 // The legacy pass of AddDiscriminators.
 struct AddDiscriminatorsLegacyPass : public FunctionPass {
   static char ID; // Pass identification, replacement for typeid
+
   AddDiscriminatorsLegacyPass() : FunctionPass(ID) {
     initializeAddDiscriminatorsLegacyPassPass(*PassRegistry::getPassRegistry());
   }
@@ -85,18 +99,12 @@ struct AddDiscriminatorsLegacyPass : public FunctionPass {
 } // end anonymous namespace
 
 char AddDiscriminatorsLegacyPass::ID = 0;
+
 INITIALIZE_PASS_BEGIN(AddDiscriminatorsLegacyPass, "add-discriminators",
                       "Add DWARF path discriminators", false, false)
 INITIALIZE_PASS_END(AddDiscriminatorsLegacyPass, "add-discriminators",
                     "Add DWARF path discriminators", false, false)
 
-// Command line option to disable discriminator generation even in the
-// presence of debug information. This is only needed when debugging
-// debug info generation issues.
-static cl::opt<bool> NoDiscriminators(
-    "no-discriminators", cl::init(false),
-    cl::desc("Disable generation of discriminator information."));
-
 // Create the legacy AddDiscriminatorsPass.
 FunctionPass *llvm::createAddDiscriminatorsPass() {
   return new AddDiscriminatorsLegacyPass();
@@ -166,11 +174,11 @@ static bool addDiscriminators(Function &F) {
 
   bool Changed = false;
 
-  typedef std::pair<StringRef, unsigned> Location;
-  typedef DenseSet<const BasicBlock *> BBSet;
-  typedef DenseMap<Location, BBSet> LocationBBMap;
-  typedef DenseMap<Location, unsigned> LocationDiscriminatorMap;
-  typedef DenseSet<Location> LocationSet;
+  using Location = std::pair<StringRef, unsigned>;
+  using BBSet = DenseSet<const BasicBlock *>;
+  using LocationBBMap = DenseMap<Location, BBSet>;
+  using LocationDiscriminatorMap = DenseMap<Location, unsigned>;
+  using LocationSet = DenseSet<Location>;
 
   LocationBBMap LBM;
   LocationDiscriminatorMap LDM;
@@ -242,6 +250,7 @@ static bool addDiscriminators(Function &F) {
 bool AddDiscriminatorsLegacyPass::runOnFunction(Function &F) {
   return addDiscriminators(F);
 }
+
 PreservedAnalyses AddDiscriminatorsPass::run(Function &F,
                                              FunctionAnalysisManager &AM) {
   if (!addDiscriminators(F))
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 3d5cbfc93f2e..606bd8baccaa 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -1,4 +1,4 @@
-//===-- BasicBlockUtils.cpp - BasicBlock Utilities -------------------------==//
+//===- BasicBlockUtils.cpp - BasicBlock Utilities --------------------------==//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,22 +13,36 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
-#include "llvm/IR/Constant.h"
-#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
 #include "llvm/IR/ValueHandle.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <string>
+#include <utility>
+#include <vector>
+
 using namespace llvm;
 
 void llvm::DeleteDeadBlock(BasicBlock *BB) {
@@ -130,8 +144,16 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
   }
 
   // Begin by getting rid of unneeded PHIs.
-  if (isa<PHINode>(BB->front()))
+  SmallVector<Value *, 4> IncomingValues;
+  if (isa<PHINode>(BB->front())) {
+    for (auto &I : *BB)
+      if (PHINode *PN = dyn_cast<PHINode>(&I)) {
+        if (PN->getIncomingValue(0) != PN)
+          IncomingValues.push_back(PN->getIncomingValue(0));
+      } else
+        break;
     FoldSingleEntryPHINodes(BB, MemDep);
+  }
 
   // Delete the unconditional branch from the predecessor...
   PredBB->getInstList().pop_back();
@@ -143,6 +165,21 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
   // Move all definitions in the successor to the predecessor...
   PredBB->getInstList().splice(PredBB->end(), BB->getInstList());
 
+  // Eliminate duplicate dbg.values describing the entry PHI node post-splice.
+  for (auto *Incoming : IncomingValues) {
+    if (isa<Instruction>(Incoming)) {
+      SmallVector<DbgValueInst *, 2> DbgValues;
+      SmallDenseSet<std::pair<DILocalVariable *, DIExpression *>, 2>
+          DbgValueSet;
+      llvm::findDbgValues(DbgValues, Incoming);
+      for (auto &DVI : DbgValues) {
+        auto R = DbgValueSet.insert({DVI->getVariable(), DVI->getExpression()});
+        if (!R.second)
+          DVI->eraseFromParent();
+      }
+    }
+  }
+
   // Inherit predecessors name if it exists.
   if (!PredBB->hasName())
     PredBB->takeName(BB);
@@ -454,7 +491,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
   // node becomes an incoming value for BB's phi node.  However, if the Preds
   // list is empty, we need to insert dummy entries into the PHI nodes in BB to
   // account for the newly created predecessor.
-  if (Preds.size() == 0) {
+  if (Preds.empty()) {
     // Insert dummy values as the incoming value.
     for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I)
       cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB);
@@ -675,7 +712,6 @@ void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
   ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
 }
 
-
 Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
                              BasicBlock *&IfFalse) {
   PHINode *SomePHI = dyn_cast<PHINode>(BB->begin());
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 175cbd2ce0df..3653c307619b 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -16,9 +16,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/BreakCriticalEdges.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/IR/CFG.h"
@@ -28,6 +30,8 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "break-crit-edges"
@@ -198,59 +202,23 @@ llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
   if (!DT && !LI)
     return NewBB;
 
-  // Now update analysis information.  Since the only predecessor of NewBB is
-  // the TIBB, TIBB clearly dominates NewBB.  TIBB usually doesn't dominate
-  // anything, as there are other successors of DestBB.  However, if all other
-  // predecessors of DestBB are already dominated by DestBB (e.g. DestBB is a
-  // loop header) then NewBB dominates DestBB.
-  SmallVector<BasicBlock*, 8> OtherPreds;
-
-  // If there is a PHI in the block, loop over predecessors with it, which is
-  // faster than iterating pred_begin/end.
-  if (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
-    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
-      if (PN->getIncomingBlock(i) != NewBB)
-        OtherPreds.push_back(PN->getIncomingBlock(i));
-  } else {
-    for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB);
-         I != E; ++I) {
-      BasicBlock *P = *I;
-      if (P != NewBB)
-        OtherPreds.push_back(P);
-    }
-  }
-
-  bool NewBBDominatesDestBB = true;
-
-  // Should we update DominatorTree information?
   if (DT) {
-    DomTreeNode *TINode = DT->getNode(TIBB);
-
-    // The new block is not the immediate dominator for any other nodes, but
-    // TINode is the immediate dominator for the new node.
+    // Update the DominatorTree.
+    //       ---> NewBB -----\
+    //      /                 V
+    //  TIBB -------\\------> DestBB
     //
-    if (TINode) {       // Don't break unreachable code!
-      DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB);
-      DomTreeNode *DestBBNode = nullptr;
-
-      // If NewBBDominatesDestBB hasn't been computed yet, do so with DT.
-      if (!OtherPreds.empty()) {
-        DestBBNode = DT->getNode(DestBB);
-        while (!OtherPreds.empty() && NewBBDominatesDestBB) {
-          if (DomTreeNode *OPNode = DT->getNode(OtherPreds.back()))
-            NewBBDominatesDestBB = DT->dominates(DestBBNode, OPNode);
-          OtherPreds.pop_back();
-        }
-        OtherPreds.clear();
-      }
-
-      // If NewBBDominatesDestBB, then NewBB dominates DestBB, otherwise it
-      // doesn't dominate anything.
-      if (NewBBDominatesDestBB) {
-        if (!DestBBNode) DestBBNode = DT->getNode(DestBB);
-        DT->changeImmediateDominator(DestBBNode, NewBBNode);
-      }
-    }
+    // First, inform the DT about the new path from TIBB to DestBB via NewBB,
+    // then delete the old edge from TIBB to DestBB. By doing this in that order
+    // DestBB stays reachable in the DT the whole time and its subtree doesn't
+    // get disconnected.
+    SmallVector<DominatorTree::UpdateType, 3> Updates;
+    Updates.push_back({DominatorTree::Insert, TIBB, NewBB});
+    Updates.push_back({DominatorTree::Insert, NewBB, DestBB});
+    if (llvm::find(successors(TIBB), DestBB) == succ_end(TIBB))
+      Updates.push_back({DominatorTree::Delete, TIBB, DestBB});
+
+    DT->applyUpdates(Updates);
   }
 
   // Update LoopInfo if it is around.
@@ -326,3 +294,159 @@ llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
 
   return NewBB;
 }
+
+// Return the unique indirectbr predecessor of a block. This may return null
+// even if such a predecessor exists, if it's not useful for splitting.
+// If a predecessor is found, OtherPreds will contain all other (non-indirectbr)
+// predecessors of BB.
+static BasicBlock *
+findIBRPredecessor(BasicBlock *BB, SmallVectorImpl<BasicBlock *> &OtherPreds) {
+  // If the block doesn't have any PHIs, we don't care about it, since there's
+  // no point in splitting it.
+  PHINode *PN = dyn_cast<PHINode>(BB->begin());
+  if (!PN)
+    return nullptr;
+
+  // Verify we have exactly one IBR predecessor.
+  // Conservatively bail out if one of the other predecessors is not a "regular"
+  // terminator (that is, not a switch or a br).
+  BasicBlock *IBB = nullptr;
+  for (unsigned Pred = 0, E = PN->getNumIncomingValues(); Pred != E; ++Pred) {
+    BasicBlock *PredBB = PN->getIncomingBlock(Pred);
+    TerminatorInst *PredTerm = PredBB->getTerminator();
+    switch (PredTerm->getOpcode()) {
+    case Instruction::IndirectBr:
+      if (IBB)
+        return nullptr;
+      IBB = PredBB;
+      break;
+    case Instruction::Br:
+    case Instruction::Switch:
+      OtherPreds.push_back(PredBB);
+      continue;
+    default:
+      return nullptr;
+    }
+  }
+
+  return IBB;
+}
+
+bool llvm::SplitIndirectBrCriticalEdges(Function &F,
+                                        BranchProbabilityInfo *BPI,
+                                        BlockFrequencyInfo *BFI) {
+  // Check whether the function has any indirectbrs, and collect which blocks
+  // they may jump to. Since most functions don't have indirect branches,
+  // this lowers the common case's overhead to O(Blocks) instead of O(Edges).
+  SmallSetVector<BasicBlock *, 16> Targets;
+  for (auto &BB : F) {
+    auto *IBI = dyn_cast<IndirectBrInst>(BB.getTerminator());
+    if (!IBI)
+      continue;
+
+    for (unsigned Succ = 0, E = IBI->getNumSuccessors(); Succ != E; ++Succ)
+      Targets.insert(IBI->getSuccessor(Succ));
+  }
+
+  if (Targets.empty())
+    return false;
+
+  bool ShouldUpdateAnalysis = BPI && BFI;
+  bool Changed = false;
+  for (BasicBlock *Target : Targets) {
+    SmallVector<BasicBlock *, 16> OtherPreds;
+    BasicBlock *IBRPred = findIBRPredecessor(Target, OtherPreds);
+    // If we did not found an indirectbr, or the indirectbr is the only
+    // incoming edge, this isn't the kind of edge we're looking for.
+    if (!IBRPred || OtherPreds.empty())
+      continue;
+
+    // Don't even think about ehpads/landingpads.
+    Instruction *FirstNonPHI = Target->getFirstNonPHI();
+    if (FirstNonPHI->isEHPad() || Target->isLandingPad())
+      continue;
+
+    BasicBlock *BodyBlock = Target->splitBasicBlock(FirstNonPHI, ".split");
+    if (ShouldUpdateAnalysis) {
+      // Copy the BFI/BPI from Target to BodyBlock.
+      for (unsigned I = 0, E = BodyBlock->getTerminator()->getNumSuccessors();
+           I < E; ++I)
+        BPI->setEdgeProbability(BodyBlock, I,
+                                BPI->getEdgeProbability(Target, I));
+      BFI->setBlockFreq(BodyBlock, BFI->getBlockFreq(Target).getFrequency());
+    }
+    // It's possible Target was its own successor through an indirectbr.
+    // In this case, the indirectbr now comes from BodyBlock.
+    if (IBRPred == Target)
+      IBRPred = BodyBlock;
+
+    // At this point Target only has PHIs, and BodyBlock has the rest of the
+    // block's body. Create a copy of Target that will be used by the "direct"
+    // preds.
+    ValueToValueMapTy VMap;
+    BasicBlock *DirectSucc = CloneBasicBlock(Target, VMap, ".clone", &F);
+
+    BlockFrequency BlockFreqForDirectSucc;
+    for (BasicBlock *Pred : OtherPreds) {
+      // If the target is a loop to itself, then the terminator of the split
+      // block (BodyBlock) needs to be updated.
+      BasicBlock *Src = Pred != Target ? Pred : BodyBlock;
+      Src->getTerminator()->replaceUsesOfWith(Target, DirectSucc);
+      if (ShouldUpdateAnalysis)
+        BlockFreqForDirectSucc += BFI->getBlockFreq(Src) *
+            BPI->getEdgeProbability(Src, DirectSucc);
+    }
+    if (ShouldUpdateAnalysis) {
+      BFI->setBlockFreq(DirectSucc, BlockFreqForDirectSucc.getFrequency());
+      BlockFrequency NewBlockFreqForTarget =
+          BFI->getBlockFreq(Target) - BlockFreqForDirectSucc;
+      BFI->setBlockFreq(Target, NewBlockFreqForTarget.getFrequency());
+      BPI->eraseBlock(Target);
+    }
+
+    // Ok, now fix up the PHIs. We know the two blocks only have PHIs, and that
+    // they are clones, so the number of PHIs are the same.
+    // (a) Remove the edge coming from IBRPred from the "Direct" PHI
+    // (b) Leave that as the only edge in the "Indirect" PHI.
+    // (c) Merge the two in the body block.
+    BasicBlock::iterator Indirect = Target->begin(),
+                         End = Target->getFirstNonPHI()->getIterator();
+    BasicBlock::iterator Direct = DirectSucc->begin();
+    BasicBlock::iterator MergeInsert = BodyBlock->getFirstInsertionPt();
+
+    assert(&*End == Target->getTerminator() &&
+           "Block was expected to only contain PHIs");
+
+    while (Indirect != End) {
+      PHINode *DirPHI = cast<PHINode>(Direct);
+      PHINode *IndPHI = cast<PHINode>(Indirect);
+
+      // Now, clean up - the direct block shouldn't get the indirect value,
+      // and vice versa.
+      DirPHI->removeIncomingValue(IBRPred);
+      Direct++;
+
+      // Advance the pointer here, to avoid invalidation issues when the old
+      // PHI is erased.
+      Indirect++;
+
+      PHINode *NewIndPHI = PHINode::Create(IndPHI->getType(), 1, "ind", IndPHI);
+      NewIndPHI->addIncoming(IndPHI->getIncomingValueForBlock(IBRPred),
+                             IBRPred);
+
+      // Create a PHI in the body block, to merge the direct and indirect
+      // predecessors.
+      PHINode *MergePHI =
+          PHINode::Create(IndPHI->getType(), 2, "merge", &*MergeInsert);
+      MergePHI->addIncoming(NewIndPHI, Target);
+      MergePHI->addIncoming(DirPHI, DirectSucc);
+
+      IndPHI->replaceAllUsesWith(MergePHI);
+      IndPHI->eraseFromParent();
+    }
+
+    Changed = true;
+  }
+
+  return Changed;
+}
diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp
index 83ec7f55d1af..f711b192f604 100644
--- a/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -1,4 +1,4 @@
-//===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
+//===- BypassSlowDivision.cpp - Bypass slow division ----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -17,27 +17,32 @@
 
 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include <cassert>
+#include <cstdint>
 
 using namespace llvm;
 
 #define DEBUG_TYPE "bypass-slow-division"
 
 namespace {
-  struct DivOpInfo {
-    bool SignedOp;
-    Value *Dividend;
-    Value *Divisor;
-
-    DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
-      : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
-  };
 
   struct QuotRemPair {
     Value *Quotient;
@@ -55,38 +60,11 @@ namespace {
     Value *Quotient = nullptr;
     Value *Remainder = nullptr;
   };
-}
-
-namespace llvm {
-  template<>
-  struct DenseMapInfo<DivOpInfo> {
-    static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
-      return Val1.SignedOp == Val2.SignedOp &&
-             Val1.Dividend == Val2.Dividend &&
-             Val1.Divisor == Val2.Divisor;
-    }
-
-    static DivOpInfo getEmptyKey() {
-      return DivOpInfo(false, nullptr, nullptr);
-    }
 
-    static DivOpInfo getTombstoneKey() {
-      return DivOpInfo(true, nullptr, nullptr);
-    }
+using DivCacheTy = DenseMap<DivRemMapKey, QuotRemPair>;
+using BypassWidthsTy = DenseMap<unsigned, unsigned>;
+using VisitedSetTy = SmallPtrSet<Instruction *, 4>;
 
-    static unsigned getHashValue(const DivOpInfo &Val) {
-      return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
-                        reinterpret_cast<uintptr_t>(Val.Divisor)) ^
-                        (unsigned)Val.SignedOp;
-    }
-  };
-
-  typedef DenseMap<DivOpInfo, QuotRemPair> DivCacheTy;
-  typedef DenseMap<unsigned, unsigned> BypassWidthsTy;
-  typedef SmallPtrSet<Instruction *, 4> VisitedSetTy;
-}
-
-namespace {
 enum ValueRange {
   /// Operand definitely fits into BypassType. No runtime checks are needed.
   VALRNG_KNOWN_SHORT,
@@ -116,17 +94,21 @@ class FastDivInsertionTask {
     return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
            SlowDivOrRem->getOpcode() == Instruction::SRem;
   }
+
   bool isDivisionOp() {
     return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
            SlowDivOrRem->getOpcode() == Instruction::UDiv;
   }
+
   Type *getSlowType() { return SlowDivOrRem->getType(); }
 
 public:
   FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths);
+
   Value *getReplacement(DivCacheTy &Cache);
 };
-} // anonymous namespace
+
+} // end anonymous namespace
 
 FastDivInsertionTask::FastDivInsertionTask(Instruction *I,
                                            const BypassWidthsTy &BypassWidths) {
@@ -175,7 +157,7 @@ Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) {
   // Then, look for a value in Cache.
   Value *Dividend = SlowDivOrRem->getOperand(0);
   Value *Divisor = SlowDivOrRem->getOperand(1);
-  DivOpInfo Key(isSignedOp(), Dividend, Divisor);
+  DivRemMapKey Key(isSignedOp(), Dividend, Divisor);
   auto CacheI = Cache.find(Key);
 
   if (CacheI == Cache.end()) {
@@ -225,7 +207,7 @@ bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) {
       C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0));
     return C && C->getValue().getMinSignedBits() > BypassType->getBitWidth();
   }
-  case Instruction::PHI: {
+  case Instruction::PHI:
     // Stop IR traversal in case of a crazy input code. This limits recursion
     // depth.
     if (Visited.size() >= 16)
@@ -241,7 +223,6 @@ bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) {
       return getValueRange(V, Visited) == VALRNG_LIKELY_LONG ||
              isa<UndefValue>(V);
     });
-  }
   default:
     return false;
   }
@@ -371,11 +352,6 @@ Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
   Value *Dividend = SlowDivOrRem->getOperand(0);
   Value *Divisor = SlowDivOrRem->getOperand(1);
 
-  if (isa<ConstantInt>(Divisor)) {
-    // Keep division by a constant for DAGCombiner.
-    return None;
-  }
-
   VisitedSetTy SetL;
   ValueRange DividendRange = getValueRange(Dividend, SetL);
   if (DividendRange == VALRNG_LIKELY_LONG)
@@ -391,7 +367,9 @@ Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
 
   if (DividendShort && DivisorShort) {
     // If both operands are known to be short then just replace the long
-    // division with a short one in-place.
+    // division with a short one in-place.  Since we're not introducing control
+    // flow in this case, narrowing the division is always a win, even if the
+    // divisor is a constant (and will later get replaced by a multiplication).
 
     IRBuilder<> Builder(SlowDivOrRem);
     Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType);
@@ -401,7 +379,16 @@ Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
     Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType());
     Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType());
     return QuotRemPair(ExtDiv, ExtRem);
-  } else if (DividendShort && !isSignedOp()) {
+  }
+
+  if (isa<ConstantInt>(Divisor)) {
+    // If the divisor is not a constant, DAGCombiner will convert it to a
+    // multiplication by a magic constant.  It isn't clear if it is worth
+    // introducing control flow to get a narrower multiply.
+    return None;
+  }
+
+  if (DividendShort && !isSignedOp()) {
     // If the division is unsigned and Dividend is known to be short, then
     // either
     // 1) Divisor is less or equal to Dividend, and the result can be computed
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 83bc05d0311c..972e47f9270a 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -5,12 +5,13 @@ add_llvm_library(LLVMTransformUtils
   BreakCriticalEdges.cpp
   BuildLibCalls.cpp
   BypassSlowDivision.cpp
+  CallPromotionUtils.cpp
   CloneFunction.cpp
   CloneModule.cpp
-  CmpInstAnalysis.cpp
   CodeExtractor.cpp
   CtorUtils.cpp
   DemoteRegToStack.cpp
+  EntryExitInstrumenter.cpp
   EscapeEnumerator.cpp
   Evaluator.cpp
   FlattenCFG.cpp
diff --git a/lib/Transforms/Utils/CallPromotionUtils.cpp b/lib/Transforms/Utils/CallPromotionUtils.cpp
new file mode 100644
index 000000000000..eb3139ce4293
--- /dev/null
+++ b/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -0,0 +1,328 @@
+//===- CallPromotionUtils.cpp - Utilities for call promotion ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements utilities useful for promoting indirect call sites to
+// direct call sites.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/CallPromotionUtils.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "call-promotion-utils"
+
+/// Fix-up phi nodes in an invoke instruction's normal destination.
+///
+/// After versioning an invoke instruction, values coming from the original
+/// block will now either be coming from the original block or the "else" block.
+static void fixupPHINodeForNormalDest(InvokeInst *Invoke, BasicBlock *OrigBlock,
+                                      BasicBlock *ElseBlock,
+                                      Instruction *NewInst) {
+  for (auto &I : *Invoke->getNormalDest()) {
+    auto *Phi = dyn_cast<PHINode>(&I);
+    if (!Phi)
+      break;
+    int Idx = Phi->getBasicBlockIndex(OrigBlock);
+    if (Idx == -1)
+      continue;
+    Value *V = Phi->getIncomingValue(Idx);
+    if (dyn_cast<Instruction>(V) == Invoke) {
+      Phi->setIncomingBlock(Idx, ElseBlock);
+      Phi->addIncoming(NewInst, OrigBlock);
+      continue;
+    }
+    Phi->addIncoming(V, ElseBlock);
+  }
+}
+
+/// Fix-up phi nodes in an invoke instruction's unwind destination.
+///
+/// After versioning an invoke instruction, values coming from the original
+/// block will now be coming from either the "then" block or the "else" block.
+static void fixupPHINodeForUnwindDest(InvokeInst *Invoke, BasicBlock *OrigBlock,
+                                      BasicBlock *ThenBlock,
+                                      BasicBlock *ElseBlock) {
+  for (auto &I : *Invoke->getUnwindDest()) {
+    auto *Phi = dyn_cast<PHINode>(&I);
+    if (!Phi)
+      break;
+    int Idx = Phi->getBasicBlockIndex(OrigBlock);
+    if (Idx == -1)
+      continue;
+    auto *V = Phi->getIncomingValue(Idx);
+    Phi->setIncomingBlock(Idx, ThenBlock);
+    Phi->addIncoming(V, ElseBlock);
+  }
+}
+
+/// Get the phi node having the returned value of a call or invoke instruction
+/// as it's operand.
+static bool getRetPhiNode(Instruction *Inst, BasicBlock *Block) {
+  BasicBlock *FromBlock = Inst->getParent();
+  for (auto &I : *Block) {
+    PHINode *PHI = dyn_cast<PHINode>(&I);
+    if (!PHI)
+      break;
+    int Idx = PHI->getBasicBlockIndex(FromBlock);
+    if (Idx == -1)
+      continue;
+    auto *V = PHI->getIncomingValue(Idx);
+    if (V == Inst)
+      return true;
+  }
+  return false;
+}
+
+/// Create a phi node for the returned value of a call or invoke instruction.
+///
+/// After versioning a call or invoke instruction that returns a value, we have
+/// to merge the value of the original and new instructions. We do this by
+/// creating a phi node and replacing uses of the original instruction with this
+/// phi node.
+static void createRetPHINode(Instruction *OrigInst, Instruction *NewInst) {
+
+  if (OrigInst->getType()->isVoidTy() || OrigInst->use_empty())
+    return;
+
+  BasicBlock *RetValBB = NewInst->getParent();
+  if (auto *Invoke = dyn_cast<InvokeInst>(NewInst))
+    RetValBB = Invoke->getNormalDest();
+  BasicBlock *PhiBB = RetValBB->getSingleSuccessor();
+
+  if (getRetPhiNode(OrigInst, PhiBB))
+    return;
+
+  IRBuilder<> Builder(&PhiBB->front());
+  PHINode *Phi = Builder.CreatePHI(OrigInst->getType(), 0);
+  SmallVector<User *, 16> UsersToUpdate;
+  for (User *U : OrigInst->users())
+    UsersToUpdate.push_back(U);
+  for (User *U : UsersToUpdate)
+    U->replaceUsesOfWith(OrigInst, Phi);
+  Phi->addIncoming(OrigInst, OrigInst->getParent());
+  Phi->addIncoming(NewInst, RetValBB);
+}
+
+/// Cast a call or invoke instruction to the given type.
+///
+/// When promoting a call site, the return type of the call site might not match
+/// that of the callee. If this is the case, we have to cast the returned value
+/// to the correct type. The location of the cast depends on if we have a call
+/// or invoke instruction.
+Instruction *createRetBitCast(CallSite CS, Type *RetTy) {
+
+  // Save the users of the calling instruction. These uses will be changed to
+  // use the bitcast after we create it.
+  SmallVector<User *, 16> UsersToUpdate;
+  for (User *U : CS.getInstruction()->users())
+    UsersToUpdate.push_back(U);
+
+  // Determine an appropriate location to create the bitcast for the return
+  // value. The location depends on if we have a call or invoke instruction.
+  Instruction *InsertBefore = nullptr;
+  if (auto *Invoke = dyn_cast<InvokeInst>(CS.getInstruction()))
+    InsertBefore = &*Invoke->getNormalDest()->getFirstInsertionPt();
+  else
+    InsertBefore = &*std::next(CS.getInstruction()->getIterator());
+
+  // Bitcast the return value to the correct type.
+  auto *Cast = CastInst::Create(Instruction::BitCast, CS.getInstruction(),
+                                RetTy, "", InsertBefore);
+
+  // Replace all the original uses of the calling instruction with the bitcast.
+  for (User *U : UsersToUpdate)
+    U->replaceUsesOfWith(CS.getInstruction(), Cast);
+
+  return Cast;
+}
+
+/// Predicate and clone the given call site.
+///
+/// This function creates an if-then-else structure at the location of the call
+/// site. The "if" condition compares the call site's called value to the given
+/// callee. The original call site is moved into the "else" block, and a clone
+/// of the call site is placed in the "then" block. The cloned instruction is
+/// returned.
+static Instruction *versionCallSite(CallSite CS, Value *Callee,
+                                    MDNode *BranchWeights,
+                                    BasicBlock *&ThenBlock,
+                                    BasicBlock *&ElseBlock,
+                                    BasicBlock *&MergeBlock) {
+
+  IRBuilder<> Builder(CS.getInstruction());
+  Instruction *OrigInst = CS.getInstruction();
+
+  // Create the compare. The called value and callee must have the same type to
+  // be compared.
+  auto *LHS =
+      Builder.CreateBitCast(CS.getCalledValue(), Builder.getInt8PtrTy());
+  auto *RHS = Builder.CreateBitCast(Callee, Builder.getInt8PtrTy());
+  auto *Cond = Builder.CreateICmpEQ(LHS, RHS);
+
+  // Create an if-then-else structure. The original instruction is moved into
+  // the "else" block, and a clone of the original instruction is placed in the
+  // "then" block.
+  TerminatorInst *ThenTerm = nullptr;
+  TerminatorInst *ElseTerm = nullptr;
+  SplitBlockAndInsertIfThenElse(Cond, CS.getInstruction(), &ThenTerm, &ElseTerm,
+                                BranchWeights);
+  ThenBlock = ThenTerm->getParent();
+  ElseBlock = ElseTerm->getParent();
+  MergeBlock = OrigInst->getParent();
+
+  ThenBlock->setName("if.true.direct_targ");
+  ElseBlock->setName("if.false.orig_indirect");
+  MergeBlock->setName("if.end.icp");
+
+  Instruction *NewInst = OrigInst->clone();
+  OrigInst->moveBefore(ElseTerm);
+  NewInst->insertBefore(ThenTerm);
+
+  // If the original call site is an invoke instruction, we have extra work to
+  // do since invoke instructions are terminating.
+  if (auto *OrigInvoke = dyn_cast<InvokeInst>(OrigInst)) {
+    auto *NewInvoke = cast<InvokeInst>(NewInst);
+
+    // Invoke instructions are terminating, so we don't need the terminator
+    // instructions that were just created.
+    ThenTerm->eraseFromParent();
+    ElseTerm->eraseFromParent();
+
+    // Branch from the "merge" block to the original normal destination.
+    Builder.SetInsertPoint(MergeBlock);
+    Builder.CreateBr(OrigInvoke->getNormalDest());
+
+    // Now set the normal destination of new the invoke instruction to be the
+    // "merge" block.
+    NewInvoke->setNormalDest(MergeBlock);
+  }
+
+  return NewInst;
+}
+
+bool llvm::isLegalToPromote(CallSite CS, Function *Callee,
+                            const char **FailureReason) {
+  assert(!CS.getCalledFunction() && "Only indirect call sites can be promoted");
+
+  // Check the return type. The callee's return value type must be bitcast
+  // compatible with the call site's type.
+  Type *CallRetTy = CS.getInstruction()->getType();
+  Type *FuncRetTy = Callee->getReturnType();
+  if (CallRetTy != FuncRetTy)
+    if (!CastInst::isBitCastable(FuncRetTy, CallRetTy)) {
+      if (FailureReason)
+        *FailureReason = "Return type mismatch";
+      return false;
+    }
+
+  // The number of formal arguments of the callee.
+  unsigned NumParams = Callee->getFunctionType()->getNumParams();
+
+  // Check the number of arguments. The callee and call site must agree on the
+  // number of arguments.
+  if (CS.arg_size() != NumParams && !Callee->isVarArg()) {
+    if (FailureReason)
+      *FailureReason = "The number of arguments mismatch";
+    return false;
+  }
+
+  // Check the argument types. The callee's formal argument types must be
+  // bitcast compatible with the corresponding actual argument types of the call
+  // site.
+  for (unsigned I = 0; I < NumParams; ++I) {
+    Type *FormalTy = Callee->getFunctionType()->getFunctionParamType(I);
+    Type *ActualTy = CS.getArgument(I)->getType();
+    if (FormalTy == ActualTy)
+      continue;
+    if (!CastInst::isBitCastable(ActualTy, FormalTy)) {
+      if (FailureReason)
+        *FailureReason = "Argument type mismatch";
+      return false;
+    }
+  }
+
+  return true;
+}
+
+static void promoteCall(CallSite CS, Function *Callee, Instruction *&Cast) {
+  assert(!CS.getCalledFunction() && "Only indirect call sites can be promoted");
+
+  // Set the called function of the call site to be the given callee.
+  CS.setCalledFunction(Callee);
+
+  // Since the call site will no longer be direct, we must clear metadata that
+  // is only appropriate for indirect calls. This includes !prof and !callees
+  // metadata.
+  CS.getInstruction()->setMetadata(LLVMContext::MD_prof, nullptr);
+  CS.getInstruction()->setMetadata(LLVMContext::MD_callees, nullptr);
+
+  // If the function type of the call site matches that of the callee, no
+  // additional work is required.
+  if (CS.getFunctionType() == Callee->getFunctionType())
+    return;
+
+  // Save the return types of the call site and callee.
+  Type *CallSiteRetTy = CS.getInstruction()->getType();
+  Type *CalleeRetTy = Callee->getReturnType();
+
+  // Change the function type of the call site the match that of the callee.
+  CS.mutateFunctionType(Callee->getFunctionType());
+
+  // Inspect the arguments of the call site. If an argument's type doesn't
+  // match the corresponding formal argument's type in the callee, bitcast it
+  // to the correct type.
+  for (Use &U : CS.args()) {
+    unsigned ArgNo = CS.getArgumentNo(&U);
+    Type *FormalTy = Callee->getFunctionType()->getParamType(ArgNo);
+    Type *ActualTy = U.get()->getType();
+    if (FormalTy != ActualTy) {
+      auto *Cast = CastInst::Create(Instruction::BitCast, U.get(), FormalTy, "",
+                                    CS.getInstruction());
+      CS.setArgument(ArgNo, Cast);
+    }
+  }
+
+  // If the return type of the call site doesn't match that of the callee, cast
+  // the returned value to the appropriate type.
+  if (!CallSiteRetTy->isVoidTy() && CallSiteRetTy != CalleeRetTy)
+    Cast = createRetBitCast(CS, CallSiteRetTy);
+}
+
+Instruction *llvm::promoteCallWithIfThenElse(CallSite CS, Function *Callee,
+                                             MDNode *BranchWeights) {
+
+  // Version the indirect call site. If the called value is equal to the given
+  // callee, 'NewInst' will be executed, otherwise the original call site will
+  // be executed.
+  BasicBlock *ThenBlock, *ElseBlock, *MergeBlock;
+  Instruction *NewInst = versionCallSite(CS, Callee, BranchWeights, ThenBlock,
+                                         ElseBlock, MergeBlock);
+
+  // Promote 'NewInst' so that it directly calls the desired function.
+  Instruction *Cast = NewInst;
+  promoteCall(CallSite(NewInst), Callee, Cast);
+
+  // If the original call site is an invoke instruction, we have to fix-up phi
+  // nodes in the invoke's normal and unwind destinations.
+  if (auto *OrigInvoke = dyn_cast<InvokeInst>(CS.getInstruction())) {
+    fixupPHINodeForNormalDest(OrigInvoke, MergeBlock, ElseBlock, Cast);
+    fixupPHINodeForUnwindDest(OrigInvoke, MergeBlock, ThenBlock, ElseBlock);
+  }
+
+  // Create a phi node for the returned value of the call site.
+  createRetPHINode(CS.getInstruction(), Cast ? Cast : NewInst);
+
+  // Return the new direct call.
+  return NewInst;
+}
+
+#undef DEBUG_TYPE
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 9c4e13903ed7..3b19ba1b50f2 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -747,7 +747,7 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
   Function *F = OrigLoop->getHeader()->getParent();
   Loop *ParentLoop = OrigLoop->getParentLoop();
 
-  Loop *NewLoop = new Loop();
+  Loop *NewLoop = LI->AllocateLoop();
   if (ParentLoop)
     ParentLoop->addChildLoop(NewLoop);
   else
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index e5392b53050d..8fee10854229 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -12,7 +12,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm-c/Core.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Module.h"
diff --git a/lib/Transforms/Utils/CmpInstAnalysis.cpp b/lib/Transforms/Utils/CmpInstAnalysis.cpp
deleted file mode 100644
index d9294c499309..000000000000
--- a/lib/Transforms/Utils/CmpInstAnalysis.cpp
+++ /dev/null
@@ -1,108 +0,0 @@
-//===- CmpInstAnalysis.cpp - Utils to help fold compares ---------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file holds routines to help analyse compare instructions
-// and fold them into constants or other compare instructions
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/CmpInstAnalysis.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Instructions.h"
-
-using namespace llvm;
-
-unsigned llvm::getICmpCode(const ICmpInst *ICI, bool InvertPred) {
-  ICmpInst::Predicate Pred = InvertPred ? ICI->getInversePredicate()
-                                        : ICI->getPredicate();
-  switch (Pred) {
-      // False -> 0
-    case ICmpInst::ICMP_UGT: return 1;  // 001
-    case ICmpInst::ICMP_SGT: return 1;  // 001
-    case ICmpInst::ICMP_EQ:  return 2;  // 010
-    case ICmpInst::ICMP_UGE: return 3;  // 011
-    case ICmpInst::ICMP_SGE: return 3;  // 011
-    case ICmpInst::ICMP_ULT: return 4;  // 100
-    case ICmpInst::ICMP_SLT: return 4;  // 100
-    case ICmpInst::ICMP_NE:  return 5;  // 101
-    case ICmpInst::ICMP_ULE: return 6;  // 110
-    case ICmpInst::ICMP_SLE: return 6;  // 110
-      // True -> 7
-    default:
-      llvm_unreachable("Invalid ICmp predicate!");
-  }
-}
-
-Value *llvm::getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
-                          CmpInst::Predicate &NewICmpPred) {
-  switch (Code) {
-    default: llvm_unreachable("Illegal ICmp code!");
-    case 0: // False.
-      return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
-    case 1: NewICmpPred = Sign ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
-    case 2: NewICmpPred = ICmpInst::ICMP_EQ; break;
-    case 3: NewICmpPred = Sign ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
-    case 4: NewICmpPred = Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
-    case 5: NewICmpPred = ICmpInst::ICMP_NE; break;
-    case 6: NewICmpPred = Sign ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
-    case 7: // True.
-      return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
-  }
-  return nullptr;
-}
-
-bool llvm::PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) {
-  return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) ||
-         (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) ||
-         (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1));
-}
-
-bool llvm::decomposeBitTestICmp(const ICmpInst *I, CmpInst::Predicate &Pred,
-                                Value *&X, Value *&Y, Value *&Z) {
-  ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1));
-  if (!C)
-    return false;
-
-  switch (I->getPredicate()) {
-  default:
-    return false;
-  case ICmpInst::ICMP_SLT:
-    // X < 0 is equivalent to (X & SignMask) != 0.
-    if (!C->isZero())
-      return false;
-    Y = ConstantInt::get(I->getContext(), APInt::getSignMask(C->getBitWidth()));
-    Pred = ICmpInst::ICMP_NE;
-    break;
-  case ICmpInst::ICMP_SGT:
-    // X > -1 is equivalent to (X & SignMask) == 0.
-    if (!C->isMinusOne())
-      return false;
-    Y = ConstantInt::get(I->getContext(), APInt::getSignMask(C->getBitWidth()));
-    Pred = ICmpInst::ICMP_EQ;
-    break;
-  case ICmpInst::ICMP_ULT:
-    // X <u 2^n is equivalent to (X & ~(2^n-1)) == 0.
-    if (!C->getValue().isPowerOf2())
-      return false;
-    Y = ConstantInt::get(I->getContext(), -C->getValue());
-    Pred = ICmpInst::ICMP_EQ;
-    break;
-  case ICmpInst::ICMP_UGT:
-    // X >u 2^n-1 is equivalent to (X & ~(2^n-1)) != 0.
-    if (!(C->getValue() + 1).isPowerOf2())
-      return false;
-    Y = ConstantInt::get(I->getContext(), ~C->getValue());
-    Pred = ICmpInst::ICMP_NE;
-    break;
-  }
-
-  X = I->getOperand(0);
-  Z = ConstantInt::getNullValue(C->getType());
-  return true;
-}
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 1189714dfab1..7a404241cb14 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -14,34 +14,57 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/CodeExtractor.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/BlockFrequencyInfoImpl.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/RegionInfo.h"
-#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/BlockFrequency.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <map>
 #include <set>
+#include <utility>
+#include <vector>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "code-extractor"
@@ -55,7 +78,8 @@ AggregateArgsOpt("aggregate-extracted-args", cl::Hidden,
                  cl::desc("Aggregate arguments to code-extracted functions"));
 
 /// \brief Test whether a block is valid for extraction.
-bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB) {
+bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB,
+                                              bool AllowVarArgs) {
   // Landing pads must be in the function where they were inserted for cleanup.
   if (BB.isEHPad())
     return false;
@@ -87,14 +111,19 @@ bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB) {
     }
   }
 
-  // Don't hoist code containing allocas, invokes, or vastarts.
+  // Don't hoist code containing allocas or invokes. If explicitly requested,
+  // allow vastart.
   for (BasicBlock::const_iterator I = BB.begin(), E = BB.end(); I != E; ++I) {
     if (isa<AllocaInst>(I) || isa<InvokeInst>(I))
       return false;
     if (const CallInst *CI = dyn_cast<CallInst>(I))
       if (const Function *F = CI->getCalledFunction())
-        if (F->getIntrinsicID() == Intrinsic::vastart)
-          return false;
+        if (F->getIntrinsicID() == Intrinsic::vastart) {
+          if (AllowVarArgs)
+            continue;
+          else
+            return false;
+        }
   }
 
   return true;
@@ -102,21 +131,21 @@ bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB) {
 
 /// \brief Build a set of blocks to extract if the input blocks are viable.
 static SetVector<BasicBlock *>
-buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT) {
+buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
+                        bool AllowVarArgs) {
   assert(!BBs.empty() && "The set of blocks to extract must be non-empty");
   SetVector<BasicBlock *> Result;
 
   // Loop over the blocks, adding them to our set-vector, and aborting with an
   // empty set if we encounter invalid blocks.
   for (BasicBlock *BB : BBs) {
-
     // If this block is dead, don't process it.
     if (DT && !DT->isReachableFromEntry(BB))
       continue;
 
     if (!Result.insert(BB))
       llvm_unreachable("Repeated basic blocks in extraction input");
-    if (!CodeExtractor::isBlockValidForExtraction(*BB)) {
+    if (!CodeExtractor::isBlockValidForExtraction(*BB, AllowVarArgs)) {
       Result.clear();
       return Result;
     }
@@ -138,16 +167,18 @@ buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT) {
 
 CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
                              bool AggregateArgs, BlockFrequencyInfo *BFI,
-                             BranchProbabilityInfo *BPI)
+                             BranchProbabilityInfo *BPI, bool AllowVarArgs)
     : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
-      BPI(BPI), Blocks(buildExtractionBlockSet(BBs, DT)), NumExitBlocks(~0U) {}
+      BPI(BPI), AllowVarArgs(AllowVarArgs),
+      Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs)) {}
 
 CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs,
                              BlockFrequencyInfo *BFI,
                              BranchProbabilityInfo *BPI)
     : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
-      BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT)),
-      NumExitBlocks(~0U) {}
+      BPI(BPI), AllowVarArgs(false),
+      Blocks(buildExtractionBlockSet(L.getBlocks(), &DT,
+                                     /* AllowVarArgs */ false)) {}
 
 /// definedInRegion - Return true if the specified value is defined in the
 /// extracted region.
@@ -202,7 +233,6 @@ bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers(
     if (Blocks.count(&BB))
       continue;
     for (Instruction &II : BB) {
-
       if (isa<DbgInfoIntrinsic>(II))
         continue;
 
@@ -287,7 +317,9 @@ CodeExtractor::findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock) {
   BasicBlock *NewExitBlock = CommonExitBlock->splitBasicBlock(
       CommonExitBlock->getFirstNonPHI()->getIterator());
 
-  for (auto *Pred : predecessors(CommonExitBlock)) {
+  for (auto PI = pred_begin(CommonExitBlock), PE = pred_end(CommonExitBlock);
+       PI != PE;) {
+    BasicBlock *Pred = *PI++;
     if (Blocks.count(Pred))
       continue;
     Pred->getTerminator()->replaceUsesOfWith(CommonExitBlock, NewExitBlock);
@@ -373,7 +405,6 @@ void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands,
       // Follow the bitcast.
       Instruction *MarkerAddr = nullptr;
       for (User *U : AI->users()) {
-
         if (U->stripInBoundsConstantOffsets() == AI) {
           SinkLifeStart = false;
           HoistLifeEnd = false;
@@ -407,7 +438,6 @@ void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands,
 
 void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
                                       const ValueSet &SinkCands) const {
-
   for (BasicBlock *BB : Blocks) {
     // If a used value is defined outside the region, it's an input.  If an
     // instruction is used outside the region, it's an output.
@@ -457,7 +487,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
   // containing PHI nodes merging values from outside of the region, and a
   // second that contains all of the code for the block and merges back any
   // incoming values from inside of the region.
-  BasicBlock *NewBB = llvm::SplitBlock(Header, Header->getFirstNonPHI(), DT);
+  BasicBlock *NewBB = SplitBlock(Header, Header->getFirstNonPHI(), DT);
 
   // We only want to code extract the second block now, and it becomes the new
   // header of the region.
@@ -525,7 +555,6 @@ void CodeExtractor::splitReturnBlocks() {
 
 /// constructFunction - make a function based on inputs and outputs, as follows:
 /// f(in0, ..., inN, out0, ..., outN)
-///
 Function *CodeExtractor::constructFunction(const ValueSet &inputs,
                                            const ValueSet &outputs,
                                            BasicBlock *header,
@@ -544,7 +573,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
   default: RetTy = Type::getInt16Ty(header->getContext()); break;
   }
 
-  std::vector<Type*> paramTy;
+  std::vector<Type *> paramTy;
 
   // Add the types of the input values to the function's argument list
   for (Value *value : inputs) {
@@ -575,7 +604,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
     paramTy.push_back(PointerType::getUnqual(StructTy));
   }
   FunctionType *funcType =
-                  FunctionType::get(RetTy, paramTy, false);
+                  FunctionType::get(RetTy, paramTy,
+                                    AllowVarArgs && oldFunction->isVarArg());
 
   // Create the new function
   Function *newFunction = Function::Create(funcType,
@@ -620,7 +650,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
     } else
       RewriteVal = &*AI++;
 
-    std::vector<User*> Users(inputs[i]->user_begin(), inputs[i]->user_end());
+    std::vector<User *> Users(inputs[i]->user_begin(), inputs[i]->user_end());
     for (User *use : Users)
       if (Instruction *inst = dyn_cast<Instruction>(use))
         if (Blocks.count(inst->getParent()))
@@ -639,7 +669,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
   // Rewrite branches to basic blocks outside of the loop to new dummy blocks
   // within the new function. This must be done before we lose track of which
   // blocks were originally in the code region.
-  std::vector<User*> Users(header->user_begin(), header->user_end());
+  std::vector<User *> Users(header->user_begin(), header->user_end());
   for (unsigned i = 0, e = Users.size(); i != e; ++i)
     // The BasicBlock which contains the branch is not in the region
     // modify the branch target to a new block
@@ -651,19 +681,6 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
   return newFunction;
 }
 
-/// FindPhiPredForUseInBlock - Given a value and a basic block, find a PHI
-/// that uses the value within the basic block, and return the predecessor
-/// block associated with that use, or return 0 if none is found.
-static BasicBlock* FindPhiPredForUseInBlock(Value* Used, BasicBlock* BB) {
-  for (Use &U : Used->uses()) {
-     PHINode *P = dyn_cast<PHINode>(U.getUser());
-     if (P && P->getParent() == BB)
-       return P->getIncomingBlock(U);
-  }
-
-  return nullptr;
-}
-
 /// emitCallAndSwitchStatement - This method sets up the caller side by adding
 /// the call instruction, splitting any PHI nodes in the header block as
 /// necessary.
@@ -672,7 +689,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
                            ValueSet &inputs, ValueSet &outputs) {
   // Emit a call to the new function, passing in: *pointer to struct (if
   // aggregating parameters), or plan inputs and allocated memory for outputs
-  std::vector<Value*> params, StructValues, ReloadOutputs, Reloads;
+  std::vector<Value *> params, StructValues, ReloadOutputs, Reloads;
 
   Module *M = newFunction->getParent();
   LLVMContext &Context = M->getContext();
@@ -702,7 +719,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
   StructType *StructArgTy = nullptr;
   AllocaInst *Struct = nullptr;
   if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
-    std::vector<Type*> ArgTypes;
+    std::vector<Type *> ArgTypes;
     for (ValueSet::iterator v = StructValues.begin(),
            ve = StructValues.end(); v != ve; ++v)
       ArgTypes.push_back((*v)->getType());
@@ -729,6 +746,14 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
   // Emit the call to the function
   CallInst *call = CallInst::Create(newFunction, params,
                                     NumExitBlocks > 1 ? "targetBlock" : "");
+  // Add debug location to the new call, if the original function has debug
+  // info. In that case, the terminator of the entry block of the extracted
+  // function contains the first debug location of the extracted function,
+  // set in extractCodeRegion.
+  if (codeReplacer->getParent()->getSubprogram()) {
+    if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc())
+      call->setDebugLoc(DL);
+  }
   codeReplacer->getInstList().push_back(call);
 
   Function::arg_iterator OutputArgBegin = newFunction->arg_begin();
@@ -736,7 +761,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
   if (!AggregateArgs)
     std::advance(OutputArgBegin, inputs.size());
 
-  // Reload the outputs passed in by reference
+  // Reload the outputs passed in by reference.
+  Function::arg_iterator OAI = OutputArgBegin;
   for (unsigned i = 0, e = outputs.size(); i != e; ++i) {
     Value *Output = nullptr;
     if (AggregateArgs) {
@@ -753,12 +779,40 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
     LoadInst *load = new LoadInst(Output, outputs[i]->getName()+".reload");
     Reloads.push_back(load);
     codeReplacer->getInstList().push_back(load);
-    std::vector<User*> Users(outputs[i]->user_begin(), outputs[i]->user_end());
+    std::vector<User *> Users(outputs[i]->user_begin(), outputs[i]->user_end());
     for (unsigned u = 0, e = Users.size(); u != e; ++u) {
       Instruction *inst = cast<Instruction>(Users[u]);
       if (!Blocks.count(inst->getParent()))
         inst->replaceUsesOfWith(outputs[i], load);
     }
+
+    // Store to argument right after the definition of output value.
+    auto *OutI = dyn_cast<Instruction>(outputs[i]);
+    if (!OutI)
+      continue;
+    // Find proper insertion point.
+    Instruction *InsertPt = OutI->getNextNode();
+    // Let's assume that there is no other guy interleave non-PHI in PHIs.
+    if (isa<PHINode>(InsertPt))
+      InsertPt = InsertPt->getParent()->getFirstNonPHI();
+
+    assert(OAI != newFunction->arg_end() &&
+           "Number of output arguments should match "
+           "the amount of defined values");
+    if (AggregateArgs) {
+      Value *Idx[2];
+      Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+      Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
+      GetElementPtrInst *GEP = GetElementPtrInst::Create(
+          StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), InsertPt);
+      new StoreInst(outputs[i], GEP, InsertPt);
+      // Since there should be only one struct argument aggregating
+      // all the output values, we shouldn't increment OAI, which always
+      // points to the struct argument, in this case.
+    } else {
+      new StoreInst(outputs[i], &*OAI, InsertPt);
+      ++OAI;
+    }
   }
 
   // Now we can emit a switch statement using the call as a value.
@@ -771,7 +825,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
   // over all of the blocks in the extracted region, updating any terminator
   // instructions in the to-be-extracted region that branch to blocks that are
   // not in the region to be extracted.
-  std::map<BasicBlock*, BasicBlock*> ExitBlockMap;
+  std::map<BasicBlock *, BasicBlock *> ExitBlockMap;
 
   unsigned switchVal = 0;
   for (BasicBlock *Block : Blocks) {
@@ -801,75 +855,12 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
             break;
           }
 
-          ReturnInst *NTRet = ReturnInst::Create(Context, brVal, NewTarget);
+          ReturnInst::Create(Context, brVal, NewTarget);
 
           // Update the switch instruction.
           TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context),
                                               SuccNum),
                              OldTarget);
-
-          // Restore values just before we exit
-          Function::arg_iterator OAI = OutputArgBegin;
-          for (unsigned out = 0, e = outputs.size(); out != e; ++out) {
-            // For an invoke, the normal destination is the only one that is
-            // dominated by the result of the invocation
-            BasicBlock *DefBlock = cast<Instruction>(outputs[out])->getParent();
-
-            bool DominatesDef = true;
-
-            BasicBlock *NormalDest = nullptr;
-            if (auto *Invoke = dyn_cast<InvokeInst>(outputs[out]))
-              NormalDest = Invoke->getNormalDest();
-
-            if (NormalDest) {
-              DefBlock = NormalDest;
-
-              // Make sure we are looking at the original successor block, not
-              // at a newly inserted exit block, which won't be in the dominator
-              // info.
-              for (const auto &I : ExitBlockMap)
-                if (DefBlock == I.second) {
-                  DefBlock = I.first;
-                  break;
-                }
-
-              // In the extract block case, if the block we are extracting ends
-              // with an invoke instruction, make sure that we don't emit a
-              // store of the invoke value for the unwind block.
-              if (!DT && DefBlock != OldTarget)
-                DominatesDef = false;
-            }
-
-            if (DT) {
-              DominatesDef = DT->dominates(DefBlock, OldTarget);
-              
-              // If the output value is used by a phi in the target block,
-              // then we need to test for dominance of the phi's predecessor
-              // instead.  Unfortunately, this a little complicated since we
-              // have already rewritten uses of the value to uses of the reload.
-              BasicBlock* pred = FindPhiPredForUseInBlock(Reloads[out], 
-                                                          OldTarget);
-              if (pred && DT && DT->dominates(DefBlock, pred))
-                DominatesDef = true;
-            }
-
-            if (DominatesDef) {
-              if (AggregateArgs) {
-                Value *Idx[2];
-                Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
-                Idx[1] = ConstantInt::get(Type::getInt32Ty(Context),
-                                          FirstOut+out);
-                GetElementPtrInst *GEP = GetElementPtrInst::Create(
-                    StructArgTy, &*OAI, Idx, "gep_" + outputs[out]->getName(),
-                    NTRet);
-                new StoreInst(outputs[out], GEP, NTRet);
-              } else {
-                new StoreInst(outputs[out], &*OAI, NTRet);
-              }
-            }
-            // Advance output iterator even if we don't emit a store
-            if (!AggregateArgs) ++OAI;
-          }
         }
 
         // rewrite the original branch instruction with this new target
@@ -940,8 +931,8 @@ void CodeExtractor::calculateNewCallTerminatorWeights(
     BasicBlock *CodeReplacer,
     DenseMap<BasicBlock *, BlockFrequency> &ExitWeights,
     BranchProbabilityInfo *BPI) {
-  typedef BlockFrequencyInfoImplBase::Distribution Distribution;
-  typedef BlockFrequencyInfoImplBase::BlockNode BlockNode;
+  using Distribution = BlockFrequencyInfoImplBase::Distribution;
+  using BlockNode = BlockFrequencyInfoImplBase::BlockNode;
 
   // Update the branch weights for the exit block.
   TerminatorInst *TI = CodeReplacer->getTerminator();
@@ -985,12 +976,31 @@ Function *CodeExtractor::extractCodeRegion() {
   if (!isEligible())
     return nullptr;
 
-  ValueSet inputs, outputs, SinkingCands, HoistingCands;
-  BasicBlock *CommonExit = nullptr;
-
   // Assumption: this is a single-entry code region, and the header is the first
   // block in the region.
   BasicBlock *header = *Blocks.begin();
+  Function *oldFunction = header->getParent();
+
+  // For functions with varargs, check that varargs handling is only done in the
+  // outlined function, i.e vastart and vaend are only used in outlined blocks.
+  if (AllowVarArgs && oldFunction->getFunctionType()->isVarArg()) {
+    auto containsVarArgIntrinsic = [](Instruction &I) {
+      if (const CallInst *CI = dyn_cast<CallInst>(&I))
+        if (const Function *F = CI->getCalledFunction())
+          return F->getIntrinsicID() == Intrinsic::vastart ||
+                 F->getIntrinsicID() == Intrinsic::vaend;
+      return false;
+    };
+
+    for (auto &BB : *oldFunction) {
+      if (Blocks.count(&BB))
+        continue;
+      if (llvm::any_of(BB, containsVarArgIntrinsic))
+        return nullptr;
+    }
+  }
+  ValueSet inputs, outputs, SinkingCands, HoistingCands;
+  BasicBlock *CommonExit = nullptr;
 
   // Calculate the entry frequency of the new function before we change the root
   //   block.
@@ -1012,8 +1022,6 @@ Function *CodeExtractor::extractCodeRegion() {
   // that the return is not in the region.
   splitReturnBlocks();
 
-  Function *oldFunction = header->getParent();
-
   // This takes place of the original loop
   BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), 
                                                 "codeRepl", oldFunction,
@@ -1023,7 +1031,22 @@ Function *CodeExtractor::extractCodeRegion() {
   // head of the region, but the entry node of a function cannot have preds.
   BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(), 
                                                "newFuncRoot");
-  newFuncRoot->getInstList().push_back(BranchInst::Create(header));
+  auto *BranchI = BranchInst::Create(header);
+  // If the original function has debug info, we have to add a debug location
+  // to the new branch instruction from the artificial entry block.
+  // We use the debug location of the first instruction in the extracted
+  // blocks, as there is no other equivalent line in the source code.
+  if (oldFunction->getSubprogram()) {
+    any_of(Blocks, [&BranchI](const BasicBlock *BB) {
+      return any_of(*BB, [&BranchI](const Instruction &I) {
+        if (!I.getDebugLoc())
+          return false;
+        BranchI->setDebugLoc(I.getDebugLoc());
+        return true;
+      });
+    });
+  }
+  newFuncRoot->getInstList().push_back(BranchI);
 
   findAllocas(SinkingCands, HoistingCands, CommonExit);
   assert(HoistingCands.empty() || CommonExit);
@@ -1044,7 +1067,7 @@ Function *CodeExtractor::extractCodeRegion() {
   }
 
   // Calculate the exit blocks for the extracted region and the total exit
-  //  weights for each of those blocks.
+  // weights for each of those blocks.
   DenseMap<BasicBlock *, BlockFrequency> ExitWeights;
   SmallPtrSet<BasicBlock *, 1> ExitBlocks;
   for (BasicBlock *Block : Blocks) {
@@ -1097,8 +1120,8 @@ Function *CodeExtractor::extractCodeRegion() {
   // Look at all successors of the codeReplacer block.  If any of these blocks
   // had PHI nodes in them, we need to update the "from" block to be the code
   // replacer, not the original block in the extracted region.
-  std::vector<BasicBlock*> Succs(succ_begin(codeReplacer),
-                                 succ_end(codeReplacer));
+  std::vector<BasicBlock *> Succs(succ_begin(codeReplacer),
+                                  succ_end(codeReplacer));
   for (unsigned i = 0, e = Succs.size(); i != e; ++i)
     for (BasicBlock::iterator I = Succs[i]->begin(); isa<PHINode>(I); ++I) {
       PHINode *PN = cast<PHINode>(I);
diff --git a/lib/Transforms/Utils/CtorUtils.cpp b/lib/Transforms/Utils/CtorUtils.cpp
index 6642a97a29c2..82b67c293102 100644
--- a/lib/Transforms/Utils/CtorUtils.cpp
+++ b/lib/Transforms/Utils/CtorUtils.cpp
@@ -16,7 +16,6 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/lib/Transforms/Utils/EntryExitInstrumenter.cpp
new file mode 100644
index 000000000000..421663f82565
--- /dev/null
+++ b/lib/Transforms/Utils/EntryExitInstrumenter.cpp
@@ -0,0 +1,163 @@
+//===- EntryExitInstrumenter.cpp - Function Entry/Exit Instrumentation ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
+using namespace llvm;
+
+static void insertCall(Function &CurFn, StringRef Func,
+                       Instruction *InsertionPt, DebugLoc DL) {
+  Module &M = *InsertionPt->getParent()->getParent()->getParent();
+  LLVMContext &C = InsertionPt->getParent()->getContext();
+
+  if (Func == "mcount" ||
+      Func == ".mcount" ||
+      Func == "\01__gnu_mcount_nc" ||
+      Func == "\01_mcount" ||
+      Func == "\01mcount" ||
+      Func == "__mcount" ||
+      Func == "_mcount" ||
+      Func == "__cyg_profile_func_enter_bare") {
+    Constant *Fn = M.getOrInsertFunction(Func, Type::getVoidTy(C));
+    CallInst *Call = CallInst::Create(Fn, "", InsertionPt);
+    Call->setDebugLoc(DL);
+    return;
+  }
+
+  if (Func == "__cyg_profile_func_enter" || Func == "__cyg_profile_func_exit") {
+    Type *ArgTypes[] = {Type::getInt8PtrTy(C), Type::getInt8PtrTy(C)};
+
+    Constant *Fn = M.getOrInsertFunction(
+        Func, FunctionType::get(Type::getVoidTy(C), ArgTypes, false));
+
+    Instruction *RetAddr = CallInst::Create(
+        Intrinsic::getDeclaration(&M, Intrinsic::returnaddress),
+        ArrayRef<Value *>(ConstantInt::get(Type::getInt32Ty(C), 0)), "",
+        InsertionPt);
+    RetAddr->setDebugLoc(DL);
+
+    Value *Args[] = {ConstantExpr::getBitCast(&CurFn, Type::getInt8PtrTy(C)),
+                     RetAddr};
+
+    CallInst *Call =
+        CallInst::Create(Fn, ArrayRef<Value *>(Args), "", InsertionPt);
+    Call->setDebugLoc(DL);
+    return;
+  }
+
+  // We only know how to call a fixed set of instrumentation functions, because
+  // they all expect different arguments, etc.
+  report_fatal_error(Twine("Unknown instrumentation function: '") + Func + "'");
+}
+
+static bool runOnFunction(Function &F, bool PostInlining) {
+  StringRef EntryAttr = PostInlining ? "instrument-function-entry-inlined"
+                                     : "instrument-function-entry";
+
+  StringRef ExitAttr = PostInlining ? "instrument-function-exit-inlined"
+                                    : "instrument-function-exit";
+
+  StringRef EntryFunc = F.getFnAttribute(EntryAttr).getValueAsString();
+  StringRef ExitFunc = F.getFnAttribute(ExitAttr).getValueAsString();
+
+  bool Changed = false;
+
+  // If the attribute is specified, insert instrumentation and then "consume"
+  // the attribute so that it's not inserted again if the pass should happen to
+  // run later for some reason.
+
+  if (!EntryFunc.empty()) {
+    DebugLoc DL;
+    if (auto SP = F.getSubprogram())
+      DL = DebugLoc::get(SP->getScopeLine(), 0, SP);
+
+    insertCall(F, EntryFunc, &*F.begin()->getFirstInsertionPt(), DL);
+    Changed = true;
+    F.removeAttribute(AttributeList::FunctionIndex, EntryAttr);
+  }
+
+  if (!ExitFunc.empty()) {
+    for (BasicBlock &BB : F) {
+      TerminatorInst *T = BB.getTerminator();
+      DebugLoc DL;
+      if (DebugLoc TerminatorDL = T->getDebugLoc())
+        DL = TerminatorDL;
+      else if (auto SP = F.getSubprogram())
+        DL = DebugLoc::get(0, 0, SP);
+
+      if (isa<ReturnInst>(T)) {
+        insertCall(F, ExitFunc, T, DL);
+        Changed = true;
+      }
+    }
+    F.removeAttribute(AttributeList::FunctionIndex, ExitAttr);
+  }
+
+  return Changed;
+}
+
+namespace {
+struct EntryExitInstrumenter : public FunctionPass {
+  static char ID;
+  EntryExitInstrumenter() : FunctionPass(ID) {
+    initializeEntryExitInstrumenterPass(*PassRegistry::getPassRegistry());
+  }
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addPreserved<GlobalsAAWrapperPass>();
+  }
+  bool runOnFunction(Function &F) override { return ::runOnFunction(F, false); }
+};
+char EntryExitInstrumenter::ID = 0;
+
+struct PostInlineEntryExitInstrumenter : public FunctionPass {
+  static char ID;
+  PostInlineEntryExitInstrumenter() : FunctionPass(ID) {
+    initializePostInlineEntryExitInstrumenterPass(
+        *PassRegistry::getPassRegistry());
+  }
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addPreserved<GlobalsAAWrapperPass>();
+  }
+  bool runOnFunction(Function &F) override { return ::runOnFunction(F, true); }
+};
+char PostInlineEntryExitInstrumenter::ID = 0;
+}
+
+INITIALIZE_PASS(
+    EntryExitInstrumenter, "ee-instrument",
+    "Instrument function entry/exit with calls to e.g. mcount() (pre inlining)",
+    false, false)
+INITIALIZE_PASS(PostInlineEntryExitInstrumenter, "post-inline-ee-instrument",
+                "Instrument function entry/exit with calls to e.g. mcount() "
+                "(post inlining)",
+                false, false)
+
+FunctionPass *llvm::createEntryExitInstrumenterPass() {
+  return new EntryExitInstrumenter();
+}
+
+FunctionPass *llvm::createPostInlineEntryExitInstrumenterPass() {
+  return new PostInlineEntryExitInstrumenter();
+}
+
+PreservedAnalyses
+llvm::EntryExitInstrumenterPass::run(Function &F, FunctionAnalysisManager &AM) {
+  runOnFunction(F, PostInlining);
+  PreservedAnalyses PA;
+  PA.preserveSet<CFGAnalyses>();
+  return PA;
+}
diff --git a/lib/Transforms/Utils/Evaluator.cpp b/lib/Transforms/Utils/Evaluator.cpp
index 1328f2f3ec01..3c5e299fae98 100644
--- a/lib/Transforms/Utils/Evaluator.cpp
+++ b/lib/Transforms/Utils/Evaluator.cpp
@@ -12,19 +12,33 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/Evaluator.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include <iterator>
 
 #define DEBUG_TYPE "evaluator"
 
@@ -193,7 +207,7 @@ Constant *Evaluator::ComputeLoadResult(Constant *P) {
 bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
                               BasicBlock *&NextBB) {
   // This is the main evaluation loop.
-  while (1) {
+  while (true) {
     Constant *InstResult = nullptr;
 
     DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n");
@@ -318,7 +332,6 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
       DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult
             << "\n");
     } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
-
       if (!LI->isSimple()) {
         DEBUG(dbgs() << "Found a Load! Not a simple load, can not evaluate.\n");
         return false;  // no volatile/atomic accesses.
@@ -344,9 +357,9 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
         return false;  // Cannot handle array allocs.
       }
       Type *Ty = AI->getAllocatedType();
-      AllocaTmps.push_back(
-          make_unique<GlobalVariable>(Ty, false, GlobalValue::InternalLinkage,
-                                      UndefValue::get(Ty), AI->getName()));
+      AllocaTmps.push_back(llvm::make_unique<GlobalVariable>(
+          Ty, false, GlobalValue::InternalLinkage, UndefValue::get(Ty),
+          AI->getName()));
       InstResult = AllocaTmps.back().get();
       DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n");
     } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) {
@@ -420,6 +433,10 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
           DEBUG(dbgs() << "Skipping assume intrinsic.\n");
           ++CurInst;
           continue;
+        } else if (II->getIntrinsicID() == Intrinsic::sideeffect) {
+          DEBUG(dbgs() << "Skipping sideeffect intrinsic.\n");
+          ++CurInst;
+          continue;
         }
 
         DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n");
@@ -559,7 +576,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
 
   BasicBlock::iterator CurInst = CurBB->begin();
 
-  while (1) {
+  while (true) {
     BasicBlock *NextBB = nullptr; // Initialized to avoid compiler warnings.
     DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n");
 
@@ -594,4 +611,3 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
     CurBB = NextBB;
   }
 }
-
diff --git a/lib/Transforms/Utils/FlattenCFG.cpp b/lib/Transforms/Utils/FlattenCFG.cpp
index 435eff3bef47..5fdcc6d1d727 100644
--- a/lib/Transforms/Utils/FlattenCFG.cpp
+++ b/lib/Transforms/Utils/FlattenCFG.cpp
@@ -1,4 +1,4 @@
-//===- FlatternCFG.cpp - Code to perform CFG flattening ---------------===//
+//===- FlatternCFG.cpp - Code to perform CFG flattening -------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,25 +14,37 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include <cassert>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "flattencfg"
 
 namespace {
+
 class FlattenCFGOpt {
   AliasAnalysis *AA;
+
   /// \brief Use parallel-and or parallel-or to generate conditions for
   /// conditional branches.
   bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder);
+
   /// \brief If \param BB is the merge block of an if-region, attempt to merge
   /// the if-region with an adjacent if-region upstream if two if-regions
   /// contain identical instructions.
   bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder);
+
   /// \brief Compare a pair of blocks: \p Block1 and \p Block2, which
   /// are from two if-regions whose entry blocks are \p Head1 and \p
   /// Head2.  \returns true if \p Block1 and \p Block2 contain identical
@@ -43,9 +55,11 @@ class FlattenCFGOpt {
 
 public:
   FlattenCFGOpt(AliasAnalysis *AA) : AA(AA) {}
+
   bool run(BasicBlock *BB);
 };
-}
+
+} // end anonymous namespace
 
 /// If \param [in] BB has more than one predecessor that is a conditional
 /// branch, attempt to use parallel and/or for the branch condition. \returns
@@ -120,7 +134,6 @@ public:
 ///  In Case 1, \param BB (BB4) has an unconditional branch (BB3) as
 ///  its predecessor.  In Case 2, \param BB (BB3) only has conditional branches
 ///  as its predecessors.
-///
 bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) {
   PHINode *PHI = dyn_cast<PHINode>(BB->begin());
   if (PHI)
@@ -237,8 +250,8 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) {
     // Do branch inversion.
     BasicBlock *CurrBlock = LastCondBlock;
     bool EverChanged = false;
-    for (;CurrBlock != FirstCondBlock;
-          CurrBlock = CurrBlock->getSinglePredecessor()) {
+    for (; CurrBlock != FirstCondBlock;
+         CurrBlock = CurrBlock->getSinglePredecessor()) {
       BranchInst *BI = dyn_cast<BranchInst>(CurrBlock->getTerminator());
       CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
       if (!CI)
@@ -309,7 +322,6 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) {
 //  in the 2nd if-region to compare.  \returns true if \param Block1 and \param
 /// Block2 have identical instructions and do not have memory reference alias
 /// with \param Head2.
-///
 bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
                                          BasicBlock *Block1,
                                          BasicBlock *Block2) {
@@ -330,7 +342,7 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
   BasicBlock::iterator iter2 = Block2->begin();
   BasicBlock::iterator end2 = Block2->getTerminator()->getIterator();
 
-  while (1) {
+  while (true) {
     if (iter1 == end1) {
       if (iter2 != end2)
         return false;
@@ -384,7 +396,6 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
 /// To:
 /// if (a || b)
 ///   statement;
-///
 bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) {
   BasicBlock *IfTrue2, *IfFalse2;
   Value *IfCond2 = GetIfCondition(BB, IfTrue2, IfFalse2);
@@ -475,8 +486,7 @@ bool FlattenCFGOpt::run(BasicBlock *BB) {
 
 /// FlattenCFG - This function is used to flatten a CFG.  For
 /// example, it uses parallel-and and parallel-or mode to collapse
-//  if-conditions and merge if-regions with identical statements.
-///
+/// if-conditions and merge if-regions with identical statements.
 bool llvm::FlattenCFG(BasicBlock *BB, AliasAnalysis *AA) {
   return FlattenCFGOpt(AA).run(BB);
 }
diff --git a/lib/Transforms/Utils/FunctionComparator.cpp b/lib/Transforms/Utils/FunctionComparator.cpp
index 4a2be3a53176..bddcbd86e914 100644
--- a/lib/Transforms/Utils/FunctionComparator.cpp
+++ b/lib/Transforms/Utils/FunctionComparator.cpp
@@ -13,13 +13,41 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/FunctionComparator.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <utility>
 
 using namespace llvm;
 
@@ -160,7 +188,6 @@ int FunctionComparator::cmpOperandBundlesSchema(const Instruction *L,
 /// For more details see declaration comments.
 int FunctionComparator::cmpConstants(const Constant *L,
                                      const Constant *R) const {
-
   Type *TyL = L->getType();
   Type *TyR = R->getType();
 
@@ -226,8 +253,8 @@ int FunctionComparator::cmpConstants(const Constant *L,
   if (!L->isNullValue() && R->isNullValue())
     return -1;
 
-  auto GlobalValueL = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(L));
-  auto GlobalValueR = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(R));
+  auto GlobalValueL = const_cast<GlobalValue *>(dyn_cast<GlobalValue>(L));
+  auto GlobalValueR = const_cast<GlobalValue *>(dyn_cast<GlobalValue>(R));
   if (GlobalValueL && GlobalValueR) {
     return cmpGlobalValues(GlobalValueL, GlobalValueR);
   }
@@ -401,10 +428,9 @@ int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
   case Type::TokenTyID:
     return 0;
 
-  case Type::PointerTyID: {
+  case Type::PointerTyID:
     assert(PTyL && PTyR && "Both types must be pointers here.");
     return cmpNumbers(PTyL->getAddressSpace(), PTyR->getAddressSpace());
-  }
 
   case Type::StructTyID: {
     StructType *STyL = cast<StructType>(TyL);
@@ -637,7 +663,6 @@ int FunctionComparator::cmpOperations(const Instruction *L,
 // Read method declaration comments for more details.
 int FunctionComparator::cmpGEPs(const GEPOperator *GEPL,
                                 const GEPOperator *GEPR) const {
-
   unsigned int ASL = GEPL->getPointerAddressSpace();
   unsigned int ASR = GEPR->getPointerAddressSpace();
 
@@ -869,15 +894,19 @@ namespace {
 // buffer.
 class HashAccumulator64 {
   uint64_t Hash;
+
 public:
   // Initialize to random constant, so the state isn't zero.
   HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; }
+
   void add(uint64_t V) {
-     Hash = llvm::hashing::detail::hash_16_bytes(Hash, V);
+     Hash = hashing::detail::hash_16_bytes(Hash, V);
   }
+
   // No finishing is required, because the entire hash value is used.
   uint64_t getHash() { return Hash; }
 };
+
 } // end anonymous namespace
 
 // A function hash is calculated by considering only the number of arguments and
@@ -919,5 +948,3 @@ FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) {
   }
   return H.getHash();
 }
-
-
diff --git a/lib/Transforms/Utils/FunctionImportUtils.cpp b/lib/Transforms/Utils/FunctionImportUtils.cpp
index a98d07237b47..6b5f593073b4 100644
--- a/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ b/lib/Transforms/Utils/FunctionImportUtils.cpp
@@ -13,9 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
-#include "llvm/Analysis/ModuleSummaryAnalysis.h"
 #include "llvm/IR/InstIterator.h"
-#include "llvm/IR/Instructions.h"
 using namespace llvm;
 
 /// Checks if we should import SGV as a definition, otherwise import as a
@@ -23,21 +21,15 @@ using namespace llvm;
 bool FunctionImportGlobalProcessing::doImportAsDefinition(
     const GlobalValue *SGV, SetVector<GlobalValue *> *GlobalsToImport) {
 
-  // For alias, we tie the definition to the base object. Extract it and recurse
-  if (auto *GA = dyn_cast<GlobalAlias>(SGV)) {
-    if (GA->isInterposable())
-      return false;
-    const GlobalObject *GO = GA->getBaseObject();
-    if (!GO->hasLinkOnceODRLinkage())
-      return false;
-    return FunctionImportGlobalProcessing::doImportAsDefinition(
-        GO, GlobalsToImport);
-  }
   // Only import the globals requested for importing.
-  if (GlobalsToImport->count(const_cast<GlobalValue *>(SGV)))
-    return true;
-  // Otherwise no.
-  return false;
+  if (!GlobalsToImport->count(const_cast<GlobalValue *>(SGV)))
+    return false;
+
+  assert(!isa<GlobalAlias>(SGV) &&
+         "Unexpected global alias in the import list.");
+
+  // Otherwise yes.
+  return true;
 }
 
 bool FunctionImportGlobalProcessing::doImportAsDefinition(
@@ -132,8 +124,10 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV,
     return SGV->getLinkage();
 
   switch (SGV->getLinkage()) {
+  case GlobalValue::LinkOnceAnyLinkage:
+  case GlobalValue::LinkOnceODRLinkage:
   case GlobalValue::ExternalLinkage:
-    // External defnitions are converted to available_externally
+    // External and linkonce definitions are converted to available_externally
     // definitions upon import, so that they are available for inlining
     // and/or optimization, but are turned into declarations later
     // during the EliminateAvailableExternally pass.
@@ -150,12 +144,6 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV,
     // An imported available_externally declaration stays that way.
     return SGV->getLinkage();
 
-  case GlobalValue::LinkOnceAnyLinkage:
-  case GlobalValue::LinkOnceODRLinkage:
-    // These both stay the same when importing the definition.
-    // The ThinLTO pass will eventually force-import their definitions.
-    return SGV->getLinkage();
-
   case GlobalValue::WeakAnyLinkage:
     // Can't import weak_any definitions correctly, or we might change the
     // program semantics, since the linker will pick the first weak_any
@@ -213,6 +201,23 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV,
 }
 
 void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
+
+  // Check the summaries to see if the symbol gets resolved to a known local
+  // definition.
+  if (GV.hasName()) {
+    ValueInfo VI = ImportIndex.getValueInfo(GV.getGUID());
+    if (VI) {
+      // Need to check all summaries are local in case of hash collisions.
+      bool IsLocal = VI.getSummaryList().size() &&
+          llvm::all_of(VI.getSummaryList(),
+                       [](const std::unique_ptr<GlobalValueSummary> &Summary) {
+                         return Summary->isDSOLocal();
+                       });
+      if (IsLocal)
+        GV.setDSOLocal(true);
+    }
+  }
+
   bool DoPromote = false;
   if (GV.hasLocalLinkage() &&
       ((DoPromote = shouldPromoteLocalToGlobal(&GV)) || isPerformingImport())) {
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 2a18c140c788..fedf6e100d6c 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -12,11 +12,15 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
@@ -26,25 +30,46 @@
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
 #include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+#include <string>
+#include <utility>
+#include <vector>
 
 using namespace llvm;
 
@@ -62,28 +87,37 @@ bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI,
                           AAResults *CalleeAAR, bool InsertLifetime) {
   return InlineFunction(CallSite(CI), IFI, CalleeAAR, InsertLifetime);
 }
+
 bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
                           AAResults *CalleeAAR, bool InsertLifetime) {
   return InlineFunction(CallSite(II), IFI, CalleeAAR, InsertLifetime);
 }
 
 namespace {
+
   /// A class for recording information about inlining a landing pad.
   class LandingPadInliningInfo {
-    BasicBlock *OuterResumeDest; ///< Destination of the invoke's unwind.
-    BasicBlock *InnerResumeDest; ///< Destination for the callee's resume.
-    LandingPadInst *CallerLPad;  ///< LandingPadInst associated with the invoke.
-    PHINode *InnerEHValuesPHI;   ///< PHI for EH values from landingpad insts.
+    /// Destination of the invoke's unwind.
+    BasicBlock *OuterResumeDest;
+
+    /// Destination for the callee's resume.
+    BasicBlock *InnerResumeDest = nullptr;
+
+    /// LandingPadInst associated with the invoke.
+    LandingPadInst *CallerLPad = nullptr;
+
+    /// PHI for EH values from landingpad insts.
+    PHINode *InnerEHValuesPHI = nullptr;
+
     SmallVector<Value*, 8> UnwindDestPHIValues;
 
   public:
     LandingPadInliningInfo(InvokeInst *II)
-      : OuterResumeDest(II->getUnwindDest()), InnerResumeDest(nullptr),
-        CallerLPad(nullptr), InnerEHValuesPHI(nullptr) {
+        : OuterResumeDest(II->getUnwindDest()) {
       // If there are PHI nodes in the unwind destination block, we need to keep
       // track of which values came into them from the invoke before removing
       // the edge from this block.
-      llvm::BasicBlock *InvokeBB = II->getParent();
+      BasicBlock *InvokeBB = II->getParent();
       BasicBlock::iterator I = OuterResumeDest->begin();
       for (; isa<PHINode>(I); ++I) {
         // Save the value to use for this edge.
@@ -126,7 +160,8 @@ namespace {
       }
     }
   };
-} // anonymous namespace
+
+} // end anonymous namespace
 
 /// Get or create a target for the branch from ResumeInsts.
 BasicBlock *LandingPadInliningInfo::getInnerResumeDest() {
@@ -189,7 +224,7 @@ static Value *getParentPad(Value *EHPad) {
   return cast<CatchSwitchInst>(EHPad)->getParentPad();
 }
 
-typedef DenseMap<Instruction *, Value *> UnwindDestMemoTy;
+using UnwindDestMemoTy = DenseMap<Instruction *, Value *>;
 
 /// Helper for getUnwindDestToken that does the descendant-ward part of
 /// the search.
@@ -617,7 +652,7 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
   // track of which values came into them from the invoke before removing the
   // edge from this block.
   SmallVector<Value *, 8> UnwindDestPHIValues;
-  llvm::BasicBlock *InvokeBB = II->getParent();
+  BasicBlock *InvokeBB = II->getParent();
   for (Instruction &I : *UnwindDest) {
     // Save the value to use for this edge.
     PHINode *PHI = dyn_cast<PHINode>(&I);
@@ -1359,6 +1394,7 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
     }
   }
 }
+
 /// Update the block frequencies of the caller after a callee has been inlined.
 ///
 /// Each block cloned into the caller has its block frequency scaled by the
@@ -1454,7 +1490,8 @@ static void updateCalleeCount(BlockFrequencyInfo *CallerBFI, BasicBlock *CallBB,
 /// exists in the instruction stream.  Similarly this will inline a recursive
 /// function by one level.
 bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
-                          AAResults *CalleeAAR, bool InsertLifetime) {
+                          AAResults *CalleeAAR, bool InsertLifetime,
+                          Function *ForwardVarArgsTo) {
   Instruction *TheCall = CS.getInstruction();
   assert(TheCall->getParent() && TheCall->getFunction()
          && "Instruction not in function!");
@@ -1464,8 +1501,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
 
   Function *CalledFunc = CS.getCalledFunction();
   if (!CalledFunc ||              // Can't inline external function or indirect
-      CalledFunc->isDeclaration() || // call, or call to a vararg function!
-      CalledFunc->getFunctionType()->isVarArg()) return false;
+      CalledFunc->isDeclaration() ||
+      (!ForwardVarArgsTo && CalledFunc->isVarArg())) // call, or call to a vararg function!
+      return false;
 
   // The inliner does not know how to inline through calls with operand bundles
   // in general ...
@@ -1592,8 +1630,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
 
     auto &DL = Caller->getParent()->getDataLayout();
 
-    assert(CalledFunc->arg_size() == CS.arg_size() &&
-           "No varargs calls can be inlined!");
+    assert((CalledFunc->arg_size() == CS.arg_size() || ForwardVarArgsTo) &&
+           "Varargs calls can only be inlined if the Varargs are forwarded!");
 
     // Calculate the vector of arguments to pass into the function cloner, which
     // matches up the formal to the actual argument values.
@@ -1772,9 +1810,15 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
     // Move any dbg.declares describing the allocas into the entry basic block.
     DIBuilder DIB(*Caller->getParent());
     for (auto &AI : IFI.StaticAllocas)
-      replaceDbgDeclareForAlloca(AI, AI, DIB, /*Deref=*/false);
+      replaceDbgDeclareForAlloca(AI, AI, DIB, DIExpression::NoDeref, 0,
+                                 DIExpression::NoDeref);
   }
 
+  SmallVector<Value*,4> VarArgsToForward;
+  for (unsigned i = CalledFunc->getFunctionType()->getNumParams();
+       i < CS.getNumArgOperands(); i++)
+    VarArgsToForward.push_back(CS.getArgOperand(i));
+
   bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false;
   if (InlinedFunctionInfo.ContainsCalls) {
     CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None;
@@ -1783,7 +1827,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
 
     for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E;
          ++BB) {
-      for (Instruction &I : *BB) {
+      for (auto II = BB->begin(); II != BB->end();) {
+        Instruction &I = *II++;
         CallInst *CI = dyn_cast<CallInst>(&I);
         if (!CI)
           continue;
@@ -1806,7 +1851,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
         //    f ->          g -> musttail f  ==>  f ->          f
         //    f ->          g ->     tail f  ==>  f ->          f
         CallInst::TailCallKind ChildTCK = CI->getTailCallKind();
-        ChildTCK = std::min(CallSiteTailKind, ChildTCK);
+        if (ChildTCK != CallInst::TCK_NoTail)
+          ChildTCK = std::min(CallSiteTailKind, ChildTCK);
         CI->setTailCallKind(ChildTCK);
         InlinedMustTailCalls |= CI->isMustTailCall();
 
@@ -1814,6 +1860,16 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
         // 'nounwind'.
         if (MarkNoUnwind)
           CI->setDoesNotThrow();
+
+        if (ForwardVarArgsTo && !VarArgsToForward.empty() &&
+            CI->getCalledFunction() == ForwardVarArgsTo) {
+          SmallVector<Value*, 6> Params(CI->arg_operands());
+          Params.append(VarArgsToForward.begin(), VarArgsToForward.end());
+          CallInst *Call = CallInst::Create(CI->getCalledFunction(), Params, "", CI);
+          Call->setDebugLoc(CI->getDebugLoc());
+          CI->replaceAllUsesWith(Call);
+          CI->eraseFromParent();
+        }
       }
     }
   }
@@ -1848,8 +1904,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
 
         // Check that array size doesn't saturate uint64_t and doesn't
         // overflow when it's multiplied by type size.
-        if (AllocaArraySize != ~0ULL &&
-            UINT64_MAX / AllocaArraySize >= AllocaTypeSize) {
+        if (AllocaArraySize != std::numeric_limits<uint64_t>::max() &&
+            std::numeric_limits<uint64_t>::max() / AllocaArraySize >=
+                AllocaTypeSize) {
           AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()),
                                         AllocaArraySize * AllocaTypeSize);
         }
@@ -1980,7 +2037,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
     // match the callee's return type, we also need to change the return type of
     // the intrinsic.
     if (Caller->getReturnType() == TheCall->getType()) {
-      auto NewEnd = remove_if(Returns, [](ReturnInst *RI) {
+      auto NewEnd = llvm::remove_if(Returns, [](ReturnInst *RI) {
         return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr;
       });
       Returns.erase(NewEnd, Returns.end());
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index 089f2b5f3b18..ae0e2bb6c280 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -56,9 +56,10 @@ static bool VerifyLoopLCSSA = true;
 #else
 static bool VerifyLoopLCSSA = false;
 #endif
-static cl::opt<bool,true>
-VerifyLoopLCSSAFlag("verify-loop-lcssa", cl::location(VerifyLoopLCSSA),
-                    cl::desc("Verify loop lcssa form (time consuming)"));
+static cl::opt<bool, true>
+    VerifyLoopLCSSAFlag("verify-loop-lcssa", cl::location(VerifyLoopLCSSA),
+                        cl::Hidden,
+                        cl::desc("Verify loop lcssa form (time consuming)"));
 
 /// Return true if the specified block is in the list.
 static bool isExitBlock(BasicBlock *BB,
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 74610613001c..a1961eecb391 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -1,4 +1,4 @@
-//===-- Local.cpp - Functions to perform local transformations ------------===//
+//===- Local.cpp - Functions to perform local transformations -------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,42 +13,74 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LazyValueInfo.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constant.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
-#include "llvm/IR/GlobalAlias.h"
-#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/GlobalObject.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
 #include "llvm/IR/ValueHandle.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/KnownBits.h"
-#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <climits>
+#include <cstdint>
+#include <iterator>
+#include <map>
+#include <utility>
+
 using namespace llvm;
 using namespace llvm::PatternMatch;
 
@@ -282,7 +314,6 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
   return false;
 }
 
-
 //===----------------------------------------------------------------------===//
 //  Local dead code elimination.
 //
@@ -541,7 +572,6 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB,
 //  Control Flow Graph Restructuring.
 //
 
-
 /// RemovePredecessorAndSimplify - Like BasicBlock::removePredecessor, this
 /// method is called when we're about to delete Pred as a predecessor of BB.  If
 /// BB contains any PHI nodes, this drops the entries in the PHI nodes for Pred.
@@ -578,12 +608,10 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred) {
   }
 }
 
-
 /// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its
 /// predecessor is known to have one successor (DestBB!).  Eliminate the edge
 /// between them, moving the instructions in the predecessor into DestBB and
 /// deleting the predecessor block.
-///
 void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT) {
   // If BB has single-entry PHI nodes, fold them.
   while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
@@ -602,7 +630,7 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT) {
   if (DestBB->hasAddressTaken()) {
     BlockAddress *BA = BlockAddress::get(DestBB);
     Constant *Replacement =
-      ConstantInt::get(llvm::Type::getInt32Ty(BA->getContext()), 1);
+      ConstantInt::get(Type::getInt32Ty(BA->getContext()), 1);
     BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(Replacement,
                                                      BA->getType()));
     BA->destroyConstant();
@@ -621,9 +649,13 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT) {
     DestBB->moveAfter(PredBB);
 
   if (DT) {
-    BasicBlock *PredBBIDom = DT->getNode(PredBB)->getIDom()->getBlock();
-    DT->changeImmediateDominator(DestBB, PredBBIDom);
-    DT->eraseNode(PredBB);
+    // For some irreducible CFG we end up having forward-unreachable blocks
+    // so check if getNode returns a valid node before updating the domtree.
+    if (DomTreeNode *DTN = DT->getNode(PredBB)) {
+      BasicBlock *PredBBIDom = DTN->getIDom()->getBlock();
+      DT->changeImmediateDominator(DestBB, PredBBIDom);
+      DT->eraseNode(PredBB);
+    }
   }
   // Nuke BB.
   PredBB->eraseFromParent();
@@ -640,7 +672,6 @@ static bool CanMergeValues(Value *First, Value *Second) {
 /// almost-empty BB ending in an unconditional branch to Succ, into Succ.
 ///
 /// Assumption: Succ is the single successor for BB.
-///
 static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
   assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!");
 
@@ -696,8 +727,8 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
   return true;
 }
 
-typedef SmallVector<BasicBlock *, 16> PredBlockVector;
-typedef DenseMap<BasicBlock *, Value *> IncomingValueMap;
+using PredBlockVector = SmallVector<BasicBlock *, 16>;
+using IncomingValueMap = DenseMap<BasicBlock *, Value *>;
 
 /// \brief Determines the value to use as the phi node input for a block.
 ///
@@ -927,7 +958,6 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
 /// nodes in this block. This doesn't try to be clever about PHI nodes
 /// which differ only in the order of the incoming values, but instcombine
 /// orders them so it usually won't matter.
-///
 bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
   // This implementation doesn't currently consider undef operands
   // specially. Theoretically, two phis which are identical except for
@@ -937,9 +967,11 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
     static PHINode *getEmptyKey() {
       return DenseMapInfo<PHINode *>::getEmptyKey();
     }
+
     static PHINode *getTombstoneKey() {
       return DenseMapInfo<PHINode *>::getTombstoneKey();
     }
+
     static unsigned getHashValue(PHINode *PN) {
       // Compute a hash value on the operands. Instcombine will likely have
       // sorted them, which helps expose duplicates, but we have to check all
@@ -948,6 +980,7 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
           hash_combine_range(PN->value_op_begin(), PN->value_op_end()),
           hash_combine_range(PN->block_begin(), PN->block_end())));
     }
+
     static bool isEqual(PHINode *LHS, PHINode *RHS) {
       if (LHS == getEmptyKey() || LHS == getTombstoneKey() ||
           RHS == getEmptyKey() || RHS == getTombstoneKey())
@@ -984,7 +1017,6 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
 /// often possible though. If alignment is important, a more reliable approach
 /// is to simply align all global variables and allocation instructions to
 /// their preferred alignment from the beginning.
-///
 static unsigned enforceKnownAlignment(Value *V, unsigned Align,
                                       unsigned PrefAlign,
                                       const DataLayout &DL) {
@@ -1068,12 +1100,11 @@ static bool LdStHasDebugValue(DILocalVariable *DIVar, DIExpression *DIExpr,
   // Since we can't guarantee that the original dbg.declare instrinsic
   // is removed by LowerDbgDeclare(), we need to make sure that we are
   // not inserting the same dbg.value intrinsic over and over.
-  llvm::BasicBlock::InstListType::iterator PrevI(I);
+  BasicBlock::InstListType::iterator PrevI(I);
   if (PrevI != I->getParent()->getInstList().begin()) {
     --PrevI;
     if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(PrevI))
       if (DVI->getValue() == I->getOperand(0) &&
-          DVI->getOffset() == 0 &&
           DVI->getVariable() == DIVar &&
           DVI->getExpression() == DIExpr)
         return true;
@@ -1092,7 +1123,6 @@ static bool PhiHasDebugValue(DILocalVariable *DIVar,
   findDbgValues(DbgValues, APN);
   for (auto *DVI : DbgValues) {
     assert(DVI->getValue() == APN);
-    assert(DVI->getOffset() == 0);
     if ((DVI->getVariable() == DIVar) && (DVI->getExpression() == DIExpr))
       return true;
   }
@@ -1100,12 +1130,13 @@ static bool PhiHasDebugValue(DILocalVariable *DIVar,
 }
 
 /// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value
-/// that has an associated llvm.dbg.decl intrinsic.
-void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+/// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic.
+void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,
                                            StoreInst *SI, DIBuilder &Builder) {
-  auto *DIVar = DDI->getVariable();
+  assert(DII->isAddressOfVariable());
+  auto *DIVar = DII->getVariable();
   assert(DIVar && "Missing variable");
-  auto *DIExpr = DDI->getExpression();
+  auto *DIExpr = DII->getExpression();
   Value *DV = SI->getOperand(0);
 
   // If an argument is zero extended then use argument directly. The ZExt
@@ -1116,7 +1147,7 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
   if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
     ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0));
   if (ExtendedArg) {
-    // If this DDI was already describing only a fragment of a variable, ensure
+    // If this DII was already describing only a fragment of a variable, ensure
     // that fragment is appropriately narrowed here.
     // But if a fragment wasn't used, describe the value as the original
     // argument (rather than the zext or sext) so that it remains described even
@@ -1129,23 +1160,23 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
                                    DIExpr->elements_end() - 3);
       Ops.push_back(dwarf::DW_OP_LLVM_fragment);
       Ops.push_back(FragmentOffset);
-      const DataLayout &DL = DDI->getModule()->getDataLayout();
+      const DataLayout &DL = DII->getModule()->getDataLayout();
       Ops.push_back(DL.getTypeSizeInBits(ExtendedArg->getType()));
       DIExpr = Builder.createExpression(Ops);
     }
     DV = ExtendedArg;
   }
   if (!LdStHasDebugValue(DIVar, DIExpr, SI))
-    Builder.insertDbgValueIntrinsic(DV, 0, DIVar, DIExpr, DDI->getDebugLoc(),
+    Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, DII->getDebugLoc(),
                                     SI);
 }
 
 /// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value
-/// that has an associated llvm.dbg.decl intrinsic.
-void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+/// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic.
+void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,
                                            LoadInst *LI, DIBuilder &Builder) {
-  auto *DIVar = DDI->getVariable();
-  auto *DIExpr = DDI->getExpression();
+  auto *DIVar = DII->getVariable();
+  auto *DIExpr = DII->getExpression();
   assert(DIVar && "Missing variable");
 
   if (LdStHasDebugValue(DIVar, DIExpr, LI))
@@ -1156,16 +1187,16 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
   // preferable to keep tracking both the loaded value and the original
   // address in case the alloca can not be elided.
   Instruction *DbgValue = Builder.insertDbgValueIntrinsic(
-      LI, 0, DIVar, DIExpr, DDI->getDebugLoc(), (Instruction *)nullptr);
+      LI, DIVar, DIExpr, DII->getDebugLoc(), (Instruction *)nullptr);
   DbgValue->insertAfter(LI);
 }
 
-/// Inserts a llvm.dbg.value intrinsic after a phi
-/// that has an associated llvm.dbg.decl intrinsic.
-void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+/// Inserts a llvm.dbg.value intrinsic after a phi that has an associated
+/// llvm.dbg.declare or llvm.dbg.addr intrinsic.
+void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,
                                            PHINode *APN, DIBuilder &Builder) {
-  auto *DIVar = DDI->getVariable();
-  auto *DIExpr = DDI->getExpression();
+  auto *DIVar = DII->getVariable();
+  auto *DIExpr = DII->getExpression();
   assert(DIVar && "Missing variable");
 
   if (PhiHasDebugValue(DIVar, DIExpr, APN))
@@ -1178,7 +1209,7 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
   // insertion point.
   // FIXME: Insert dbg.value markers in the successors when appropriate.
   if (InsertionPt != BB->end())
-    Builder.insertDbgValueIntrinsic(APN, 0, DIVar, DIExpr, DDI->getDebugLoc(),
+    Builder.insertDbgValueIntrinsic(APN, DIVar, DIExpr, DII->getDebugLoc(),
                                     &*InsertionPt);
 }
 
@@ -1222,7 +1253,7 @@ bool llvm::LowerDbgDeclare(Function &F) {
           // This is a call by-value or some other instruction that
           // takes a pointer to the variable. Insert a *value*
           // intrinsic that describes the alloca.
-          DIB.insertDbgValueIntrinsic(AI, 0, DDI->getVariable(),
+          DIB.insertDbgValueIntrinsic(AI, DDI->getVariable(),
                                       DDI->getExpression(), DDI->getDebugLoc(),
                                       CI);
         }
@@ -1233,16 +1264,25 @@ bool llvm::LowerDbgDeclare(Function &F) {
   return true;
 }
 
-/// FindAllocaDbgDeclare - Finds the llvm.dbg.declare intrinsic describing the
-/// alloca 'V', if any.
-DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) {
-  if (auto *L = LocalAsMetadata::getIfExists(V))
-    if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L))
-      for (User *U : MDV->users())
-        if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
-          return DDI;
+/// Finds all intrinsics declaring local variables as living in the memory that
+/// 'V' points to. This may include a mix of dbg.declare and
+/// dbg.addr intrinsics.
+TinyPtrVector<DbgInfoIntrinsic *> llvm::FindDbgAddrUses(Value *V) {
+  auto *L = LocalAsMetadata::getIfExists(V);
+  if (!L)
+    return {};
+  auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L);
+  if (!MDV)
+    return {};
+
+  TinyPtrVector<DbgInfoIntrinsic *> Declares;
+  for (User *U : MDV->users()) {
+    if (auto *DII = dyn_cast<DbgInfoIntrinsic>(U))
+      if (DII->isAddressOfVariable())
+        Declares.push_back(DII);
+  }
 
-  return nullptr;
+  return Declares;
 }
 
 void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V) {
@@ -1253,29 +1293,40 @@ void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V) {
           DbgValues.push_back(DVI);
 }
 
+static void findDbgUsers(SmallVectorImpl<DbgInfoIntrinsic *> &DbgUsers,
+                         Value *V) {
+  if (auto *L = LocalAsMetadata::getIfExists(V))
+    if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L))
+      for (User *U : MDV->users())
+        if (DbgInfoIntrinsic *DII = dyn_cast<DbgInfoIntrinsic>(U))
+          DbgUsers.push_back(DII);
+}
 
 bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
                              Instruction *InsertBefore, DIBuilder &Builder,
-                             bool Deref, int Offset) {
-  DbgDeclareInst *DDI = FindAllocaDbgDeclare(Address);
-  if (!DDI)
-    return false;
-  DebugLoc Loc = DDI->getDebugLoc();
-  auto *DIVar = DDI->getVariable();
-  auto *DIExpr = DDI->getExpression();
-  assert(DIVar && "Missing variable");
-  DIExpr = DIExpression::prepend(DIExpr, Deref, Offset);
-  // Insert llvm.dbg.declare immediately after the original alloca, and remove
-  // old llvm.dbg.declare.
-  Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, InsertBefore);
-  DDI->eraseFromParent();
-  return true;
+                             bool DerefBefore, int Offset, bool DerefAfter) {
+  auto DbgAddrs = FindDbgAddrUses(Address);
+  for (DbgInfoIntrinsic *DII : DbgAddrs) {
+    DebugLoc Loc = DII->getDebugLoc();
+    auto *DIVar = DII->getVariable();
+    auto *DIExpr = DII->getExpression();
+    assert(DIVar && "Missing variable");
+    DIExpr = DIExpression::prepend(DIExpr, DerefBefore, Offset, DerefAfter);
+    // Insert llvm.dbg.declare immediately after InsertBefore, and remove old
+    // llvm.dbg.declare.
+    Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, InsertBefore);
+    if (DII == InsertBefore)
+      InsertBefore = &*std::next(InsertBefore->getIterator());
+    DII->eraseFromParent();
+  }
+  return !DbgAddrs.empty();
 }
 
 bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
-                                      DIBuilder &Builder, bool Deref, int Offset) {
+                                      DIBuilder &Builder, bool DerefBefore,
+                                      int Offset, bool DerefAfter) {
   return replaceDbgDeclare(AI, NewAllocaAddress, AI->getNextNode(), Builder,
-                           Deref, Offset);
+                           DerefBefore, Offset, DerefAfter);
 }
 
 static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress,
@@ -1302,8 +1353,7 @@ static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress,
     DIExpr = Builder.createExpression(Ops);
   }
 
-  Builder.insertDbgValueIntrinsic(NewAddress, DVI->getOffset(), DIVar, DIExpr,
-                                  Loc, DVI);
+  Builder.insertDbgValueIntrinsic(NewAddress, DIVar, DIExpr, Loc, DVI);
   DVI->eraseFromParent();
 }
 
@@ -1322,17 +1372,28 @@ void llvm::salvageDebugInfo(Instruction &I) {
   SmallVector<DbgValueInst *, 1> DbgValues;
   auto &M = *I.getModule();
 
-  auto MDWrap = [&](Value *V) {
+  auto wrapMD = [&](Value *V) {
     return MetadataAsValue::get(I.getContext(), ValueAsMetadata::get(V));
   };
 
-  if (isa<BitCastInst>(&I)) {
-    findDbgValues(DbgValues, &I);
-    for (auto *DVI : DbgValues) {
-      // Bitcasts are entirely irrelevant for debug info. Rewrite the dbg.value
-      // to use the cast's source.
-      DVI->setOperand(0, MDWrap(I.getOperand(0)));
-      DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n');
+  auto applyOffset = [&](DbgValueInst *DVI, uint64_t Offset) {
+    auto *DIExpr = DVI->getExpression();
+    DIExpr = DIExpression::prepend(DIExpr, DIExpression::NoDeref, Offset,
+                                   DIExpression::NoDeref,
+                                   DIExpression::WithStackValue);
+    DVI->setOperand(0, wrapMD(I.getOperand(0)));
+    DVI->setOperand(2, MetadataAsValue::get(I.getContext(), DIExpr));
+    DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n');
+  };
+
+  if (isa<BitCastInst>(&I) || isa<IntToPtrInst>(&I)) {
+    // Bitcasts are entirely irrelevant for debug info. Rewrite dbg.value,
+    // dbg.addr, and dbg.declare to use the cast's source.
+    SmallVector<DbgInfoIntrinsic *, 1> DbgUsers;
+    findDbgUsers(DbgUsers, &I);
+    for (auto *DII : DbgUsers) {
+      DII->setOperand(0, wrapMD(I.getOperand(0)));
+      DEBUG(dbgs() << "SALVAGE: " << *DII << '\n');
     }
   } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
     findDbgValues(DbgValues, &I);
@@ -1343,27 +1404,27 @@ void llvm::salvageDebugInfo(Instruction &I) {
       // Rewrite a constant GEP into a DIExpression.  Since we are performing
       // arithmetic to compute the variable's *value* in the DIExpression, we
       // need to mark the expression with a DW_OP_stack_value.
-      if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) {
-        auto *DIExpr = DVI->getExpression();
-        DIBuilder DIB(M, /*AllowUnresolved*/ false);
+      if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset))
         // GEP offsets are i32 and thus always fit into an int64_t.
-        DIExpr = DIExpression::prepend(DIExpr, DIExpression::NoDeref,
-                                       Offset.getSExtValue(),
-                                       DIExpression::WithStackValue);
-        DVI->setOperand(0, MDWrap(I.getOperand(0)));
-        DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr));
-        DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n');
-      }
+        applyOffset(DVI, Offset.getSExtValue());
     }
+  } else if (auto *BI = dyn_cast<BinaryOperator>(&I)) {
+    if (BI->getOpcode() == Instruction::Add)
+      if (auto *ConstInt = dyn_cast<ConstantInt>(I.getOperand(1)))
+        if (ConstInt->getBitWidth() <= 64) {
+          APInt Offset = ConstInt->getValue();
+          findDbgValues(DbgValues, &I);
+          for (auto *DVI : DbgValues)
+            applyOffset(DVI, Offset.getSExtValue());
+        }
   } else if (isa<LoadInst>(&I)) {
     findDbgValues(DbgValues, &I);
     for (auto *DVI : DbgValues) {
       // Rewrite the load into DW_OP_deref.
       auto *DIExpr = DVI->getExpression();
-      DIBuilder DIB(M, /*AllowUnresolved*/ false);
       DIExpr = DIExpression::prepend(DIExpr, DIExpression::WithDeref);
-      DVI->setOperand(0, MDWrap(I.getOperand(0)));
-      DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr));
+      DVI->setOperand(0, wrapMD(I.getOperand(0)));
+      DVI->setOperand(2, MetadataAsValue::get(I.getContext(), DIExpr));
       DEBUG(dbgs() << "SALVAGE:  " << *DVI << '\n');
     }
   }
@@ -1480,7 +1541,6 @@ BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
 
 static bool markAliveBlocks(Function &F,
                             SmallPtrSetImpl<BasicBlock*> &Reachable) {
-
   SmallVector<BasicBlock*, 128> Worklist;
   BasicBlock *BB = &F.front();
   Worklist.push_back(BB);
@@ -1586,13 +1646,16 @@ static bool markAliveBlocks(Function &F,
         static CatchPadInst *getEmptyKey() {
           return DenseMapInfo<CatchPadInst *>::getEmptyKey();
         }
+
         static CatchPadInst *getTombstoneKey() {
           return DenseMapInfo<CatchPadInst *>::getTombstoneKey();
         }
+
         static unsigned getHashValue(CatchPadInst *CatchPad) {
           return static_cast<unsigned>(hash_combine_range(
               CatchPad->value_op_begin(), CatchPad->value_op_end()));
         }
+
         static bool isEqual(CatchPadInst *LHS, CatchPadInst *RHS) {
           if (LHS == getEmptyKey() || LHS == getTombstoneKey() ||
               RHS == getEmptyKey() || RHS == getTombstoneKey())
@@ -1832,7 +1895,8 @@ unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
   return ::replaceDominatedUsesWith(From, To, BB, ProperlyDominates);
 }
 
-bool llvm::callsGCLeafFunction(ImmutableCallSite CS) {
+bool llvm::callsGCLeafFunction(ImmutableCallSite CS,
+                               const TargetLibraryInfo &TLI) {
   // Check if the function is specifically marked as a gc leaf function.
   if (CS.hasFnAttr("gc-leaf-function"))
     return true;
@@ -1846,6 +1910,14 @@ bool llvm::callsGCLeafFunction(ImmutableCallSite CS) {
              IID != Intrinsic::experimental_deoptimize;
   }
 
+  // Lib calls can be materialized by some passes, and won't be
+  // marked as 'gc-leaf-function.' All available Libcalls are
+  // GC-leaf.
+  LibFunc LF;
+  if (TLI.getLibFunc(CS, LF)) {
+    return TLI.has(LF);
+  }
+
   return false;
 }
 
@@ -1893,6 +1965,7 @@ void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI,
 }
 
 namespace {
+
 /// A potential constituent of a bitreverse or bswap expression. See
 /// collectBitParts for a fuller explanation.
 struct BitPart {
@@ -1902,12 +1975,14 @@ struct BitPart {
 
   /// The Value that this is a bitreverse/bswap of.
   Value *Provider;
+
   /// The "provenance" of each bit. Provenance[A] = B means that bit A
   /// in Provider becomes bit B in the result of this expression.
   SmallVector<int8_t, 32> Provenance; // int8_t means max size is i128.
 
   enum { Unset = -1 };
 };
+
 } // end anonymous namespace
 
 /// Analyze the specified subexpression and see if it is capable of providing
@@ -1933,7 +2008,6 @@ struct BitPart {
 ///
 /// Because we pass around references into \c BPS, we must use a container that
 /// does not invalidate internal references (std::map instead of DenseMap).
-///
 static const Optional<BitPart> &
 collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
                 std::map<Value *, Optional<BitPart>> &BPS) {
@@ -2069,8 +2143,6 @@ static bool bitTransformIsCorrectForBitReverse(unsigned From, unsigned To,
   return From == BitWidth - To - 1;
 }
 
-/// Given an OR instruction, check to see if this is a bitreverse
-/// idiom. If so, insert the new intrinsic and return true.
 bool llvm::recognizeBSwapOrBitReverseIdiom(
     Instruction *I, bool MatchBSwaps, bool MatchBitReversals,
     SmallVectorImpl<Instruction *> &InsertedInsts) {
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index e21e34df8ded..f43af9772771 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -258,7 +258,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
   placeSplitBlockCarefully(NewBB, OuterLoopPreds, L);
 
   // Create the new outer loop.
-  Loop *NewOuter = new Loop();
+  Loop *NewOuter = LI->AllocateLoop();
 
   // Change the parent loop to use the outer loop as its child now.
   if (Loop *Parent = L->getParentLoop())
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index f2527f89e83e..dc98a39adcc5 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -21,8 +21,7 @@
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopIterator.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/DataLayout.h"
@@ -68,9 +67,23 @@ static inline void remapInstruction(Instruction *I,
                                     ValueToValueMapTy &VMap) {
   for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
     Value *Op = I->getOperand(op);
+
+    // Unwrap arguments of dbg.value intrinsics.
+    bool Wrapped = false;
+    if (auto *V = dyn_cast<MetadataAsValue>(Op))
+      if (auto *Unwrapped = dyn_cast<ValueAsMetadata>(V->getMetadata())) {
+        Op = Unwrapped->getValue();
+        Wrapped = true;
+      }
+
+    auto wrap = [&](Value *V) {
+      auto &C = I->getContext();
+      return Wrapped ? MetadataAsValue::get(C, ValueAsMetadata::get(V)) : V;
+    };
+
     ValueToValueMapTy::iterator It = VMap.find(Op);
     if (It != VMap.end())
-      I->setOperand(op, It->second);
+      I->setOperand(op, wrap(It->second));
   }
 
   if (PHINode *PN = dyn_cast<PHINode>(I)) {
@@ -200,7 +213,7 @@ const Loop* llvm::addClonedBlockToLoopInfo(BasicBlock *OriginalBB,
     assert(OriginalBB == OldLoop->getHeader() &&
            "Header should be first in RPO");
 
-    NewLoop = new Loop();
+    NewLoop = LI->AllocateLoop();
     Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop());
 
     if (NewLoopParent)
@@ -255,8 +268,7 @@ static bool isEpilogProfitable(Loop *L) {
   return false;
 }
 
-/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true
-/// if unrolling was successful, or false if the loop was unmodified. Unrolling
+/// Unroll the given loop by Count. The loop must be in LCSSA form.  Unrolling
 /// can only fail when the loop's latch block is not terminated by a conditional
 /// branch instruction. However, if the trip count (and multiple) are not known,
 /// loop unrolling will mostly produce more code that is no faster.
@@ -285,37 +297,36 @@ static bool isEpilogProfitable(Loop *L) {
 /// runtime-unroll the loop if computing RuntimeTripCount will be expensive and
 /// AllowExpensiveTripCount is false.
 ///
-/// If we want to perform PGO-based loop peeling, PeelCount is set to the 
+/// If we want to perform PGO-based loop peeling, PeelCount is set to the
 /// number of iterations we want to peel off.
 ///
 /// The LoopInfo Analysis that is passed will be kept consistent.
 ///
 /// This utility preserves LoopInfo. It will also preserve ScalarEvolution and
 /// DominatorTree if they are non-null.
-bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
-                      bool AllowRuntime, bool AllowExpensiveTripCount,
-                      bool PreserveCondBr, bool PreserveOnlyFirst,
-                      unsigned TripMultiple, unsigned PeelCount, LoopInfo *LI,
-                      ScalarEvolution *SE, DominatorTree *DT,
-                      AssumptionCache *AC, OptimizationRemarkEmitter *ORE,
-                      bool PreserveLCSSA) {
+LoopUnrollResult llvm::UnrollLoop(
+    Loop *L, unsigned Count, unsigned TripCount, bool Force, bool AllowRuntime,
+    bool AllowExpensiveTripCount, bool PreserveCondBr, bool PreserveOnlyFirst,
+    unsigned TripMultiple, unsigned PeelCount, bool UnrollRemainder,
+    LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
+    OptimizationRemarkEmitter *ORE, bool PreserveLCSSA) {
 
   BasicBlock *Preheader = L->getLoopPreheader();
   if (!Preheader) {
     DEBUG(dbgs() << "  Can't unroll; loop preheader-insertion failed.\n");
-    return false;
+    return LoopUnrollResult::Unmodified;
   }
 
   BasicBlock *LatchBlock = L->getLoopLatch();
   if (!LatchBlock) {
     DEBUG(dbgs() << "  Can't unroll; loop exit-block-insertion failed.\n");
-    return false;
+    return LoopUnrollResult::Unmodified;
   }
 
   // Loops with indirectbr cannot be cloned.
   if (!L->isSafeToClone()) {
     DEBUG(dbgs() << "  Can't unroll; Loop body cannot be cloned.\n");
-    return false;
+    return LoopUnrollResult::Unmodified;
   }
 
   // The current loop unroll pass can only unroll loops with a single latch
@@ -329,7 +340,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
     // The loop-rotate pass can be helpful to avoid this in many cases.
     DEBUG(dbgs() <<
              "  Can't unroll; loop not terminated by a conditional branch.\n");
-    return false;
+    return LoopUnrollResult::Unmodified;
   }
 
   auto CheckSuccessors = [&](unsigned S1, unsigned S2) {
@@ -339,14 +350,14 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
   if (!CheckSuccessors(0, 1) && !CheckSuccessors(1, 0)) {
     DEBUG(dbgs() << "Can't unroll; only loops with one conditional latch"
                     " exiting the loop can be unrolled\n");
-    return false;
+    return LoopUnrollResult::Unmodified;
   }
 
   if (Header->hasAddressTaken()) {
     // The loop-rotate pass can be helpful to avoid this in many cases.
     DEBUG(dbgs() <<
           "  Won't unroll loop: address of header block is taken.\n");
-    return false;
+    return LoopUnrollResult::Unmodified;
   }
 
   if (TripCount != 0)
@@ -362,7 +373,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
   // Don't enter the unroll code if there is nothing to do.
   if (TripCount == 0 && Count < 2 && PeelCount == 0) {
     DEBUG(dbgs() << "Won't unroll; almost nothing to do\n");
-    return false;
+    return LoopUnrollResult::Unmodified;
   }
 
   assert(Count > 0);
@@ -395,8 +406,19 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
          "Did not expect runtime trip-count unrolling "
          "and peeling for the same loop");
 
-  if (PeelCount)
-    peelLoop(L, PeelCount, LI, SE, DT, AC, PreserveLCSSA);
+  if (PeelCount) {
+    bool Peeled = peelLoop(L, PeelCount, LI, SE, DT, AC, PreserveLCSSA);
+
+    // Successful peeling may result in a change in the loop preheader/trip
+    // counts. If we later unroll the loop, we want these to be updated.
+    if (Peeled) {
+      BasicBlock *ExitingBlock = L->getExitingBlock();
+      assert(ExitingBlock && "Loop without exiting block?");
+      Preheader = L->getLoopPreheader();
+      TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
+      TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
+    }
+  }
 
   // Loops containing convergent instructions must have a count that divides
   // their TripMultiple.
@@ -418,15 +440,15 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
 
   if (RuntimeTripCount && TripMultiple % Count != 0 &&
       !UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount,
-                                  EpilogProfitability, LI, SE, DT,
-                                  PreserveLCSSA)) {
+                                  EpilogProfitability, UnrollRemainder, LI, SE,
+                                  DT, AC, PreserveLCSSA)) {
     if (Force)
       RuntimeTripCount = false;
     else {
       DEBUG(
           dbgs() << "Wont unroll; remainder loop could not be generated"
                     "when assuming runtime trip count\n");
-      return false;
+      return LoopUnrollResult::Unmodified;
     }
   }
 
@@ -450,36 +472,53 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
   // Report the unrolling decision.
   if (CompletelyUnroll) {
     DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
-          << " with trip count " << TripCount << "!\n");
-    ORE->emit(OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(),
-                                 L->getHeader())
-              << "completely unrolled loop with "
-              << NV("UnrollCount", TripCount) << " iterations");
+                 << " with trip count " << TripCount << "!\n");
+    if (ORE)
+      ORE->emit([&]() {
+        return OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(),
+                                  L->getHeader())
+               << "completely unrolled loop with "
+               << NV("UnrollCount", TripCount) << " iterations";
+      });
   } else if (PeelCount) {
     DEBUG(dbgs() << "PEELING loop %" << Header->getName()
                  << " with iteration count " << PeelCount << "!\n");
-    ORE->emit(OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(),
-                                 L->getHeader())
-              << " peeled loop by " << NV("PeelCount", PeelCount)
-              << " iterations");
+    if (ORE)
+      ORE->emit([&]() {
+        return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(),
+                                  L->getHeader())
+               << " peeled loop by " << NV("PeelCount", PeelCount)
+               << " iterations";
+      });
   } else {
-    OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
-                            L->getHeader());
-    Diag << "unrolled loop by a factor of " << NV("UnrollCount", Count);
+    auto DiagBuilder = [&]() {
+      OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
+                              L->getHeader());
+      return Diag << "unrolled loop by a factor of "
+                  << NV("UnrollCount", Count);
+    };
 
     DEBUG(dbgs() << "UNROLLING loop %" << Header->getName()
           << " by " << Count);
     if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
       DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
-      ORE->emit(Diag << " with a breakout at trip "
-                     << NV("BreakoutTrip", BreakoutTrip));
+      if (ORE)
+        ORE->emit([&]() {
+          return DiagBuilder() << " with a breakout at trip "
+                               << NV("BreakoutTrip", BreakoutTrip);
+        });
     } else if (TripMultiple != 1) {
       DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
-      ORE->emit(Diag << " with " << NV("TripMultiple", TripMultiple)
-                     << " trips per branch");
+      if (ORE)
+        ORE->emit([&]() {
+          return DiagBuilder() << " with " << NV("TripMultiple", TripMultiple)
+                               << " trips per branch";
+        });
     } else if (RuntimeTripCount) {
       DEBUG(dbgs() << " with run-time trip count");
-      ORE->emit(Diag << " with run-time trip count");
+      if (ORE)
+        ORE->emit(
+            [&]() { return DiagBuilder() << " with run-time trip count"; });
     }
     DEBUG(dbgs() << "!\n");
   }
@@ -523,8 +562,9 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
   if (Header->getParent()->isDebugInfoForProfiling())
     for (BasicBlock *BB : L->getBlocks())
       for (Instruction &I : *BB)
-        if (const DILocation *DIL = I.getDebugLoc())
-          I.setDebugLoc(DIL->cloneWithDuplicationFactor(Count));
+        if (!isa<DbgInfoIntrinsic>(&I))
+          if (const DILocation *DIL = I.getDebugLoc())
+            I.setDebugLoc(DIL->cloneWithDuplicationFactor(Count));
 
   for (unsigned It = 1; It != Count; ++It) {
     std::vector<BasicBlock*> NewBlocks;
@@ -796,7 +836,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
   Loop *OuterL = L->getParentLoop();
   // Update LoopInfo if the loop is completely removed.
   if (CompletelyUnroll)
-    LI->markAsRemoved(L);
+    LI->erase(L);
 
   // After complete unrolling most of the blocks should be contained in OuterL.
   // However, some of them might happen to be out of OuterL (e.g. if they
@@ -821,7 +861,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
       if (NeedToFixLCSSA) {
         // LCSSA must be performed on the outermost affected loop. The unrolled
         // loop's last loop latch is guaranteed to be in the outermost loop
-        // after LoopInfo's been updated by markAsRemoved.
+        // after LoopInfo's been updated by LoopInfo::erase.
         Loop *LatchLoop = LI->getLoopFor(Latches.back());
         Loop *FixLCSSALoop = OuterL;
         if (!FixLCSSALoop->contains(LatchLoop))
@@ -844,7 +884,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
     }
   }
 
-  return true;
+  return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled
+                          : LoopUnrollResult::PartiallyUnrolled;
 }
 
 /// Given an llvm.loop loop id metadata node, returns the loop hint metadata
diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp
index 5c21490793e7..4273ce0b6200 100644
--- a/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -1,4 +1,4 @@
-//===-- UnrollLoopPeel.cpp - Loop peeling utilities -----------------------===//
+//===- UnrollLoopPeel.cpp - Loop peeling utilities ------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,29 +13,42 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopIterator.h"
-#include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Metadata.h"
-#include "llvm/IR/Module.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/LoopSimplify.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Transforms/Utils/UnrollLoop.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
 #include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <limits>
 
 using namespace llvm;
 
 #define DEBUG_TYPE "loop-unroll"
+
 STATISTIC(NumPeeled, "Number of loops peeled");
 
 static cl::opt<unsigned> UnrollPeelMaxCount(
@@ -49,7 +62,8 @@ static cl::opt<unsigned> UnrollForcePeelCount(
 // Designates that a Phi is estimated to become invariant after an "infinite"
 // number of loop iterations (i.e. only may become an invariant if the loop is
 // fully unrolled).
-static const unsigned InfiniteIterationsToInvariance = UINT_MAX;
+static const unsigned InfiniteIterationsToInvariance =
+    std::numeric_limits<unsigned>::max();
 
 // Check whether we are capable of peeling this loop.
 static bool canPeel(Loop *L) {
@@ -210,8 +224,6 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
       DEBUG(dbgs() << "Max peel cost: " << UP.Threshold << "\n");
     }
   }
-
-  return;
 }
 
 /// \brief Update the branch weights of the latch of a peeled-off loop
@@ -236,7 +248,6 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
 static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
                                 unsigned IterNumber, unsigned AvgIters,
                                 uint64_t &PeeledHeaderWeight) {
-
   // FIXME: Pick a more realistic distribution.
   // Currently the proportion of weight we assign to the fall-through
   // side of the branch drops linearly with the iteration number, and we use
@@ -272,7 +283,6 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop,
                             LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
                             ValueToValueMapTy &LVMap, DominatorTree *DT,
                             LoopInfo *LI) {
-
   BasicBlock *Header = L->getHeader();
   BasicBlock *Latch = L->getLoopLatch();
   BasicBlock *PreHeader = L->getLoopPreheader();
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index d43ce7abb7cd..efff06f79cb7 100644
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -25,7 +25,6 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/LoopIterator.h"
-#include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/IR/BasicBlock.h"
@@ -294,7 +293,8 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
 /// Return the new cloned loop that is created when CreateRemainderLoop is true.
 static Loop *
 CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
-                const bool UseEpilogRemainder, BasicBlock *InsertTop,
+                const bool UseEpilogRemainder, const bool UnrollRemainder,
+                BasicBlock *InsertTop,
                 BasicBlock *InsertBot, BasicBlock *Preheader,
                 std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
                 ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) {
@@ -393,35 +393,14 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
   if (CreateRemainderLoop) {
     Loop *NewLoop = NewLoops[L];
     assert(NewLoop && "L should have been cloned");
-    // Add unroll disable metadata to disable future unrolling for this loop.
-    SmallVector<Metadata *, 4> MDs;
-    // Reserve first location for self reference to the LoopID metadata node.
-    MDs.push_back(nullptr);
-    MDNode *LoopID = NewLoop->getLoopID();
-    if (LoopID) {
-      // First remove any existing loop unrolling metadata.
-      for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
-        bool IsUnrollMetadata = false;
-        MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
-        if (MD) {
-          const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
-          IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll.");
-        }
-        if (!IsUnrollMetadata)
-          MDs.push_back(LoopID->getOperand(i));
-      }
-    }
 
-    LLVMContext &Context = NewLoop->getHeader()->getContext();
-    SmallVector<Metadata *, 1> DisableOperands;
-    DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable"));
-    MDNode *DisableNode = MDNode::get(Context, DisableOperands);
-    MDs.push_back(DisableNode);
+    // Only add loop metadata if the loop is not going to be completely
+    // unrolled.
+    if (UnrollRemainder)
+      return NewLoop;
 
-    MDNode *NewLoopID = MDNode::get(Context, MDs);
-    // Set operand 0 to refer to the loop id itself.
-    NewLoopID->replaceOperandWith(0, NewLoopID);
-    NewLoop->setLoopID(NewLoopID);
+    // Add unroll disable metadata to disable future unrolling for this loop.
+    NewLoop->setLoopAlreadyUnrolled();
     return NewLoop;
   }
   else
@@ -435,12 +414,9 @@ canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits,
                              BasicBlock *LatchExit, bool PreserveLCSSA,
                              bool UseEpilogRemainder) {
 
-  // Support runtime unrolling for multiple exit blocks and multiple exiting
-  // blocks.
-  if (!UnrollRuntimeMultiExit)
-    return false;
-  // Even if runtime multi exit is enabled, we currently have some correctness
-  // constrains in unrolling a multi-exit loop.
+  // We currently have some correctness constrains in unrolling a multi-exit
+  // loop. Check for these below.
+
   // We rely on LCSSA form being preserved when the exit blocks are transformed.
   if (!PreserveLCSSA)
     return false;
@@ -470,7 +446,54 @@ canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits,
   return true;
 }
 
+/// Returns true if we can profitably unroll the multi-exit loop L. Currently,
+/// we return true only if UnrollRuntimeMultiExit is set to true.
+static bool canProfitablyUnrollMultiExitLoop(
+    Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit,
+    bool PreserveLCSSA, bool UseEpilogRemainder) {
+
+#if !defined(NDEBUG)
+  SmallVector<BasicBlock *, 8> OtherExitsDummyCheck;
+  assert(canSafelyUnrollMultiExitLoop(L, OtherExitsDummyCheck, LatchExit,
+                                      PreserveLCSSA, UseEpilogRemainder) &&
+         "Should be safe to unroll before checking profitability!");
+#endif
+
+  // Priority goes to UnrollRuntimeMultiExit if it's supplied.
+  if (UnrollRuntimeMultiExit.getNumOccurrences())
+    return UnrollRuntimeMultiExit;
+
+  // The main pain point with multi-exit loop unrolling is that once unrolled,
+  // we will not be able to merge all blocks into a straight line code.
+  // There are branches within the unrolled loop that go to the OtherExits.
+  // The second point is the increase in code size, but this is true
+  // irrespective of multiple exits.
+
+  // Note: Both the heuristics below are coarse grained. We are essentially
+  // enabling unrolling of loops that have a single side exit other than the
+  // normal LatchExit (i.e. exiting into a deoptimize block).
+  // The heuristics considered are:
+  // 1. low number of branches in the unrolled version.
+  // 2. high predictability of these extra branches.
+  // We avoid unrolling loops that have more than two exiting blocks. This
+  // limits the total number of branches in the unrolled loop to be atmost
+  // the unroll factor (since one of the exiting blocks is the latch block).
+  SmallVector<BasicBlock*, 4> ExitingBlocks;
+  L->getExitingBlocks(ExitingBlocks);
+  if (ExitingBlocks.size() > 2)
+    return false;
 
+  // The second heuristic is that L has one exit other than the latchexit and
+  // that exit is a deoptimize block. We know that deoptimize blocks are rarely
+  // taken, which also implies the branch leading to the deoptimize block is
+  // highly predictable.
+  return (OtherExits.size() == 1 &&
+          OtherExits[0]->getTerminatingDeoptimizeCall());
+  // TODO: These can be fine-tuned further to consider code size or deopt states
+  // that are captured by the deoptimize exit block.
+  // Also, we can extend this to support more cases, if we actually
+  // know of kinds of multiexit loops that would benefit from unrolling.
+}
 
 /// Insert code in the prolog/epilog code when unrolling a loop with a
 /// run-time trip-count.
@@ -513,10 +536,14 @@ canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits,
 bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
                                       bool AllowExpensiveTripCount,
                                       bool UseEpilogRemainder,
+                                      bool UnrollRemainder,
                                       LoopInfo *LI, ScalarEvolution *SE,
-                                      DominatorTree *DT, bool PreserveLCSSA) {
+                                      DominatorTree *DT, AssumptionCache *AC,
+                                      bool PreserveLCSSA) {
   DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
   DEBUG(L->dump());
+  DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n" :
+        dbgs() << "Using prolog remainder.\n");
 
   // Make sure the loop is in canonical form.
   if (!L->isLoopSimplifyForm()) {
@@ -538,8 +565,11 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
          "one of the loop latch successors should be the exit block!");
   // These are exit blocks other than the target of the latch exiting block.
   SmallVector<BasicBlock *, 4> OtherExits;
-  bool isMultiExitUnrollingEnabled = canSafelyUnrollMultiExitLoop(
-      L, OtherExits, LatchExit, PreserveLCSSA, UseEpilogRemainder);
+  bool isMultiExitUnrollingEnabled =
+      canSafelyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA,
+                                   UseEpilogRemainder) &&
+      canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA,
+                                       UseEpilogRemainder);
   // Support only single exit and exiting block unless multi-exit loop unrolling is enabled.
   if (!isMultiExitUnrollingEnabled &&
       (!L->getExitingBlock() || OtherExits.size())) {
@@ -724,7 +754,8 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
   BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
   BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
   Loop *remainderLoop = CloneLoopBlocks(
-      L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop, InsertBot,
+      L, ModVal, CreateRemainderLoop, UseEpilogRemainder, UnrollRemainder,
+      InsertTop, InsertBot,
       NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
 
   // Insert the cloned blocks into the function.
@@ -753,11 +784,15 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
      // Add the incoming values from the remainder code to the end of the phi
      // node.
      for (unsigned i =0; i < oldNumOperands; i++){
-       Value *newVal = VMap[Phi->getIncomingValue(i)];
+       Value *newVal = VMap.lookup(Phi->getIncomingValue(i));
        // newVal can be a constant or derived from values outside the loop, and
-       // hence need not have a VMap value.
-       if (!newVal)
+       // hence need not have a VMap value. Also, since lookup already generated
+       // a default "null" VMap entry for this value, we need to populate that
+       // VMap entry correctly, with the mapped entry being itself.
+       if (!newVal) {
          newVal = Phi->getIncomingValue(i);
+         VMap[Phi->getIncomingValue(i)] = Phi->getIncomingValue(i);
+       }
        Phi->addIncoming(newVal,
                            cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)]));
      }
@@ -868,6 +903,16 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
       formDedicatedExitBlocks(remainderLoop, DT, LI, PreserveLCSSA);
   }
 
+  if (remainderLoop && UnrollRemainder) {
+    DEBUG(dbgs() << "Unrolling remainder loop\n");
+    UnrollLoop(remainderLoop, /*Count*/ Count - 1, /*TripCount*/ Count - 1,
+               /*Force*/ false, /*AllowRuntime*/ false,
+               /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true,
+               /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1,
+               /*PeelCount*/ 0, /*UnrollRemainder*/ false, LI, SE, DT, AC,
+               /*ORE*/ nullptr, PreserveLCSSA);
+  }
+
   NumRuntimeUnrolled++;
   return true;
 }
diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp
index 3c522786641a..c3fa05a11a24 100644
--- a/lib/Transforms/Utils/LoopUtils.cpp
+++ b/lib/Transforms/Utils/LoopUtils.cpp
@@ -432,7 +432,7 @@ RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
                                         InstDesc &Prev, bool HasFunNoNaNAttr) {
   bool FP = I->getType()->isFloatingPointTy();
   Instruction *UAI = Prev.getUnsafeAlgebraInst();
-  if (!UAI && FP && !I->hasUnsafeAlgebra())
+  if (!UAI && FP && !I->isFast())
     UAI = I; // Found an unsafe (unvectorizable) algebra instruction.
 
   switch (I->getOpcode()) {
@@ -565,7 +565,8 @@ bool RecurrenceDescriptor::isFirstOrderRecurrence(
     auto *I = Phi->user_back();
     if (I->isCast() && (I->getParent() == Phi->getParent()) && I->hasOneUse() &&
         DT->dominates(Previous, I->user_back())) {
-      SinkAfter[I] = Previous;
+      if (!DT->dominates(Previous, I)) // Otherwise we're good w/o sinking.
+        SinkAfter[I] = Previous;
       return true;
     }
   }
@@ -659,11 +660,11 @@ Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder,
     break;
   }
 
-  // We only match FP sequences with unsafe algebra, so we can unconditionally
+  // We only match FP sequences that are 'fast', so we can unconditionally
   // set it on any generated instructions.
   IRBuilder<>::FastMathFlagGuard FMFG(Builder);
   FastMathFlags FMF;
-  FMF.setUnsafeAlgebra();
+  FMF.setFast();
   Builder.setFastMathFlags(FMF);
 
   Value *Cmp;
@@ -677,7 +678,8 @@ Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder,
 }
 
 InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
-                                         const SCEV *Step, BinaryOperator *BOp)
+                                         const SCEV *Step, BinaryOperator *BOp,
+                                         SmallVectorImpl<Instruction *> *Casts)
   : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) {
   assert(IK != IK_NoInduction && "Not an induction");
 
@@ -704,6 +706,12 @@ InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
           (InductionBinOp->getOpcode() == Instruction::FAdd ||
            InductionBinOp->getOpcode() == Instruction::FSub))) &&
          "Binary opcode should be specified for FP induction");
+
+  if (Casts) {
+    for (auto &Inst : *Casts) {
+      RedundantCasts.push_back(Inst);
+    }
+  }
 }
 
 int InductionDescriptor::getConsecutiveDirection() const {
@@ -767,7 +775,7 @@ Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index,
 
     // Floating point operations had to be 'fast' to enable the induction.
     FastMathFlags Flags;
-    Flags.setUnsafeAlgebra();
+    Flags.setFast();
 
     Value *MulExp = B.CreateFMul(StepValue, Index);
     if (isa<Instruction>(MulExp))
@@ -807,7 +815,7 @@ bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop,
     StartValue = Phi->getIncomingValue(1);
   } else {
     assert(TheLoop->contains(Phi->getIncomingBlock(1)) &&
-           "Unexpected Phi node in the loop"); 
+           "Unexpected Phi node in the loop");
     BEValue = Phi->getIncomingValue(1);
     StartValue = Phi->getIncomingValue(0);
   }
@@ -840,6 +848,110 @@ bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop,
   return true;
 }
 
+/// This function is called when we suspect that the update-chain of a phi node
+/// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts, 
+/// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime 
+/// predicate P under which the SCEV expression for the phi can be the 
+/// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the 
+/// cast instructions that are involved in the update-chain of this induction. 
+/// A caller that adds the required runtime predicate can be free to drop these 
+/// cast instructions, and compute the phi using \p AR (instead of some scev 
+/// expression with casts).
+///
+/// For example, without a predicate the scev expression can take the following
+/// form:
+///      (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy)
+///
+/// It corresponds to the following IR sequence:
+/// %for.body:
+///   %x = phi i64 [ 0, %ph ], [ %add, %for.body ]
+///   %casted_phi = "ExtTrunc i64 %x"
+///   %add = add i64 %casted_phi, %step
+///
+/// where %x is given in \p PN,
+/// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate,
+/// and the IR sequence that "ExtTrunc i64 %x" represents can take one of
+/// several forms, for example, such as:
+///   ExtTrunc1:    %casted_phi = and  %x, 2^n-1
+/// or:
+///   ExtTrunc2:    %t = shl %x, m
+///                 %casted_phi = ashr %t, m
+///
+/// If we are able to find such sequence, we return the instructions
+/// we found, namely %casted_phi and the instructions on its use-def chain up
+/// to the phi (not including the phi).
+bool getCastsForInductionPHI(
+    PredicatedScalarEvolution &PSE, const SCEVUnknown *PhiScev,
+    const SCEVAddRecExpr *AR, SmallVectorImpl<Instruction *> &CastInsts) {
+
+  assert(CastInsts.empty() && "CastInsts is expected to be empty.");
+  auto *PN = cast<PHINode>(PhiScev->getValue());
+  assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression");
+  const Loop *L = AR->getLoop();
+
+  // Find any cast instructions that participate in the def-use chain of 
+  // PhiScev in the loop.
+  // FORNOW/TODO: We currently expect the def-use chain to include only
+  // two-operand instructions, where one of the operands is an invariant.
+  // createAddRecFromPHIWithCasts() currently does not support anything more
+  // involved than that, so we keep the search simple. This can be
+  // extended/generalized as needed.
+
+  auto getDef = [&](const Value *Val) -> Value * {
+    const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val);
+    if (!BinOp)
+      return nullptr;
+    Value *Op0 = BinOp->getOperand(0);
+    Value *Op1 = BinOp->getOperand(1);
+    Value *Def = nullptr;
+    if (L->isLoopInvariant(Op0))
+      Def = Op1;
+    else if (L->isLoopInvariant(Op1))
+      Def = Op0;
+    return Def;
+  };
+
+  // Look for the instruction that defines the induction via the
+  // loop backedge.
+  BasicBlock *Latch = L->getLoopLatch();
+  if (!Latch)
+    return false;
+  Value *Val = PN->getIncomingValueForBlock(Latch);
+  if (!Val)
+    return false;
+
+  // Follow the def-use chain until the induction phi is reached.
+  // If on the way we encounter a Value that has the same SCEV Expr as the
+  // phi node, we can consider the instructions we visit from that point
+  // as part of the cast-sequence that can be ignored.
+  bool InCastSequence = false;
+  auto *Inst = dyn_cast<Instruction>(Val);
+  while (Val != PN) {
+    // If we encountered a phi node other than PN, or if we left the loop,
+    // we bail out.
+    if (!Inst || !L->contains(Inst)) {
+      return false;
+    }
+    auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val));
+    if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR))
+      InCastSequence = true;
+    if (InCastSequence) {
+      // Only the last instruction in the cast sequence is expected to have
+      // uses outside the induction def-use chain.
+      if (!CastInsts.empty())
+        if (!Inst->hasOneUse())
+          return false;
+      CastInsts.push_back(Inst);
+    }
+    Val = getDef(Val);
+    if (!Val)
+      return false;
+    Inst = dyn_cast<Instruction>(Val);
+  }
+
+  return InCastSequence;
+}
+
 bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
                                          PredicatedScalarEvolution &PSE,
                                          InductionDescriptor &D,
@@ -869,13 +981,26 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
     return false;
   }
 
+  // Record any Cast instructions that participate in the induction update
+  const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev);
+  // If we started from an UnknownSCEV, and managed to build an addRecurrence
+  // only after enabling Assume with PSCEV, this means we may have encountered
+  // cast instructions that required adding a runtime check in order to
+  // guarantee the correctness of the AddRecurence respresentation of the
+  // induction.
+  if (PhiScev != AR && SymbolicPhi) {
+    SmallVector<Instruction *, 2> Casts;
+    if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts))
+      return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts);
+  }
+
   return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR);
 }
 
-bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
-                                         ScalarEvolution *SE,
-                                         InductionDescriptor &D,
-                                         const SCEV *Expr) {
+bool InductionDescriptor::isInductionPHI(
+    PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE,
+    InductionDescriptor &D, const SCEV *Expr,
+    SmallVectorImpl<Instruction *> *CastsToIgnore) {
   Type *PhiTy = Phi->getType();
   // We only handle integer and pointer inductions variables.
   if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
@@ -894,7 +1019,7 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
     // FIXME: We should treat this as a uniform. Unfortunately, we
     // don't currently know how to handled uniform PHIs.
     DEBUG(dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n");
-    return false;    
+    return false;
   }
 
   Value *StartValue =
@@ -907,7 +1032,8 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
     return false;
 
   if (PhiTy->isIntegerTy()) {
-    D = InductionDescriptor(StartValue, IK_IntInduction, Step);
+    D = InductionDescriptor(StartValue, IK_IntInduction, Step, /*BOp=*/ nullptr,
+                            CastsToIgnore);
     return true;
   }
 
@@ -1115,6 +1241,149 @@ Optional<const MDOperand *> llvm::findStringMetadataForLoop(Loop *TheLoop,
   return None;
 }
 
+/// Does a BFS from a given node to all of its children inside a given loop.
+/// The returned vector of nodes includes the starting point.
+SmallVector<DomTreeNode *, 16>
+llvm::collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop) {
+  SmallVector<DomTreeNode *, 16> Worklist;
+  auto AddRegionToWorklist = [&](DomTreeNode *DTN) {
+    // Only include subregions in the top level loop.
+    BasicBlock *BB = DTN->getBlock();
+    if (CurLoop->contains(BB))
+      Worklist.push_back(DTN);
+  };
+
+  AddRegionToWorklist(N);
+
+  for (size_t I = 0; I < Worklist.size(); I++)
+    for (DomTreeNode *Child : Worklist[I]->getChildren())
+      AddRegionToWorklist(Child);
+
+  return Worklist;
+}
+
+void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,
+                          ScalarEvolution *SE = nullptr,
+                          LoopInfo *LI = nullptr) {
+  assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!");
+  auto *Preheader = L->getLoopPreheader();
+  assert(Preheader && "Preheader should exist!");
+
+  // Now that we know the removal is safe, remove the loop by changing the
+  // branch from the preheader to go to the single exit block.
+  //
+  // Because we're deleting a large chunk of code at once, the sequence in which
+  // we remove things is very important to avoid invalidation issues.
+
+  // Tell ScalarEvolution that the loop is deleted. Do this before
+  // deleting the loop so that ScalarEvolution can look at the loop
+  // to determine what it needs to clean up.
+  if (SE)
+    SE->forgetLoop(L);
+
+  auto *ExitBlock = L->getUniqueExitBlock();
+  assert(ExitBlock && "Should have a unique exit block!");
+  assert(L->hasDedicatedExits() && "Loop should have dedicated exits!");
+
+  auto *OldBr = dyn_cast<BranchInst>(Preheader->getTerminator());
+  assert(OldBr && "Preheader must end with a branch");
+  assert(OldBr->isUnconditional() && "Preheader must have a single successor");
+  // Connect the preheader to the exit block. Keep the old edge to the header
+  // around to perform the dominator tree update in two separate steps
+  // -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge
+  // preheader -> header.
+  //
+  //
+  // 0.  Preheader          1.  Preheader           2.  Preheader
+  //        |                    |   |                   |
+  //        V                    |   V                   |
+  //      Header <--\            | Header <--\           | Header <--\
+  //       |  |     |            |  |  |     |           |  |  |     |
+  //       |  V     |            |  |  V     |           |  |  V     |
+  //       | Body --/            |  | Body --/           |  | Body --/
+  //       V                     V  V                    V  V
+  //      Exit                   Exit                    Exit
+  //
+  // By doing this is two separate steps we can perform the dominator tree
+  // update without using the batch update API.
+  //
+  // Even when the loop is never executed, we cannot remove the edge from the
+  // source block to the exit block. Consider the case where the unexecuted loop
+  // branches back to an outer loop. If we deleted the loop and removed the edge
+  // coming to this inner loop, this will break the outer loop structure (by
+  // deleting the backedge of the outer loop). If the outer loop is indeed a
+  // non-loop, it will be deleted in a future iteration of loop deletion pass.
+  IRBuilder<> Builder(OldBr);
+  Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock);
+  // Remove the old branch. The conditional branch becomes a new terminator.
+  OldBr->eraseFromParent();
+
+  // Rewrite phis in the exit block to get their inputs from the Preheader
+  // instead of the exiting block.
+  BasicBlock::iterator BI = ExitBlock->begin();
+  while (PHINode *P = dyn_cast<PHINode>(BI)) {
+    // Set the zero'th element of Phi to be from the preheader and remove all
+    // other incoming values. Given the loop has dedicated exits, all other
+    // incoming values must be from the exiting blocks.
+    int PredIndex = 0;
+    P->setIncomingBlock(PredIndex, Preheader);
+    // Removes all incoming values from all other exiting blocks (including
+    // duplicate values from an exiting block).
+    // Nuke all entries except the zero'th entry which is the preheader entry.
+    // NOTE! We need to remove Incoming Values in the reverse order as done
+    // below, to keep the indices valid for deletion (removeIncomingValues
+    // updates getNumIncomingValues and shifts all values down into the operand
+    // being deleted).
+    for (unsigned i = 0, e = P->getNumIncomingValues() - 1; i != e; ++i)
+      P->removeIncomingValue(e - i, false);
+
+    assert((P->getNumIncomingValues() == 1 &&
+            P->getIncomingBlock(PredIndex) == Preheader) &&
+           "Should have exactly one value and that's from the preheader!");
+    ++BI;
+  }
+
+  // Disconnect the loop body by branching directly to its exit.
+  Builder.SetInsertPoint(Preheader->getTerminator());
+  Builder.CreateBr(ExitBlock);
+  // Remove the old branch.
+  Preheader->getTerminator()->eraseFromParent();
+
+  if (DT) {
+    // Update the dominator tree by informing it about the new edge from the
+    // preheader to the exit.
+    DT->insertEdge(Preheader, ExitBlock);
+    // Inform the dominator tree about the removed edge.
+    DT->deleteEdge(Preheader, L->getHeader());
+  }
+
+  // Remove the block from the reference counting scheme, so that we can
+  // delete it freely later.
+  for (auto *Block : L->blocks())
+    Block->dropAllReferences();
+
+  if (LI) {
+    // Erase the instructions and the blocks without having to worry
+    // about ordering because we already dropped the references.
+    // NOTE: This iteration is safe because erasing the block does not remove
+    // its entry from the loop's block list.  We do that in the next section.
+    for (Loop::block_iterator LpI = L->block_begin(), LpE = L->block_end();
+         LpI != LpE; ++LpI)
+      (*LpI)->eraseFromParent();
+
+    // Finally, the blocks from loopinfo.  This has to happen late because
+    // otherwise our loop iterators won't work.
+
+    SmallPtrSet<BasicBlock *, 8> blocks;
+    blocks.insert(L->block_begin(), L->block_end());
+    for (BasicBlock *BB : blocks)
+      LI->removeBlock(BB);
+
+    // The last step is to update LoopInfo now that we've eliminated this loop.
+    LI->erase(L);
+  }
+}
+
 /// Returns true if the instruction in a loop is guaranteed to execute at least
 /// once.
 bool llvm::isGuaranteedToExecute(const Instruction &Inst,
@@ -1194,7 +1463,7 @@ Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) {
 static Value *addFastMathFlag(Value *V) {
   if (isa<FPMathOperator>(V)) {
     FastMathFlags Flags;
-    Flags.setUnsafeAlgebra();
+    Flags.setFast();
     cast<Instruction>(V)->setFastMathFlags(Flags);
   }
   return V;
@@ -1256,8 +1525,8 @@ Value *llvm::createSimpleTargetReduction(
   using RD = RecurrenceDescriptor;
   RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid;
   // TODO: Support creating ordered reductions.
-  FastMathFlags FMFUnsafe;
-  FMFUnsafe.setUnsafeAlgebra();
+  FastMathFlags FMFFast;
+  FMFFast.setFast();
 
   switch (Opcode) {
   case Instruction::Add:
@@ -1278,14 +1547,14 @@ Value *llvm::createSimpleTargetReduction(
   case Instruction::FAdd:
     BuildFunc = [&]() {
       auto Rdx = Builder.CreateFAddReduce(ScalarUdf, Src);
-      cast<CallInst>(Rdx)->setFastMathFlags(FMFUnsafe);
+      cast<CallInst>(Rdx)->setFastMathFlags(FMFFast);
       return Rdx;
     };
     break;
   case Instruction::FMul:
     BuildFunc = [&]() {
       auto Rdx = Builder.CreateFMulReduce(ScalarUdf, Src);
-      cast<CallInst>(Rdx)->setFastMathFlags(FMFUnsafe);
+      cast<CallInst>(Rdx)->setFastMathFlags(FMFFast);
       return Rdx;
     };
     break;
@@ -1321,55 +1590,39 @@ Value *llvm::createSimpleTargetReduction(
 }
 
 /// Create a vector reduction using a given recurrence descriptor.
-Value *llvm::createTargetReduction(IRBuilder<> &Builder,
+Value *llvm::createTargetReduction(IRBuilder<> &B,
                                    const TargetTransformInfo *TTI,
                                    RecurrenceDescriptor &Desc, Value *Src,
                                    bool NoNaN) {
   // TODO: Support in-order reductions based on the recurrence descriptor.
-  RecurrenceDescriptor::RecurrenceKind RecKind = Desc.getRecurrenceKind();
+  using RD = RecurrenceDescriptor;
+  RD::RecurrenceKind RecKind = Desc.getRecurrenceKind();
   TargetTransformInfo::ReductionFlags Flags;
   Flags.NoNaN = NoNaN;
-  auto getSimpleRdx = [&](unsigned Opc) {
-    return createSimpleTargetReduction(Builder, TTI, Opc, Src, Flags);
-  };
   switch (RecKind) {
-  case RecurrenceDescriptor::RK_FloatAdd:
-    return getSimpleRdx(Instruction::FAdd);
-  case RecurrenceDescriptor::RK_FloatMult:
-    return getSimpleRdx(Instruction::FMul);
-  case RecurrenceDescriptor::RK_IntegerAdd:
-    return getSimpleRdx(Instruction::Add);
-  case RecurrenceDescriptor::RK_IntegerMult:
-    return getSimpleRdx(Instruction::Mul);
-  case RecurrenceDescriptor::RK_IntegerAnd:
-    return getSimpleRdx(Instruction::And);
-  case RecurrenceDescriptor::RK_IntegerOr:
-    return getSimpleRdx(Instruction::Or);
-  case RecurrenceDescriptor::RK_IntegerXor:
-    return getSimpleRdx(Instruction::Xor);
-  case RecurrenceDescriptor::RK_IntegerMinMax: {
-    switch (Desc.getMinMaxRecurrenceKind()) {
-    case RecurrenceDescriptor::MRK_SIntMax:
-      Flags.IsSigned = true;
-      Flags.IsMaxOp = true;
-      break;
-    case RecurrenceDescriptor::MRK_UIntMax:
-      Flags.IsMaxOp = true;
-      break;
-    case RecurrenceDescriptor::MRK_SIntMin:
-      Flags.IsSigned = true;
-      break;
-    case RecurrenceDescriptor::MRK_UIntMin:
-      break;
-    default:
-      llvm_unreachable("Unhandled MRK");
-    }
-    return getSimpleRdx(Instruction::ICmp);
+  case RD::RK_FloatAdd:
+    return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags);
+  case RD::RK_FloatMult:
+    return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags);
+  case RD::RK_IntegerAdd:
+    return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags);
+  case RD::RK_IntegerMult:
+    return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags);
+  case RD::RK_IntegerAnd:
+    return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags);
+  case RD::RK_IntegerOr:
+    return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags);
+  case RD::RK_IntegerXor:
+    return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags);
+  case RD::RK_IntegerMinMax: {
+    RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind();
+    Flags.IsMaxOp = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_UIntMax);
+    Flags.IsSigned = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_SIntMin);
+    return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags);
   }
-  case RecurrenceDescriptor::RK_FloatMinMax: {
-    Flags.IsMaxOp =
-        Desc.getMinMaxRecurrenceKind() == RecurrenceDescriptor::MRK_FloatMax;
-    return getSimpleRdx(Instruction::FCmp);
+  case RD::RK_FloatMinMax: {
+    Flags.IsMaxOp = Desc.getMinMaxRecurrenceKind() == RD::MRK_FloatMax;
+    return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags);
   }
   default:
     llvm_unreachable("Unhandled RecKind");
diff --git a/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/lib/Transforms/Utils/LowerMemIntrinsics.cpp
index 900450b40061..57dc225e9dab 100644
--- a/lib/Transforms/Utils/LowerMemIntrinsics.cpp
+++ b/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -168,13 +168,14 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore,
   IntegerType *ILengthType = dyn_cast<IntegerType>(CopyLenType);
   assert(ILengthType &&
          "expected size argument to memcpy to be an integer type!");
+  Type *Int8Type = Type::getInt8Ty(Ctx);
+  bool LoopOpIsInt8 = LoopOpType == Int8Type;
   ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize);
-  Value *RuntimeLoopCount = PLBuilder.CreateUDiv(CopyLen, CILoopOpSize);
-  Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize);
-  Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual);
-
+  Value *RuntimeLoopCount = LoopOpIsInt8 ?
+                            CopyLen :
+                            PLBuilder.CreateUDiv(CopyLen, CILoopOpSize);
   BasicBlock *LoopBB =
-      BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, nullptr);
+      BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, PostLoopBB);
   IRBuilder<> LoopBuilder(LoopBB);
 
   PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLenType, 2, "loop-index");
@@ -189,11 +190,15 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore,
       LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U));
   LoopIndex->addIncoming(NewIndex, LoopBB);
 
-  Type *Int8Type = Type::getInt8Ty(Ctx);
-  if (LoopOpType != Int8Type) {
+  if (!LoopOpIsInt8) {
+   // Add in the
+   Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize);
+   Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual);
+
     // Loop body for the residual copy.
     BasicBlock *ResLoopBB = BasicBlock::Create(Ctx, "loop-memcpy-residual",
-                                               PreLoopBB->getParent(), nullptr);
+                                               PreLoopBB->getParent(),
+                                               PostLoopBB);
     // Residual loop header.
     BasicBlock *ResHeaderBB = BasicBlock::Create(
         Ctx, "loop-memcpy-residual-header", PreLoopBB->getParent(), nullptr);
@@ -258,61 +263,6 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore,
   }
 }
 
-void llvm::createMemCpyLoop(Instruction *InsertBefore,
-                            Value *SrcAddr, Value *DstAddr, Value *CopyLen,
-                            unsigned SrcAlign, unsigned DestAlign,
-                            bool SrcIsVolatile, bool DstIsVolatile) {
-  Type *TypeOfCopyLen = CopyLen->getType();
-
-  BasicBlock *OrigBB = InsertBefore->getParent();
-  Function *F = OrigBB->getParent();
-  BasicBlock *NewBB =
-    InsertBefore->getParent()->splitBasicBlock(InsertBefore, "split");
-  BasicBlock *LoopBB = BasicBlock::Create(F->getContext(), "loadstoreloop",
-                                          F, NewBB);
-
-  IRBuilder<> Builder(OrigBB->getTerminator());
-
-  // SrcAddr and DstAddr are expected to be pointer types,
-  // so no check is made here.
-  unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
-  unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
-
-  // Cast pointers to (char *)
-  SrcAddr = Builder.CreateBitCast(SrcAddr, Builder.getInt8PtrTy(SrcAS));
-  DstAddr = Builder.CreateBitCast(DstAddr, Builder.getInt8PtrTy(DstAS));
-
-  Builder.CreateCondBr(
-      Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB,
-      LoopBB);
-  OrigBB->getTerminator()->eraseFromParent();
-
-  IRBuilder<> LoopBuilder(LoopBB);
-  PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
-  LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB);
-
-  // load from SrcAddr+LoopIndex
-  // TODO: we can leverage the align parameter of llvm.memcpy for more efficient
-  // word-sized loads and stores.
-  Value *Element =
-    LoopBuilder.CreateLoad(LoopBuilder.CreateInBoundsGEP(
-                             LoopBuilder.getInt8Ty(), SrcAddr, LoopIndex),
-                           SrcIsVolatile);
-  // store at DstAddr+LoopIndex
-  LoopBuilder.CreateStore(Element,
-                          LoopBuilder.CreateInBoundsGEP(LoopBuilder.getInt8Ty(),
-                                                        DstAddr, LoopIndex),
-                          DstIsVolatile);
-
-  // The value for LoopIndex coming from backedge is (LoopIndex + 1)
-  Value *NewIndex =
-    LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1));
-  LoopIndex->addIncoming(NewIndex, LoopBB);
-
-  LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB,
-                           NewBB);
-}
-
 // Lower memmove to IR. memmove is required to correctly copy overlapping memory
 // regions; therefore, it has to check the relative positions of the source and
 // destination pointers and choose the copy direction accordingly.
@@ -454,38 +404,26 @@ static void createMemSetLoop(Instruction *InsertBefore,
 
 void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,
                               const TargetTransformInfo &TTI) {
-  // Original implementation
-  if (!TTI.useWideIRMemcpyLoopLowering()) {
-    createMemCpyLoop(/* InsertBefore */ Memcpy,
-                     /* SrcAddr */ Memcpy->getRawSource(),
-                     /* DstAddr */ Memcpy->getRawDest(),
-                     /* CopyLen */ Memcpy->getLength(),
-                     /* SrcAlign */ Memcpy->getAlignment(),
-                     /* DestAlign */ Memcpy->getAlignment(),
-                     /* SrcIsVolatile */ Memcpy->isVolatile(),
-                     /* DstIsVolatile */ Memcpy->isVolatile());
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) {
+    createMemCpyLoopKnownSize(/* InsertBefore */ Memcpy,
+                              /* SrcAddr */ Memcpy->getRawSource(),
+                              /* DstAddr */ Memcpy->getRawDest(),
+                              /* CopyLen */ CI,
+                              /* SrcAlign */ Memcpy->getAlignment(),
+                              /* DestAlign */ Memcpy->getAlignment(),
+                              /* SrcIsVolatile */ Memcpy->isVolatile(),
+                              /* DstIsVolatile */ Memcpy->isVolatile(),
+                              /* TargetTransformInfo */ TTI);
   } else {
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) {
-      createMemCpyLoopKnownSize(/* InsertBefore */ Memcpy,
+    createMemCpyLoopUnknownSize(/* InsertBefore */ Memcpy,
                                 /* SrcAddr */ Memcpy->getRawSource(),
                                 /* DstAddr */ Memcpy->getRawDest(),
-                                /* CopyLen */ CI,
+                                /* CopyLen */ Memcpy->getLength(),
                                 /* SrcAlign */ Memcpy->getAlignment(),
                                 /* DestAlign */ Memcpy->getAlignment(),
                                 /* SrcIsVolatile */ Memcpy->isVolatile(),
                                 /* DstIsVolatile */ Memcpy->isVolatile(),
-                                /* TargetTransformInfo */ TTI);
-    } else {
-      createMemCpyLoopUnknownSize(/* InsertBefore */ Memcpy,
-                                  /* SrcAddr */ Memcpy->getRawSource(),
-                                  /* DstAddr */ Memcpy->getRawDest(),
-                                  /* CopyLen */ Memcpy->getLength(),
-                                  /* SrcAlign */ Memcpy->getAlignment(),
-                                  /* DestAlign */ Memcpy->getAlignment(),
-                                  /* SrcIsVolatile */ Memcpy->isVolatile(),
-                                  /* DstIsVolatile */ Memcpy->isVolatile(),
-                                  /* TargetTransfomrInfo */ TTI);
-    }
+                                /* TargetTransfomrInfo */ TTI);
   }
 }
 
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index 890afbc46e63..344cb35df986 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -13,46 +13,65 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Value.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
 #include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+#include <vector>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "lower-switch"
 
 namespace {
+
   struct IntRange {
     int64_t Low, High;
   };
-  // Return true iff R is covered by Ranges.
-  static bool IsInRanges(const IntRange &R,
-                         const std::vector<IntRange> &Ranges) {
-    // Note: Ranges must be sorted, non-overlapping and non-adjacent.
-
-    // Find the first range whose High field is >= R.High,
-    // then check if the Low field is <= R.Low. If so, we
-    // have a Range that covers R.
-    auto I = std::lower_bound(
-        Ranges.begin(), Ranges.end(), R,
-        [](const IntRange &A, const IntRange &B) { return A.High < B.High; });
-    return I != Ranges.end() && I->Low <= R.Low;
-  }
+
+} // end anonymous namespace
+
+// Return true iff R is covered by Ranges.
+static bool IsInRanges(const IntRange &R,
+                       const std::vector<IntRange> &Ranges) {
+  // Note: Ranges must be sorted, non-overlapping and non-adjacent.
+
+  // Find the first range whose High field is >= R.High,
+  // then check if the Low field is <= R.Low. If so, we
+  // have a Range that covers R.
+  auto I = std::lower_bound(
+      Ranges.begin(), Ranges.end(), R,
+      [](const IntRange &A, const IntRange &B) { return A.High < B.High; });
+  return I != Ranges.end() && I->Low <= R.Low;
+}
+
+namespace {
 
   /// Replace all SwitchInst instructions with chained branch instructions.
   class LowerSwitch : public FunctionPass {
   public:
-    static char ID; // Pass identification, replacement for typeid
+    // Pass identification, replacement for typeid
+    static char ID;
+
     LowerSwitch() : FunctionPass(ID) {
       initializeLowerSwitchPass(*PassRegistry::getPassRegistry());
     } 
@@ -68,8 +87,9 @@ namespace {
           : Low(low), High(high), BB(bb) {}
     };
 
-    typedef std::vector<CaseRange> CaseVector;
-    typedef std::vector<CaseRange>::iterator CaseItr;
+    using CaseVector = std::vector<CaseRange>;
+    using CaseItr = std::vector<CaseRange>::iterator;
+
   private:
     void processSwitchInst(SwitchInst *SI, SmallPtrSetImpl<BasicBlock*> &DeleteList);
 
@@ -86,22 +106,24 @@ namespace {
   /// The comparison function for sorting the switch case values in the vector.
   /// WARNING: Case ranges should be disjoint!
   struct CaseCmp {
-    bool operator () (const LowerSwitch::CaseRange& C1,
-                      const LowerSwitch::CaseRange& C2) {
-
+    bool operator()(const LowerSwitch::CaseRange& C1,
+                    const LowerSwitch::CaseRange& C2) {
       const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
       const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
       return CI1->getValue().slt(CI2->getValue());
     }
   };
-}
+
+} // end anonymous namespace
 
 char LowerSwitch::ID = 0;
-INITIALIZE_PASS(LowerSwitch, "lowerswitch",
-                "Lower SwitchInst's to branches", false, false)
 
 // Publicly exposed interface to pass...
 char &llvm::LowerSwitchID = LowerSwitch::ID;
+
+INITIALIZE_PASS(LowerSwitch, "lowerswitch",
+                "Lower SwitchInst's to branches", false, false)
+
 // createLowerSwitchPass - Interface to this file...
 FunctionPass *llvm::createLowerSwitchPass() {
   return new LowerSwitch();
@@ -136,6 +158,7 @@ bool LowerSwitch::runOnFunction(Function &F) {
 static raw_ostream& operator<<(raw_ostream &O,
                                const LowerSwitch::CaseVector &C)
     LLVM_ATTRIBUTE_USED;
+
 static raw_ostream& operator<<(raw_ostream &O,
                                const LowerSwitch::CaseVector &C) {
   O << "[";
@@ -186,7 +209,7 @@ static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
       }
     // Remove incoming values in the reverse order to prevent invalidating
     // *successive* index.
-    for (unsigned III : reverse(Indices))
+    for (unsigned III : llvm::reverse(Indices))
       PN->removeIncomingValue(III);
   }
 }
@@ -294,8 +317,7 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
 /// value, so the jump to the "default" branch is warranted.
 BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
                                       BasicBlock* OrigBlock,
-                                      BasicBlock* Default)
-{
+                                      BasicBlock* Default) {
   Function* F = OrigBlock->getParent();
   BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock");
   F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewLeaf);
@@ -442,7 +464,8 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
     unsigned MaxPop = 0;
     BasicBlock *PopSucc = nullptr;
 
-    IntRange R = { INT64_MIN, INT64_MAX };
+    IntRange R = {std::numeric_limits<int64_t>::min(),
+                  std::numeric_limits<int64_t>::max()};
     UnreachableRanges.push_back(R);
     for (const auto &I : Cases) {
       int64_t Low = I.Low->getSExtValue();
@@ -457,8 +480,8 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
         assert(Low > LastRange.Low);
         LastRange.High = Low - 1;
       }
-      if (High != INT64_MAX) {
-        IntRange R = { High + 1, INT64_MAX };
+      if (High != std::numeric_limits<int64_t>::max()) {
+        IntRange R = { High + 1, std::numeric_limits<int64_t>::max() };
         UnreachableRanges.push_back(R);
       }
 
@@ -487,8 +510,8 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
     assert(MaxPop > 0 && PopSucc);
     Default = PopSucc;
     Cases.erase(
-        remove_if(Cases,
-                  [PopSucc](const CaseRange &R) { return R.BB == PopSucc; }),
+        llvm::remove_if(
+            Cases, [PopSucc](const CaseRange &R) { return R.BB == PopSucc; }),
         Cases.end());
 
     // If there are no cases left, just branch.
diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp
index b659a2e4463f..29f289b62da0 100644
--- a/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/lib/Transforms/Utils/Mem2Reg.cpp
@@ -15,12 +15,17 @@
 #include "llvm/Transforms/Utils/Mem2Reg.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
-#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include <vector>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "mem2reg"
@@ -33,7 +38,7 @@ static bool promoteMemoryToRegister(Function &F, DominatorTree &DT,
   BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
   bool Changed = false;
 
-  while (1) {
+  while (true) {
     Allocas.clear();
 
     // Find allocas that are safe to promote, by looking at all instructions in
@@ -65,15 +70,17 @@ PreservedAnalyses PromotePass::run(Function &F, FunctionAnalysisManager &AM) {
 }
 
 namespace {
+
 struct PromoteLegacyPass : public FunctionPass {
-  static char ID; // Pass identification, replacement for typeid
+  // Pass identification, replacement for typeid
+  static char ID;
+
   PromoteLegacyPass() : FunctionPass(ID) {
     initializePromoteLegacyPassPass(*PassRegistry::getPassRegistry());
   }
 
   // runOnFunction - To run this pass, first we calculate the alloca
   // instructions that are safe for promotion, then we promote each one.
-  //
   bool runOnFunction(Function &F) override {
     if (skipFunction(F))
       return false;
@@ -89,10 +96,12 @@ struct PromoteLegacyPass : public FunctionPass {
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.setPreservesCFG();
   }
-  };
-}  // end of anonymous namespace
+};
+
+} // end anonymous namespace
 
 char PromoteLegacyPass::ID = 0;
+
 INITIALIZE_PASS_BEGIN(PromoteLegacyPass, "mem2reg", "Promote Memory to "
                                                     "Register",
                       false, false)
@@ -102,7 +111,6 @@ INITIALIZE_PASS_END(PromoteLegacyPass, "mem2reg", "Promote Memory to Register",
                     false, false)
 
 // createPromoteMemoryToRegister - Provide an entry point to create this pass.
-//
 FunctionPass *llvm::createPromoteMemoryToRegisterPass() {
   return new PromoteLegacyPass();
 }
diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp
index 9f2ad540c83d..0f7bd76c03ca 100644
--- a/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/lib/Transforms/Utils/MetaRenamer.cpp
@@ -15,16 +15,30 @@
 
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/TypeFinder.h"
 #include "llvm/Pass.h"
 #include "llvm/Transforms/IPO.h"
+
 using namespace llvm;
 
+static const char *const metaNames[] = {
+  // See http://en.wikipedia.org/wiki/Metasyntactic_variable
+  "foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge",
+  "wibble", "wobble", "widget", "wombat", "ham", "eggs", "pluto", "spam"
+};
+
 namespace {
 
   // This PRNG is from the ISO C spec. It is intentionally simple and
@@ -43,12 +57,6 @@ namespace {
     }
   };
 
-  static const char *const metaNames[] = {
-    // See http://en.wikipedia.org/wiki/Metasyntactic_variable
-    "foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge",
-    "wibble", "wobble", "widget", "wombat", "ham", "eggs", "pluto", "spam"
-  };
-
   struct Renamer {
     Renamer(unsigned int seed) {
       prng.srand(seed);
@@ -62,7 +70,9 @@ namespace {
   };
   
   struct MetaRenamer : public ModulePass {
-    static char ID; // Pass identification, replacement for typeid
+    // Pass identification, replacement for typeid
+    static char ID;
+
     MetaRenamer() : ModulePass(ID) {
       initializeMetaRenamerPass(*PassRegistry::getPassRegistry());
     }
@@ -123,7 +133,11 @@ namespace {
             TLI.getLibFunc(F, Tmp))
           continue;
 
-        F.setName(renamer.newName());
+        // Leave @main alone. The output of -metarenamer might be passed to
+        // lli for execution and the latter needs a main entry point.
+        if (Name != "main")
+          F.setName(renamer.newName());
+
         runOnFunction(F);
       }
       return true;
@@ -144,14 +158,17 @@ namespace {
       return true;
     }
   };
-}
+
+} // end anonymous namespace
 
 char MetaRenamer::ID = 0;
+
 INITIALIZE_PASS_BEGIN(MetaRenamer, "metarenamer",
                       "Assign new names to everything", false, false)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(MetaRenamer, "metarenamer",
                     "Assign new names to everything", false, false)
+
 //===----------------------------------------------------------------------===//
 //
 // MetaRenamer - Rename everything with metasyntactic names.
diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp
index 2ef3d6336ae2..ba4b7f3cc263 100644
--- a/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/lib/Transforms/Utils/ModuleUtils.cpp
@@ -243,7 +243,7 @@ std::string llvm::getUniqueModuleId(Module *M) {
   bool ExportsSymbols = false;
   auto AddGlobal = [&](GlobalValue &GV) {
     if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
-        !GV.hasExternalLinkage())
+        !GV.hasExternalLinkage() || GV.hasComdat())
       return;
     ExportsSymbols = true;
     Md5.update(GV.getName());
diff --git a/lib/Transforms/Utils/PredicateInfo.cpp b/lib/Transforms/Utils/PredicateInfo.cpp
index d4cdaede6b86..d47be6ea566b 100644
--- a/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/lib/Transforms/Utils/PredicateInfo.cpp
@@ -49,9 +49,10 @@ INITIALIZE_PASS_END(PredicateInfoPrinterLegacyPass, "print-predicateinfo",
 static cl::opt<bool> VerifyPredicateInfo(
     "verify-predicateinfo", cl::init(false), cl::Hidden,
     cl::desc("Verify PredicateInfo in legacy printer pass."));
-namespace {
 DEBUG_COUNTER(RenameCounter, "predicateinfo-rename",
-              "Controls which variables are renamed with predicateinfo")
+              "Controls which variables are renamed with predicateinfo");
+
+namespace {
 // Given a predicate info that is a type of branching terminator, get the
 // branching block.
 const BasicBlock *getBranchBlock(const PredicateBase *PB) {
@@ -610,7 +611,12 @@ void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpSet) {
     }
 
     convertUsesToDFSOrdered(Op, OrderedUses);
-    std::sort(OrderedUses.begin(), OrderedUses.end(), Compare);
+    // Here we require a stable sort because we do not bother to try to
+    // assign an order to the operands the uses represent. Thus, two
+    // uses in the same instruction do not have a strict sort order
+    // currently and will be considered equal. We could get rid of the
+    // stable sort by creating one if we wanted.
+    std::stable_sort(OrderedUses.begin(), OrderedUses.end(), Compare);
     SmallVector<ValueDFS, 8> RenameStack;
     // For each use, sorted into dfs order, push values and replaces uses with
     // top of stack, which will represent the reaching def.
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index cdba982e6641..fcd3bd08482a 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -21,25 +21,38 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/IteratedDominanceFrontier.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
+#include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DIBuilder.h"
-#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
 #include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <utility>
+#include <vector>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "mem2reg"
@@ -103,7 +116,7 @@ struct AllocaInfo {
   bool OnlyUsedInOneBlock;
 
   Value *AllocaPointerVal;
-  DbgDeclareInst *DbgDeclare;
+  TinyPtrVector<DbgInfoIntrinsic *> DbgDeclares;
 
   void clear() {
     DefiningBlocks.clear();
@@ -112,7 +125,7 @@ struct AllocaInfo {
     OnlyBlock = nullptr;
     OnlyUsedInOneBlock = true;
     AllocaPointerVal = nullptr;
-    DbgDeclare = nullptr;
+    DbgDeclares.clear();
   }
 
   /// Scan the uses of the specified alloca, filling in the AllocaInfo used
@@ -147,27 +160,21 @@ struct AllocaInfo {
       }
     }
 
-    DbgDeclare = FindAllocaDbgDeclare(AI);
+    DbgDeclares = FindDbgAddrUses(AI);
   }
 };
 
 // Data package used by RenamePass()
 class RenamePassData {
 public:
-  typedef std::vector<Value *> ValVector;
+  using ValVector = std::vector<Value *>;
+
+  RenamePassData(BasicBlock *B, BasicBlock *P, ValVector V)
+      : BB(B), Pred(P), Values(std::move(V)) {}
 
-  RenamePassData() : BB(nullptr), Pred(nullptr), Values() {}
-  RenamePassData(BasicBlock *B, BasicBlock *P, const ValVector &V)
-      : BB(B), Pred(P), Values(V) {}
   BasicBlock *BB;
   BasicBlock *Pred;
   ValVector Values;
-
-  void swap(RenamePassData &RHS) {
-    std::swap(BB, RHS.BB);
-    std::swap(Pred, RHS.Pred);
-    Values.swap(RHS.Values);
-  }
 };
 
 /// \brief This assigns and keeps a per-bb relative ordering of load/store
@@ -223,12 +230,15 @@ public:
 struct PromoteMem2Reg {
   /// The alloca instructions being promoted.
   std::vector<AllocaInst *> Allocas;
+
   DominatorTree &DT;
   DIBuilder DIB;
+
   /// A cache of @llvm.assume intrinsics used by SimplifyInstruction.
   AssumptionCache *AC;
 
   const SimplifyQuery SQ;
+
   /// Reverse mapping of Allocas.
   DenseMap<AllocaInst *, unsigned> AllocaLookup;
 
@@ -252,10 +262,9 @@ struct PromoteMem2Reg {
   /// For each alloca, we keep track of the dbg.declare intrinsic that
   /// describes it, if any, so that we can convert it to a dbg.value
   /// intrinsic if the alloca gets promoted.
-  SmallVector<DbgDeclareInst *, 8> AllocaDbgDeclares;
+  SmallVector<TinyPtrVector<DbgInfoIntrinsic *>, 8> AllocaDbgDeclares;
 
   /// The set of basic blocks the renamer has already visited.
-  ///
   SmallPtrSet<BasicBlock *, 16> Visited;
 
   /// Contains a stable numbering of basic blocks to avoid non-determinstic
@@ -298,7 +307,7 @@ private:
   bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version);
 };
 
-} // end of anonymous namespace
+} // end anonymous namespace
 
 /// Given a LoadInst LI this adds assume(LI != null) after it.
 static void addAssumeNonNull(AssumptionCache *AC, LoadInst *LI) {
@@ -345,8 +354,8 @@ static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
 /// and thus must be phi-ed with undef. We fall back to the standard alloca
 /// promotion algorithm in that case.
 static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
-                                     LargeBlockInfo &LBI, DominatorTree &DT,
-                                     AssumptionCache *AC) {
+                                     LargeBlockInfo &LBI, const DataLayout &DL,
+                                     DominatorTree &DT, AssumptionCache *AC) {
   StoreInst *OnlyStore = Info.OnlyStore;
   bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
   BasicBlock *StoreBB = OnlyStore->getParent();
@@ -380,7 +389,6 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
           Info.UsingBlocks.push_back(StoreBB);
           continue;
         }
-
       } else if (LI->getParent() != StoreBB &&
                  !DT.dominates(StoreBB, LI->getParent())) {
         // If the load and store are in different blocks, use BB dominance to
@@ -402,7 +410,7 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
     // that information when we erase this Load. So we preserve
     // it with an assume.
     if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
-        !llvm::isKnownNonNullAt(ReplVal, LI, &DT))
+        !isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT))
       addAssumeNonNull(AC, LI);
 
     LI->replaceAllUsesWith(ReplVal);
@@ -416,11 +424,11 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
 
   // Record debuginfo for the store and remove the declaration's
   // debuginfo.
-  if (DbgDeclareInst *DDI = Info.DbgDeclare) {
+  for (DbgInfoIntrinsic *DII : Info.DbgDeclares) {
     DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
-    ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, DIB);
-    DDI->eraseFromParent();
-    LBI.deleteValue(DDI);
+    ConvertDebugDeclareToDebugValue(DII, Info.OnlyStore, DIB);
+    DII->eraseFromParent();
+    LBI.deleteValue(DII);
   }
   // Remove the (now dead) store and alloca.
   Info.OnlyStore->eraseFromParent();
@@ -449,6 +457,7 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
 ///  }
 static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
                                      LargeBlockInfo &LBI,
+                                     const DataLayout &DL,
                                      DominatorTree &DT,
                                      AssumptionCache *AC) {
   // The trickiest case to handle is when we have large blocks. Because of this,
@@ -457,7 +466,7 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
   // make it efficient to get the index of various operations in the block.
 
   // Walk the use-def list of the alloca, getting the locations of all stores.
-  typedef SmallVector<std::pair<unsigned, StoreInst *>, 64> StoresByIndexTy;
+  using StoresByIndexTy = SmallVector<std::pair<unsigned, StoreInst *>, 64>;
   StoresByIndexTy StoresByIndex;
 
   for (User *U : AI->users())
@@ -497,7 +506,7 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
       // information when we erase it. So we preserve it with an assume.
       Value *ReplVal = std::prev(I)->second->getOperand(0);
       if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
-          !llvm::isKnownNonNullAt(ReplVal, LI, &DT))
+          !isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT))
         addAssumeNonNull(AC, LI);
 
       LI->replaceAllUsesWith(ReplVal);
@@ -511,9 +520,9 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
   while (!AI->use_empty()) {
     StoreInst *SI = cast<StoreInst>(AI->user_back());
     // Record debuginfo for the store before removing it.
-    if (DbgDeclareInst *DDI = Info.DbgDeclare) {
+    for (DbgInfoIntrinsic *DII : Info.DbgDeclares) {
       DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
-      ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
+      ConvertDebugDeclareToDebugValue(DII, SI, DIB);
     }
     SI->eraseFromParent();
     LBI.deleteValue(SI);
@@ -523,9 +532,9 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
   LBI.deleteValue(AI);
 
   // The alloca's debuginfo can be removed as well.
-  if (DbgDeclareInst *DDI = Info.DbgDeclare) {
-    DDI->eraseFromParent();
-    LBI.deleteValue(DDI);
+  for (DbgInfoIntrinsic *DII : Info.DbgDeclares) {
+    DII->eraseFromParent();
+    LBI.deleteValue(DII);
   }
 
   ++NumLocalPromoted;
@@ -567,7 +576,7 @@ void PromoteMem2Reg::run() {
     // If there is only a single store to this value, replace any loads of
     // it that are directly dominated by the definition with the value stored.
     if (Info.DefiningBlocks.size() == 1) {
-      if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AC)) {
+      if (rewriteSingleStoreAlloca(AI, Info, LBI, SQ.DL, DT, AC)) {
         // The alloca has been processed, move on.
         RemoveFromAllocasList(AllocaNum);
         ++NumSingleStore;
@@ -578,7 +587,7 @@ void PromoteMem2Reg::run() {
     // If the alloca is only read and written in one basic block, just perform a
     // linear sweep over the block to eliminate it.
     if (Info.OnlyUsedInOneBlock &&
-        promoteSingleBlockAlloca(AI, Info, LBI, DT, AC)) {
+        promoteSingleBlockAlloca(AI, Info, LBI, SQ.DL, DT, AC)) {
       // The alloca has been processed, move on.
       RemoveFromAllocasList(AllocaNum);
       continue;
@@ -593,8 +602,8 @@ void PromoteMem2Reg::run() {
     }
 
     // Remember the dbg.declare intrinsic describing this alloca, if any.
-    if (Info.DbgDeclare)
-      AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare;
+    if (!Info.DbgDeclares.empty())
+      AllocaDbgDeclares[AllocaNum] = Info.DbgDeclares;
 
     // Keep the reverse mapping of the 'Allocas' array for the rename pass.
     AllocaLookup[Allocas[AllocaNum]] = AllocaNum;
@@ -604,7 +613,6 @@ void PromoteMem2Reg::run() {
     // nodes and see if we can optimize out some work by avoiding insertion of
     // dead phi nodes.
 
-
     // Unique the set of defining blocks for efficient lookup.
     SmallPtrSet<BasicBlock *, 32> DefBlocks;
     DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());
@@ -629,8 +637,8 @@ void PromoteMem2Reg::run() {
                 });
 
     unsigned CurrentVersion = 0;
-    for (unsigned i = 0, e = PHIBlocks.size(); i != e; ++i)
-      QueuePhiNode(PHIBlocks[i], AllocaNum, CurrentVersion);
+    for (BasicBlock *BB : PHIBlocks)
+      QueuePhiNode(BB, AllocaNum, CurrentVersion);
   }
 
   if (Allocas.empty())
@@ -641,19 +649,16 @@ void PromoteMem2Reg::run() {
   // Set the incoming values for the basic block to be null values for all of
   // the alloca's.  We do this in case there is a load of a value that has not
   // been stored yet.  In this case, it will get this null value.
-  //
   RenamePassData::ValVector Values(Allocas.size());
   for (unsigned i = 0, e = Allocas.size(); i != e; ++i)
     Values[i] = UndefValue::get(Allocas[i]->getAllocatedType());
 
   // Walks all basic blocks in the function performing the SSA rename algorithm
   // and inserting the phi nodes we marked as necessary
-  //
   std::vector<RenamePassData> RenamePassWorkList;
   RenamePassWorkList.emplace_back(&F.front(), nullptr, std::move(Values));
   do {
-    RenamePassData RPD;
-    RPD.swap(RenamePassWorkList.back());
+    RenamePassData RPD = std::move(RenamePassWorkList.back());
     RenamePassWorkList.pop_back();
     // RenamePass may add new worklist entries.
     RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList);
@@ -663,9 +668,7 @@ void PromoteMem2Reg::run() {
   Visited.clear();
 
   // Remove the allocas themselves from the function.
-  for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
-    Instruction *A = Allocas[i];
-
+  for (Instruction *A : Allocas) {
     // If there are any uses of the alloca instructions left, they must be in
     // unreachable basic blocks that were not processed by walking the dominator
     // tree. Just delete the users now.
@@ -675,9 +678,9 @@ void PromoteMem2Reg::run() {
   }
 
   // Remove alloca's dbg.declare instrinsics from the function.
-  for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i)
-    if (DbgDeclareInst *DDI = AllocaDbgDeclares[i])
-      DDI->eraseFromParent();
+  for (auto &Declares : AllocaDbgDeclares)
+    for (auto *DII : Declares)
+      DII->eraseFromParent();
 
   // Loop over all of the PHI nodes and see if there are any that we can get
   // rid of because they merge all of the same incoming values.  This can
@@ -714,7 +717,6 @@ void PromoteMem2Reg::run() {
   // hasn't traversed.  If this is the case, the PHI nodes may not
   // have incoming values for all predecessors.  Loop over all PHI nodes we have
   // created, inserting undef values if they are missing any incoming values.
-  //
   for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
            I = NewPhiNodes.begin(),
            E = NewPhiNodes.end();
@@ -762,8 +764,8 @@ void PromoteMem2Reg::run() {
     while ((SomePHI = dyn_cast<PHINode>(BBI++)) &&
            SomePHI->getNumIncomingValues() == NumBadPreds) {
       Value *UndefVal = UndefValue::get(SomePHI->getType());
-      for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred)
-        SomePHI->addIncoming(UndefVal, Preds[pred]);
+      for (BasicBlock *Pred : Preds)
+        SomePHI->addIncoming(UndefVal, Pred);
     }
   }
 
@@ -779,7 +781,6 @@ void PromoteMem2Reg::ComputeLiveInBlocks(
     AllocaInst *AI, AllocaInfo &Info,
     const SmallPtrSetImpl<BasicBlock *> &DefBlocks,
     SmallPtrSetImpl<BasicBlock *> &LiveInBlocks) {
-
   // To determine liveness, we must iterate through the predecessors of blocks
   // where the def is live.  Blocks are added to the worklist if we need to
   // check their predecessors.  Start with all the using blocks.
@@ -834,9 +835,7 @@ void PromoteMem2Reg::ComputeLiveInBlocks(
     // Since the value is live into BB, it is either defined in a predecessor or
     // live into it to.  Add the preds to the worklist unless they are a
     // defining block.
-    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
-      BasicBlock *P = *PI;
-
+    for (BasicBlock *P : predecessors(BB)) {
       // The value is not live into a predecessor if it defines the value.
       if (DefBlocks.count(P))
         continue;
@@ -906,8 +905,8 @@ NextIteration:
 
         // The currently active variable for this block is now the PHI.
         IncomingVals[AllocaNo] = APN;
-        if (DbgDeclareInst *DDI = AllocaDbgDeclares[AllocaNo])
-          ConvertDebugDeclareToDebugValue(DDI, APN, DIB);
+        for (DbgInfoIntrinsic *DII : AllocaDbgDeclares[AllocaNo])
+          ConvertDebugDeclareToDebugValue(DII, APN, DIB);
 
         // Get the next phi node.
         ++PNI;
@@ -943,7 +942,7 @@ NextIteration:
       // that information when we erase this Load. So we preserve
       // it with an assume.
       if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
-          !llvm::isKnownNonNullAt(V, LI, &DT))
+          !isKnownNonZero(V, SQ.DL, 0, AC, LI, &DT))
         addAssumeNonNull(AC, LI);
 
       // Anything using the load now uses the current value.
@@ -963,8 +962,8 @@ NextIteration:
       // what value were we writing?
       IncomingVals[ai->second] = SI->getOperand(0);
       // Record debuginfo for the store before removing it.
-      if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second])
-        ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
+      for (DbgInfoIntrinsic *DII : AllocaDbgDeclares[ai->second])
+        ConvertDebugDeclareToDebugValue(DII, SI, DIB);
       BB->getInstList().erase(SI);
     }
   }
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index 6ccf54e49dd3..e4b20b0faa15 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -15,7 +15,6 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/IR/BasicBlock.h"
@@ -39,12 +38,13 @@ using namespace llvm;
 
 #define DEBUG_TYPE "ssaupdater"
 
-typedef DenseMap<BasicBlock*, Value*> AvailableValsTy;
+using AvailableValsTy = DenseMap<BasicBlock *, Value *>;
+
 static AvailableValsTy &getAvailableVals(void *AV) {
   return *static_cast<AvailableValsTy*>(AV);
 }
 
-SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI)
+SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode *> *NewPHI)
   : InsertedPHIs(NewPHI) {}
 
 SSAUpdater::~SSAUpdater() {
@@ -72,7 +72,7 @@ void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
 }
 
 static bool IsEquivalentPHI(PHINode *PHI,
-                          SmallDenseMap<BasicBlock*, Value*, 8> &ValueMapping) {
+                        SmallDenseMap<BasicBlock *, Value *, 8> &ValueMapping) {
   unsigned PHINumValues = PHI->getNumIncomingValues();
   if (PHINumValues != ValueMapping.size())
     return false;
@@ -100,7 +100,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
 
   // Otherwise, we have the hard case.  Get the live-in values for each
   // predecessor.
-  SmallVector<std::pair<BasicBlock*, Value*>, 8> PredValues;
+  SmallVector<std::pair<BasicBlock *, Value *>, 8> PredValues;
   Value *SingularValue = nullptr;
 
   // We can get our predecessor info by walking the pred_iterator list, but it
@@ -145,8 +145,8 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
   // Otherwise, we do need a PHI: check to see if we already have one available
   // in this block that produces the right value.
   if (isa<PHINode>(BB->begin())) {
-    SmallDenseMap<BasicBlock*, Value*, 8> ValueMapping(PredValues.begin(),
-                                                       PredValues.end());
+    SmallDenseMap<BasicBlock *, Value *, 8> ValueMapping(PredValues.begin(),
+                                                         PredValues.end());
     PHINode *SomePHI;
     for (BasicBlock::iterator It = BB->begin();
          (SomePHI = dyn_cast<PHINode>(It)); ++It) {
@@ -218,11 +218,11 @@ namespace llvm {
 template<>
 class SSAUpdaterTraits<SSAUpdater> {
 public:
-  typedef BasicBlock BlkT;
-  typedef Value *ValT;
-  typedef PHINode PhiT;
+  using BlkT = BasicBlock;
+  using ValT = Value *;
+  using PhiT = PHINode;
+  using BlkSucc_iterator = succ_iterator;
 
-  typedef succ_iterator BlkSucc_iterator;
   static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return succ_begin(BB); }
   static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return succ_end(BB); }
 
@@ -253,7 +253,7 @@ public:
   /// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds
   /// vector, set Info->NumPreds, and allocate space in Info->Preds.
   static void FindPredecessorBlocks(BasicBlock *BB,
-                                    SmallVectorImpl<BasicBlock*> *Preds) {
+                                    SmallVectorImpl<BasicBlock *> *Preds) {
     // We can get our predecessor info by walking the pred_iterator list,
     // but it is relatively slow.  If we already have PHI nodes in this
     // block, walk one of them to get the predecessor list instead.
@@ -293,7 +293,6 @@ public:
   }
 
   /// ValueIsPHI - Check if a value is a PHI.
-  ///
   static PHINode *ValueIsPHI(Value *Val, SSAUpdater *Updater) {
     return dyn_cast<PHINode>(Val);
   }
@@ -333,7 +332,7 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
 //===----------------------------------------------------------------------===//
 
 LoadAndStorePromoter::
-LoadAndStorePromoter(ArrayRef<const Instruction*> Insts,
+LoadAndStorePromoter(ArrayRef<const Instruction *> Insts,
                      SSAUpdater &S, StringRef BaseName) : SSA(S) {
   if (Insts.empty()) return;
   
@@ -349,11 +348,11 @@ LoadAndStorePromoter(ArrayRef<const Instruction*> Insts,
 }
 
 void LoadAndStorePromoter::
-run(const SmallVectorImpl<Instruction*> &Insts) const {
+run(const SmallVectorImpl<Instruction *> &Insts) const {
   // First step: bucket up uses of the alloca by the block they occur in.
   // This is important because we have to handle multiple defs/uses in a block
   // ourselves: SSAUpdater is purely for cross-block references.
-  DenseMap<BasicBlock*, TinyPtrVector<Instruction*>> UsesByBlock;
+  DenseMap<BasicBlock *, TinyPtrVector<Instruction *>> UsesByBlock;
 
   for (Instruction *User : Insts)
     UsesByBlock[User->getParent()].push_back(User);
@@ -361,12 +360,12 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
   // Okay, now we can iterate over all the blocks in the function with uses,
   // processing them.  Keep track of which loads are loading a live-in value.
   // Walk the uses in the use-list order to be determinstic.
-  SmallVector<LoadInst*, 32> LiveInLoads;
-  DenseMap<Value*, Value*> ReplacedLoads;
+  SmallVector<LoadInst *, 32> LiveInLoads;
+  DenseMap<Value *, Value *> ReplacedLoads;
 
   for (Instruction *User : Insts) {
     BasicBlock *BB = User->getParent();
-    TinyPtrVector<Instruction*> &BlockUses = UsesByBlock[BB];
+    TinyPtrVector<Instruction *> &BlockUses = UsesByBlock[BB];
     
     // If this block has already been processed, ignore this repeat use.
     if (BlockUses.empty()) continue;
@@ -489,7 +488,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
 
 bool
 LoadAndStorePromoter::isInstInList(Instruction *I,
-                                   const SmallVectorImpl<Instruction*> &Insts)
+                                   const SmallVectorImpl<Instruction *> &Insts)
                                    const {
   return is_contained(Insts, I);
 }
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 8784b9702141..f02f80cc1b78 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -22,12 +22,14 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/CallSite.h"
@@ -35,8 +37,8 @@
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/IRBuilder.h"
@@ -53,6 +55,7 @@
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/Casting.h"
@@ -73,6 +76,7 @@
 #include <iterator>
 #include <map>
 #include <set>
+#include <tuple>
 #include <utility>
 #include <vector>
 
@@ -141,12 +145,13 @@ namespace {
 // The first field contains the value that the switch produces when a certain
 // case group is selected, and the second field is a vector containing the
 // cases composing the case group.
-typedef SmallVector<std::pair<Constant *, SmallVector<ConstantInt *, 4>>, 2>
-    SwitchCaseResultVectorTy;
+using SwitchCaseResultVectorTy =
+    SmallVector<std::pair<Constant *, SmallVector<ConstantInt *, 4>>, 2>;
+
 // The first field contains the phi node that generates a result of the switch
 // and the second field contains the value generated for a certain case in the
 // switch for that PHI.
-typedef SmallVector<std::pair<PHINode *, Constant *>, 4> SwitchCaseResultsTy;
+using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
 
 /// ValueEqualityComparisonCase - Represents a case of a switch.
 struct ValueEqualityComparisonCase {
@@ -167,11 +172,9 @@ struct ValueEqualityComparisonCase {
 class SimplifyCFGOpt {
   const TargetTransformInfo &TTI;
   const DataLayout &DL;
-  unsigned BonusInstThreshold;
-  AssumptionCache *AC;
   SmallPtrSetImpl<BasicBlock *> *LoopHeaders;
-  // See comments in SimplifyCFGOpt::SimplifySwitch.
-  bool LateSimplifyCFG;
+  const SimplifyCFGOptions &Options;
+
   Value *isValueEqualityComparison(TerminatorInst *TI);
   BasicBlock *GetValueEqualityComparisonCases(
       TerminatorInst *TI, std::vector<ValueEqualityComparisonCase> &Cases);
@@ -194,11 +197,9 @@ class SimplifyCFGOpt {
 
 public:
   SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL,
-                 unsigned BonusInstThreshold, AssumptionCache *AC,
                  SmallPtrSetImpl<BasicBlock *> *LoopHeaders,
-                 bool LateSimplifyCFG)
-      : TTI(TTI), DL(DL), BonusInstThreshold(BonusInstThreshold), AC(AC),
-        LoopHeaders(LoopHeaders), LateSimplifyCFG(LateSimplifyCFG) {}
+                 const SimplifyCFGOptions &Opts)
+      : TTI(TTI), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {}
 
   bool run(BasicBlock *BB);
 };
@@ -438,18 +439,24 @@ namespace {
 /// fail.
 struct ConstantComparesGatherer {
   const DataLayout &DL;
-  Value *CompValue; /// Value found for the switch comparison
-  Value *Extra;     /// Extra clause to be checked before the switch
-  SmallVector<ConstantInt *, 8> Vals; /// Set of integers to match in switch
-  unsigned UsedICmps; /// Number of comparisons matched in the and/or chain
+
+  /// Value found for the switch comparison
+  Value *CompValue = nullptr;
+
+  /// Extra clause to be checked before the switch
+  Value *Extra = nullptr;
+
+  /// Set of integers to match in switch
+  SmallVector<ConstantInt *, 8> Vals;
+
+  /// Number of comparisons matched in the and/or chain
+  unsigned UsedICmps = 0;
 
   /// Construct and compute the result for the comparison instruction Cond
-  ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL)
-      : DL(DL), CompValue(nullptr), Extra(nullptr), UsedICmps(0) {
+  ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
     gather(Cond);
   }
 
-  /// Prevent copy
   ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
   ConstantComparesGatherer &
   operator=(const ConstantComparesGatherer &) = delete;
@@ -487,7 +494,6 @@ private:
     // (x & ~2^z) == y --> x == y || x == y|2^z
     // This undoes a transformation done by instcombine to fuse 2 compares.
     if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
-
       // It's a little bit hard to see why the following transformations are
       // correct. Here is a CVC3 program to verify them for 64-bit values:
 
@@ -770,6 +776,31 @@ static bool ValuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
   return false;
 }
 
+// Set branch weights on SwitchInst. This sets the metadata if there is at
+// least one non-zero weight.
+static void setBranchWeights(SwitchInst *SI, ArrayRef<uint32_t> Weights) {
+  // Check that there is at least one non-zero weight. Otherwise, pass
+  // nullptr to setMetadata which will erase the existing metadata.
+  MDNode *N = nullptr;
+  if (llvm::any_of(Weights, [](uint32_t W) { return W != 0; }))
+    N = MDBuilder(SI->getParent()->getContext()).createBranchWeights(Weights);
+  SI->setMetadata(LLVMContext::MD_prof, N);
+}
+
+// Similar to the above, but for branch and select instructions that take
+// exactly 2 weights.
+static void setBranchWeights(Instruction *I, uint32_t TrueWeight,
+                             uint32_t FalseWeight) {
+  assert(isa<BranchInst>(I) || isa<SelectInst>(I));
+  // Check that there is at least one non-zero weight. Otherwise, pass
+  // nullptr to setMetadata which will erase the existing metadata.
+  MDNode *N = nullptr;
+  if (TrueWeight || FalseWeight)
+    N = MDBuilder(I->getParent()->getContext())
+            .createBranchWeights(TrueWeight, FalseWeight);
+  I->setMetadata(LLVMContext::MD_prof, N);
+}
+
 /// If TI is known to be a terminator instruction and its block is known to
 /// only have a single predecessor block, check to see if that predecessor is
 /// also a value comparison with the same value, and if that comparison
@@ -859,9 +890,7 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
       }
     }
     if (HasWeight && Weights.size() >= 2)
-      SI->setMetadata(LLVMContext::MD_prof,
-                      MDBuilder(SI->getParent()->getContext())
-                          .createBranchWeights(Weights));
+      setBranchWeights(SI, Weights);
 
     DEBUG(dbgs() << "Leaving: " << *TI << "\n");
     return true;
@@ -1166,9 +1195,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
 
         SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
 
-        NewSI->setMetadata(
-            LLVMContext::MD_prof,
-            MDBuilder(BB->getContext()).createBranchWeights(MDWeights));
+        setBranchWeights(NewSI, MDWeights);
       }
 
       EraseTerminatorInstAndDCECond(PTI);
@@ -1279,9 +1306,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI,
 
     // I1 and I2 are being combined into a single instruction.  Its debug
     // location is the merged locations of the original instructions.
-    if (!isa<CallInst>(I1))
-      I1->setDebugLoc(
-          DILocation::getMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()));
+    I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
 
     I2->eraseFromParent();
     Changed = true;
@@ -1535,20 +1560,20 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
     I0->getOperandUse(O).set(NewOperands[O]);
   I0->moveBefore(&*BBEnd->getFirstInsertionPt());
 
-  // The debug location for the "common" instruction is the merged locations of
-  // all the commoned instructions.  We start with the original location of the
-  // "common" instruction and iteratively merge each location in the loop below.
-  const DILocation *Loc = I0->getDebugLoc();
-
   // Update metadata and IR flags, and merge debug locations.
   for (auto *I : Insts)
     if (I != I0) {
-      Loc = DILocation::getMergedLocation(Loc, I->getDebugLoc());
+      // The debug location for the "common" instruction is the merged locations
+      // of all the commoned instructions.  We start with the original location
+      // of the "common" instruction and iteratively merge each location in the
+      // loop below.
+      // This is an N-way merge, which will be inefficient if I0 is a CallInst.
+      // However, as N-way merge for CallInst is rare, so we use simplified API
+      // instead of using complex API for N-way merge.
+      I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
       combineMetadataForCSE(I0, I);
       I0->andIRFlags(I);
     }
-  if (!isa<CallInst>(I0))
-    I0->setDebugLoc(Loc);
 
   if (!isa<StoreInst>(I0)) {
     // canSinkLastInstruction checked that all instructions were used by
@@ -1582,9 +1607,9 @@ namespace {
     ArrayRef<BasicBlock*> Blocks;
     SmallVector<Instruction*,4> Insts;
     bool Fail;
+
   public:
-    LockstepReverseIterator(ArrayRef<BasicBlock*> Blocks) :
-      Blocks(Blocks) {
+    LockstepReverseIterator(ArrayRef<BasicBlock*> Blocks) : Blocks(Blocks) {
       reset();
     }
 
@@ -1608,7 +1633,7 @@ namespace {
       return !Fail;
     }
 
-    void operator -- () {
+    void operator--() {
       if (Fail)
         return;
       for (auto *&Inst : Insts) {
@@ -1916,6 +1941,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
   // - All of their uses are in CondBB.
   SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
 
+  SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics;
+
   unsigned SpeculationCost = 0;
   Value *SpeculatedStoreValue = nullptr;
   StoreInst *SpeculatedStore = nullptr;
@@ -1924,8 +1951,10 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
        BBI != BBE; ++BBI) {
     Instruction *I = &*BBI;
     // Skip debug info.
-    if (isa<DbgInfoIntrinsic>(I))
+    if (isa<DbgInfoIntrinsic>(I)) {
+      SpeculatedDbgIntrinsics.push_back(I);
       continue;
+    }
 
     // Only speculatively execute a single instruction (not counting the
     // terminator) for now.
@@ -2030,11 +2059,10 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
     if (Invert)
       std::swap(TrueV, FalseV);
     Value *S = Builder.CreateSelect(
-        BrCond, TrueV, FalseV, TrueV->getName() + "." + FalseV->getName(), BI);
+        BrCond, TrueV, FalseV, "spec.store.select", BI);
     SpeculatedStore->setOperand(0, S);
-    SpeculatedStore->setDebugLoc(
-        DILocation::getMergedLocation(
-          BI->getDebugLoc(), SpeculatedStore->getDebugLoc()));
+    SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
+                                         SpeculatedStore->getDebugLoc());
   }
 
   // Metadata can be dependent on the condition we are hoisting above.
@@ -2066,11 +2094,17 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
     if (Invert)
       std::swap(TrueV, FalseV);
     Value *V = Builder.CreateSelect(
-        BrCond, TrueV, FalseV, TrueV->getName() + "." + FalseV->getName(), BI);
+        BrCond, TrueV, FalseV, "spec.select", BI);
     PN->setIncomingValue(OrigI, V);
     PN->setIncomingValue(ThenI, V);
   }
 
+  // Remove speculated dbg intrinsics.
+  // FIXME: Is it possible to do this in a more elegant way? Moving/merging the
+  // dbg value for the different flows and inserting it after the select.
+  for (Instruction *I : SpeculatedDbgIntrinsics)
+    I->eraseFromParent();
+
   ++NumSpeculations;
   return true;
 }
@@ -2507,7 +2541,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
   else {
     // For unconditional branch, check for a simple CFG pattern, where
     // BB has a single predecessor and BB's successor is also its predecessor's
-    // successor. If such pattern exisits, check for CSE between BB and its
+    // successor. If such pattern exists, check for CSE between BB and its
     // predecessor.
     if (BasicBlock *PB = BB->getSinglePredecessor())
       if (BranchInst *PBI = dyn_cast<BranchInst>(PB->getTerminator()))
@@ -2725,9 +2759,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
 
         SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(),
                                            NewWeights.end());
-        PBI->setMetadata(
-            LLVMContext::MD_prof,
-            MDBuilder(BI->getContext()).createBranchWeights(MDWeights));
+        setBranchWeights(PBI, MDWeights[0], MDWeights[1]);
       } else
         PBI->setMetadata(LLVMContext::MD_prof, nullptr);
     } else {
@@ -2860,7 +2892,8 @@ static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB,
 static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
                                            BasicBlock *QTB, BasicBlock *QFB,
                                            BasicBlock *PostBB, Value *Address,
-                                           bool InvertPCond, bool InvertQCond) {
+                                           bool InvertPCond, bool InvertQCond,
+                                           const DataLayout &DL) {
   auto IsaBitcastOfPointerType = [](const Instruction &I) {
     return Operator::getOpcode(&I) == Instruction::BitCast &&
            I.getType()->isPointerTy();
@@ -2887,7 +2920,9 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
       else
         return false;
     }
-    return N <= PHINodeFoldingThreshold;
+    // The store we want to merge is counted in N, so add 1 to make sure
+    // we're counting the instructions that would be left.
+    return N <= (PHINodeFoldingThreshold + 1);
   };
 
   if (!MergeCondStoresAggressively &&
@@ -2966,6 +3001,29 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
   PStore->getAAMetadata(AAMD, /*Merge=*/false);
   PStore->getAAMetadata(AAMD, /*Merge=*/true);
   SI->setAAMetadata(AAMD);
+  unsigned PAlignment = PStore->getAlignment();
+  unsigned QAlignment = QStore->getAlignment();
+  unsigned TypeAlignment =
+      DL.getABITypeAlignment(SI->getValueOperand()->getType());
+  unsigned MinAlignment;
+  unsigned MaxAlignment;
+  std::tie(MinAlignment, MaxAlignment) = std::minmax(PAlignment, QAlignment);
+  // Choose the minimum alignment. If we could prove both stores execute, we
+  // could use biggest one.  In this case, though, we only know that one of the
+  // stores executes.  And we don't know it's safe to take the alignment from a
+  // store that doesn't execute.
+  if (MinAlignment != 0) {
+    // Choose the minimum of all non-zero alignments.
+    SI->setAlignment(MinAlignment);
+  } else if (MaxAlignment != 0) {
+    // Choose the minimal alignment between the non-zero alignment and the ABI
+    // default alignment for the type of the stored value.
+    SI->setAlignment(std::min(MaxAlignment, TypeAlignment));
+  } else {
+    // If both alignments are zero, use ABI default alignment for the type of
+    // the stored value.
+    SI->setAlignment(TypeAlignment);
+  }
 
   QStore->eraseFromParent();
   PStore->eraseFromParent();
@@ -2973,7 +3031,8 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
   return true;
 }
 
-static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) {
+static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI,
+                                   const DataLayout &DL) {
   // The intention here is to find diamonds or triangles (see below) where each
   // conditional block contains a store to the same address. Both of these
   // stores are conditional, so they can't be unconditionally sunk. But it may
@@ -3001,7 +3060,6 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) {
   // We model triangles as a type of diamond with a nullptr "true" block.
   // Triangles are canonicalized so that the fallthrough edge is represented by
   // a true condition, as in the diagram above.
-  //
   BasicBlock *PTB = PBI->getSuccessor(0);
   BasicBlock *PFB = PBI->getSuccessor(1);
   BasicBlock *QTB = QBI->getSuccessor(0);
@@ -3076,7 +3134,7 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) {
   bool Changed = false;
   for (auto *Address : CommonAddresses)
     Changed |= mergeConditionalStoreToAddress(
-        PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond);
+        PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond, DL);
   return Changed;
 }
 
@@ -3141,7 +3199,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
   // If both branches are conditional and both contain stores to the same
   // address, remove the stores from the conditionals and create a conditional
   // merged store at the end.
-  if (MergeCondStores && mergeConditionalStores(PBI, BI))
+  if (MergeCondStores && mergeConditionalStores(PBI, BI, DL))
     return true;
 
   // If this is a conditional branch in an empty block, and if any
@@ -3270,9 +3328,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
     // Halve the weights if any of them cannot fit in an uint32_t
     FitWeights(NewWeights);
 
-    PBI->setMetadata(LLVMContext::MD_prof,
-                     MDBuilder(BI->getContext())
-                         .createBranchWeights(NewWeights[0], NewWeights[1]));
+    setBranchWeights(PBI, NewWeights[0], NewWeights[1]);
   }
 
   // OtherDest may have phi nodes.  If so, add an entry from PBI's
@@ -3310,9 +3366,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
 
         FitWeights(NewWeights);
 
-        NV->setMetadata(LLVMContext::MD_prof,
-                        MDBuilder(BI->getContext())
-                            .createBranchWeights(NewWeights[0], NewWeights[1]));
+        setBranchWeights(NV, NewWeights[0], NewWeights[1]);
       }
     }
   }
@@ -3367,9 +3421,7 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
       // Create a conditional branch sharing the condition of the select.
       BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
       if (TrueWeight != FalseWeight)
-        NewBI->setMetadata(LLVMContext::MD_prof,
-                           MDBuilder(OldTerm->getContext())
-                               .createBranchWeights(TrueWeight, FalseWeight));
+        setBranchWeights(NewBI, TrueWeight, FalseWeight);
     }
   } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
     // Neither of the selected blocks were successors, so this
@@ -3464,10 +3516,9 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
 ///
 /// We prefer to split the edge to 'end' so that there is a true/false entry to
 /// the PHI, merging the third icmp into the switch.
-static bool TryToSimplifyUncondBranchWithICmpInIt(
+static bool tryToSimplifyUncondBranchWithICmpInIt(
     ICmpInst *ICI, IRBuilder<> &Builder, const DataLayout &DL,
-    const TargetTransformInfo &TTI, unsigned BonusInstThreshold,
-    AssumptionCache *AC) {
+    const TargetTransformInfo &TTI, const SimplifyCFGOptions &Options) {
   BasicBlock *BB = ICI->getParent();
 
   // If the block has any PHIs in it or the icmp has multiple uses, it is too
@@ -3502,7 +3553,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
       ICI->eraseFromParent();
     }
     // BB is now empty, so it is likely to simplify away.
-    return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+    return simplifyCFG(BB, TTI, Options) | true;
   }
 
   // Ok, the block is reachable from the default dest.  If the constant we're
@@ -3518,7 +3569,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
     ICI->replaceAllUsesWith(V);
     ICI->eraseFromParent();
     // BB is now empty, so it is likely to simplify away.
-    return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+    return simplifyCFG(BB, TTI, Options) | true;
   }
 
   // The use of the icmp has to be in the 'end' block, by the only PHI node in
@@ -3556,9 +3607,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
       Weights.push_back(Weights[0]);
 
       SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
-      SI->setMetadata(
-          LLVMContext::MD_prof,
-          MDBuilder(SI->getContext()).createBranchWeights(MDWeights));
+      setBranchWeights(SI, MDWeights);
     }
   }
   SI->addCase(Cst, NewBB);
@@ -4285,10 +4334,7 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
         TrueWeight /= 2;
         FalseWeight /= 2;
       }
-      NewBI->setMetadata(LLVMContext::MD_prof,
-                         MDBuilder(SI->getContext())
-                             .createBranchWeights((uint32_t)TrueWeight,
-                                                  (uint32_t)FalseWeight));
+      setBranchWeights(NewBI, TrueWeight, FalseWeight);
     }
   }
 
@@ -4316,7 +4362,7 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
 
 /// Compute masked bits for the condition of a switch
 /// and use it to remove dead cases.
-static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
+static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
                                      const DataLayout &DL) {
   Value *Cond = SI->getCondition();
   unsigned Bits = Cond->getType()->getIntegerBitWidth();
@@ -4385,9 +4431,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
   }
   if (HasWeight && Weights.size() >= 2) {
     SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
-    SI->setMetadata(LLVMContext::MD_prof,
-                    MDBuilder(SI->getParent()->getContext())
-                        .createBranchWeights(MDWeights));
+    setBranchWeights(SI, MDWeights);
   }
 
   return !DeadCases.empty();
@@ -4429,38 +4473,59 @@ static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue,
 
 /// Try to forward the condition of a switch instruction to a phi node
 /// dominated by the switch, if that would mean that some of the destination
-/// blocks of the switch can be folded away.
-/// Returns true if a change is made.
+/// blocks of the switch can be folded away. Return true if a change is made.
 static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
-  typedef DenseMap<PHINode *, SmallVector<int, 4>> ForwardingNodesMap;
-  ForwardingNodesMap ForwardingNodes;
+  using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
 
-  for (auto Case : SI->cases()) {
+  ForwardingNodesMap ForwardingNodes;
+  BasicBlock *SwitchBlock = SI->getParent();
+  bool Changed = false;
+  for (auto &Case : SI->cases()) {
     ConstantInt *CaseValue = Case.getCaseValue();
     BasicBlock *CaseDest = Case.getCaseSuccessor();
 
-    int PhiIndex;
-    PHINode *PHI =
-        FindPHIForConditionForwarding(CaseValue, CaseDest, &PhiIndex);
-    if (!PHI)
-      continue;
+    // Replace phi operands in successor blocks that are using the constant case
+    // value rather than the switch condition variable:
+    //   switchbb:
+    //   switch i32 %x, label %default [
+    //     i32 17, label %succ
+    //   ...
+    //   succ:
+    //     %r = phi i32 ... [ 17, %switchbb ] ...
+    // -->
+    //     %r = phi i32 ... [ %x, %switchbb ] ...
+
+    for (Instruction &InstInCaseDest : *CaseDest) {
+      auto *Phi = dyn_cast<PHINode>(&InstInCaseDest);
+      if (!Phi) break;
+
+      // This only works if there is exactly 1 incoming edge from the switch to
+      // a phi. If there is >1, that means multiple cases of the switch map to 1
+      // value in the phi, and that phi value is not the switch condition. Thus,
+      // this transform would not make sense (the phi would be invalid because
+      // a phi can't have different incoming values from the same block).
+      int SwitchBBIdx = Phi->getBasicBlockIndex(SwitchBlock);
+      if (Phi->getIncomingValue(SwitchBBIdx) == CaseValue &&
+          count(Phi->blocks(), SwitchBlock) == 1) {
+        Phi->setIncomingValue(SwitchBBIdx, SI->getCondition());
+        Changed = true;
+      }
+    }
 
-    ForwardingNodes[PHI].push_back(PhiIndex);
+    // Collect phi nodes that are indirectly using this switch's case constants.
+    int PhiIdx;
+    if (auto *Phi = FindPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
+      ForwardingNodes[Phi].push_back(PhiIdx);
   }
 
-  bool Changed = false;
-
-  for (ForwardingNodesMap::iterator I = ForwardingNodes.begin(),
-                                    E = ForwardingNodes.end();
-       I != E; ++I) {
-    PHINode *Phi = I->first;
-    SmallVectorImpl<int> &Indexes = I->second;
-
+  for (auto &ForwardingNode : ForwardingNodes) {
+    PHINode *Phi = ForwardingNode.first;
+    SmallVectorImpl<int> &Indexes = ForwardingNode.second;
     if (Indexes.size() < 2)
       continue;
 
-    for (size_t I = 0, E = Indexes.size(); I != E; ++I)
-      Phi->setIncomingValue(Indexes[I], SI->getCondition());
+    for (int Index : Indexes)
+      Phi->setIncomingValue(Index, SI->getCondition());
     Changed = true;
   }
 
@@ -4743,8 +4808,8 @@ static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI,
 /// If the switch is only used to initialize one or more
 /// phi nodes in a common successor block with only two different
 /// constant values, replace the switch with select.
-static bool SwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
-                           AssumptionCache *AC, const DataLayout &DL,
+static bool switchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
+                           const DataLayout &DL,
                            const TargetTransformInfo &TTI) {
   Value *const Cond = SI->getCondition();
   PHINode *PHI = nullptr;
@@ -4816,18 +4881,18 @@ private:
   } Kind;
 
   // For SingleValueKind, this is the single value.
-  Constant *SingleValue;
+  Constant *SingleValue = nullptr;
 
   // For BitMapKind, this is the bitmap.
-  ConstantInt *BitMap;
-  IntegerType *BitMapElementTy;
+  ConstantInt *BitMap = nullptr;
+  IntegerType *BitMapElementTy = nullptr;
 
   // For LinearMapKind, these are the constants used to derive the value.
-  ConstantInt *LinearOffset;
-  ConstantInt *LinearMultiplier;
+  ConstantInt *LinearOffset = nullptr;
+  ConstantInt *LinearMultiplier = nullptr;
 
   // For ArrayKind, this is the array.
-  GlobalVariable *Array;
+  GlobalVariable *Array = nullptr;
 };
 
 } // end anonymous namespace
@@ -4835,9 +4900,7 @@ private:
 SwitchLookupTable::SwitchLookupTable(
     Module &M, uint64_t TableSize, ConstantInt *Offset,
     const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
-    Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName)
-    : SingleValue(nullptr), BitMap(nullptr), BitMapElementTy(nullptr),
-      LinearOffset(nullptr), LinearMultiplier(nullptr), Array(nullptr) {
+    Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) {
   assert(Values.size() && "Can't build lookup table without values!");
   assert(TableSize >= Values.size() && "Can't fit values in table!");
 
@@ -5083,7 +5146,6 @@ static void reuseTableCompare(
     User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
     Constant *DefaultValue,
     const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
-
   ICmpInst *CmpInst = dyn_cast<ICmpInst>(PhiUser);
   if (!CmpInst)
     return;
@@ -5112,7 +5174,7 @@ static void reuseTableCompare(
   for (auto ValuePair : Values) {
     Constant *CaseConst = ConstantExpr::getICmp(CmpInst->getPredicate(),
                                                 ValuePair.second, CmpOp1, true);
-    if (!CaseConst || CaseConst == DefaultConst)
+    if (!CaseConst || CaseConst == DefaultConst || isa<UndefValue>(CaseConst))
       return;
     assert((CaseConst == TrueConst || CaseConst == FalseConst) &&
            "Expect true or false as compare result.");
@@ -5151,8 +5213,11 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
                                 const TargetTransformInfo &TTI) {
   assert(SI->getNumCases() > 1 && "Degenerate switch?");
 
-  // Only build lookup table when we have a target that supports it.
-  if (!TTI.shouldBuildLookupTables())
+  Function *Fn = SI->getParent()->getParent();
+  // Only build lookup table when we have a target that supports it or the
+  // attribute is not set.
+  if (!TTI.shouldBuildLookupTables() ||
+      (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true"))
     return false;
 
   // FIXME: If the switch is too sparse for a lookup table, perhaps we could
@@ -5163,8 +5228,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
   // string and lookup indices into that.
 
   // Ignore switches with less than three cases. Lookup tables will not make
-  // them
-  // faster, so we don't analyze them.
+  // them faster, so we don't analyze them.
   if (SI->getNumCases() < 3)
     return false;
 
@@ -5176,8 +5240,10 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
   ConstantInt *MaxCaseVal = CI->getCaseValue();
 
   BasicBlock *CommonDest = nullptr;
-  typedef SmallVector<std::pair<ConstantInt *, Constant *>, 4> ResultListTy;
+
+  using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
   SmallDenseMap<PHINode *, ResultListTy> ResultLists;
+
   SmallDenseMap<PHINode *, Constant *> DefaultResults;
   SmallDenseMap<PHINode *, Type *> ResultTypes;
   SmallVector<PHINode *, 4> PHIs;
@@ -5190,7 +5256,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
       MaxCaseVal = CaseVal;
 
     // Resulting value at phi nodes for this case value.
-    typedef SmallVector<std::pair<PHINode *, Constant *>, 4> ResultsTy;
+    using ResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
     ResultsTy Results;
     if (!GetCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
                         Results, DL, TTI))
@@ -5248,8 +5314,12 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
 
   // Compute the table index value.
   Builder.SetInsertPoint(SI);
-  Value *TableIndex =
-      Builder.CreateSub(SI->getCondition(), MinCaseVal, "switch.tableidx");
+  Value *TableIndex;
+  if (MinCaseVal->isNullValue())
+    TableIndex = SI->getCondition();
+  else
+    TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal,
+                                   "switch.tableidx");
 
   // Compute the maximum table size representable by the integer type we are
   // switching upon.
@@ -5282,15 +5352,14 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
   Builder.SetInsertPoint(LookupBB);
 
   if (NeedMask) {
-    // Before doing the lookup we do the hole check.
-    // The LookupBB is therefore re-purposed to do the hole check
-    // and we create a new LookupBB.
+    // Before doing the lookup, we do the hole check. The LookupBB is therefore
+    // re-purposed to do the hole check, and we create a new LookupBB.
     BasicBlock *MaskBB = LookupBB;
     MaskBB->setName("switch.hole_check");
     LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
                                   CommonDest->getParent(), CommonDest);
 
-    // Make the mask's bitwidth at least 8bit and a power-of-2 to avoid
+    // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
     // unnecessary illegal types.
     uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
     APInt MaskInt(TableSizePowOf2, 0);
@@ -5320,7 +5389,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
   }
 
   if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
-    // We cached PHINodes in PHIs, to avoid accessing deleted PHINodes later,
+    // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
     // do not delete PHINodes here.
     SI->getDefaultDest()->removePredecessor(SI->getParent(),
                                             /*DontDeleteUselessPHIs=*/true);
@@ -5333,7 +5402,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
 
     // If using a bitmask, use any value to fill the lookup table holes.
     Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI];
-    StringRef FuncName = SI->getParent()->getParent()->getName();
+    StringRef FuncName = Fn->getName();
     SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultList, DV, DL,
                             FuncName);
 
@@ -5391,14 +5460,14 @@ static bool isSwitchDense(ArrayRef<int64_t> Values) {
   return NumCases * 100 >= Range * MinDensity;
 }
 
-// Try and transform a switch that has "holes" in it to a contiguous sequence
-// of cases.
-//
-// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
-// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
-//
-// This converts a sparse switch into a dense switch which allows better
-// lowering and could also allow transforming into a lookup table.
+/// Try to transform a switch that has "holes" in it to a contiguous sequence
+/// of cases.
+///
+/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
+/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
+///
+/// This converts a sparse switch into a dense switch which allows better
+/// lowering and could also allow transforming into a lookup table.
 static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
                               const DataLayout &DL,
                               const TargetTransformInfo &TTI) {
@@ -5427,7 +5496,7 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
   // First, transform the values such that they start at zero and ascend.
   int64_t Base = Values[0];
   for (auto &V : Values)
-    V -= Base;
+    V -= (uint64_t)(Base);
 
   // Now we have signed numbers that have been shifted so that, given enough
   // precision, there are no negative values. Since the rest of the transform
@@ -5492,12 +5561,12 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
     // see if that predecessor totally determines the outcome of this switch.
     if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
       if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
-        return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+        return simplifyCFG(BB, TTI, Options) | true;
 
     Value *Cond = SI->getCondition();
     if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
       if (SimplifySwitchOnSelect(SI, Select))
-        return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+        return simplifyCFG(BB, TTI, Options) | true;
 
     // If the block only contains the switch, see if we can fold the block
     // away into any preds.
@@ -5507,33 +5576,34 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
       ++BBI;
     if (SI == &*BBI)
       if (FoldValueComparisonIntoPredecessors(SI, Builder))
-        return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+        return simplifyCFG(BB, TTI, Options) | true;
   }
 
   // Try to transform the switch into an icmp and a branch.
   if (TurnSwitchRangeIntoICmp(SI, Builder))
-    return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+    return simplifyCFG(BB, TTI, Options) | true;
 
   // Remove unreachable cases.
-  if (EliminateDeadSwitchCases(SI, AC, DL))
-    return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+  if (eliminateDeadSwitchCases(SI, Options.AC, DL))
+    return simplifyCFG(BB, TTI, Options) | true;
 
-  if (SwitchToSelect(SI, Builder, AC, DL, TTI))
-    return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+  if (switchToSelect(SI, Builder, DL, TTI))
+    return simplifyCFG(BB, TTI, Options) | true;
 
-  if (ForwardSwitchConditionToPHI(SI))
-    return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+  if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI))
+    return simplifyCFG(BB, TTI, Options) | true;
 
-  // The conversion from switch to lookup tables results in difficult
-  // to analyze code and makes pruning branches much harder.
-  // This is a problem of the switch expression itself can still be
-  // restricted as a result of inlining or CVP. There only apply this
-  // transformation during late steps of the optimisation chain.
-  if (LateSimplifyCFG && SwitchToLookupTable(SI, Builder, DL, TTI))
-    return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+  // The conversion from switch to lookup tables results in difficult-to-analyze
+  // code and makes pruning branches much harder. This is a problem if the
+  // switch expression itself can still be restricted as a result of inlining or
+  // CVP. Therefore, only apply this transformation during late stages of the
+  // optimisation pipeline.
+  if (Options.ConvertSwitchToLookupTable &&
+      SwitchToLookupTable(SI, Builder, DL, TTI))
+    return simplifyCFG(BB, TTI, Options) | true;
 
   if (ReduceSwitchRange(SI, Builder, DL, TTI))
-    return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+    return simplifyCFG(BB, TTI, Options) | true;
 
   return false;
 }
@@ -5571,7 +5641,7 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
 
   if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
     if (SimplifyIndirectBrOnSelect(IBI, SI))
-      return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+      return simplifyCFG(BB, TTI, Options) | true;
   }
   return Changed;
 }
@@ -5613,8 +5683,8 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
     LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(I);
     if (!LPad2 || !LPad2->isIdenticalTo(LPad))
       continue;
-    for (++I; isa<DbgInfoIntrinsic>(I); ++I) {
-    }
+    for (++I; isa<DbgInfoIntrinsic>(I); ++I)
+      ;
     BranchInst *BI2 = dyn_cast<BranchInst>(I);
     if (!BI2 || !BI2->isIdenticalTo(BI))
       continue;
@@ -5658,39 +5728,38 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI,
   BasicBlock *BB = BI->getParent();
   BasicBlock *Succ = BI->getSuccessor(0);
 
-  if (SinkCommon && SinkThenElseCodeToEnd(BI))
+  if (SinkCommon && Options.SinkCommonInsts && SinkThenElseCodeToEnd(BI))
     return true;
 
   // If the Terminator is the only non-phi instruction, simplify the block.
-  // if LoopHeader is provided, check if the block or its successor is a loop
-  // header (This is for early invocations before loop simplify and
+  // If LoopHeader is provided, check if the block or its successor is a loop
+  // header. (This is for early invocations before loop simplify and
   // vectorization to keep canonical loop forms for nested loops. These blocks
   // can be eliminated when the pass is invoked later in the back-end.)
   bool NeedCanonicalLoop =
-      !LateSimplifyCFG &&
+      Options.NeedCanonicalLoop &&
       (LoopHeaders && (LoopHeaders->count(BB) || LoopHeaders->count(Succ)));
   BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator();
   if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
       !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB))
     return true;
 
-  // If the only instruction in the block is a seteq/setne comparison
-  // against a constant, try to simplify the block.
+  // If the only instruction in the block is a seteq/setne comparison against a
+  // constant, try to simplify the block.
   if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
     if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
       for (++I; isa<DbgInfoIntrinsic>(I); ++I)
         ;
       if (I->isTerminator() &&
-          TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, DL, TTI,
-                                                BonusInstThreshold, AC))
+          tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, DL, TTI, Options))
         return true;
     }
 
   // See if we can merge an empty landing pad block with another which is
   // equivalent.
   if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
-    for (++I; isa<DbgInfoIntrinsic>(I); ++I) {
-    }
+    for (++I; isa<DbgInfoIntrinsic>(I); ++I)
+      ;
     if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB))
       return true;
   }
@@ -5699,8 +5768,8 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI,
   // branches to us and our successor, fold the comparison into the
   // predecessor and use logical operations to update the incoming value
   // for PHI nodes in common successor.
-  if (FoldBranchToCommonDest(BI, BonusInstThreshold))
-    return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+  if (FoldBranchToCommonDest(BI, Options.BonusInstThreshold))
+    return simplifyCFG(BB, TTI, Options) | true;
   return false;
 }
 
@@ -5725,7 +5794,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
     // switch.
     if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
       if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
-        return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+        return simplifyCFG(BB, TTI, Options) | true;
 
     // This block must be empty, except for the setcond inst, if it exists.
     // Ignore dbg intrinsics.
@@ -5735,14 +5804,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
       ++I;
     if (&*I == BI) {
       if (FoldValueComparisonIntoPredecessors(BI, Builder))
-        return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+        return simplifyCFG(BB, TTI, Options) | true;
     } else if (&*I == cast<Instruction>(BI->getCondition())) {
       ++I;
       // Ignore dbg intrinsics.
       while (isa<DbgInfoIntrinsic>(I))
         ++I;
       if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))
-        return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+        return simplifyCFG(BB, TTI, Options) | true;
     }
   }
 
@@ -5758,9 +5827,9 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
     if (PBI && PBI->isConditional() &&
         PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
       assert(PBI->getSuccessor(0) == BB || PBI->getSuccessor(1) == BB);
-      bool CondIsFalse = PBI->getSuccessor(1) == BB;
+      bool CondIsTrue = PBI->getSuccessor(0) == BB;
       Optional<bool> Implication = isImpliedCondition(
-          PBI->getCondition(), BI->getCondition(), DL, CondIsFalse);
+          PBI->getCondition(), BI->getCondition(), DL, CondIsTrue);
       if (Implication) {
         // Turn this into a branch on constant.
         auto *OldCond = BI->getCondition();
@@ -5769,7 +5838,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
                               : ConstantInt::getFalse(BB->getContext());
         BI->setCondition(CI);
         RecursivelyDeleteTriviallyDeadInstructions(OldCond);
-        return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+        return simplifyCFG(BB, TTI, Options) | true;
       }
     }
   }
@@ -5777,8 +5846,8 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
   // If this basic block is ONLY a compare and a branch, and if a predecessor
   // branches to us and one of our successors, fold the comparison into the
   // predecessor and use logical operations to pick the right destination.
-  if (FoldBranchToCommonDest(BI, BonusInstThreshold))
-    return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+  if (FoldBranchToCommonDest(BI, Options.BonusInstThreshold))
+    return simplifyCFG(BB, TTI, Options) | true;
 
   // We have a conditional branch to two blocks that are only reachable
   // from BI.  We know that the condbr dominates the two blocks, so see if
@@ -5787,7 +5856,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
   if (BI->getSuccessor(0)->getSinglePredecessor()) {
     if (BI->getSuccessor(1)->getSinglePredecessor()) {
       if (HoistThenElseCodeToIf(BI, TTI))
-        return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+        return simplifyCFG(BB, TTI, Options) | true;
     } else {
       // If Successor #1 has multiple preds, we may be able to conditionally
       // execute Successor #0 if it branches to Successor #1.
@@ -5795,7 +5864,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
       if (Succ0TI->getNumSuccessors() == 1 &&
           Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
         if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), TTI))
-          return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+          return simplifyCFG(BB, TTI, Options) | true;
     }
   } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
     // If Successor #0 has multiple preds, we may be able to conditionally
@@ -5804,30 +5873,30 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
     if (Succ1TI->getNumSuccessors() == 1 &&
         Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
       if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), TTI))
-        return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+        return simplifyCFG(BB, TTI, Options) | true;
   }
 
   // If this is a branch on a phi node in the current block, thread control
   // through this block if any PHI node entries are constants.
   if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
     if (PN->getParent() == BI->getParent())
-      if (FoldCondBranchOnPHI(BI, DL, AC))
-        return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+      if (FoldCondBranchOnPHI(BI, DL, Options.AC))
+        return simplifyCFG(BB, TTI, Options) | true;
 
   // Scan predecessor blocks for conditional branches.
   for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
     if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
       if (PBI != BI && PBI->isConditional())
         if (SimplifyCondBranchToCondBranch(PBI, BI, DL))
-          return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+          return simplifyCFG(BB, TTI, Options) | true;
 
   // Look for diamond patterns.
   if (MergeCondStores)
     if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
       if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
         if (PBI != BI && PBI->isConditional())
-          if (mergeConditionalStores(PBI, BI))
-            return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+          if (mergeConditionalStores(PBI, BI, DL))
+            return simplifyCFG(BB, TTI, Options) | true;
 
   return false;
 }
@@ -5936,7 +6005,6 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
   // Merge basic blocks into their predecessor if there is only one distinct
   // pred, and if there is only one distinct successor of the predecessor, and
   // if there are no PHI nodes.
-  //
   if (MergeBlockIntoPredecessor(BB))
     return true;
 
@@ -5944,12 +6012,12 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
 
   // If there is a trivial two-entry PHI node in this basic block, and we can
   // eliminate it, do so now.
-  if (PHINode *PN = dyn_cast<PHINode>(BB->begin()))
+  if (auto *PN = dyn_cast<PHINode>(BB->begin()))
     if (PN->getNumIncomingValues() == 2)
       Changed |= FoldTwoEntryPHINode(PN, TTI, DL);
 
   Builder.SetInsertPoint(BB->getTerminator());
-  if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+  if (auto *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
     if (BI->isUnconditional()) {
       if (SimplifyUncondBranch(BI, Builder))
         return true;
@@ -5957,25 +6025,22 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
       if (SimplifyCondBranch(BI, Builder))
         return true;
     }
-  } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+  } else if (auto *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
     if (SimplifyReturn(RI, Builder))
       return true;
-  } else if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) {
+  } else if (auto *RI = dyn_cast<ResumeInst>(BB->getTerminator())) {
     if (SimplifyResume(RI, Builder))
       return true;
-  } else if (CleanupReturnInst *RI =
-                 dyn_cast<CleanupReturnInst>(BB->getTerminator())) {
+  } else if (auto *RI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) {
     if (SimplifyCleanupReturn(RI))
       return true;
-  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+  } else if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
     if (SimplifySwitch(SI, Builder))
       return true;
-  } else if (UnreachableInst *UI =
-                 dyn_cast<UnreachableInst>(BB->getTerminator())) {
+  } else if (auto *UI = dyn_cast<UnreachableInst>(BB->getTerminator())) {
     if (SimplifyUnreachable(UI))
       return true;
-  } else if (IndirectBrInst *IBI =
-                 dyn_cast<IndirectBrInst>(BB->getTerminator())) {
+  } else if (auto *IBI = dyn_cast<IndirectBrInst>(BB->getTerminator())) {
     if (SimplifyIndirectBr(IBI))
       return true;
   }
@@ -5983,16 +6048,10 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
   return Changed;
 }
 
-/// This function is used to do simplification of a CFG.
-/// For example, it adjusts branches to branches to eliminate the extra hop,
-/// eliminates unreachable basic blocks, and does other "peephole" optimization
-/// of the CFG.  It returns true if a modification was made.
-///
-bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
-                       unsigned BonusInstThreshold, AssumptionCache *AC,
-                       SmallPtrSetImpl<BasicBlock *> *LoopHeaders,
-                       bool LateSimplifyCFG) {
-  return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(),
-                        BonusInstThreshold, AC, LoopHeaders, LateSimplifyCFG)
+bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
+                       const SimplifyCFGOptions &Options,
+                       SmallPtrSetImpl<BasicBlock *> *LoopHeaders) {
+  return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(), LoopHeaders,
+                        Options)
       .run(BB);
 }
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index 6d90e6b48358..ad1faea0a7ae 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -18,13 +18,11 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -35,10 +33,14 @@ using namespace llvm;
 
 STATISTIC(NumElimIdentity, "Number of IV identities eliminated");
 STATISTIC(NumElimOperand,  "Number of IV operands folded into a use");
+STATISTIC(NumFoldedUser, "Number of IV users folded into a constant");
 STATISTIC(NumElimRem     , "Number of IV remainder operations eliminated");
 STATISTIC(
     NumSimplifiedSDiv,
     "Number of IV signed division operations converted to unsigned division");
+STATISTIC(
+    NumSimplifiedSRem,
+    "Number of IV signed remainder operations converted to unsigned remainder");
 STATISTIC(NumElimCmp     , "Number of IV comparisons eliminated");
 
 namespace {
@@ -51,15 +53,17 @@ namespace {
     LoopInfo         *LI;
     ScalarEvolution  *SE;
     DominatorTree    *DT;
-
+    SCEVExpander     &Rewriter;
     SmallVectorImpl<WeakTrackingVH> &DeadInsts;
 
     bool Changed;
 
   public:
     SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT,
-                   LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead)
-        : L(Loop), LI(LI), SE(SE), DT(DT), DeadInsts(Dead), Changed(false) {
+                   LoopInfo *LI, SCEVExpander &Rewriter,
+                   SmallVectorImpl<WeakTrackingVH> &Dead)
+        : L(Loop), LI(LI), SE(SE), DT(DT), Rewriter(Rewriter), DeadInsts(Dead),
+          Changed(false) {
       assert(LI && "IV simplification requires LoopInfo");
     }
 
@@ -73,12 +77,17 @@ namespace {
     Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand);
 
     bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand);
+    bool replaceIVUserWithLoopInvariant(Instruction *UseInst);
 
     bool eliminateOverflowIntrinsic(CallInst *CI);
     bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
+    bool makeIVComparisonInvariant(ICmpInst *ICmp, Value *IVOperand);
     void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
-    void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand,
-                              bool IsSigned);
+    void simplifyIVRemainder(BinaryOperator *Rem, Value *IVOperand,
+                             bool IsSigned);
+    void replaceRemWithNumerator(BinaryOperator *Rem);
+    void replaceRemWithNumeratorOrZero(BinaryOperator *Rem);
+    void replaceSRemWithURem(BinaryOperator *Rem);
     bool eliminateSDiv(BinaryOperator *SDiv);
     bool strengthenOverflowingOperation(BinaryOperator *OBO, Value *IVOperand);
     bool strengthenRightShift(BinaryOperator *BO, Value *IVOperand);
@@ -151,6 +160,74 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand)
   return IVSrc;
 }
 
+bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp,
+                                               Value *IVOperand) {
+  unsigned IVOperIdx = 0;
+  ICmpInst::Predicate Pred = ICmp->getPredicate();
+  if (IVOperand != ICmp->getOperand(0)) {
+    // Swapped
+    assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand");
+    IVOperIdx = 1;
+    Pred = ICmpInst::getSwappedPredicate(Pred);
+  }
+
+  // Get the SCEVs for the ICmp operands (in the specific context of the
+  // current loop)
+  const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent());
+  const SCEV *S = SE->getSCEVAtScope(ICmp->getOperand(IVOperIdx), ICmpLoop);
+  const SCEV *X = SE->getSCEVAtScope(ICmp->getOperand(1 - IVOperIdx), ICmpLoop);
+
+  ICmpInst::Predicate InvariantPredicate;
+  const SCEV *InvariantLHS, *InvariantRHS;
+
+  auto *PN = dyn_cast<PHINode>(IVOperand);
+  if (!PN)
+    return false;
+  if (!SE->isLoopInvariantPredicate(Pred, S, X, L, InvariantPredicate,
+                                    InvariantLHS, InvariantRHS))
+    return false;
+
+  // Rewrite the comparison to a loop invariant comparison if it can be done
+  // cheaply, where cheaply means "we don't need to emit any new
+  // instructions".
+
+  SmallDenseMap<const SCEV*, Value*> CheapExpansions;
+  CheapExpansions[S] = ICmp->getOperand(IVOperIdx);
+  CheapExpansions[X] = ICmp->getOperand(1 - IVOperIdx);
+  
+  // TODO: Support multiple entry loops?  (We currently bail out of these in
+  // the IndVarSimplify pass)
+  if (auto *BB = L->getLoopPredecessor()) {
+    const int Idx = PN->getBasicBlockIndex(BB);
+    if (Idx >= 0) {
+      Value *Incoming = PN->getIncomingValue(Idx);
+      const SCEV *IncomingS = SE->getSCEV(Incoming);
+      CheapExpansions[IncomingS] = Incoming;
+    }
+  }
+  Value *NewLHS = CheapExpansions[InvariantLHS];
+  Value *NewRHS = CheapExpansions[InvariantRHS];
+
+  if (!NewLHS)
+    if (auto *ConstLHS = dyn_cast<SCEVConstant>(InvariantLHS))
+      NewLHS = ConstLHS->getValue();
+  if (!NewRHS)
+    if (auto *ConstRHS = dyn_cast<SCEVConstant>(InvariantRHS))
+      NewRHS = ConstRHS->getValue();
+
+  if (!NewLHS || !NewRHS)
+    // We could not find an existing value to replace either LHS or RHS.
+    // Generating new instructions has subtler tradeoffs, so avoid doing that
+    // for now.
+    return false;
+
+  DEBUG(dbgs() << "INDVARS: Simplified comparison: " << *ICmp << '\n');
+  ICmp->setPredicate(InvariantPredicate);
+  ICmp->setOperand(0, NewLHS);
+  ICmp->setOperand(1, NewRHS);
+  return true;
+}
+
 /// SimplifyIVUsers helper for eliminating useless
 /// comparisons against an induction variable.
 void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
@@ -164,17 +241,11 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
     Pred = ICmpInst::getSwappedPredicate(Pred);
   }
 
-  // Get the SCEVs for the ICmp operands.
-  const SCEV *S = SE->getSCEV(ICmp->getOperand(IVOperIdx));
-  const SCEV *X = SE->getSCEV(ICmp->getOperand(1 - IVOperIdx));
-
-  // Simplify unnecessary loops away.
+  // Get the SCEVs for the ICmp operands (in the specific context of the
+  // current loop)
   const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent());
-  S = SE->getSCEVAtScope(S, ICmpLoop);
-  X = SE->getSCEVAtScope(X, ICmpLoop);
-
-  ICmpInst::Predicate InvariantPredicate;
-  const SCEV *InvariantLHS, *InvariantRHS;
+  const SCEV *S = SE->getSCEVAtScope(ICmp->getOperand(IVOperIdx), ICmpLoop);
+  const SCEV *X = SE->getSCEVAtScope(ICmp->getOperand(1 - IVOperIdx), ICmpLoop);
 
   // If the condition is always true or always false, replace it with
   // a constant value.
@@ -186,85 +257,8 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
     ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext()));
     DeadInsts.emplace_back(ICmp);
     DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
-  } else if (isa<PHINode>(IVOperand) &&
-             SE->isLoopInvariantPredicate(Pred, S, X, L, InvariantPredicate,
-                                          InvariantLHS, InvariantRHS)) {
-
-    // Rewrite the comparison to a loop invariant comparison if it can be done
-    // cheaply, where cheaply means "we don't need to emit any new
-    // instructions".
-
-    Value *NewLHS = nullptr, *NewRHS = nullptr;
-
-    if (S == InvariantLHS || X == InvariantLHS)
-      NewLHS =
-          ICmp->getOperand(S == InvariantLHS ? IVOperIdx : (1 - IVOperIdx));
-
-    if (S == InvariantRHS || X == InvariantRHS)
-      NewRHS =
-          ICmp->getOperand(S == InvariantRHS ? IVOperIdx : (1 - IVOperIdx));
-
-    auto *PN = cast<PHINode>(IVOperand);
-    for (unsigned i = 0, e = PN->getNumIncomingValues();
-         i != e && (!NewLHS || !NewRHS);
-         ++i) {
-
-      // If this is a value incoming from the backedge, then it cannot be a loop
-      // invariant value (since we know that IVOperand is an induction variable).
-      if (L->contains(PN->getIncomingBlock(i)))
-        continue;
-
-      // NB! This following assert does not fundamentally have to be true, but
-      // it is true today given how SCEV analyzes induction variables.
-      // Specifically, today SCEV will *not* recognize %iv as an induction
-      // variable in the following case:
-      //
-      // define void @f(i32 %k) {
-      // entry:
-      //   br i1 undef, label %r, label %l
-      //
-      // l:
-      //   %k.inc.l = add i32 %k, 1
-      //   br label %loop
-      //
-      // r:
-      //   %k.inc.r = add i32 %k, 1
-      //   br label %loop
-      //
-      // loop:
-      //   %iv = phi i32 [ %k.inc.l, %l ], [ %k.inc.r, %r ], [ %iv.inc, %loop ]
-      //   %iv.inc = add i32 %iv, 1
-      //   br label %loop
-      // }
-      //
-      // but if it starts to, at some point, then the assertion below will have
-      // to be changed to a runtime check.
-
-      Value *Incoming = PN->getIncomingValue(i);
-
-#ifndef NDEBUG
-      if (auto *I = dyn_cast<Instruction>(Incoming))
-        assert(DT->dominates(I, ICmp) && "Should be a unique loop dominating value!");
-#endif
-
-      const SCEV *IncomingS = SE->getSCEV(Incoming);
-
-      if (!NewLHS && IncomingS == InvariantLHS)
-        NewLHS = Incoming;
-      if (!NewRHS && IncomingS == InvariantRHS)
-        NewRHS = Incoming;
-    }
-
-    if (!NewLHS || !NewRHS)
-      // We could not find an existing value to replace either LHS or RHS.
-      // Generating new instructions has subtler tradeoffs, so avoid doing that
-      // for now.
-      return;
-
-    DEBUG(dbgs() << "INDVARS: Simplified comparison: " << *ICmp << '\n');
-    ICmp->setPredicate(InvariantPredicate);
-    ICmp->setOperand(0, NewLHS);
-    ICmp->setOperand(1, NewRHS);
+  } else if (makeIVComparisonInvariant(ICmp, IVOperand)) {
+    // fallthrough to end of function
   } else if (ICmpInst::isSigned(OriginalPred) &&
              SE->isKnownNonNegative(S) && SE->isKnownNonNegative(X)) {
     // If we were unable to make anything above, all we can is to canonicalize
@@ -309,54 +303,90 @@ bool SimplifyIndvar::eliminateSDiv(BinaryOperator *SDiv) {
   return false;
 }
 
-/// SimplifyIVUsers helper for eliminating useless
-/// remainder operations operating on an induction variable.
-void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem,
-                                      Value *IVOperand,
-                                      bool IsSigned) {
+// i %s n -> i %u n if i >= 0 and n >= 0
+void SimplifyIndvar::replaceSRemWithURem(BinaryOperator *Rem) {
+  auto *N = Rem->getOperand(0), *D = Rem->getOperand(1);
+  auto *URem = BinaryOperator::Create(BinaryOperator::URem, N, D,
+                                      Rem->getName() + ".urem", Rem);
+  Rem->replaceAllUsesWith(URem);
+  DEBUG(dbgs() << "INDVARS: Simplified srem: " << *Rem << '\n');
+  ++NumSimplifiedSRem;
+  Changed = true;
+  DeadInsts.emplace_back(Rem);
+}
+
+// i % n  -->  i  if i is in [0,n).
+void SimplifyIndvar::replaceRemWithNumerator(BinaryOperator *Rem) {
+  Rem->replaceAllUsesWith(Rem->getOperand(0));
+  DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
+  ++NumElimRem;
+  Changed = true;
+  DeadInsts.emplace_back(Rem);
+}
+
+// (i+1) % n  -->  (i+1)==n?0:(i+1)  if i is in [0,n).
+void SimplifyIndvar::replaceRemWithNumeratorOrZero(BinaryOperator *Rem) {
+  auto *T = Rem->getType();
+  auto *N = Rem->getOperand(0), *D = Rem->getOperand(1);
+  ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ, N, D);
+  SelectInst *Sel =
+      SelectInst::Create(ICmp, ConstantInt::get(T, 0), N, "iv.rem", Rem);
+  Rem->replaceAllUsesWith(Sel);
+  DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
+  ++NumElimRem;
+  Changed = true;
+  DeadInsts.emplace_back(Rem);
+}
+
+/// SimplifyIVUsers helper for eliminating useless remainder operations
+/// operating on an induction variable or replacing srem by urem.
+void SimplifyIndvar::simplifyIVRemainder(BinaryOperator *Rem, Value *IVOperand,
+                                         bool IsSigned) {
+  auto *NValue = Rem->getOperand(0);
+  auto *DValue = Rem->getOperand(1);
   // We're only interested in the case where we know something about
-  // the numerator.
-  if (IVOperand != Rem->getOperand(0))
+  // the numerator, unless it is a srem, because we want to replace srem by urem
+  // in general.
+  bool UsedAsNumerator = IVOperand == NValue;
+  if (!UsedAsNumerator && !IsSigned)
     return;
 
-  // Get the SCEVs for the ICmp operands.
-  const SCEV *S = SE->getSCEV(Rem->getOperand(0));
-  const SCEV *X = SE->getSCEV(Rem->getOperand(1));
+  const SCEV *N = SE->getSCEV(NValue);
 
   // Simplify unnecessary loops away.
   const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent());
-  S = SE->getSCEVAtScope(S, ICmpLoop);
-  X = SE->getSCEVAtScope(X, ICmpLoop);
-
-  // i % n  -->  i  if i is in [0,n).
-  if ((!IsSigned || SE->isKnownNonNegative(S)) &&
-      SE->isKnownPredicate(IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
-                           S, X))
-    Rem->replaceAllUsesWith(Rem->getOperand(0));
-  else {
-    // (i+1) % n  -->  (i+1)==n?0:(i+1)  if i is in [0,n).
-    const SCEV *LessOne = SE->getMinusSCEV(S, SE->getOne(S->getType()));
-    if (IsSigned && !SE->isKnownNonNegative(LessOne))
-      return;
+  N = SE->getSCEVAtScope(N, ICmpLoop);
+
+  bool IsNumeratorNonNegative = !IsSigned || SE->isKnownNonNegative(N);
+
+  // Do not proceed if the Numerator may be negative
+  if (!IsNumeratorNonNegative)
+    return;
 
-    if (!SE->isKnownPredicate(IsSigned ?
-                              ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
-                              LessOne, X))
+  const SCEV *D = SE->getSCEV(DValue);
+  D = SE->getSCEVAtScope(D, ICmpLoop);
+
+  if (UsedAsNumerator) {
+    auto LT = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
+    if (SE->isKnownPredicate(LT, N, D)) {
+      replaceRemWithNumerator(Rem);
       return;
+    }
 
-    ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ,
-                                  Rem->getOperand(0), Rem->getOperand(1));
-    SelectInst *Sel =
-      SelectInst::Create(ICmp,
-                         ConstantInt::get(Rem->getType(), 0),
-                         Rem->getOperand(0), "tmp", Rem);
-    Rem->replaceAllUsesWith(Sel);
+    auto *T = Rem->getType();
+    const auto *NLessOne = SE->getMinusSCEV(N, SE->getOne(T));
+    if (SE->isKnownPredicate(LT, NLessOne, D)) {
+      replaceRemWithNumeratorOrZero(Rem);
+      return;
+    }
   }
 
-  DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
-  ++NumElimRem;
-  Changed = true;
-  DeadInsts.emplace_back(Rem);
+  // Try to replace SRem with URem, if both N and D are known non-negative.
+  // Since we had already check N, we only need to check D now
+  if (!IsSigned || !SE->isKnownNonNegative(D))
+    return;
+
+  replaceSRemWithURem(Rem);
 }
 
 bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) {
@@ -474,7 +504,7 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
   if (BinaryOperator *Bin = dyn_cast<BinaryOperator>(UseInst)) {
     bool IsSRem = Bin->getOpcode() == Instruction::SRem;
     if (IsSRem || Bin->getOpcode() == Instruction::URem) {
-      eliminateIVRemainder(Bin, IVOperand, IsSRem);
+      simplifyIVRemainder(Bin, IVOperand, IsSRem);
       return true;
     }
 
@@ -492,6 +522,40 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
   return false;
 }
 
+static Instruction *GetLoopInvariantInsertPosition(Loop *L, Instruction *Hint) {
+  if (auto *BB = L->getLoopPreheader())
+    return BB->getTerminator();
+
+  return Hint;
+}
+
+/// Replace the UseInst with a constant if possible.
+bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction *I) {
+  if (!SE->isSCEVable(I->getType()))
+    return false;
+
+  // Get the symbolic expression for this instruction.
+  const SCEV *S = SE->getSCEV(I);
+
+  if (!SE->isLoopInvariant(S, L))
+    return false;
+
+  // Do not generate something ridiculous even if S is loop invariant.
+  if (Rewriter.isHighCostExpansion(S, L, I))
+    return false;
+
+  auto *IP = GetLoopInvariantInsertPosition(L, I);
+  auto *Invariant = Rewriter.expandCodeFor(S, I->getType(), IP);
+
+  I->replaceAllUsesWith(Invariant);
+  DEBUG(dbgs() << "INDVARS: Replace IV user: " << *I
+               << " with loop invariant: " << *S << '\n');
+  ++NumFoldedUser;
+  Changed = true;
+  DeadInsts.emplace_back(I);
+  return true;
+}
+
 /// Eliminate any operation that SCEV can prove is an identity function.
 bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst,
                                            Instruction *IVOperand) {
@@ -627,7 +691,7 @@ bool SimplifyIndvar::strengthenRightShift(BinaryOperator *BO,
 
 /// Add all uses of Def to the current IV's worklist.
 static void pushIVUsers(
-  Instruction *Def,
+  Instruction *Def, Loop *L,
   SmallPtrSet<Instruction*,16> &Simplified,
   SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) {
 
@@ -638,8 +702,19 @@ static void pushIVUsers(
     // Also ensure unique worklist users.
     // If Def is a LoopPhi, it may not be in the Simplified set, so check for
     // self edges first.
-    if (UI != Def && Simplified.insert(UI).second)
-      SimpleIVUsers.push_back(std::make_pair(UI, Def));
+    if (UI == Def)
+      continue;
+
+    // Only change the current Loop, do not change the other parts (e.g. other
+    // Loops).
+    if (!L->contains(UI))
+      continue;
+
+    // Do not push the same instruction more than once.
+    if (!Simplified.insert(UI).second)
+      continue;
+
+    SimpleIVUsers.push_back(std::make_pair(UI, Def));
   }
 }
 
@@ -689,7 +764,7 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
   // Push users of the current LoopPhi. In rare cases, pushIVUsers may be
   // called multiple times for the same LoopPhi. This is the proper thing to
   // do for loop header phis that use each other.
-  pushIVUsers(CurrIV, Simplified, SimpleIVUsers);
+  pushIVUsers(CurrIV, L, Simplified, SimpleIVUsers);
 
   while (!SimpleIVUsers.empty()) {
     std::pair<Instruction*, Instruction*> UseOper =
@@ -699,6 +774,11 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
     // Bypass back edges to avoid extra work.
     if (UseInst == CurrIV) continue;
 
+    // Try to replace UseInst with a loop invariant before any other
+    // simplifications.
+    if (replaceIVUserWithLoopInvariant(UseInst))
+      continue;
+
     Instruction *IVOperand = UseOper.second;
     for (unsigned N = 0; IVOperand; ++N) {
       assert(N <= Simplified.size() && "runaway iteration");
@@ -712,7 +792,7 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
       continue;
 
     if (eliminateIVUser(UseOper.first, IVOperand)) {
-      pushIVUsers(IVOperand, Simplified, SimpleIVUsers);
+      pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers);
       continue;
     }
 
@@ -722,7 +802,7 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
           (isa<ShlOperator>(BO) && strengthenRightShift(BO, IVOperand))) {
         // re-queue uses of the now modified binary operator and fall
         // through to the checks that remain.
-        pushIVUsers(IVOperand, Simplified, SimpleIVUsers);
+        pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers);
       }
     }
 
@@ -732,7 +812,7 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
       continue;
     }
     if (isSimpleIVUser(UseOper.first, L, SE)) {
-      pushIVUsers(UseOper.first, Simplified, SimpleIVUsers);
+      pushIVUsers(UseOper.first, L, Simplified, SimpleIVUsers);
     }
   }
 }
@@ -745,8 +825,9 @@ void IVVisitor::anchor() { }
 /// by using ScalarEvolution to analyze the IV's recurrence.
 bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT,
                        LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead,
-                       IVVisitor *V) {
-  SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, Dead);
+                       SCEVExpander &Rewriter, IVVisitor *V) {
+  SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, Rewriter,
+                     Dead);
   SIV.simplifyUsers(CurrIV, V);
   return SIV.hasChanged();
 }
@@ -755,9 +836,13 @@ bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT,
 /// loop. This does not actually change or add IVs.
 bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
                      LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead) {
+  SCEVExpander Rewriter(*SE, SE->getDataLayout(), "indvars");
+#ifndef NDEBUG
+  Rewriter.setDebugType(DEBUG_TYPE);
+#endif
   bool Changed = false;
   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
-    Changed |= simplifyUsersOfIV(cast<PHINode>(I), SE, DT, LI, Dead);
+    Changed |= simplifyUsersOfIV(cast<PHINode>(I), SE, DT, LI, Dead, Rewriter);
   }
   return Changed;
 }
diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp
index 2ea15f65cef9..f3d4f2ef38d7 100644
--- a/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -20,7 +20,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 77c0a41929ac..03a1d55ddc30 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -18,10 +18,11 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/IntrinsicInst.h"
@@ -484,10 +485,10 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilder<> &B,
     uint64_t LenTrue = GetStringLength(SI->getTrueValue(), CharSize);
     uint64_t LenFalse = GetStringLength(SI->getFalseValue(), CharSize);
     if (LenTrue && LenFalse) {
-      Function *Caller = CI->getParent()->getParent();
-      emitOptimizationRemark(CI->getContext(), "simplify-libcalls", *Caller,
-                             SI->getDebugLoc(),
-                             "folded strlen(select) to select of constants");
+      ORE.emit([&]() {
+        return OptimizationRemark("instcombine", "simplify-libcalls", CI)
+               << "folded strlen(select) to select of constants";
+      });
       return B.CreateSelect(SI->getCondition(),
                             ConstantInt::get(CI->getType(), LenTrue - 1),
                             ConstantInt::get(CI->getType(), LenFalse - 1));
@@ -509,6 +510,9 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) {
 Value *LibCallSimplifier::optimizeWcslen(CallInst *CI, IRBuilder<> &B) {
   Module &M = *CI->getParent()->getParent()->getParent();
   unsigned WCharSize = TLI->getWCharSize(M) * 8;
+  // We cannot perform this optimization without wchar_size metadata.
+  if (WCharSize == 0)
+    return nullptr;
 
   return optimizeStringLength(CI, B, WCharSize);
 }
@@ -753,29 +757,44 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
   }
 
   // memcmp(S1,S2,N/8)==0 -> (*(intN_t*)S1 != *(intN_t*)S2)==0
+  // TODO: The case where both inputs are constants does not need to be limited
+  // to legal integers or equality comparison. See block below this.
   if (DL.isLegalInteger(Len * 8) && isOnlyUsedInZeroEqualityComparison(CI)) {
-
     IntegerType *IntType = IntegerType::get(CI->getContext(), Len * 8);
     unsigned PrefAlignment = DL.getPrefTypeAlignment(IntType);
 
-    if (getKnownAlignment(LHS, DL, CI) >= PrefAlignment &&
-        getKnownAlignment(RHS, DL, CI) >= PrefAlignment) {
-
-      Type *LHSPtrTy =
-          IntType->getPointerTo(LHS->getType()->getPointerAddressSpace());
-      Type *RHSPtrTy =
-          IntType->getPointerTo(RHS->getType()->getPointerAddressSpace());
-
-      Value *LHSV =
-          B.CreateLoad(B.CreateBitCast(LHS, LHSPtrTy, "lhsc"), "lhsv");
-      Value *RHSV =
-          B.CreateLoad(B.CreateBitCast(RHS, RHSPtrTy, "rhsc"), "rhsv");
+    // First, see if we can fold either argument to a constant.
+    Value *LHSV = nullptr;
+    if (auto *LHSC = dyn_cast<Constant>(LHS)) {
+      LHSC = ConstantExpr::getBitCast(LHSC, IntType->getPointerTo());
+      LHSV = ConstantFoldLoadFromConstPtr(LHSC, IntType, DL);
+    }
+    Value *RHSV = nullptr;
+    if (auto *RHSC = dyn_cast<Constant>(RHS)) {
+      RHSC = ConstantExpr::getBitCast(RHSC, IntType->getPointerTo());
+      RHSV = ConstantFoldLoadFromConstPtr(RHSC, IntType, DL);
+    }
 
+    // Don't generate unaligned loads. If either source is constant data,
+    // alignment doesn't matter for that source because there is no load.
+    if ((LHSV || getKnownAlignment(LHS, DL, CI) >= PrefAlignment) &&
+        (RHSV || getKnownAlignment(RHS, DL, CI) >= PrefAlignment)) {
+      if (!LHSV) {
+        Type *LHSPtrTy =
+            IntType->getPointerTo(LHS->getType()->getPointerAddressSpace());
+        LHSV = B.CreateLoad(B.CreateBitCast(LHS, LHSPtrTy), "lhsv");
+      }
+      if (!RHSV) {
+        Type *RHSPtrTy =
+            IntType->getPointerTo(RHS->getType()->getPointerAddressSpace());
+        RHSV = B.CreateLoad(B.CreateBitCast(RHS, RHSPtrTy), "rhsv");
+      }
       return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp");
     }
   }
 
-  // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant)
+  // Constant folding: memcmp(x, y, Len) -> constant (all arguments are const).
+  // TODO: This is limited to i8 arrays.
   StringRef LHSStr, RHSStr;
   if (getConstantStringInfo(LHS, LHSStr) &&
       getConstantStringInfo(RHS, RHSStr)) {
@@ -1014,6 +1033,35 @@ static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) {
   return B.CreateFPExt(V, B.getDoubleTy());
 }
 
+// cabs(z) -> sqrt((creal(z)*creal(z)) + (cimag(z)*cimag(z)))
+Value *LibCallSimplifier::optimizeCAbs(CallInst *CI, IRBuilder<> &B) {
+  if (!CI->isFast())
+    return nullptr;
+
+  // Propagate fast-math flags from the existing call to new instructions.
+  IRBuilder<>::FastMathFlagGuard Guard(B);
+  B.setFastMathFlags(CI->getFastMathFlags());
+
+  Value *Real, *Imag;
+  if (CI->getNumArgOperands() == 1) {
+    Value *Op = CI->getArgOperand(0);
+    assert(Op->getType()->isArrayTy() && "Unexpected signature for cabs!");
+    Real = B.CreateExtractValue(Op, 0, "real");
+    Imag = B.CreateExtractValue(Op, 1, "imag");
+  } else {
+    assert(CI->getNumArgOperands() == 2 && "Unexpected signature for cabs!");
+    Real = CI->getArgOperand(0);
+    Imag = CI->getArgOperand(1);
+  }
+
+  Value *RealReal = B.CreateFMul(Real, Real);
+  Value *ImagImag = B.CreateFMul(Imag, Imag);
+
+  Function *FSqrt = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::sqrt,
+                                              CI->getType());
+  return B.CreateCall(FSqrt, B.CreateFAdd(RealReal, ImagImag), "cabs");
+}
+
 Value *LibCallSimplifier::optimizeCos(CallInst *CI, IRBuilder<> &B) {
   Function *Callee = CI->getCalledFunction();
   Value *Ret = nullptr;
@@ -1055,6 +1103,51 @@ static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilder<> &B) {
   return InnerChain[Exp];
 }
 
+/// Use square root in place of pow(x, +/-0.5).
+Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) {
+  // TODO: There is some subset of 'fast' under which these transforms should
+  // be allowed.
+  if (!Pow->isFast())
+    return nullptr;
+
+  const APFloat *Arg1C;
+  if (!match(Pow->getArgOperand(1), m_APFloat(Arg1C)))
+    return nullptr;
+  if (!Arg1C->isExactlyValue(0.5) && !Arg1C->isExactlyValue(-0.5))
+    return nullptr;
+
+  // Fast-math flags from the pow() are propagated to all replacement ops.
+  IRBuilder<>::FastMathFlagGuard Guard(B);
+  B.setFastMathFlags(Pow->getFastMathFlags());
+  Type *Ty = Pow->getType();
+  Value *Sqrt;
+  if (Pow->hasFnAttr(Attribute::ReadNone)) {
+    // We know that errno is never set, so replace with an intrinsic:
+    // pow(x, 0.5) --> llvm.sqrt(x)
+    // llvm.pow(x, 0.5) --> llvm.sqrt(x)
+    auto *F = Intrinsic::getDeclaration(Pow->getModule(), Intrinsic::sqrt, Ty);
+    Sqrt = B.CreateCall(F, Pow->getArgOperand(0));
+  } else if (hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, LibFunc_sqrtf,
+                             LibFunc_sqrtl)) {
+    // Errno could be set, so we must use a sqrt libcall.
+    // TODO: We also should check that the target can in fact lower the sqrt
+    // libcall. We currently have no way to ask this question, so we ask
+    // whether the target has a sqrt libcall which is not exactly the same.
+    Sqrt = emitUnaryFloatFnCall(Pow->getArgOperand(0),
+                                TLI->getName(LibFunc_sqrt), B,
+                                Pow->getCalledFunction()->getAttributes());
+  } else {
+    // We can't replace with an intrinsic or a libcall.
+    return nullptr;
+  }
+
+  // If this is pow(x, -0.5), get the reciprocal.
+  if (Arg1C->isExactlyValue(-0.5))
+    Sqrt = B.CreateFDiv(ConstantFP::get(Ty, 1.0), Sqrt);
+
+  return Sqrt;
+}
+
 Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
   Function *Callee = CI->getCalledFunction();
   Value *Ret = nullptr;
@@ -1092,7 +1185,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
   // Example: x = 1000, y = 0.001.
   // pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1).
   auto *OpC = dyn_cast<CallInst>(Op1);
-  if (OpC && OpC->hasUnsafeAlgebra() && CI->hasUnsafeAlgebra()) {
+  if (OpC && OpC->isFast() && CI->isFast()) {
     LibFunc Func;
     Function *OpCCallee = OpC->getCalledFunction();
     if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) &&
@@ -1105,6 +1198,9 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
     }
   }
 
+  if (Value *Sqrt = replacePowWithSqrt(CI, B))
+    return Sqrt;
+
   ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
   if (!Op2C)
     return Ret;
@@ -1112,42 +1208,10 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
   if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0
     return ConstantFP::get(CI->getType(), 1.0);
 
-  if (Op2C->isExactlyValue(-0.5) &&
-      hasUnaryFloatFn(TLI, Op2->getType(), LibFunc_sqrt, LibFunc_sqrtf,
-                      LibFunc_sqrtl)) {
-    // If -ffast-math:
-    // pow(x, -0.5) -> 1.0 / sqrt(x)
-    if (CI->hasUnsafeAlgebra()) {
-      IRBuilder<>::FastMathFlagGuard Guard(B);
-      B.setFastMathFlags(CI->getFastMathFlags());
-
-      // TODO: If the pow call is an intrinsic, we should lower to the sqrt
-      // intrinsic, so we match errno semantics.  We also should check that the
-      // target can in fact lower the sqrt intrinsic -- we currently have no way
-      // to ask this question other than asking whether the target has a sqrt
-      // libcall, which is a sufficient but not necessary condition.
-      Value *Sqrt = emitUnaryFloatFnCall(Op1, TLI->getName(LibFunc_sqrt), B,
-                                         Callee->getAttributes());
-
-      return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Sqrt, "sqrtrecip");
-    }
-  }
-
+  // FIXME: Correct the transforms and pull this into replacePowWithSqrt().
   if (Op2C->isExactlyValue(0.5) &&
       hasUnaryFloatFn(TLI, Op2->getType(), LibFunc_sqrt, LibFunc_sqrtf,
                       LibFunc_sqrtl)) {
-
-    // In -ffast-math, pow(x, 0.5) -> sqrt(x).
-    if (CI->hasUnsafeAlgebra()) {
-      IRBuilder<>::FastMathFlagGuard Guard(B);
-      B.setFastMathFlags(CI->getFastMathFlags());
-
-      // TODO: As above, we should lower to the sqrt intrinsic if the pow is an
-      // intrinsic, to match errno semantics.
-      return emitUnaryFloatFnCall(Op1, TLI->getName(LibFunc_sqrt), B,
-                                  Callee->getAttributes());
-    }
-
     // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
     // This is faster than calling pow, and still handles negative zero
     // and negative infinity correctly.
@@ -1169,15 +1233,21 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
     return Sel;
   }
 
-  if (Op2C->isExactlyValue(1.0)) // pow(x, 1.0) -> x
+  // Propagate fast-math-flags from the call to any created instructions.
+  IRBuilder<>::FastMathFlagGuard Guard(B);
+  B.setFastMathFlags(CI->getFastMathFlags());
+  // pow(x, 1.0) --> x
+  if (Op2C->isExactlyValue(1.0))
     return Op1;
-  if (Op2C->isExactlyValue(2.0)) // pow(x, 2.0) -> x*x
+  // pow(x, 2.0) --> x * x
+  if (Op2C->isExactlyValue(2.0))
     return B.CreateFMul(Op1, Op1, "pow2");
-  if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x
+  // pow(x, -1.0) --> 1.0 / x
+  if (Op2C->isExactlyValue(-1.0))
     return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip");
 
   // In -ffast-math, generate repeated fmul instead of generating pow(x, n).
-  if (CI->hasUnsafeAlgebra()) {
+  if (CI->isFast()) {
     APFloat V = abs(Op2C->getValueAPF());
     // We limit to a max of 7 fmul(s). Thus max exponent is 32.
     // This transformation applies to integer exponents only.
@@ -1185,10 +1255,6 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
         !V.isInteger())
       return nullptr;
 
-    // Propagate fast math flags.
-    IRBuilder<>::FastMathFlagGuard Guard(B);
-    B.setFastMathFlags(CI->getFastMathFlags());
-
     // We will memoize intermediate products of the Addition Chain.
     Value *InnerChain[33] = {nullptr};
     InnerChain[1] = Op1;
@@ -1196,8 +1262,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
 
     // We cannot readily convert a non-double type (like float) to a double.
     // So we first convert V to something which could be converted to double.
-    bool ignored;
-    V.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &ignored);
+    bool Ignored;
+    V.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored);
     
     Value *FMul = getPow(InnerChain, V.convertToDouble(), B);
     // For negative exponents simply compute the reciprocal.
@@ -1265,9 +1331,9 @@ Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
 
   IRBuilder<>::FastMathFlagGuard Guard(B);
   FastMathFlags FMF;
-  if (CI->hasUnsafeAlgebra()) {
-    // Unsafe algebra sets all fast-math-flags to true.
-    FMF.setUnsafeAlgebra();
+  if (CI->isFast()) {
+    // If the call is 'fast', then anything we create here will also be 'fast'.
+    FMF.setFast();
   } else {
     // At a minimum, no-nans-fp-math must be true.
     if (!CI->hasNoNaNs())
@@ -1298,13 +1364,13 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {
   if (UnsafeFPShrink && hasFloatVersion(Name))
     Ret = optimizeUnaryDoubleFP(CI, B, true);
 
-  if (!CI->hasUnsafeAlgebra())
+  if (!CI->isFast())
     return Ret;
   Value *Op1 = CI->getArgOperand(0);
   auto *OpC = dyn_cast<CallInst>(Op1);
 
-  // The earlier call must also be unsafe in order to do these transforms.
-  if (!OpC || !OpC->hasUnsafeAlgebra())
+  // The earlier call must also be 'fast' in order to do these transforms.
+  if (!OpC || !OpC->isFast())
     return Ret;
 
   // log(pow(x,y)) -> y*log(x)
@@ -1314,7 +1380,7 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {
 
   IRBuilder<>::FastMathFlagGuard Guard(B);
   FastMathFlags FMF;
-  FMF.setUnsafeAlgebra();
+  FMF.setFast();
   B.setFastMathFlags(FMF);
 
   LibFunc Func;
@@ -1346,11 +1412,11 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
                                   Callee->getIntrinsicID() == Intrinsic::sqrt))
     Ret = optimizeUnaryDoubleFP(CI, B, true);
 
-  if (!CI->hasUnsafeAlgebra())
+  if (!CI->isFast())
     return Ret;
 
   Instruction *I = dyn_cast<Instruction>(CI->getArgOperand(0));
-  if (!I || I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra())
+  if (!I || I->getOpcode() != Instruction::FMul || !I->isFast())
     return Ret;
 
   // We're looking for a repeated factor in a multiplication tree,
@@ -1372,8 +1438,7 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
     Value *OtherMul0, *OtherMul1;
     if (match(Op0, m_FMul(m_Value(OtherMul0), m_Value(OtherMul1)))) {
       // Pattern: sqrt((x * y) * z)
-      if (OtherMul0 == OtherMul1 &&
-          cast<Instruction>(Op0)->hasUnsafeAlgebra()) {
+      if (OtherMul0 == OtherMul1 && cast<Instruction>(Op0)->isFast()) {
         // Matched: sqrt((x * x) * z)
         RepeatOp = OtherMul0;
         OtherOp = Op1;
@@ -1418,8 +1483,8 @@ Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) {
   if (!OpC)
     return Ret;
 
-  // Both calls must allow unsafe optimizations in order to remove them.
-  if (!CI->hasUnsafeAlgebra() || !OpC->hasUnsafeAlgebra())
+  // Both calls must be 'fast' in order to remove them.
+  if (!CI->isFast() || !OpC->isFast())
     return Ret;
 
   // tan(atan(x)) -> x
@@ -2043,13 +2108,107 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
   return nullptr;
 }
 
+Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI,
+                                                       LibFunc Func,
+                                                       IRBuilder<> &Builder) {
+  // Don't optimize calls that require strict floating point semantics.
+  if (CI->isStrictFP())
+    return nullptr;
+
+  switch (Func) {
+  case LibFunc_cosf:
+  case LibFunc_cos:
+  case LibFunc_cosl:
+    return optimizeCos(CI, Builder);
+  case LibFunc_sinpif:
+  case LibFunc_sinpi:
+  case LibFunc_cospif:
+  case LibFunc_cospi:
+    return optimizeSinCosPi(CI, Builder);
+  case LibFunc_powf:
+  case LibFunc_pow:
+  case LibFunc_powl:
+    return optimizePow(CI, Builder);
+  case LibFunc_exp2l:
+  case LibFunc_exp2:
+  case LibFunc_exp2f:
+    return optimizeExp2(CI, Builder);
+  case LibFunc_fabsf:
+  case LibFunc_fabs:
+  case LibFunc_fabsl:
+    return replaceUnaryCall(CI, Builder, Intrinsic::fabs);
+  case LibFunc_sqrtf:
+  case LibFunc_sqrt:
+  case LibFunc_sqrtl:
+    return optimizeSqrt(CI, Builder);
+  case LibFunc_log:
+  case LibFunc_log10:
+  case LibFunc_log1p:
+  case LibFunc_log2:
+  case LibFunc_logb:
+    return optimizeLog(CI, Builder);
+  case LibFunc_tan:
+  case LibFunc_tanf:
+  case LibFunc_tanl:
+    return optimizeTan(CI, Builder);
+  case LibFunc_ceil:
+    return replaceUnaryCall(CI, Builder, Intrinsic::ceil);
+  case LibFunc_floor:
+    return replaceUnaryCall(CI, Builder, Intrinsic::floor);
+  case LibFunc_round:
+    return replaceUnaryCall(CI, Builder, Intrinsic::round);
+  case LibFunc_nearbyint:
+    return replaceUnaryCall(CI, Builder, Intrinsic::nearbyint);
+  case LibFunc_rint:
+    return replaceUnaryCall(CI, Builder, Intrinsic::rint);
+  case LibFunc_trunc:
+    return replaceUnaryCall(CI, Builder, Intrinsic::trunc);
+  case LibFunc_acos:
+  case LibFunc_acosh:
+  case LibFunc_asin:
+  case LibFunc_asinh:
+  case LibFunc_atan:
+  case LibFunc_atanh:
+  case LibFunc_cbrt:
+  case LibFunc_cosh:
+  case LibFunc_exp:
+  case LibFunc_exp10:
+  case LibFunc_expm1:
+  case LibFunc_sin:
+  case LibFunc_sinh:
+  case LibFunc_tanh:
+    if (UnsafeFPShrink && hasFloatVersion(CI->getCalledFunction()->getName()))
+      return optimizeUnaryDoubleFP(CI, Builder, true);
+    return nullptr;
+  case LibFunc_copysign:
+    if (hasFloatVersion(CI->getCalledFunction()->getName()))
+      return optimizeBinaryDoubleFP(CI, Builder);
+    return nullptr;
+  case LibFunc_fminf:
+  case LibFunc_fmin:
+  case LibFunc_fminl:
+  case LibFunc_fmaxf:
+  case LibFunc_fmax:
+  case LibFunc_fmaxl:
+    return optimizeFMinFMax(CI, Builder);
+  case LibFunc_cabs:
+  case LibFunc_cabsf:
+  case LibFunc_cabsl:
+    return optimizeCAbs(CI, Builder);
+  default:
+    return nullptr;
+  }
+}
+
 Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
+  // TODO: Split out the code below that operates on FP calls so that
+  //       we can all non-FP calls with the StrictFP attribute to be
+  //       optimized.
   if (CI->isNoBuiltin())
     return nullptr;
 
   LibFunc Func;
   Function *Callee = CI->getCalledFunction();
-  StringRef FuncName = Callee->getName();
 
   SmallVector<OperandBundleDef, 2> OpBundles;
   CI->getOperandBundlesAsDefs(OpBundles);
@@ -2057,15 +2216,19 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
   bool isCallingConvC = isCallingConvCCompatible(CI);
 
   // Command-line parameter overrides instruction attribute.
+  // This can't be moved to optimizeFloatingPointLibCall() because it may be
+  // used by the intrinsic optimizations.
   if (EnableUnsafeFPShrink.getNumOccurrences() > 0)
     UnsafeFPShrink = EnableUnsafeFPShrink;
-  else if (isa<FPMathOperator>(CI) && CI->hasUnsafeAlgebra())
+  else if (isa<FPMathOperator>(CI) && CI->isFast())
     UnsafeFPShrink = true;
 
   // First, check for intrinsics.
   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
     if (!isCallingConvC)
       return nullptr;
+    // The FP intrinsics have corresponding constrained versions so we don't
+    // need to check for the StrictFP attribute here.
     switch (II->getIntrinsicID()) {
     case Intrinsic::pow:
       return optimizePow(CI, Builder);
@@ -2106,32 +2269,9 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
       return nullptr;
     if (Value *V = optimizeStringMemoryLibCall(CI, Builder))
       return V;
+    if (Value *V = optimizeFloatingPointLibCall(CI, Func, Builder))
+      return V;
     switch (Func) {
-    case LibFunc_cosf:
-    case LibFunc_cos:
-    case LibFunc_cosl:
-      return optimizeCos(CI, Builder);
-    case LibFunc_sinpif:
-    case LibFunc_sinpi:
-    case LibFunc_cospif:
-    case LibFunc_cospi:
-      return optimizeSinCosPi(CI, Builder);
-    case LibFunc_powf:
-    case LibFunc_pow:
-    case LibFunc_powl:
-      return optimizePow(CI, Builder);
-    case LibFunc_exp2l:
-    case LibFunc_exp2:
-    case LibFunc_exp2f:
-      return optimizeExp2(CI, Builder);
-    case LibFunc_fabsf:
-    case LibFunc_fabs:
-    case LibFunc_fabsl:
-      return replaceUnaryCall(CI, Builder, Intrinsic::fabs);
-    case LibFunc_sqrtf:
-    case LibFunc_sqrt:
-    case LibFunc_sqrtl:
-      return optimizeSqrt(CI, Builder);
     case LibFunc_ffs:
     case LibFunc_ffsl:
     case LibFunc_ffsll:
@@ -2160,18 +2300,8 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
       return optimizeFWrite(CI, Builder);
     case LibFunc_fputs:
       return optimizeFPuts(CI, Builder);
-    case LibFunc_log:
-    case LibFunc_log10:
-    case LibFunc_log1p:
-    case LibFunc_log2:
-    case LibFunc_logb:
-      return optimizeLog(CI, Builder);
     case LibFunc_puts:
       return optimizePuts(CI, Builder);
-    case LibFunc_tan:
-    case LibFunc_tanf:
-    case LibFunc_tanl:
-      return optimizeTan(CI, Builder);
     case LibFunc_perror:
       return optimizeErrorReporting(CI, Builder);
     case LibFunc_vfprintf:
@@ -2179,46 +2309,6 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
       return optimizeErrorReporting(CI, Builder, 0);
     case LibFunc_fputc:
       return optimizeErrorReporting(CI, Builder, 1);
-    case LibFunc_ceil:
-      return replaceUnaryCall(CI, Builder, Intrinsic::ceil);
-    case LibFunc_floor:
-      return replaceUnaryCall(CI, Builder, Intrinsic::floor);
-    case LibFunc_round:
-      return replaceUnaryCall(CI, Builder, Intrinsic::round);
-    case LibFunc_nearbyint:
-      return replaceUnaryCall(CI, Builder, Intrinsic::nearbyint);
-    case LibFunc_rint:
-      return replaceUnaryCall(CI, Builder, Intrinsic::rint);
-    case LibFunc_trunc:
-      return replaceUnaryCall(CI, Builder, Intrinsic::trunc);
-    case LibFunc_acos:
-    case LibFunc_acosh:
-    case LibFunc_asin:
-    case LibFunc_asinh:
-    case LibFunc_atan:
-    case LibFunc_atanh:
-    case LibFunc_cbrt:
-    case LibFunc_cosh:
-    case LibFunc_exp:
-    case LibFunc_exp10:
-    case LibFunc_expm1:
-    case LibFunc_sin:
-    case LibFunc_sinh:
-    case LibFunc_tanh:
-      if (UnsafeFPShrink && hasFloatVersion(FuncName))
-        return optimizeUnaryDoubleFP(CI, Builder, true);
-      return nullptr;
-    case LibFunc_copysign:
-      if (hasFloatVersion(FuncName))
-        return optimizeBinaryDoubleFP(CI, Builder);
-      return nullptr;
-    case LibFunc_fminf:
-    case LibFunc_fmin:
-    case LibFunc_fminl:
-    case LibFunc_fmaxf:
-    case LibFunc_fmax:
-    case LibFunc_fmaxl:
-      return optimizeFMinFMax(CI, Builder);
     default:
       return nullptr;
     }
@@ -2228,9 +2318,10 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
 
 LibCallSimplifier::LibCallSimplifier(
     const DataLayout &DL, const TargetLibraryInfo *TLI,
+    OptimizationRemarkEmitter &ORE,
     function_ref<void(Instruction *, Value *)> Replacer)
-    : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), UnsafeFPShrink(false),
-      Replacer(Replacer) {}
+    : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE),
+      UnsafeFPShrink(false), Replacer(Replacer) {}
 
 void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
   // Indirect through the replacer used in this instance.
diff --git a/lib/Transforms/Utils/SplitModule.cpp b/lib/Transforms/Utils/SplitModule.cpp
index e9a368f4faa4..968eb0208f43 100644
--- a/lib/Transforms/Utils/SplitModule.cpp
+++ b/lib/Transforms/Utils/SplitModule.cpp
@@ -13,32 +13,51 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "split-module"
-
 #include "llvm/Transforms/Utils/SplitModule.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/EquivalenceClasses.h"
-#include "llvm/ADT/Hashing.h"
-#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/Comdat.h"
+#include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalAlias.h"
 #include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalIndirectSymbol.h"
 #include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MD5.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <memory>
 #include <queue>
+#include <utility>
+#include <vector>
 
 using namespace llvm;
 
+#define DEBUG_TYPE "split-module"
+
 namespace {
-typedef EquivalenceClasses<const GlobalValue *> ClusterMapType;
-typedef DenseMap<const Comdat *, const GlobalValue *> ComdatMembersType;
-typedef DenseMap<const GlobalValue *, unsigned> ClusterIDMapType;
-}
+
+using ClusterMapType = EquivalenceClasses<const GlobalValue *>;
+using ComdatMembersType = DenseMap<const Comdat *, const GlobalValue *>;
+using ClusterIDMapType = DenseMap<const GlobalValue *, unsigned>;
+
+} // end anonymous namespace
 
 static void addNonConstUser(ClusterMapType &GVtoClusterMap,
                             const GlobalValue *GV, const User *U) {
@@ -125,9 +144,9 @@ static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap,
       addAllGlobalValueUsers(GVtoClusterMap, &GV, &GV);
   };
 
-  std::for_each(M->begin(), M->end(), recordGVSet);
-  std::for_each(M->global_begin(), M->global_end(), recordGVSet);
-  std::for_each(M->alias_begin(), M->alias_end(), recordGVSet);
+  llvm::for_each(M->functions(), recordGVSet);
+  llvm::for_each(M->globals(), recordGVSet);
+  llvm::for_each(M->aliases(), recordGVSet);
 
   // Assigned all GVs to merged clusters while balancing number of objects in
   // each.
@@ -147,7 +166,8 @@ static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap,
   for (unsigned i = 0; i < N; ++i)
     BalancinQueue.push(std::make_pair(i, 0));
 
-  typedef std::pair<unsigned, ClusterMapType::iterator> SortType;
+  using SortType = std::pair<unsigned, ClusterMapType::iterator>;
+
   SmallVector<SortType, 64> Sets;
   SmallPtrSet<const GlobalValue *, 32> Visited;
 
diff --git a/lib/Transforms/Utils/SymbolRewriter.cpp b/lib/Transforms/Utils/SymbolRewriter.cpp
index 20107553665f..3640541e63cc 100644
--- a/lib/Transforms/Utils/SymbolRewriter.cpp
+++ b/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -1,4 +1,4 @@
-//===- SymbolRewriter.cpp - Symbol Rewriter ---------------------*- C++ -*-===//
+//===- SymbolRewriter.cpp - Symbol Rewriter -------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -57,25 +57,41 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "symbol-rewriter"
 #include "llvm/Transforms/Utils/SymbolRewriter.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/IR/Comdat.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Value.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Regex.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/YAMLParser.h"
-#include "llvm/Support/raw_ostream.h"
+#include <memory>
+#include <string>
+#include <vector>
 
 using namespace llvm;
 using namespace SymbolRewriter;
 
+#define DEBUG_TYPE "symbol-rewriter"
+
 static cl::list<std::string> RewriteMapFiles("rewrite-map-file",
                                              cl::desc("Symbol Rewrite Map"),
-                                             cl::value_desc("filename"));
+                                             cl::value_desc("filename"),
+                                             cl::Hidden);
 
 static void rewriteComdat(Module &M, GlobalObject *GO,
                           const std::string &Source,
@@ -92,8 +108,9 @@ static void rewriteComdat(Module &M, GlobalObject *GO,
 }
 
 namespace {
+
 template <RewriteDescriptor::Type DT, typename ValueType,
-          ValueType *(llvm::Module::*Get)(StringRef) const>
+          ValueType *(Module::*Get)(StringRef) const>
 class ExplicitRewriteDescriptor : public RewriteDescriptor {
 public:
   const std::string Source;
@@ -110,8 +127,10 @@ public:
   }
 };
 
+} // end anonymous namespace
+
 template <RewriteDescriptor::Type DT, typename ValueType,
-          ValueType *(llvm::Module::*Get)(StringRef) const>
+          ValueType *(Module::*Get)(StringRef) const>
 bool ExplicitRewriteDescriptor<DT, ValueType, Get>::performOnModule(Module &M) {
   bool Changed = false;
   if (ValueType *S = (M.*Get)(Source)) {
@@ -128,10 +147,12 @@ bool ExplicitRewriteDescriptor<DT, ValueType, Get>::performOnModule(Module &M) {
   return Changed;
 }
 
+namespace {
+
 template <RewriteDescriptor::Type DT, typename ValueType,
-          ValueType *(llvm::Module::*Get)(StringRef) const,
+          ValueType *(Module::*Get)(StringRef) const,
           iterator_range<typename iplist<ValueType>::iterator>
-          (llvm::Module::*Iterator)()>
+          (Module::*Iterator)()>
 class PatternRewriteDescriptor : public RewriteDescriptor {
 public:
   const std::string Pattern;
@@ -147,10 +168,12 @@ public:
   }
 };
 
+} // end anonymous namespace
+
 template <RewriteDescriptor::Type DT, typename ValueType,
-          ValueType *(llvm::Module::*Get)(StringRef) const,
+          ValueType *(Module::*Get)(StringRef) const,
           iterator_range<typename iplist<ValueType>::iterator>
-          (llvm::Module::*Iterator)()>
+          (Module::*Iterator)()>
 bool PatternRewriteDescriptor<DT, ValueType, Get, Iterator>::
 performOnModule(Module &M) {
   bool Changed = false;
@@ -178,55 +201,52 @@ performOnModule(Module &M) {
   return Changed;
 }
 
+namespace {
+
 /// Represents a rewrite for an explicitly named (function) symbol.  Both the
 /// source function name and target function name of the transformation are
 /// explicitly spelt out.
-typedef ExplicitRewriteDescriptor<RewriteDescriptor::Type::Function,
-                                  llvm::Function, &llvm::Module::getFunction>
-    ExplicitRewriteFunctionDescriptor;
+using ExplicitRewriteFunctionDescriptor =
+    ExplicitRewriteDescriptor<RewriteDescriptor::Type::Function, Function,
+                              &Module::getFunction>;
 
 /// Represents a rewrite for an explicitly named (global variable) symbol.  Both
 /// the source variable name and target variable name are spelt out.  This
 /// applies only to module level variables.
-typedef ExplicitRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable,
-                                  llvm::GlobalVariable,
-                                  &llvm::Module::getGlobalVariable>
-    ExplicitRewriteGlobalVariableDescriptor;
+using ExplicitRewriteGlobalVariableDescriptor =
+    ExplicitRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable,
+                              GlobalVariable, &Module::getGlobalVariable>;
 
 /// Represents a rewrite for an explicitly named global alias.  Both the source
 /// and target name are explicitly spelt out.
-typedef ExplicitRewriteDescriptor<RewriteDescriptor::Type::NamedAlias,
-                                  llvm::GlobalAlias,
-                                  &llvm::Module::getNamedAlias>
-    ExplicitRewriteNamedAliasDescriptor;
+using ExplicitRewriteNamedAliasDescriptor =
+    ExplicitRewriteDescriptor<RewriteDescriptor::Type::NamedAlias, GlobalAlias,
+                              &Module::getNamedAlias>;
 
 /// Represents a rewrite for a regular expression based pattern for functions.
 /// A pattern for the function name is provided and a transformation for that
 /// pattern to determine the target function name create the rewrite rule.
-typedef PatternRewriteDescriptor<RewriteDescriptor::Type::Function,
-                                 llvm::Function, &llvm::Module::getFunction,
-                                 &llvm::Module::functions>
-    PatternRewriteFunctionDescriptor;
+using PatternRewriteFunctionDescriptor =
+    PatternRewriteDescriptor<RewriteDescriptor::Type::Function, Function,
+                             &Module::getFunction, &Module::functions>;
 
 /// Represents a rewrite for a global variable based upon a matching pattern.
 /// Each global variable matching the provided pattern will be transformed as
 /// described in the transformation pattern for the target.  Applies only to
 /// module level variables.
-typedef PatternRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable,
-                                 llvm::GlobalVariable,
-                                 &llvm::Module::getGlobalVariable,
-                                 &llvm::Module::globals>
-    PatternRewriteGlobalVariableDescriptor;
+using PatternRewriteGlobalVariableDescriptor =
+    PatternRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable,
+                             GlobalVariable, &Module::getGlobalVariable,
+                             &Module::globals>;
 
 /// PatternRewriteNamedAliasDescriptor - represents a rewrite for global
 /// aliases which match a given pattern.  The provided transformation will be
 /// applied to each of the matching names.
-typedef PatternRewriteDescriptor<RewriteDescriptor::Type::NamedAlias,
-                                 llvm::GlobalAlias,
-                                 &llvm::Module::getNamedAlias,
-                                 &llvm::Module::aliases>
-    PatternRewriteNamedAliasDescriptor;
-} // namespace
+using PatternRewriteNamedAliasDescriptor =
+    PatternRewriteDescriptor<RewriteDescriptor::Type::NamedAlias, GlobalAlias,
+                             &Module::getNamedAlias, &Module::aliases>;
+
+} // end anonymous namespace
 
 bool RewriteMapParser::parse(const std::string &MapFile,
                              RewriteDescriptorList *DL) {
@@ -497,6 +517,7 @@ parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
 }
 
 namespace {
+
 class RewriteSymbolsLegacyPass : public ModulePass {
 public:
   static char ID; // Pass identification, replacement for typeid
@@ -510,9 +531,11 @@ private:
   RewriteSymbolPass Impl;
 };
 
+} // end anonymous namespace
+
 char RewriteSymbolsLegacyPass::ID = 0;
 
-RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass() : ModulePass(ID), Impl() {
+RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass() : ModulePass(ID) {
   initializeRewriteSymbolsLegacyPassPass(*PassRegistry::getPassRegistry());  
 }
 
@@ -523,9 +546,7 @@ RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass(
 bool RewriteSymbolsLegacyPass::runOnModule(Module &M) {
   return Impl.runImpl(M);
 }
-}
 
-namespace llvm {
 PreservedAnalyses RewriteSymbolPass::run(Module &M, ModuleAnalysisManager &AM) {
   if (!runImpl(M))
     return PreservedAnalyses::all();
@@ -550,7 +571,6 @@ void RewriteSymbolPass::loadAndParseMapFiles() {
   for (const auto &MapFile : MapFiles)
     Parser.parse(MapFile, &Descriptors);
 }
-}
 
 INITIALIZE_PASS(RewriteSymbolsLegacyPass, "rewrite-symbols", "Rewrite Symbols",
                 false, false)
diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index 9385f825523c..ed444e4cf43c 100644
--- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -15,7 +15,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index 930972924c3c..8c9ecbc3503e 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -13,17 +13,36 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalObject.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include <cassert>
+#include <limits>
+#include <memory>
+#include <utility>
+
 using namespace llvm;
 
 // Out of line method to get vtable etc for class.
@@ -85,7 +104,6 @@ struct MappingContext {
       : VM(&VM), Materializer(Materializer) {}
 };
 
-class MDNodeMapper;
 class Mapper {
   friend class MDNodeMapper;
 
@@ -175,7 +193,7 @@ class MDNodeMapper {
   /// Data about a node in \a UniquedGraph.
   struct Data {
     bool HasChanged = false;
-    unsigned ID = ~0u;
+    unsigned ID = std::numeric_limits<unsigned>::max();
     TempMDNode Placeholder;
   };
 
@@ -316,7 +334,7 @@ private:
   void remapOperands(MDNode &N, OperandMapper mapOperand);
 };
 
-} // end namespace
+} // end anonymous namespace
 
 Value *Mapper::mapValue(const Value *V) {
   ValueToValueMapTy::iterator I = getVM().find(V);
@@ -579,6 +597,7 @@ void MDNodeMapper::remapOperands(MDNode &N, OperandMapper mapOperand) {
 }
 
 namespace {
+
 /// An entry in the worklist for the post-order traversal.
 struct POTWorklistEntry {
   MDNode *N;              ///< Current node.
@@ -590,7 +609,8 @@ struct POTWorklistEntry {
 
   POTWorklistEntry(MDNode &N) : N(&N), Op(N.op_begin()) {}
 };
-} // end namespace
+
+} // end anonymous namespace
 
 bool MDNodeMapper::createPOT(UniquedGraph &G, const MDNode &FirstN) {
   assert(G.Info.empty() && "Expected a fresh traversal");
@@ -653,7 +673,7 @@ void MDNodeMapper::UniquedGraph::propagateChanges() {
       if (D.HasChanged)
         continue;
 
-      if (none_of(N->operands(), [&](const Metadata *Op) {
+      if (llvm::none_of(N->operands(), [&](const Metadata *Op) {
             auto Where = Info.find(Op);
             return Where != Info.end() && Where->second.HasChanged;
           }))
@@ -752,10 +772,11 @@ struct MapMetadataDisabler {
   MapMetadataDisabler(ValueToValueMapTy &VM) : VM(VM) {
     VM.disableMapMetadata();
   }
+
   ~MapMetadataDisabler() { VM.enableMapMetadata(); }
 };
 
-} // end namespace
+} // end anonymous namespace
 
 Optional<Metadata *> Mapper::mapSimpleMetadata(const Metadata *MD) {
   // If the value already exists in the map, use it.
@@ -1037,11 +1058,13 @@ public:
   explicit FlushingMapper(void *pImpl) : M(*getAsMapper(pImpl)) {
     assert(!M.hasWorkToDo() && "Expected to be flushed");
   }
+
   ~FlushingMapper() { M.flush(); }
+
   Mapper *operator->() const { return &M; }
 };
 
-} // end namespace
+} // end anonymous namespace
 
 ValueMapper::ValueMapper(ValueToValueMapTy &VM, RemapFlags Flags,
                          ValueMapTypeRemapper *TypeMapper,
author	Dimitry Andric <dim@FreeBSD.org>	2017-12-18 20:10:56 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2017-12-18 20:10:56 +0000
commit	044eb2f6afba375a914ac9d8024f8f5142bb912e (patch)
tree	1475247dc9f9fe5be155ebd4c9069c75aadf8c20 /lib/Transforms/Utils
parent	eb70dddbd77e120e5d490bd8fbe7ff3f8fa81c6b (diff)