40 files changed, 1538 insertions, 226 deletions
diff --git a/llvm/lib/Transforms/Utils/AddDiscriminators.cpp b/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
index ee0973002c47..0908b361a4d4 100644
--- a/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
+++ b/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -63,6 +63,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
@@ -232,7 +233,7 @@ static bool addDiscriminators(Function &F) {
     LocationSet CallLocations;
     for (auto &I : B.getInstList()) {
       // We bypass intrinsic calls for the following two reasons:
-      //  1) We want to avoid a non-deterministic assigment of
+      //  1) We want to avoid a non-deterministic assignment of
       //     discriminators.
       //  2) We want to minimize the number of base discriminators used.
       if (!isa<InvokeInst>(I) && (!isa<CallInst>(I) || isa<IntrinsicInst>(I)))  
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index d85cc40c372a..c9eb4abfa21a 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -247,7 +247,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
   Instruction *STI = BB->getTerminator();
   Instruction *Start = &*BB->begin();
   // If there's nothing to move, mark the starting instruction as the last
-  // instruction in the block.
+  // instruction in the block. Terminator instruction is handled separately.
   if (Start == STI)
     Start = PTI;
 
@@ -274,24 +274,20 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
 
     // Move terminator instruction.
     PredBB->getInstList().splice(PredBB->end(), BB->getInstList());
+
+    // Terminator may be a memory accessing instruction too.
+    if (MSSAU)
+      if (MemoryUseOrDef *MUD = cast_or_null<MemoryUseOrDef>(
+              MSSAU->getMemorySSA()->getMemoryAccess(PredBB->getTerminator())))
+        MSSAU->moveToPlace(MUD, PredBB, MemorySSA::End);
   }
   // Add unreachable to now empty BB.
   new UnreachableInst(BB->getContext(), BB);
 
-  // Eliminate duplicate dbg.values describing the entry PHI node post-splice.
-  for (auto Incoming : IncomingValues) {
-    if (isa<Instruction>(*Incoming)) {
-      SmallVector<DbgValueInst *, 2> DbgValues;
-      SmallDenseSet<std::pair<DILocalVariable *, DIExpression *>, 2>
-          DbgValueSet;
-      llvm::findDbgValues(DbgValues, Incoming);
-      for (auto &DVI : DbgValues) {
-        auto R = DbgValueSet.insert({DVI->getVariable(), DVI->getExpression()});
-        if (!R.second)
-          DVI->eraseFromParent();
-      }
-    }
-  }
+  // Eliminate duplicate/redundant dbg.values. This seems to be a good place to
+  // do that since we might end up with redundant dbg.values describing the
+  // entry PHI node post-splice.
+  RemoveRedundantDbgInstrs(PredBB);
 
   // Inherit predecessors name if it exists.
   if (!PredBB->hasName())
@@ -318,6 +314,124 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
   return true;
 }
 
+/// Remove redundant instructions within sequences of consecutive dbg.value
+/// instructions. This is done using a backward scan to keep the last dbg.value
+/// describing a specific variable/fragment.
+///
+/// BackwardScan strategy:
+/// ----------------------
+/// Given a sequence of consecutive DbgValueInst like this
+///
+///   dbg.value ..., "x", FragmentX1  (*)
+///   dbg.value ..., "y", FragmentY1
+///   dbg.value ..., "x", FragmentX2
+///   dbg.value ..., "x", FragmentX1  (**)
+///
+/// then the instruction marked with (*) can be removed (it is guaranteed to be
+/// obsoleted by the instruction marked with (**) as the latter instruction is
+/// describing the same variable using the same fragment info).
+///
+/// Possible improvements:
+/// - Check fully overlapping fragments and not only identical fragments.
+/// - Support dbg.addr, dbg.declare. dbg.label, and possibly other meta
+///   instructions being part of the sequence of consecutive instructions.
+static bool removeRedundantDbgInstrsUsingBackwardScan(BasicBlock *BB) {
+  SmallVector<DbgValueInst *, 8> ToBeRemoved;
+  SmallDenseSet<DebugVariable> VariableSet;
+  for (auto &I : reverse(*BB)) {
+    if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(&I)) {
+      DebugVariable Key(DVI->getVariable(),
+                        DVI->getExpression(),
+                        DVI->getDebugLoc()->getInlinedAt());
+      auto R = VariableSet.insert(Key);
+      // If the same variable fragment is described more than once it is enough
+      // to keep the last one (i.e. the first found since we for reverse
+      // iteration).
+      if (!R.second)
+        ToBeRemoved.push_back(DVI);
+      continue;
+    }
+    // Sequence with consecutive dbg.value instrs ended. Clear the map to
+    // restart identifying redundant instructions if case we find another
+    // dbg.value sequence.
+    VariableSet.clear();
+  }
+
+  for (auto &Instr : ToBeRemoved)
+    Instr->eraseFromParent();
+
+  return !ToBeRemoved.empty();
+}
+
+/// Remove redundant dbg.value instructions using a forward scan. This can
+/// remove a dbg.value instruction that is redundant due to indicating that a
+/// variable has the same value as already being indicated by an earlier
+/// dbg.value.
+///
+/// ForwardScan strategy:
+/// ---------------------
+/// Given two identical dbg.value instructions, separated by a block of
+/// instructions that isn't describing the same variable, like this
+///
+///   dbg.value X1, "x", FragmentX1  (**)
+///   <block of instructions, none being "dbg.value ..., "x", ...">
+///   dbg.value X1, "x", FragmentX1  (*)
+///
+/// then the instruction marked with (*) can be removed. Variable "x" is already
+/// described as being mapped to the SSA value X1.
+///
+/// Possible improvements:
+/// - Keep track of non-overlapping fragments.
+static bool removeRedundantDbgInstrsUsingForwardScan(BasicBlock *BB) {
+  SmallVector<DbgValueInst *, 8> ToBeRemoved;
+  DenseMap<DebugVariable, std::pair<Value *, DIExpression *> > VariableMap;
+  for (auto &I : *BB) {
+    if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(&I)) {
+      DebugVariable Key(DVI->getVariable(),
+                        NoneType(),
+                        DVI->getDebugLoc()->getInlinedAt());
+      auto VMI = VariableMap.find(Key);
+      // Update the map if we found a new value/expression describing the
+      // variable, or if the variable wasn't mapped already.
+      if (VMI == VariableMap.end() ||
+          VMI->second.first != DVI->getValue() ||
+          VMI->second.second != DVI->getExpression()) {
+        VariableMap[Key] = { DVI->getValue(), DVI->getExpression() };
+        continue;
+      }
+      // Found an identical mapping. Remember the instruction for later removal.
+      ToBeRemoved.push_back(DVI);
+    }
+  }
+
+  for (auto &Instr : ToBeRemoved)
+    Instr->eraseFromParent();
+
+  return !ToBeRemoved.empty();
+}
+
+bool llvm::RemoveRedundantDbgInstrs(BasicBlock *BB) {
+  bool MadeChanges = false;
+  // By using the "backward scan" strategy before the "forward scan" strategy we
+  // can remove both dbg.value (2) and (3) in a situation like this:
+  //
+  //   (1) dbg.value V1, "x", DIExpression()
+  //       ...
+  //   (2) dbg.value V2, "x", DIExpression()
+  //   (3) dbg.value V1, "x", DIExpression()
+  //
+  // The backward scan will remove (2), it is made obsolete by (3). After
+  // getting (2) out of the way, the foward scan will remove (3) since "x"
+  // already is described as having the value V1 at (1).
+  MadeChanges |= removeRedundantDbgInstrsUsingBackwardScan(BB);
+  MadeChanges |= removeRedundantDbgInstrsUsingForwardScan(BB);
+
+  if (MadeChanges)
+    LLVM_DEBUG(dbgs() << "Removed redundant dbg instrs from: "
+                      << BB->getName() << "\n");
+  return MadeChanges;
+}
+
 void llvm::ReplaceInstWithValue(BasicBlock::InstListType &BIL,
                                 BasicBlock::iterator &BI, Value *V) {
   Instruction &I = *BI;
diff --git a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
index f5e4b53f6d97..008cea333e6b 100644
--- a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -28,6 +28,7 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Transforms/Utils.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
diff --git a/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp b/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp
index 3c7c8d872595..6b01c0c71d00 100644
--- a/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp
+++ b/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp
@@ -30,9 +30,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/CanonicalizeAliases.h"
-
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 
 using namespace llvm;
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 0298ff9a395f..682af4a88d3e 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -805,7 +805,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
     dbgs() << ")\n";
   });
 
-  StructType *StructTy;
+  StructType *StructTy = nullptr;
   if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
     StructTy = StructType::get(M->getContext(), paramTy);
     paramTy.clear();
diff --git a/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp b/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
new file mode 100644
index 000000000000..93395ac761ab
--- /dev/null
+++ b/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
@@ -0,0 +1,189 @@
+//===- CodeMoverUtils.cpp - CodeMover Utilities ----------------------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions perform movements on basic blocks, and instructions
+// contained within a function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/CodeMoverUtils.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Dominators.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "codemover-utils"
+
+STATISTIC(HasDependences,
+          "Cannot move across instructions that has memory dependences");
+STATISTIC(MayThrowException, "Cannot move across instructions that may throw");
+STATISTIC(NotControlFlowEquivalent,
+          "Instructions are not control flow equivalent");
+STATISTIC(NotMovedPHINode, "Movement of PHINodes are not supported");
+STATISTIC(NotMovedTerminator, "Movement of Terminator are not supported");
+
+bool llvm::isControlFlowEquivalent(const Instruction &I0, const Instruction &I1,
+                                   const DominatorTree &DT,
+                                   const PostDominatorTree &PDT) {
+  return isControlFlowEquivalent(*I0.getParent(), *I1.getParent(), DT, PDT);
+}
+
+bool llvm::isControlFlowEquivalent(const BasicBlock &BB0, const BasicBlock &BB1,
+                                   const DominatorTree &DT,
+                                   const PostDominatorTree &PDT) {
+  if (&BB0 == &BB1)
+    return true;
+
+  return ((DT.dominates(&BB0, &BB1) && PDT.dominates(&BB1, &BB0)) ||
+          (PDT.dominates(&BB0, &BB1) && DT.dominates(&BB1, &BB0)));
+}
+
+static bool reportInvalidCandidate(const Instruction &I,
+                                   llvm::Statistic &Stat) {
+  ++Stat;
+  LLVM_DEBUG(dbgs() << "Unable to move instruction: " << I << ". "
+                    << Stat.getDesc());
+  return false;
+}
+
+/// Collect all instructions in between \p StartInst and \p EndInst, and store
+/// them in \p InBetweenInsts.
+static void
+collectInstructionsInBetween(Instruction &StartInst, const Instruction &EndInst,
+                             SmallPtrSetImpl<Instruction *> &InBetweenInsts) {
+  assert(InBetweenInsts.empty() && "Expecting InBetweenInsts to be empty");
+
+  /// Get the next instructions of \p I, and push them to \p WorkList.
+  auto getNextInsts = [](Instruction &I,
+                         SmallPtrSetImpl<Instruction *> &WorkList) {
+    if (Instruction *NextInst = I.getNextNode())
+      WorkList.insert(NextInst);
+    else {
+      assert(I.isTerminator() && "Expecting a terminator instruction");
+      for (BasicBlock *Succ : successors(&I))
+        WorkList.insert(&Succ->front());
+    }
+  };
+
+  SmallPtrSet<Instruction *, 10> WorkList;
+  getNextInsts(StartInst, WorkList);
+  while (!WorkList.empty()) {
+    Instruction *CurInst = *WorkList.begin();
+    WorkList.erase(CurInst);
+
+    if (CurInst == &EndInst)
+      continue;
+
+    if (!InBetweenInsts.insert(CurInst).second)
+      continue;
+
+    getNextInsts(*CurInst, WorkList);
+  }
+}
+
+bool llvm::isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
+                              const DominatorTree &DT,
+                              const PostDominatorTree &PDT,
+                              DependenceInfo &DI) {
+  // Cannot move itself before itself.
+  if (&I == &InsertPoint)
+    return false;
+
+  // Not moved.
+  if (I.getNextNode() == &InsertPoint)
+    return true;
+
+  if (isa<PHINode>(I) || isa<PHINode>(InsertPoint))
+    return reportInvalidCandidate(I, NotMovedPHINode);
+
+  if (I.isTerminator())
+    return reportInvalidCandidate(I, NotMovedTerminator);
+
+  // TODO remove this limitation.
+  if (!isControlFlowEquivalent(I, InsertPoint, DT, PDT))
+    return reportInvalidCandidate(I, NotControlFlowEquivalent);
+
+  // As I and InsertPoint are control flow equivalent, if I dominates
+  // InsertPoint, then I comes before InsertPoint.
+  const bool MoveForward = DT.dominates(&I, &InsertPoint);
+  if (MoveForward) {
+    // When I is being moved forward, we need to make sure the InsertPoint
+    // dominates every users. Or else, a user may be using an undefined I.
+    for (const Use &U : I.uses())
+      if (auto *UserInst = dyn_cast<Instruction>(U.getUser()))
+        if (UserInst != &InsertPoint && !DT.dominates(&InsertPoint, U))
+          return false;
+  } else {
+    // When I is being moved backward, we need to make sure all its opernads
+    // dominates the InsertPoint. Or else, an operand may be undefined for I.
+    for (const Value *Op : I.operands())
+      if (auto *OpInst = dyn_cast<Instruction>(Op))
+        if (&InsertPoint == OpInst || !DT.dominates(OpInst, &InsertPoint))
+          return false;
+  }
+
+  Instruction &StartInst = (MoveForward ? I : InsertPoint);
+  Instruction &EndInst = (MoveForward ? InsertPoint : I);
+  SmallPtrSet<Instruction *, 10> InstsToCheck;
+  collectInstructionsInBetween(StartInst, EndInst, InstsToCheck);
+  if (!MoveForward)
+    InstsToCheck.insert(&InsertPoint);
+
+  // Check if there exists instructions which may throw, may synchonize, or may
+  // never return, from I to InsertPoint.
+  if (!isSafeToSpeculativelyExecute(&I))
+    if (std::any_of(InstsToCheck.begin(), InstsToCheck.end(),
+                    [](Instruction *I) {
+                      if (I->mayThrow())
+                        return true;
+
+                      const CallBase *CB = dyn_cast<CallBase>(I);
+                      if (!CB)
+                        return false;
+                      if (!CB->hasFnAttr(Attribute::WillReturn))
+                        return true;
+                      if (!CB->hasFnAttr(Attribute::NoSync))
+                        return true;
+
+                      return false;
+                    })) {
+      return reportInvalidCandidate(I, MayThrowException);
+    }
+
+  // Check if I has any output/flow/anti dependences with instructions from \p
+  // StartInst to \p EndInst.
+  if (std::any_of(InstsToCheck.begin(), InstsToCheck.end(),
+                  [&DI, &I](Instruction *CurInst) {
+                    auto DepResult = DI.depends(&I, CurInst, true);
+                    if (DepResult &&
+                        (DepResult->isOutput() || DepResult->isFlow() ||
+                         DepResult->isAnti()))
+                      return true;
+                    return false;
+                  }))
+    return reportInvalidCandidate(I, HasDependences);
+
+  return true;
+}
+
+void llvm::moveInstsBottomUp(BasicBlock &FromBB, BasicBlock &ToBB,
+                             const DominatorTree &DT,
+                             const PostDominatorTree &PDT, DependenceInfo &DI) {
+  for (auto It = ++FromBB.rbegin(); It != FromBB.rend();) {
+    Instruction *MovePos = ToBB.getFirstNonPHIOrDbg();
+    Instruction &I = *It;
+    // Increment the iterator before modifying FromBB.
+    ++It;
+
+    if (isSafeToMoveBefore(I, *MovePos, DT, PDT, DI))
+      I.moveBefore(MovePos);
+  }
+}
diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp
new file mode 100644
index 000000000000..b7b4bfa3734d
--- /dev/null
+++ b/llvm/lib/Transforms/Utils/Debugify.cpp
@@ -0,0 +1,435 @@
+//===- Debugify.cpp - Attach synthetic debug info to everything -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file This pass attaches synthetic debug info to everything. It can be used
+/// to create targeted tests for debug info preservation.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Debugify.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+namespace {
+
+cl::opt<bool> Quiet("debugify-quiet",
+                    cl::desc("Suppress verbose debugify output"));
+
+raw_ostream &dbg() { return Quiet ? nulls() : errs(); }
+
+uint64_t getAllocSizeInBits(Module &M, Type *Ty) {
+  return Ty->isSized() ? M.getDataLayout().getTypeAllocSizeInBits(Ty) : 0;
+}
+
+bool isFunctionSkipped(Function &F) {
+  return F.isDeclaration() || !F.hasExactDefinition();
+}
+
+/// Find the basic block's terminating instruction.
+///
+/// Special care is needed to handle musttail and deopt calls, as these behave
+/// like (but are in fact not) terminators.
+Instruction *findTerminatingInstruction(BasicBlock &BB) {
+  if (auto *I = BB.getTerminatingMustTailCall())
+    return I;
+  if (auto *I = BB.getTerminatingDeoptimizeCall())
+    return I;
+  return BB.getTerminator();
+}
+
+bool applyDebugifyMetadata(Module &M,
+                           iterator_range<Module::iterator> Functions,
+                           StringRef Banner) {
+  // Skip modules with debug info.
+  if (M.getNamedMetadata("llvm.dbg.cu")) {
+    dbg() << Banner << "Skipping module with debug info\n";
+    return false;
+  }
+
+  DIBuilder DIB(M);
+  LLVMContext &Ctx = M.getContext();
+
+  // Get a DIType which corresponds to Ty.
+  DenseMap<uint64_t, DIType *> TypeCache;
+  auto getCachedDIType = [&](Type *Ty) -> DIType * {
+    uint64_t Size = getAllocSizeInBits(M, Ty);
+    DIType *&DTy = TypeCache[Size];
+    if (!DTy) {
+      std::string Name = "ty" + utostr(Size);
+      DTy = DIB.createBasicType(Name, Size, dwarf::DW_ATE_unsigned);
+    }
+    return DTy;
+  };
+
+  unsigned NextLine = 1;
+  unsigned NextVar = 1;
+  auto File = DIB.createFile(M.getName(), "/");
+  auto CU = DIB.createCompileUnit(dwarf::DW_LANG_C, File, "debugify",
+                                  /*isOptimized=*/true, "", 0);
+
+  // Visit each instruction.
+  for (Function &F : Functions) {
+    if (isFunctionSkipped(F))
+      continue;
+
+    auto SPType = DIB.createSubroutineType(DIB.getOrCreateTypeArray(None));
+    DISubprogram::DISPFlags SPFlags =
+        DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized;
+    if (F.hasPrivateLinkage() || F.hasInternalLinkage())
+      SPFlags |= DISubprogram::SPFlagLocalToUnit;
+    auto SP = DIB.createFunction(CU, F.getName(), F.getName(), File, NextLine,
+                                 SPType, NextLine, DINode::FlagZero, SPFlags);
+    F.setSubprogram(SP);
+    for (BasicBlock &BB : F) {
+      // Attach debug locations.
+      for (Instruction &I : BB)
+        I.setDebugLoc(DILocation::get(Ctx, NextLine++, 1, SP));
+
+      // Inserting debug values into EH pads can break IR invariants.
+      if (BB.isEHPad())
+        continue;
+
+      // Find the terminating instruction, after which no debug values are
+      // attached.
+      Instruction *LastInst = findTerminatingInstruction(BB);
+      assert(LastInst && "Expected basic block with a terminator");
+
+      // Maintain an insertion point which can't be invalidated when updates
+      // are made.
+      BasicBlock::iterator InsertPt = BB.getFirstInsertionPt();
+      assert(InsertPt != BB.end() && "Expected to find an insertion point");
+      Instruction *InsertBefore = &*InsertPt;
+
+      // Attach debug values.
+      for (Instruction *I = &*BB.begin(); I != LastInst; I = I->getNextNode()) {
+        // Skip void-valued instructions.
+        if (I->getType()->isVoidTy())
+          continue;
+
+        // Phis and EH pads must be grouped at the beginning of the block.
+        // Only advance the insertion point when we finish visiting these.
+        if (!isa<PHINode>(I) && !I->isEHPad())
+          InsertBefore = I->getNextNode();
+
+        std::string Name = utostr(NextVar++);
+        const DILocation *Loc = I->getDebugLoc().get();
+        auto LocalVar = DIB.createAutoVariable(SP, Name, File, Loc->getLine(),
+                                               getCachedDIType(I->getType()),
+                                               /*AlwaysPreserve=*/true);
+        DIB.insertDbgValueIntrinsic(I, LocalVar, DIB.createExpression(), Loc,
+                                    InsertBefore);
+      }
+    }
+    DIB.finalizeSubprogram(SP);
+  }
+  DIB.finalize();
+
+  // Track the number of distinct lines and variables.
+  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.debugify");
+  auto *IntTy = Type::getInt32Ty(Ctx);
+  auto addDebugifyOperand = [&](unsigned N) {
+    NMD->addOperand(MDNode::get(
+        Ctx, ValueAsMetadata::getConstant(ConstantInt::get(IntTy, N))));
+  };
+  addDebugifyOperand(NextLine - 1); // Original number of lines.
+  addDebugifyOperand(NextVar - 1);  // Original number of variables.
+  assert(NMD->getNumOperands() == 2 &&
+         "llvm.debugify should have exactly 2 operands!");
+
+  // Claim that this synthetic debug info is valid.
+  StringRef DIVersionKey = "Debug Info Version";
+  if (!M.getModuleFlag(DIVersionKey))
+    M.addModuleFlag(Module::Warning, DIVersionKey, DEBUG_METADATA_VERSION);
+
+  return true;
+}
+
+/// Return true if a mis-sized diagnostic is issued for \p DVI.
+bool diagnoseMisSizedDbgValue(Module &M, DbgValueInst *DVI) {
+  // The size of a dbg.value's value operand should match the size of the
+  // variable it corresponds to.
+  //
+  // TODO: This, along with a check for non-null value operands, should be
+  // promoted to verifier failures.
+  Value *V = DVI->getValue();
+  if (!V)
+    return false;
+
+  // For now, don't try to interpret anything more complicated than an empty
+  // DIExpression. Eventually we should try to handle OP_deref and fragments.
+  if (DVI->getExpression()->getNumElements())
+    return false;
+
+  Type *Ty = V->getType();
+  uint64_t ValueOperandSize = getAllocSizeInBits(M, Ty);
+  Optional<uint64_t> DbgVarSize = DVI->getFragmentSizeInBits();
+  if (!ValueOperandSize || !DbgVarSize)
+    return false;
+
+  bool HasBadSize = false;
+  if (Ty->isIntegerTy()) {
+    auto Signedness = DVI->getVariable()->getSignedness();
+    if (Signedness && *Signedness == DIBasicType::Signedness::Signed)
+      HasBadSize = ValueOperandSize < *DbgVarSize;
+  } else {
+    HasBadSize = ValueOperandSize != *DbgVarSize;
+  }
+
+  if (HasBadSize) {
+    dbg() << "ERROR: dbg.value operand has size " << ValueOperandSize
+          << ", but its variable has size " << *DbgVarSize << ": ";
+    DVI->print(dbg());
+    dbg() << "\n";
+  }
+  return HasBadSize;
+}
+
+bool checkDebugifyMetadata(Module &M,
+                           iterator_range<Module::iterator> Functions,
+                           StringRef NameOfWrappedPass, StringRef Banner,
+                           bool Strip, DebugifyStatsMap *StatsMap) {
+  // Skip modules without debugify metadata.
+  NamedMDNode *NMD = M.getNamedMetadata("llvm.debugify");
+  if (!NMD) {
+    dbg() << Banner << "Skipping module without debugify metadata\n";
+    return false;
+  }
+
+  auto getDebugifyOperand = [&](unsigned Idx) -> unsigned {
+    return mdconst::extract<ConstantInt>(NMD->getOperand(Idx)->getOperand(0))
+        ->getZExtValue();
+  };
+  assert(NMD->getNumOperands() == 2 &&
+         "llvm.debugify should have exactly 2 operands!");
+  unsigned OriginalNumLines = getDebugifyOperand(0);
+  unsigned OriginalNumVars = getDebugifyOperand(1);
+  bool HasErrors = false;
+
+  // Track debug info loss statistics if able.
+  DebugifyStatistics *Stats = nullptr;
+  if (StatsMap && !NameOfWrappedPass.empty())
+    Stats = &StatsMap->operator[](NameOfWrappedPass);
+
+  BitVector MissingLines{OriginalNumLines, true};
+  BitVector MissingVars{OriginalNumVars, true};
+  for (Function &F : Functions) {
+    if (isFunctionSkipped(F))
+      continue;
+
+    // Find missing lines.
+    for (Instruction &I : instructions(F)) {
+      if (isa<DbgValueInst>(&I))
+        continue;
+
+      auto DL = I.getDebugLoc();
+      if (DL && DL.getLine() != 0) {
+        MissingLines.reset(DL.getLine() - 1);
+        continue;
+      }
+
+      if (!DL) {
+        dbg() << "ERROR: Instruction with empty DebugLoc in function ";
+        dbg() << F.getName() << " --";
+        I.print(dbg());
+        dbg() << "\n";
+        HasErrors = true;
+      }
+    }
+
+    // Find missing variables and mis-sized debug values.
+    for (Instruction &I : instructions(F)) {
+      auto *DVI = dyn_cast<DbgValueInst>(&I);
+      if (!DVI)
+        continue;
+
+      unsigned Var = ~0U;
+      (void)to_integer(DVI->getVariable()->getName(), Var, 10);
+      assert(Var <= OriginalNumVars && "Unexpected name for DILocalVariable");
+      bool HasBadSize = diagnoseMisSizedDbgValue(M, DVI);
+      if (!HasBadSize)
+        MissingVars.reset(Var - 1);
+      HasErrors |= HasBadSize;
+    }
+  }
+
+  // Print the results.
+  for (unsigned Idx : MissingLines.set_bits())
+    dbg() << "WARNING: Missing line " << Idx + 1 << "\n";
+
+  for (unsigned Idx : MissingVars.set_bits())
+    dbg() << "WARNING: Missing variable " << Idx + 1 << "\n";
+
+  // Update DI loss statistics.
+  if (Stats) {
+    Stats->NumDbgLocsExpected += OriginalNumLines;
+    Stats->NumDbgLocsMissing += MissingLines.count();
+    Stats->NumDbgValuesExpected += OriginalNumVars;
+    Stats->NumDbgValuesMissing += MissingVars.count();
+  }
+
+  dbg() << Banner;
+  if (!NameOfWrappedPass.empty())
+    dbg() << " [" << NameOfWrappedPass << "]";
+  dbg() << ": " << (HasErrors ? "FAIL" : "PASS") << '\n';
+
+  // Strip the Debugify Metadata if required.
+  if (Strip) {
+    StripDebugInfo(M);
+    M.eraseNamedMetadata(NMD);
+    return true;
+  }
+
+  return false;
+}
+
+/// ModulePass for attaching synthetic debug info to everything, used with the
+/// legacy module pass manager.
+struct DebugifyModulePass : public ModulePass {
+  bool runOnModule(Module &M) override {
+    return applyDebugifyMetadata(M, M.functions(), "ModuleDebugify: ");
+  }
+
+  DebugifyModulePass() : ModulePass(ID) {}
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesAll();
+  }
+
+  static char ID; // Pass identification.
+};
+
+/// FunctionPass for attaching synthetic debug info to instructions within a
+/// single function, used with the legacy module pass manager.
+struct DebugifyFunctionPass : public FunctionPass {
+  bool runOnFunction(Function &F) override {
+    Module &M = *F.getParent();
+    auto FuncIt = F.getIterator();
+    return applyDebugifyMetadata(M, make_range(FuncIt, std::next(FuncIt)),
+                                 "FunctionDebugify: ");
+  }
+
+  DebugifyFunctionPass() : FunctionPass(ID) {}
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesAll();
+  }
+
+  static char ID; // Pass identification.
+};
+
+/// ModulePass for checking debug info inserted by -debugify, used with the
+/// legacy module pass manager.
+struct CheckDebugifyModulePass : public ModulePass {
+  bool runOnModule(Module &M) override {
+    return checkDebugifyMetadata(M, M.functions(), NameOfWrappedPass,
+                                 "CheckModuleDebugify", Strip, StatsMap);
+  }
+
+  CheckDebugifyModulePass(bool Strip = false, StringRef NameOfWrappedPass = "",
+                          DebugifyStatsMap *StatsMap = nullptr)
+      : ModulePass(ID), Strip(Strip), NameOfWrappedPass(NameOfWrappedPass),
+        StatsMap(StatsMap) {}
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesAll();
+  }
+
+  static char ID; // Pass identification.
+
+private:
+  bool Strip;
+  StringRef NameOfWrappedPass;
+  DebugifyStatsMap *StatsMap;
+};
+
+/// FunctionPass for checking debug info inserted by -debugify-function, used
+/// with the legacy module pass manager.
+struct CheckDebugifyFunctionPass : public FunctionPass {
+  bool runOnFunction(Function &F) override {
+    Module &M = *F.getParent();
+    auto FuncIt = F.getIterator();
+    return checkDebugifyMetadata(M, make_range(FuncIt, std::next(FuncIt)),
+                                 NameOfWrappedPass, "CheckFunctionDebugify",
+                                 Strip, StatsMap);
+  }
+
+  CheckDebugifyFunctionPass(bool Strip = false,
+                            StringRef NameOfWrappedPass = "",
+                            DebugifyStatsMap *StatsMap = nullptr)
+      : FunctionPass(ID), Strip(Strip), NameOfWrappedPass(NameOfWrappedPass),
+        StatsMap(StatsMap) {}
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesAll();
+  }
+
+  static char ID; // Pass identification.
+
+private:
+  bool Strip;
+  StringRef NameOfWrappedPass;
+  DebugifyStatsMap *StatsMap;
+};
+
+} // end anonymous namespace
+
+ModulePass *createDebugifyModulePass() { return new DebugifyModulePass(); }
+
+FunctionPass *createDebugifyFunctionPass() {
+  return new DebugifyFunctionPass();
+}
+
+PreservedAnalyses NewPMDebugifyPass::run(Module &M, ModuleAnalysisManager &) {
+  applyDebugifyMetadata(M, M.functions(), "ModuleDebugify: ");
+  return PreservedAnalyses::all();
+}
+
+ModulePass *createCheckDebugifyModulePass(bool Strip,
+                                          StringRef NameOfWrappedPass,
+                                          DebugifyStatsMap *StatsMap) {
+  return new CheckDebugifyModulePass(Strip, NameOfWrappedPass, StatsMap);
+}
+
+FunctionPass *createCheckDebugifyFunctionPass(bool Strip,
+                                              StringRef NameOfWrappedPass,
+                                              DebugifyStatsMap *StatsMap) {
+  return new CheckDebugifyFunctionPass(Strip, NameOfWrappedPass, StatsMap);
+}
+
+PreservedAnalyses NewPMCheckDebugifyPass::run(Module &M,
+                                              ModuleAnalysisManager &) {
+  checkDebugifyMetadata(M, M.functions(), "", "CheckModuleDebugify", false,
+                        nullptr);
+  return PreservedAnalyses::all();
+}
+
+char DebugifyModulePass::ID = 0;
+static RegisterPass<DebugifyModulePass> DM("debugify",
+                                           "Attach debug info to everything");
+
+char CheckDebugifyModulePass::ID = 0;
+static RegisterPass<CheckDebugifyModulePass>
+    CDM("check-debugify", "Check debug info from -debugify");
+
+char DebugifyFunctionPass::ID = 0;
+static RegisterPass<DebugifyFunctionPass> DF("debugify-function",
+                                             "Attach debug info to a function");
+
+char CheckDebugifyFunctionPass::ID = 0;
+static RegisterPass<CheckDebugifyFunctionPass>
+    CDF("check-debugify-function", "Check debug info from -debugify-function");
diff --git a/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
index 57e2ff0251a9..651f776a4915 100644
--- a/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
+++ b/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
@@ -13,6 +13,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Transforms/Utils.h"
 using namespace llvm;
diff --git a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
index 76b4635ad501..26d48ee0d23f 100644
--- a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
@@ -12,13 +12,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/InstIterator.h"
 using namespace llvm;
 
 /// Checks if we should import SGV as a definition, otherwise import as a
 /// declaration.
 bool FunctionImportGlobalProcessing::doImportAsDefinition(
-    const GlobalValue *SGV, SetVector<GlobalValue *> *GlobalsToImport) {
+    const GlobalValue *SGV) {
+  if (!isPerformingImport())
+    return false;
 
   // Only import the globals requested for importing.
   if (!GlobalsToImport->count(const_cast<GlobalValue *>(SGV)))
@@ -31,16 +34,8 @@ bool FunctionImportGlobalProcessing::doImportAsDefinition(
   return true;
 }
 
-bool FunctionImportGlobalProcessing::doImportAsDefinition(
-    const GlobalValue *SGV) {
-  if (!isPerformingImport())
-    return false;
-  return FunctionImportGlobalProcessing::doImportAsDefinition(SGV,
-                                                              GlobalsToImport);
-}
-
 bool FunctionImportGlobalProcessing::shouldPromoteLocalToGlobal(
-    const GlobalValue *SGV) {
+    const GlobalValue *SGV, ValueInfo VI) {
   assert(SGV->hasLocalLinkage());
   // Both the imported references and the original local variable must
   // be promoted.
@@ -65,7 +60,7 @@ bool FunctionImportGlobalProcessing::shouldPromoteLocalToGlobal(
   // (so the source file name and resulting GUID is the same). Find the one
   // in this module.
   auto Summary = ImportIndex.findSummaryInModule(
-      SGV->getGUID(), SGV->getParent()->getModuleIdentifier());
+      VI, SGV->getParent()->getModuleIdentifier());
   assert(Summary && "Missing summary for global value when exporting");
   auto Linkage = Summary->linkage();
   if (!GlobalValue::isLocalLinkage(Linkage)) {
@@ -91,18 +86,15 @@ bool FunctionImportGlobalProcessing::isNonRenamableLocal(
 }
 #endif
 
-std::string FunctionImportGlobalProcessing::getName(const GlobalValue *SGV,
-                                                    bool DoPromote) {
+std::string
+FunctionImportGlobalProcessing::getPromotedName(const GlobalValue *SGV) {
+  assert(SGV->hasLocalLinkage());
   // For locals that must be promoted to global scope, ensure that
   // the promoted name uniquely identifies the copy in the original module,
-  // using the ID assigned during combined index creation. When importing,
-  // we rename all locals (not just those that are promoted) in order to
-  // avoid naming conflicts between locals imported from different modules.
-  if (SGV->hasLocalLinkage() && (DoPromote || isPerformingImport()))
-    return ModuleSummaryIndex::getGlobalNameForLocal(
-        SGV->getName(),
-        ImportIndex.getModuleHash(SGV->getParent()->getModuleIdentifier()));
-  return SGV->getName();
+  // using the ID assigned during combined index creation.
+  return ModuleSummaryIndex::getGlobalNameForLocal(
+      SGV->getName(),
+      ImportIndex.getModuleHash(SGV->getParent()->getModuleIdentifier()));
 }
 
 GlobalValue::LinkageTypes
@@ -229,6 +221,11 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
     }
   }
 
+  // We should always have a ValueInfo (i.e. GV in index) for definitions when
+  // we are exporting, and also when importing that value.
+  assert(VI || GV.isDeclaration() ||
+         (isPerformingImport() && !doImportAsDefinition(&GV)));
+
   // Mark read/write-only variables which can be imported with specific
   // attribute. We can't internalize them now because IRMover will fail
   // to link variable definitions to their external declarations during
@@ -238,27 +235,42 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
   // If global value dead stripping is not enabled in summary then
   // propagateConstants hasn't been run. We can't internalize GV
   // in such case.
-  if (!GV.isDeclaration() && VI && ImportIndex.withGlobalValueDeadStripping()) {
-    const auto &SL = VI.getSummaryList();
-    auto *GVS = SL.empty() ? nullptr : dyn_cast<GlobalVarSummary>(SL[0].get());
-    // At this stage "maybe" is "definitely"
-    if (GVS && (GVS->maybeReadOnly() || GVS->maybeWriteOnly()))
-      cast<GlobalVariable>(&GV)->addAttribute("thinlto-internalize");
+  if (!GV.isDeclaration() && VI && ImportIndex.withAttributePropagation()) {
+    if (GlobalVariable *V = dyn_cast<GlobalVariable>(&GV)) {
+      // We can have more than one local with the same GUID, in the case of
+      // same-named locals in different but same-named source files that were
+      // compiled in their respective directories (so the source file name
+      // and resulting GUID is the same). Find the one in this module.
+      // Handle the case where there is no summary found in this module. That
+      // can happen in the distributed ThinLTO backend, because the index only
+      // contains summaries from the source modules if they are being imported.
+      // We might have a non-null VI and get here even in that case if the name
+      // matches one in this module (e.g. weak or appending linkage).
+      auto *GVS = dyn_cast_or_null<GlobalVarSummary>(
+          ImportIndex.findSummaryInModule(VI, M.getModuleIdentifier()));
+      if (GVS &&
+          (ImportIndex.isReadOnly(GVS) || ImportIndex.isWriteOnly(GVS))) {
+        V->addAttribute("thinlto-internalize");
+        // Objects referenced by writeonly GV initializer should not be
+        // promoted, because there is no any kind of read access to them
+        // on behalf of this writeonly GV. To avoid promotion we convert
+        // GV initializer to 'zeroinitializer'. This effectively drops
+        // references in IR module (not in combined index), so we can
+        // ignore them when computing import. We do not export references
+        // of writeonly object. See computeImportForReferencedGlobals
+        if (ImportIndex.isWriteOnly(GVS))
+          V->setInitializer(Constant::getNullValue(V->getValueType()));
+      }
+    }
   }
 
-  bool DoPromote = false;
-  if (GV.hasLocalLinkage() &&
-      ((DoPromote = shouldPromoteLocalToGlobal(&GV)) || isPerformingImport())) {
+  if (GV.hasLocalLinkage() && shouldPromoteLocalToGlobal(&GV, VI)) {
     // Save the original name string before we rename GV below.
     auto Name = GV.getName().str();
-    // Once we change the name or linkage it is difficult to determine
-    // again whether we should promote since shouldPromoteLocalToGlobal needs
-    // to locate the summary (based on GUID from name and linkage). Therefore,
-    // use DoPromote result saved above.
-    GV.setName(getName(&GV, DoPromote));
-    GV.setLinkage(getLinkage(&GV, DoPromote));
-    if (!GV.hasLocalLinkage())
-      GV.setVisibility(GlobalValue::HiddenVisibility);
+    GV.setName(getPromotedName(&GV));
+    GV.setLinkage(getLinkage(&GV, /* DoPromote */ true));
+    assert(!GV.hasLocalLinkage());
+    GV.setVisibility(GlobalValue::HiddenVisibility);
 
     // If we are renaming a COMDAT leader, ensure that we record the COMDAT
     // for later renaming as well. This is required for COFF.
diff --git a/llvm/lib/Transforms/Utils/GuardUtils.cpp b/llvm/lib/Transforms/Utils/GuardUtils.cpp
index 34c32d9c0c98..4cfc9358499a 100644
--- a/llvm/lib/Transforms/Utils/GuardUtils.cpp
+++ b/llvm/lib/Transforms/Utils/GuardUtils.cpp
@@ -10,13 +10,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/GuardUtils.h"
+#include "llvm/Analysis/GuardUtils.h"
 #include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 
 using namespace llvm;
+using namespace llvm::PatternMatch;
 
 static cl::opt<uint32_t> PredicatePassBranchWeight(
     "guards-predicate-pass-branch-weight", cl::Hidden, cl::init(1 << 20),
@@ -24,7 +28,7 @@ static cl::opt<uint32_t> PredicatePassBranchWeight(
              "reciprocal of this value (default = 1 << 20)"));
 
 void llvm::makeGuardControlFlowExplicit(Function *DeoptIntrinsic,
-                                        CallInst *Guard) {
+                                        CallInst *Guard, bool UseWC) {
   OperandBundleDef DeoptOB(*Guard->getOperandBundle(LLVMContext::OB_deopt));
   SmallVector<Value *, 4> Args(std::next(Guard->arg_begin()), Guard->arg_end());
 
@@ -60,4 +64,63 @@ void llvm::makeGuardControlFlowExplicit(Function *DeoptIntrinsic,
 
   DeoptCall->setCallingConv(Guard->getCallingConv());
   DeoptBlockTerm->eraseFromParent();
+
+  if (UseWC) {
+    // We want the guard to be expressed as explicit control flow, but still be
+    // widenable. For that, we add Widenable Condition intrinsic call to the
+    // guard's condition.
+    IRBuilder<> B(CheckBI);
+    auto *WC = B.CreateIntrinsic(Intrinsic::experimental_widenable_condition,
+                                 {}, {}, nullptr, "widenable_cond");
+    CheckBI->setCondition(B.CreateAnd(CheckBI->getCondition(), WC,
+                                      "exiplicit_guard_cond"));
+    assert(isWidenableBranch(CheckBI) && "sanity check");
+  }
+}
+
+
+void llvm::widenWidenableBranch(BranchInst *WidenableBR, Value *NewCond) {
+  assert(isWidenableBranch(WidenableBR) && "precondition");
+
+  // The tempting trivially option is to produce something like this:
+  // br (and oldcond, newcond) where oldcond is assumed to contain a widenable
+  // condition, but that doesn't match the pattern parseWidenableBranch expects
+  // so we have to be more sophisticated.
+
+  Use *C, *WC;
+  BasicBlock *IfTrueBB, *IfFalseBB;
+  parseWidenableBranch(WidenableBR, C, WC, IfTrueBB, IfFalseBB);
+  if (!C) {
+    // br (wc()), ... form
+    IRBuilder<> B(WidenableBR);
+    WidenableBR->setCondition(B.CreateAnd(NewCond, WC->get()));
+  } else {
+    // br (wc & C), ... form
+    IRBuilder<> B(WidenableBR);
+    C->set(B.CreateAnd(NewCond, C->get()));
+    Instruction *WCAnd = cast<Instruction>(WidenableBR->getCondition());
+    // Condition is only guaranteed to dominate branch
+    WCAnd->moveBefore(WidenableBR);    
+  }
+  assert(isWidenableBranch(WidenableBR) && "preserve widenabiliy");
+}
+
+void llvm::setWidenableBranchCond(BranchInst *WidenableBR, Value *NewCond) {
+  assert(isWidenableBranch(WidenableBR) && "precondition");
+
+  Use *C, *WC;
+  BasicBlock *IfTrueBB, *IfFalseBB;
+  parseWidenableBranch(WidenableBR, C, WC, IfTrueBB, IfFalseBB);
+  if (!C) {
+    // br (wc()), ... form
+    IRBuilder<> B(WidenableBR);
+    WidenableBR->setCondition(B.CreateAnd(NewCond, WC->get()));
+  } else {
+    // br (wc & C), ... form
+    Instruction *WCAnd = cast<Instruction>(WidenableBR->getCondition());
+    // Condition is only guaranteed to dominate branch
+    WCAnd->moveBefore(WidenableBR);
+    C->set(NewCond);
+  }
+  assert(isWidenableBranch(WidenableBR) && "preserve widenabiliy");
 }
diff --git a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
new file mode 100644
index 000000000000..9192e74b9ace
--- /dev/null
+++ b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
@@ -0,0 +1,186 @@
+//===- InjectTLIMAppings.cpp - TLI to VFABI attribute injection  ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Populates the VFABI attribute with the scalar-to-vector mappings
+// from the TargetLibraryInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/InjectTLIMappings.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "inject-tli-mappings"
+
+STATISTIC(NumCallInjected,
+          "Number of calls in which the mappings have been injected.");
+
+STATISTIC(NumVFDeclAdded,
+          "Number of function declarations that have been added.");
+STATISTIC(NumCompUsedAdded,
+          "Number of `@llvm.compiler.used` operands that have been added.");
+
+/// Helper function to map the TLI name to a strings that holds
+/// scalar-to-vector mapping.
+///
+///    _ZGV<isa><mask><vlen><vparams>_<scalarname>(<vectorname>)
+///
+/// where:
+///
+/// <isa> = "_LLVM_"
+/// <mask> = "N". Note: TLI does not support masked interfaces.
+/// <vlen> = Number of concurrent lanes, stored in the `VectorizationFactor`
+///          field of the `VecDesc` struct.
+/// <vparams> = "v", as many as are the number of parameters of CI.
+/// <scalarname> = the name of the scalar function called by CI.
+/// <vectorname> = the name of the vector function mapped by the TLI.
+static std::string mangleTLIName(StringRef VectorName, const CallInst &CI,
+                                 unsigned VF) {
+  SmallString<256> Buffer;
+  llvm::raw_svector_ostream Out(Buffer);
+  Out << "_ZGV" << VFABI::_LLVM_ << "N" << VF;
+  for (unsigned I = 0; I < CI.getNumArgOperands(); ++I)
+    Out << "v";
+  Out << "_" << CI.getCalledFunction()->getName() << "(" << VectorName << ")";
+  return Out.str();
+}
+
+/// A helper function for converting Scalar types to vector types.
+/// If the incoming type is void, we return void. If the VF is 1, we return
+/// the scalar type.
+static Type *ToVectorTy(Type *Scalar, unsigned VF, bool isScalable = false) {
+  if (Scalar->isVoidTy() || VF == 1)
+    return Scalar;
+  return VectorType::get(Scalar, {VF, isScalable});
+}
+
+/// A helper function that adds the vector function declaration that
+/// vectorizes the CallInst CI with a vectorization factor of VF
+/// lanes. The TLI assumes that all parameters and the return type of
+/// CI (other than void) need to be widened to a VectorType of VF
+/// lanes.
+static void addVariantDeclaration(CallInst &CI, const unsigned VF,
+                                  const StringRef VFName) {
+  Module *M = CI.getModule();
+
+  // Add function declaration.
+  Type *RetTy = ToVectorTy(CI.getType(), VF);
+  SmallVector<Type *, 4> Tys;
+  for (Value *ArgOperand : CI.arg_operands())
+    Tys.push_back(ToVectorTy(ArgOperand->getType(), VF));
+  assert(!CI.getFunctionType()->isVarArg() &&
+         "VarArg functions are not supported.");
+  FunctionType *FTy = FunctionType::get(RetTy, Tys, /*isVarArg=*/false);
+  Function *VectorF =
+      Function::Create(FTy, Function::ExternalLinkage, VFName, M);
+  VectorF->copyAttributesFrom(CI.getCalledFunction());
+  ++NumVFDeclAdded;
+  LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Added to the module: `" << VFName
+                    << "` of type " << *(VectorF->getType()) << "\n");
+
+  // Make function declaration (without a body) "sticky" in the IR by
+  // listing it in the @llvm.compiler.used intrinsic.
+  assert(!VectorF->size() && "VFABI attribute requires `@llvm.compiler.used` "
+                             "only on declarations.");
+  appendToCompilerUsed(*M, {VectorF});
+  LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Adding `" << VFName
+                    << "` to `@llvm.compiler.used`.\n");
+  ++NumCompUsedAdded;
+}
+
+static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) {
+  // This is needed to make sure we don't query the TLI for calls to
+  // bitcast of function pointers, like `%call = call i32 (i32*, ...)
+  // bitcast (i32 (...)* @goo to i32 (i32*, ...)*)(i32* nonnull %i)`,
+  // as such calls make the `isFunctionVectorizable` raise an
+  // exception.
+  if (CI.isNoBuiltin() || !CI.getCalledFunction())
+    return;
+
+  const std::string ScalarName = CI.getCalledFunction()->getName();
+  // Nothing to be done if the TLI thinks the function is not
+  // vectorizable.
+  if (!TLI.isFunctionVectorizable(ScalarName))
+    return;
+  SmallVector<std::string, 8> Mappings;
+  VFABI::getVectorVariantNames(CI, Mappings);
+  Module *M = CI.getModule();
+  const SetVector<StringRef> OriginalSetOfMappings(Mappings.begin(),
+                                                   Mappings.end());
+  //  All VFs in the TLI are powers of 2.
+  for (unsigned VF = 2, WidestVF = TLI.getWidestVF(ScalarName); VF <= WidestVF;
+       VF *= 2) {
+    const std::string TLIName = TLI.getVectorizedFunction(ScalarName, VF);
+    if (!TLIName.empty()) {
+      std::string MangledName = mangleTLIName(TLIName, CI, VF);
+      if (!OriginalSetOfMappings.count(MangledName)) {
+        Mappings.push_back(MangledName);
+        ++NumCallInjected;
+      }
+      Function *VariantF = M->getFunction(TLIName);
+      if (!VariantF)
+        addVariantDeclaration(CI, VF, TLIName);
+    }
+  }
+
+  VFABI::setVectorVariantNames(&CI, Mappings);
+}
+
+static bool runImpl(const TargetLibraryInfo &TLI, Function &F) {
+  for (auto &I : instructions(F))
+    if (auto CI = dyn_cast<CallInst>(&I))
+      addMappingsFromTLI(TLI, *CI);
+  // Even if the pass adds IR attributes, the analyses are preserved.
+  return false;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// New pass manager implementation.
+////////////////////////////////////////////////////////////////////////////////
+PreservedAnalyses InjectTLIMappings::run(Function &F,
+                                         FunctionAnalysisManager &AM) {
+  const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+  runImpl(TLI, F);
+  // Even if the pass adds IR attributes, the analyses are preserved.
+  return PreservedAnalyses::all();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Legacy PM Implementation.
+////////////////////////////////////////////////////////////////////////////////
+bool InjectTLIMappingsLegacy::runOnFunction(Function &F) {
+  const TargetLibraryInfo &TLI =
+      getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+  return runImpl(TLI, F);
+}
+
+void InjectTLIMappingsLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
+  AU.addPreserved<TargetLibraryInfoWrapperPass>();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Legacy Pass manager initialization
+////////////////////////////////////////////////////////////////////////////////
+char InjectTLIMappingsLegacy::ID = 0;
+
+INITIALIZE_PASS_BEGIN(InjectTLIMappingsLegacy, DEBUG_TYPE,
+                      "Inject TLI Mappings", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(InjectTLIMappingsLegacy, DEBUG_TYPE, "Inject TLI Mappings",
+                    false, false)
+
+FunctionPass *llvm::createInjectTLIMappingsLegacyPass() {
+  return new InjectTLIMappingsLegacy();
+}
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index a7f0f7ac5d61..6da612eb4e65 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -1254,7 +1254,8 @@ static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,
   // Always generate a memcpy of alignment 1 here because we don't know
   // the alignment of the src pointer.  Other optimizations can infer
   // better alignment.
-  Builder.CreateMemCpy(Dst, /*DstAlign*/1, Src, /*SrcAlign*/1, Size);
+  Builder.CreateMemCpy(Dst, /*DstAlign*/ Align::None(), Src,
+                       /*SrcAlign*/ Align::None(), Size);
 }
 
 /// When inlining a call site that has a byval argument,
@@ -1293,16 +1294,16 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
   }
 
   // Create the alloca.  If we have DataLayout, use nice alignment.
-  unsigned Align = DL.getPrefTypeAlignment(AggTy);
+  Align Alignment(DL.getPrefTypeAlignment(AggTy));
 
   // If the byval had an alignment specified, we *must* use at least that
   // alignment, as it is required by the byval argument (and uses of the
   // pointer inside the callee).
-  Align = std::max(Align, ByValAlignment);
+  Alignment = max(Alignment, MaybeAlign(ByValAlignment));
 
-  Value *NewAlloca = new AllocaInst(AggTy, DL.getAllocaAddrSpace(),
-                                    nullptr, Align, Arg->getName(),
-                                    &*Caller->begin()->begin());
+  Value *NewAlloca =
+      new AllocaInst(AggTy, DL.getAllocaAddrSpace(), nullptr, Alignment,
+                     Arg->getName(), &*Caller->begin()->begin());
   IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca));
 
   // Uses of the argument in the function should use our new alloca
@@ -1405,6 +1406,10 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
   // other.
   DenseMap<const MDNode *, MDNode *> IANodes;
 
+  // Check if we are not generating inline line tables and want to use
+  // the call site location instead.
+  bool NoInlineLineTables = Fn->hasFnAttribute("no-inline-line-tables");
+
   for (; FI != Fn->end(); ++FI) {
     for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
          BI != BE; ++BI) {
@@ -1416,20 +1421,22 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
         BI->setMetadata(LLVMContext::MD_loop, NewLoopID);
       }
 
-      if (DebugLoc DL = BI->getDebugLoc()) {
-        DebugLoc IDL =
-            inlineDebugLoc(DL, InlinedAtNode, BI->getContext(), IANodes);
-        BI->setDebugLoc(IDL);
-        continue;
-      }
+      if (!NoInlineLineTables)
+        if (DebugLoc DL = BI->getDebugLoc()) {
+          DebugLoc IDL =
+              inlineDebugLoc(DL, InlinedAtNode, BI->getContext(), IANodes);
+          BI->setDebugLoc(IDL);
+          continue;
+        }
 
-      if (CalleeHasDebugInfo)
+      if (CalleeHasDebugInfo && !NoInlineLineTables)
         continue;
 
-      // If the inlined instruction has no line number, make it look as if it
-      // originates from the call location. This is important for
-      // ((__always_inline__, __nodebug__)) functions which must use caller
-      // location for all instructions in their function body.
+      // If the inlined instruction has no line number, or if inline info
+      // is not being generated, make it look as if it originates from the call
+      // location. This is important for ((__always_inline, __nodebug__))
+      // functions which must use caller location for all instructions in their
+      // function body.
 
       // Don't update static allocas, as they may get moved later.
       if (auto *AI = dyn_cast<AllocaInst>(BI))
@@ -1438,6 +1445,19 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
 
       BI->setDebugLoc(TheCallDL);
     }
+
+    // Remove debug info intrinsics if we're not keeping inline info.
+    if (NoInlineLineTables) {
+      BasicBlock::iterator BI = FI->begin();
+      while (BI != FI->end()) {
+        if (isa<DbgInfoIntrinsic>(BI)) {
+          BI = BI->eraseFromParent();
+          continue;
+        }
+        ++BI;
+      }
+    }
+
   }
 }
 
@@ -1453,7 +1473,7 @@ static void updateCallerBFI(BasicBlock *CallSiteBlock,
                             BlockFrequencyInfo *CalleeBFI,
                             const BasicBlock &CalleeEntryBlock) {
   SmallPtrSet<BasicBlock *, 16> ClonedBBs;
-  for (auto const &Entry : VMap) {
+  for (auto Entry : VMap) {
     if (!isa<BasicBlock>(Entry.first) || !Entry.second)
       continue;
     auto *OrigBB = cast<BasicBlock>(Entry.first);
@@ -1508,22 +1528,25 @@ void llvm::updateProfileCallee(
   else
     newEntryCount = priorEntryCount + entryDelta;
 
-  Callee->setEntryCount(newEntryCount);
-
   // During inlining ?
   if (VMap) {
     uint64_t cloneEntryCount = priorEntryCount - newEntryCount;
-    for (auto const &Entry : *VMap)
+    for (auto Entry : *VMap)
       if (isa<CallInst>(Entry.first))
         if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second))
           CI->updateProfWeight(cloneEntryCount, priorEntryCount);
   }
-  for (BasicBlock &BB : *Callee)
-    // No need to update the callsite if it is pruned during inlining.
-    if (!VMap || VMap->count(&BB))
-      for (Instruction &I : BB)
-        if (CallInst *CI = dyn_cast<CallInst>(&I))
-          CI->updateProfWeight(newEntryCount, priorEntryCount);
+
+  if (entryDelta) {
+    Callee->setEntryCount(newEntryCount);
+
+    for (BasicBlock &BB : *Callee)
+      // No need to update the callsite if it is pruned during inlining.
+      if (!VMap || VMap->count(&BB))
+        for (Instruction &I : BB)
+          if (CallInst *CI = dyn_cast<CallInst>(&I))
+            CI->updateProfWeight(newEntryCount, priorEntryCount);
+  }
 }
 
 /// This function inlines the called function into the basic block of the
@@ -1842,6 +1865,7 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
       // Scan for the block of allocas that we can move over, and move them
       // all at once.
       while (isa<AllocaInst>(I) &&
+             !cast<AllocaInst>(I)->use_empty() &&
              allocaWouldBeStaticInEntry(cast<AllocaInst>(I))) {
         IFI.StaticAllocas.push_back(cast<AllocaInst>(I));
         ++I;
diff --git a/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/llvm/lib/Transforms/Utils/InstructionNamer.cpp
index 6c4fc1ceb991..aac0b55801c4 100644
--- a/llvm/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/llvm/lib/Transforms/Utils/InstructionNamer.cpp
@@ -15,6 +15,7 @@
 
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Transforms/Utils.h"
 using namespace llvm;
diff --git a/llvm/lib/Transforms/Utils/LCSSA.cpp b/llvm/lib/Transforms/Utils/LCSSA.cpp
index 29e7c5260f46..5746d69260d5 100644
--- a/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -43,7 +43,9 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/PredIteratorCache.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Transforms/Utils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
@@ -74,7 +76,8 @@ static bool isExitBlock(BasicBlock *BB,
 /// that are outside the current loop.  If so, insert LCSSA PHI nodes and
 /// rewrite the uses.
 bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
-                                    DominatorTree &DT, LoopInfo &LI) {
+                                    DominatorTree &DT, LoopInfo &LI,
+                                    ScalarEvolution *SE) {
   SmallVector<Use *, 16> UsesToRewrite;
   SmallSetVector<PHINode *, 16> PHIsToRemove;
   PredIteratorCache PredCache;
@@ -134,6 +137,11 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
     SSAUpdater SSAUpdate(&InsertedPHIs);
     SSAUpdate.Initialize(I->getType(), I->getName());
 
+    // Force re-computation of I, as some users now need to use the new PHI
+    // node.
+    if (SE)
+      SE->forgetValue(I);
+
     // Insert the LCSSA phi's into all of the exit blocks dominated by the
     // value, and add them to the Phi's map.
     for (BasicBlock *ExitBB : ExitBlocks) {
@@ -192,9 +200,6 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
         UserBB = PN->getIncomingBlock(*UseToRewrite);
 
       if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) {
-        // Tell the VHs that the uses changed. This updates SCEV's caches.
-        if (UseToRewrite->get()->hasValueHandle())
-          ValueHandleBase::ValueIsRAUWd(*UseToRewrite, &UserBB->front());
         UseToRewrite->set(&UserBB->front());
         continue;
       }
@@ -202,10 +207,6 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
       // If we added a single PHI, it must dominate all uses and we can directly
       // rename it.
       if (AddedPHIs.size() == 1) {
-        // Tell the VHs that the uses changed. This updates SCEV's caches.
-        // We might call ValueIsRAUWd multiple times for the same value.
-        if (UseToRewrite->get()->hasValueHandle())
-          ValueHandleBase::ValueIsRAUWd(*UseToRewrite, AddedPHIs[0]);
         UseToRewrite->set(AddedPHIs[0]);
         continue;
       }
@@ -368,7 +369,7 @@ bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
       Worklist.push_back(&I);
     }
   }
-  Changed = formLCSSAForInstructions(Worklist, DT, *LI);
+  Changed = formLCSSAForInstructions(Worklist, DT, *LI, SE);
 
   // If we modified the code, remove any caches about the loop from SCEV to
   // avoid dangling entries.
diff --git a/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
index ed28fffc22b5..4c52fac6f7cb 100644
--- a/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
+++ b/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
@@ -39,6 +39,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/MDBuilder.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 using namespace llvm;
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 5bcd05757ec1..b2d511c7c9a9 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -1421,22 +1421,32 @@ bool llvm::LowerDbgDeclare(Function &F) {
         }))
       continue;
 
-    for (auto &AIUse : AI->uses()) {
-      User *U = AIUse.getUser();
-      if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
-        if (AIUse.getOperandNo() == 1)
-          ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
-      } else if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
-        ConvertDebugDeclareToDebugValue(DDI, LI, DIB);
-      } else if (CallInst *CI = dyn_cast<CallInst>(U)) {
-        // This is a call by-value or some other instruction that takes a
-        // pointer to the variable. Insert a *value* intrinsic that describes
-        // the variable by dereferencing the alloca.
-        DebugLoc NewLoc = getDebugValueLoc(DDI, nullptr);
-        auto *DerefExpr =
-            DIExpression::append(DDI->getExpression(), dwarf::DW_OP_deref);
-        DIB.insertDbgValueIntrinsic(AI, DDI->getVariable(), DerefExpr, NewLoc,
-                                    CI);
+    SmallVector<const Value *, 8> WorkList;
+    WorkList.push_back(AI);
+    while (!WorkList.empty()) {
+      const Value *V = WorkList.pop_back_val();
+      for (auto &AIUse : V->uses()) {
+        User *U = AIUse.getUser();
+        if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+          if (AIUse.getOperandNo() == 1)
+            ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
+        } else if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+          ConvertDebugDeclareToDebugValue(DDI, LI, DIB);
+        } else if (CallInst *CI = dyn_cast<CallInst>(U)) {
+          // This is a call by-value or some other instruction that takes a
+          // pointer to the variable. Insert a *value* intrinsic that describes
+          // the variable by dereferencing the alloca.
+          if (!CI->isLifetimeStartOrEnd()) {
+            DebugLoc NewLoc = getDebugValueLoc(DDI, nullptr);
+            auto *DerefExpr =
+                DIExpression::append(DDI->getExpression(), dwarf::DW_OP_deref);
+            DIB.insertDbgValueIntrinsic(AI, DDI->getVariable(), DerefExpr,
+                                        NewLoc, CI);
+          }
+        } else if (BitCastInst *BI = dyn_cast<BitCastInst>(U)) {
+          if (BI->getType()->isPointerTy())
+            WorkList.push_back(BI);
+        }
       }
     }
     DDI->eraseFromParent();
@@ -1611,6 +1621,11 @@ bool llvm::salvageDebugInfo(Instruction &I) {
   return salvageDebugInfoForDbgValues(I, DbgUsers);
 }
 
+void llvm::salvageDebugInfoOrMarkUndef(Instruction &I) {
+  if (!salvageDebugInfo(I))
+    replaceDbgUsesWithUndef(&I);
+}
+
 bool llvm::salvageDebugInfoForDbgValues(
     Instruction &I, ArrayRef<DbgVariableIntrinsic *> DbgUsers) {
   auto &Ctx = I.getContext();
@@ -1661,9 +1676,8 @@ DIExpression *llvm::salvageDebugInfoImpl(Instruction &I,
   };
 
   // initializer-list helper for applying operators to the source DIExpression.
-  auto applyOps =
-      [&](std::initializer_list<uint64_t> Opcodes) -> DIExpression * {
-    SmallVector<uint64_t, 8> Ops(Opcodes);
+  auto applyOps = [&](ArrayRef<uint64_t> Opcodes) -> DIExpression * {
+    SmallVector<uint64_t, 8> Ops(Opcodes.begin(), Opcodes.end());
     return doSalvage(Ops);
   };
 
@@ -1671,8 +1685,21 @@ DIExpression *llvm::salvageDebugInfoImpl(Instruction &I,
     // No-op casts and zexts are irrelevant for debug info.
     if (CI->isNoopCast(DL) || isa<ZExtInst>(&I))
       return SrcDIExpr;
-    return nullptr;
-  } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
+
+    Type *Type = CI->getType();
+    // Casts other than Trunc or SExt to scalar types cannot be salvaged.
+    if (Type->isVectorTy() || (!isa<TruncInst>(&I) && !isa<SExtInst>(&I)))
+      return nullptr;
+
+    Value *FromValue = CI->getOperand(0);
+    unsigned FromTypeBitSize = FromValue->getType()->getScalarSizeInBits();
+    unsigned ToTypeBitSize = Type->getScalarSizeInBits();
+
+    return applyOps(DIExpression::getExtOps(FromTypeBitSize, ToTypeBitSize,
+                                            isa<SExtInst>(&I)));
+  }
+
+  if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
     unsigned BitWidth =
         M.getDataLayout().getIndexSizeInBits(GEP->getPointerAddressSpace());
     // Rewrite a constant GEP into a DIExpression.
@@ -1727,7 +1754,7 @@ DIExpression *llvm::salvageDebugInfoImpl(Instruction &I,
 using DbgValReplacement = Optional<DIExpression *>;
 
 /// Point debug users of \p From to \p To using exprs given by \p RewriteExpr,
-/// possibly moving/deleting users to prevent use-before-def. Returns true if
+/// possibly moving/undefing users to prevent use-before-def. Returns true if
 /// changes are made.
 static bool rewriteDebugUsers(
     Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT,
@@ -1740,7 +1767,7 @@ static bool rewriteDebugUsers(
 
   // Prevent use-before-def of To.
   bool Changed = false;
-  SmallPtrSet<DbgVariableIntrinsic *, 1> DeleteOrSalvage;
+  SmallPtrSet<DbgVariableIntrinsic *, 1> UndefOrSalvage;
   if (isa<Instruction>(&To)) {
     bool DomPointAfterFrom = From.getNextNonDebugInstruction() == &DomPoint;
 
@@ -1755,14 +1782,14 @@ static bool rewriteDebugUsers(
       // Users which otherwise aren't dominated by the replacement value must
       // be salvaged or deleted.
       } else if (!DT.dominates(&DomPoint, DII)) {
-        DeleteOrSalvage.insert(DII);
+        UndefOrSalvage.insert(DII);
       }
     }
   }
 
   // Update debug users without use-before-def risk.
   for (auto *DII : Users) {
-    if (DeleteOrSalvage.count(DII))
+    if (UndefOrSalvage.count(DII))
       continue;
 
     LLVMContext &Ctx = DII->getContext();
@@ -1776,18 +1803,10 @@ static bool rewriteDebugUsers(
     Changed = true;
   }
 
-  if (!DeleteOrSalvage.empty()) {
+  if (!UndefOrSalvage.empty()) {
     // Try to salvage the remaining debug users.
-    Changed |= salvageDebugInfo(From);
-
-    // Delete the debug users which weren't salvaged.
-    for (auto *DII : DeleteOrSalvage) {
-      if (DII->getVariableLocation() == &From) {
-        LLVM_DEBUG(dbgs() << "Erased UseBeforeDef:  " << *DII << '\n');
-        DII->eraseFromParent();
-        Changed = true;
-      }
-    }
+    salvageDebugInfoOrMarkUndef(From);
+    Changed = true;
   }
 
   return Changed;
@@ -1862,10 +1881,8 @@ bool llvm::replaceAllDbgUsesWith(Instruction &From, Value &To,
         return None;
 
       bool Signed = *Signedness == DIBasicType::Signedness::Signed;
-      dwarf::TypeKind TK = Signed ? dwarf::DW_ATE_signed : dwarf::DW_ATE_unsigned;
-      SmallVector<uint64_t, 8> Ops({dwarf::DW_OP_LLVM_convert, ToBits, TK,
-                                   dwarf::DW_OP_LLVM_convert, FromBits, TK});
-      return DIExpression::appendToStack(DII.getExpression(), Ops);
+      return DIExpression::appendExt(DII.getExpression(), ToBits, FromBits,
+                                     Signed);
     };
     return rewriteDebugUsers(From, To, DomPoint, DT, SignOrZeroExt);
   }
@@ -2574,7 +2591,7 @@ void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI,
   if (!NewTy->isPointerTy())
     return;
 
-  unsigned BitWidth = DL.getIndexTypeSizeInBits(NewTy);
+  unsigned BitWidth = DL.getPointerTypeSizeInBits(NewTy);
   if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
     MDNode *NN = MDNode::get(OldLI.getContext(), None);
     NewLI.setMetadata(LLVMContext::MD_nonnull, NN);
diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
index 889ea5ca9970..c065e0269c64 100644
--- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -76,6 +76,13 @@ private:
 };
 } // end anonymous namespace
 
+/// Insert (K, V) pair into the ValueToValueMap, and verify the key did not
+/// previously exist in the map, and the value was inserted.
+static void InsertNewValueIntoMap(ValueToValueMapTy &VM, Value *K, Value *V) {
+  bool Inserted = VM.insert({K, V}).second;
+  assert(Inserted);
+  (void)Inserted;
+}
 /// RewriteUsesOfClonedInstructions - We just cloned the instructions from the
 /// old header into the preheader.  If there were uses of the values produced by
 /// these instruction that were outside of the loop, we have to insert PHI nodes
@@ -300,7 +307,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
   // For PHI nodes, the value available in OldPreHeader is just the
   // incoming value from OldPreHeader.
   for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
-    ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader);
+    InsertNewValueIntoMap(ValueMap, PN,
+                          PN->getIncomingValueForBlock(OrigPreheader));
 
   // For the rest of the instructions, either hoist to the OrigPreheader if
   // possible or create a clone in the OldPreHeader if not.
@@ -358,13 +366,13 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
     if (V && LI->replacementPreservesLCSSAForm(C, V)) {
       // If so, then delete the temporary instruction and stick the folded value
       // in the map.
-      ValueMap[Inst] = V;
+      InsertNewValueIntoMap(ValueMap, Inst, V);
       if (!C->mayHaveSideEffects()) {
         C->deleteValue();
         C = nullptr;
       }
     } else {
-      ValueMap[Inst] = C;
+      InsertNewValueIntoMap(ValueMap, Inst, C);
     }
     if (C) {
       // Otherwise, stick the new instruction into the new block!
@@ -376,7 +384,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
           AC->registerAssumption(II);
       // MemorySSA cares whether the cloned instruction was inserted or not, and
       // not whether it can be remapped to a simplified value.
-      ValueMapMSSA[Inst] = C;
+      if (MSSAU)
+        InsertNewValueIntoMap(ValueMapMSSA, Inst, C);
     }
   }
 
@@ -396,7 +405,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
   // Update MemorySSA before the rewrite call below changes the 1:1
   // instruction:cloned_instruction_or_value mapping.
   if (MSSAU) {
-    ValueMapMSSA[OrigHeader] = OrigPreheader;
+    InsertNewValueIntoMap(ValueMapMSSA, OrigHeader, OrigPreheader);
     MSSAU->updateForClonedBlockIntoPred(OrigHeader, OrigPreheader,
                                         ValueMapMSSA);
   }
diff --git a/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/llvm/lib/Transforms/Utils/LoopSimplify.cpp
index d0f89dc54bfb..28f88f39a712 100644
--- a/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -67,6 +67,7 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils.h"
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index a7590fc32545..4b94b371e70a 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -22,17 +22,18 @@
 #include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/LoopSimplify.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
index bf2e87b0d49f..f1965934b2d7 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
@@ -21,7 +21,6 @@
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/Utils/Local.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/DataLayout.h"
@@ -177,6 +176,7 @@ LoopUnrollResult llvm::UnrollAndJamLoop(
 
   // When we enter here we should have already checked that it is safe
   BasicBlock *Header = L->getHeader();
+  assert(Header && "No header.");
   assert(L->getSubLoops().size() == 1);
   Loop *SubLoop = *L->begin();
 
@@ -247,8 +247,9 @@ LoopUnrollResult llvm::UnrollAndJamLoop(
 
   BasicBlock *Preheader = L->getLoopPreheader();
   BasicBlock *LatchBlock = L->getLoopLatch();
+  assert(Preheader && "No preheader");
+  assert(LatchBlock && "No latch block");
   BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
-  assert(Preheader && LatchBlock && Header);
   assert(BI && !BI->isUnconditional());
   bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
   BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
index 58e42074f963..7a168ff6f32b 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -212,14 +212,11 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
     const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV);
 
     // Avoid huge SCEV computations in the loop below, make sure we only
-    // consider AddRecs of the loop we are trying to peel and avoid
-    // non-monotonic predicates, as we will not be able to simplify the loop
-    // body.
-    // FIXME: For the non-monotonic predicates ICMP_EQ and ICMP_NE we can
-    //        simplify the loop, if we peel 1 additional iteration, if there
-    //        is no wrapping.
+    // consider AddRecs of the loop we are trying to peel.
+    if (!LeftAR->isAffine() || LeftAR->getLoop() != &L)
+      continue;
     bool Increasing;
-    if (!LeftAR->isAffine() || LeftAR->getLoop() != &L ||
+    if (!(ICmpInst::isEquality(Pred) && LeftAR->hasNoSelfWrap()) &&
         !SE.isMonotonicPredicate(LeftAR, Pred, Increasing))
       continue;
     (void)Increasing;
@@ -238,18 +235,43 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
       Pred = ICmpInst::getInversePredicate(Pred);
 
     const SCEV *Step = LeftAR->getStepRecurrence(SE);
-    while (NewPeelCount < MaxPeelCount &&
-           SE.isKnownPredicate(Pred, IterVal, RightSCEV)) {
-      IterVal = SE.getAddExpr(IterVal, Step);
+    const SCEV *NextIterVal = SE.getAddExpr(IterVal, Step);
+    auto PeelOneMoreIteration = [&IterVal, &NextIterVal, &SE, Step,
+                                 &NewPeelCount]() {
+      IterVal = NextIterVal;
+      NextIterVal = SE.getAddExpr(IterVal, Step);
       NewPeelCount++;
+    };
+
+    auto CanPeelOneMoreIteration = [&NewPeelCount, &MaxPeelCount]() {
+      return NewPeelCount < MaxPeelCount;
+    };
+
+    while (CanPeelOneMoreIteration() &&
+           SE.isKnownPredicate(Pred, IterVal, RightSCEV))
+      PeelOneMoreIteration();
+
+    // With *that* peel count, does the predicate !Pred become known in the
+    // first iteration of the loop body after peeling?
+    if (!SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal,
+                             RightSCEV))
+      continue; // If not, give up.
+
+    // However, for equality comparisons, that isn't always sufficient to
+    // eliminate the comparsion in loop body, we may need to peel one more
+    // iteration. See if that makes !Pred become unknown again.
+    if (ICmpInst::isEquality(Pred) &&
+        !SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), NextIterVal,
+                             RightSCEV)) {
+      assert(!SE.isKnownPredicate(Pred, IterVal, RightSCEV) &&
+             SE.isKnownPredicate(Pred, NextIterVal, RightSCEV) &&
+             "Expected Pred to go from known to unknown.");
+      if (!CanPeelOneMoreIteration())
+        continue; // Need to peel one more iteration, but can't. Give up.
+      PeelOneMoreIteration(); // Great!
     }
 
-    // Only peel the loop if the monotonic predicate !Pred becomes known in the
-    // first iteration of the loop body after peeling.
-    if (NewPeelCount > DesiredPeelCount &&
-        SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal,
-                            RightSCEV))
-      DesiredPeelCount = NewPeelCount;
+    DesiredPeelCount = std::max(DesiredPeelCount, NewPeelCount);
   }
 
   return DesiredPeelCount;
@@ -562,7 +584,7 @@ static void cloneLoopBlocks(
 
   // LastValueMap is updated with the values for the current loop
   // which are used the next time this function is called.
-  for (const auto &KV : VMap)
+  for (auto KV : VMap)
     LVMap[KV.first] = KV.second;
 }
 
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index d22fdb4d52dc..ddb7479924bd 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -30,6 +30,7 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils.h"
@@ -395,9 +396,9 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
     }
   }
   if (CreateRemainderLoop) {
-    Loop *NewLoop = NewLoops[L];
-    MDNode *LoopID = NewLoop->getLoopID();
+    Loop *NewLoop = NewLoops[L];  
     assert(NewLoop && "L should have been cloned");
+    MDNode *LoopID = NewLoop->getLoopID();
 
     // Only add loop metadata if the loop is not going to be completely
     // unrolled.
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index b4d7f35d2d9a..c4c40189fda4 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -24,7 +24,6 @@
 #include "llvm/Analysis/MustExecute.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
-#include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -35,6 +34,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/KnownBits.h"
@@ -672,7 +672,19 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,
       LI->removeBlock(BB);
 
     // The last step is to update LoopInfo now that we've eliminated this loop.
-    LI->erase(L);
+    // Note: LoopInfo::erase remove the given loop and relink its subloops with
+    // its parent. While removeLoop/removeChildLoop remove the given loop but
+    // not relink its subloops, which is what we want.
+    if (Loop *ParentLoop = L->getParentLoop()) {
+      Loop::iterator I = find(ParentLoop->begin(), ParentLoop->end(), L);
+      assert(I != ParentLoop->end() && "Couldn't find loop");
+      ParentLoop->removeChildLoop(I);
+    } else {
+      Loop::iterator I = find(LI->begin(), LI->end(), L);
+      assert(I != LI->end() && "Couldn't find loop");
+      LI->removeLoop(I);
+    }
+    LI->destroy(L);
   }
 }
 
@@ -702,19 +714,19 @@ Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) {
   // To estimate the number of times the loop body was executed, we want to
   // know the number of times the backedge was taken, vs. the number of times
   // we exited the loop.
-  uint64_t TrueVal, FalseVal;
-  if (!LatchBR->extractProfMetadata(TrueVal, FalseVal))
+  uint64_t BackedgeTakenWeight, LatchExitWeight;
+  if (!LatchBR->extractProfMetadata(BackedgeTakenWeight, LatchExitWeight))
     return None;
 
-  if (!TrueVal || !FalseVal)
+  if (LatchBR->getSuccessor(0) != L->getHeader())
+    std::swap(BackedgeTakenWeight, LatchExitWeight);
+
+  if (!BackedgeTakenWeight || !LatchExitWeight)
     return 0;
 
   // Divide the count of the backedge by the count of the edge exiting the loop,
   // rounding to nearest.
-  if (LatchBR->getSuccessor(0) == L->getHeader())
-    return (TrueVal + (FalseVal / 2)) / FalseVal;
-  else
-    return (FalseVal + (TrueVal / 2)) / TrueVal;
+  return llvm::divideNearest(BackedgeTakenWeight, LatchExitWeight);
 }
 
 bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
diff --git a/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
index 5d7759056c7d..50752bd78a65 100644
--- a/llvm/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
@@ -18,6 +18,8 @@
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/MDBuilder.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 
diff --git a/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/llvm/lib/Transforms/Utils/LowerInvoke.cpp
index fe67e191dc62..1af0ce3d86cc 100644
--- a/llvm/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/llvm/lib/Transforms/Utils/LowerInvoke.cpp
@@ -19,6 +19,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Transforms/Utils.h"
 using namespace llvm;
diff --git a/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/llvm/lib/Transforms/Utils/LowerSwitch.cpp
index 8256e3b5f5af..4b9d0dadfc17 100644
--- a/llvm/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/llvm/lib/Transforms/Utils/LowerSwitch.cpp
@@ -27,6 +27,7 @@
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
diff --git a/llvm/lib/Transforms/Utils/Mem2Reg.cpp b/llvm/lib/Transforms/Utils/Mem2Reg.cpp
index cd2c81b6abc8..5ad7aeb463ec 100644
--- a/llvm/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/llvm/lib/Transforms/Utils/Mem2Reg.cpp
@@ -19,6 +19,7 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Transforms/Utils.h"
diff --git a/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/llvm/lib/Transforms/Utils/MetaRenamer.cpp
index 60bb2775a194..7f961dbaf4b4 100644
--- a/llvm/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/llvm/lib/Transforms/Utils/MetaRenamer.cpp
@@ -27,6 +27,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/TypeFinder.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Transforms/Utils.h"
 
diff --git a/llvm/lib/Transforms/Utils/MisExpect.cpp b/llvm/lib/Transforms/Utils/MisExpect.cpp
index 26d3402bd279..a16ca1fb8efa 100644
--- a/llvm/lib/Transforms/Utils/MisExpect.cpp
+++ b/llvm/lib/Transforms/Utils/MisExpect.cpp
@@ -25,6 +25,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FormatVariadic.h"
 #include <cstdint>
diff --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
index 1ef3757017a8..b94f57e4dc2c 100644
--- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
@@ -280,3 +281,31 @@ std::string llvm::getUniqueModuleId(Module *M) {
   MD5::stringifyResult(R, Str);
   return ("$" + Str).str();
 }
+
+void VFABI::setVectorVariantNames(
+    CallInst *CI, const SmallVector<std::string, 8> &VariantMappings) {
+  if (VariantMappings.empty())
+    return;
+
+  SmallString<256> Buffer;
+  llvm::raw_svector_ostream Out(Buffer);
+  for (const std::string &VariantMapping : VariantMappings)
+    Out << VariantMapping << ",";
+  // Get rid of the trailing ','.
+  assert(!Buffer.str().empty() && "Must have at least one char.");
+  Buffer.pop_back();
+
+  Module *M = CI->getModule();
+#ifndef NDEBUG
+  for (const std::string &VariantMapping : VariantMappings) {
+    Optional<VFInfo> VI = VFABI::tryDemangleForVFABI(VariantMapping);
+    assert(VI.hasValue() && "Canno add an invalid VFABI name.");
+    assert(M->getNamedValue(VI.getValue().VectorName) &&
+           "Cannot add variant to attribute: "
+           "vector function declaration is missing.");
+  }
+#endif
+  CI->addAttribute(
+      AttributeList::FunctionIndex,
+      Attribute::get(M->getContext(), MappingsAttrName, Buffer.str()));
+}
diff --git a/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp b/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp
index ac8991e9d475..1c5c41abc682 100644
--- a/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp
+++ b/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp
@@ -12,9 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/NameAnonGlobals.h"
-
 #include "llvm/ADT/SmallString.h"
 #include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/MD5.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 
diff --git a/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
index 44859eafb9c1..dda2867f44b2 100644
--- a/llvm/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
@@ -30,6 +30,7 @@
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/DebugCounter.h"
 #include "llvm/Support/FormattedStream.h"
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 3a5e3293ed4f..d93ca4f04cdb 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/GuardUtils.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemorySSA.h"
 #include "llvm/Analysis/MemorySSAUpdater.h"
@@ -1403,10 +1404,16 @@ HoistTerminator:
       // These values do not agree.  Insert a select instruction before NT
       // that determines the right value.
       SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
-      if (!SI)
+      if (!SI) {
+        // Propagate fast-math-flags from phi node to its replacement select.
+        IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
+        if (isa<FPMathOperator>(PN))
+          Builder.setFastMathFlags(PN.getFastMathFlags());
+
         SI = cast<SelectInst>(
             Builder.CreateSelect(BI->getCondition(), BB1V, BB2V,
                                  BB1V->getName() + "." + BB2V->getName(), BI));
+      }
 
       // Make the PHI node use the select for all incoming values for BB1/BB2
       for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
@@ -2262,14 +2269,14 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL,
         if (!BBI->use_empty())
           TranslateMap[&*BBI] = N;
       }
-      // Insert the new instruction into its new home.
-      if (N)
+      if (N) {
+        // Insert the new instruction into its new home.
         EdgeBB->getInstList().insert(InsertPt, N);
 
-      // Register the new instruction with the assumption cache if necessary.
-      if (auto *II = dyn_cast_or_null<IntrinsicInst>(N))
-        if (II->getIntrinsicID() == Intrinsic::assume)
-          AC->registerAssumption(II);
+        // Register the new instruction with the assumption cache if necessary.
+        if (AC && match(N, m_Intrinsic<Intrinsic::assume>()))
+          AC->registerAssumption(cast<IntrinsicInst>(N));
+      }
     }
 
     // Loop over all of the edges from PredBB to BB, changing them to branch
@@ -2418,7 +2425,12 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
   if (IfBlock2)
     hoistAllInstructionsInto(DomBlock, InsertPt, IfBlock2);
 
+  // Propagate fast-math-flags from phi nodes to replacement selects.
+  IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
   while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
+    if (isa<FPMathOperator>(PN))
+      Builder.setFastMathFlags(PN->getFastMathFlags());
+
     // Change the PHI node into a select instruction.
     Value *TrueVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse);
     Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue);
@@ -3212,6 +3224,47 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI,
   return Changed;
 }
 
+
+/// If the previous block ended with a widenable branch, determine if reusing
+/// the target block is profitable and legal.  This will have the effect of
+/// "widening" PBI, but doesn't require us to reason about hosting safety.
+static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
+  // TODO: This can be generalized in two important ways:
+  // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
+  //    values from the PBI edge.
+  // 2) We can sink side effecting instructions into BI's fallthrough
+  //    successor provided they doesn't contribute to computation of
+  //    BI's condition.
+  Value *CondWB, *WC;
+  BasicBlock *IfTrueBB, *IfFalseBB;
+  if (!parseWidenableBranch(PBI, CondWB, WC, IfTrueBB, IfFalseBB) ||
+      IfTrueBB != BI->getParent() || !BI->getParent()->getSinglePredecessor())
+    return false;
+  if (!IfFalseBB->phis().empty())
+    return false; // TODO
+  // Use lambda to lazily compute expensive condition after cheap ones.
+  auto NoSideEffects = [](BasicBlock &BB) {
+    return !llvm::any_of(BB, [](const Instruction &I) {
+        return I.mayWriteToMemory() || I.mayHaveSideEffects();
+      });
+  };
+  if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
+      BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
+      NoSideEffects(*BI->getParent())) {
+    BI->getSuccessor(1)->removePredecessor(BI->getParent());
+    BI->setSuccessor(1, IfFalseBB);
+    return true;
+  }
+  if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
+      BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
+      NoSideEffects(*BI->getParent())) {
+    BI->getSuccessor(0)->removePredecessor(BI->getParent());
+    BI->setSuccessor(0, IfFalseBB);
+    return true;
+  }
+  return false;
+}
+
 /// If we have a conditional branch as a predecessor of another block,
 /// this function tries to simplify it.  We know
 /// that PBI and BI are both conditional branches, and BI is in one of the
@@ -3267,6 +3320,12 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
     }
   }
 
+  // If the previous block ended with a widenable branch, determine if reusing
+  // the target block is profitable and legal.  This will have the effect of
+  // "widening" PBI, but doesn't require us to reason about hosting safety.
+  if (tryWidenCondBranchToCondBranch(PBI, BI))
+    return true;
+
   if (auto *CE = dyn_cast<ConstantExpr>(BI->getCondition()))
     if (CE->canTrap())
       return false;
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 0324993a8203..fa3a9d21f3df 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -288,8 +288,9 @@ Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len,
 
   // We have enough information to now generate the memcpy call to do the
   // concatenation for us.  Make a memcpy to copy the nul byte with align = 1.
-  B.CreateMemCpy(CpyDst, 1, Src, 1,
-                 ConstantInt::get(DL.getIntPtrType(Src->getContext()), Len + 1));
+  B.CreateMemCpy(
+      CpyDst, Align::None(), Src, Align::None(),
+      ConstantInt::get(DL.getIntPtrType(Src->getContext()), Len + 1));
   return Dst;
 }
 
@@ -364,8 +365,8 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) {
   StringRef Str;
   if (!getConstantStringInfo(SrcStr, Str)) {
     if (CharC->isZero()) // strchr(p, 0) -> p + strlen(p)
-      return B.CreateGEP(B.getInt8Ty(), SrcStr, emitStrLen(SrcStr, B, DL, TLI),
-                         "strchr");
+      if (Value *StrLen = emitStrLen(SrcStr, B, DL, TLI))
+        return B.CreateGEP(B.getInt8Ty(), SrcStr, StrLen, "strchr");
     return nullptr;
   }
 
@@ -561,7 +562,7 @@ Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) {
   // We have enough information to now generate the memcpy call to do the
   // copy for us.  Make a memcpy to copy the nul byte with align = 1.
   CallInst *NewCI =
-      B.CreateMemCpy(Dst, 1, Src, 1,
+      B.CreateMemCpy(Dst, Align::None(), Src, Align::None(),
                      ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len));
   NewCI->setAttributes(CI->getAttributes());
   return Dst;
@@ -589,7 +590,8 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
 
   // We have enough information to now generate the memcpy call to do the
   // copy for us.  Make a memcpy to copy the nul byte with align = 1.
-  CallInst *NewCI = B.CreateMemCpy(Dst, 1, Src, 1, LenV);
+  CallInst *NewCI =
+      B.CreateMemCpy(Dst, Align::None(), Src, Align::None(), LenV);
   NewCI->setAttributes(CI->getAttributes());
   return DstEnd;
 }
@@ -624,7 +626,7 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) {
 
   if (SrcLen == 0) {
     // strncpy(x, "", y) -> memset(align 1 x, '\0', y)
-    CallInst *NewCI = B.CreateMemSet(Dst, B.getInt8('\0'), Size, 1);
+    CallInst *NewCI = B.CreateMemSet(Dst, B.getInt8('\0'), Size, Align::None());
     AttrBuilder ArgAttrs(CI->getAttributes().getParamAttributes(0));
     NewCI->setAttributes(NewCI->getAttributes().addParamAttributes(
         CI->getContext(), 0, ArgAttrs));
@@ -637,7 +639,8 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) {
 
   Type *PT = Callee->getFunctionType()->getParamType(0);
   // strncpy(x, s, c) -> memcpy(align 1 x, align 1 s, c) [s and c are constant]
-  CallInst *NewCI = B.CreateMemCpy(Dst, 1, Src, 1, ConstantInt::get(DL.getIntPtrType(PT), Len));
+  CallInst *NewCI = B.CreateMemCpy(Dst, Align::None(), Src, Align::None(),
+                                   ConstantInt::get(DL.getIntPtrType(PT), Len));
   NewCI->setAttributes(CI->getAttributes());
   return Dst;
 }
@@ -1113,17 +1116,58 @@ Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) {
     return nullptr;
 
   // memcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n)
-  CallInst *NewCI =
-      B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, Size);
+  CallInst *NewCI = B.CreateMemCpy(CI->getArgOperand(0), Align::None(),
+                                   CI->getArgOperand(1), Align::None(), Size);
   NewCI->setAttributes(CI->getAttributes());
   return CI->getArgOperand(0);
 }
 
+Value *LibCallSimplifier::optimizeMemCCpy(CallInst *CI, IRBuilder<> &B) {
+  Value *Dst = CI->getArgOperand(0);
+  Value *Src = CI->getArgOperand(1);
+  ConstantInt *StopChar = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+  ConstantInt *N = dyn_cast<ConstantInt>(CI->getArgOperand(3));
+  StringRef SrcStr;
+  if (CI->use_empty() && Dst == Src)
+    return Dst;
+  // memccpy(d, s, c, 0) -> nullptr
+  if (N) {
+    if (N->isNullValue())
+      return Constant::getNullValue(CI->getType());
+    if (!getConstantStringInfo(Src, SrcStr, /*Offset=*/0,
+                               /*TrimAtNul=*/false) ||
+        !StopChar)
+      return nullptr;
+  } else {
+    return nullptr;
+  }
+
+  // Wrap arg 'c' of type int to char
+  size_t Pos = SrcStr.find(StopChar->getSExtValue() & 0xFF);
+  if (Pos == StringRef::npos) {
+    if (N->getZExtValue() <= SrcStr.size()) {
+      B.CreateMemCpy(Dst, Align::None(), Src, Align::None(),
+                     CI->getArgOperand(3));
+      return Constant::getNullValue(CI->getType());
+    }
+    return nullptr;
+  }
+
+  Value *NewN =
+      ConstantInt::get(N->getType(), std::min(uint64_t(Pos + 1), N->getZExtValue()));
+  // memccpy -> llvm.memcpy
+  B.CreateMemCpy(Dst, Align::None(), Src, Align::None(), NewN);
+  return Pos + 1 <= N->getZExtValue()
+             ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, NewN)
+             : Constant::getNullValue(CI->getType());
+}
+
 Value *LibCallSimplifier::optimizeMemPCpy(CallInst *CI, IRBuilder<> &B) {
   Value *Dst = CI->getArgOperand(0);
   Value *N = CI->getArgOperand(2);
   // mempcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n), x + n
-  CallInst *NewCI = B.CreateMemCpy(Dst, 1, CI->getArgOperand(1), 1, N);
+  CallInst *NewCI = B.CreateMemCpy(Dst, Align::None(), CI->getArgOperand(1),
+                                   Align::None(), N);
   NewCI->setAttributes(CI->getAttributes());
   return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, N);
 }
@@ -1135,8 +1179,8 @@ Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) {
     return nullptr;
 
   // memmove(x, y, n) -> llvm.memmove(align 1 x, align 1 y, n)
-  CallInst *NewCI =
-      B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, Size);
+  CallInst *NewCI = B.CreateMemMove(CI->getArgOperand(0), Align::None(),
+                                    CI->getArgOperand(1), Align::None(), Size);
   NewCI->setAttributes(CI->getAttributes());
   return CI->getArgOperand(0);
 }
@@ -1196,7 +1240,8 @@ Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) {
 
   // memset(p, v, n) -> llvm.memset(align 1 p, v, n)
   Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
-  CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val, Size, 1);
+  CallInst *NewCI =
+      B.CreateMemSet(CI->getArgOperand(0), Val, Size, Align::None());
   NewCI->setAttributes(CI->getAttributes());
   return CI->getArgOperand(0);
 }
@@ -1599,6 +1644,11 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) {
       (!ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)))
     return nullptr;
 
+  // Converting pow(X, -0.5) to 1/sqrt(X) may introduce an extra rounding step,
+  // so that requires fast-math-flags (afn or reassoc).
+  if (ExpoF->isNegative() && (!Pow->hasApproxFunc() && !Pow->hasAllowReassoc()))
+    return nullptr;
+
   Sqrt = getSqrtCall(Base, Attrs, Pow->doesNotAccessMemory(), Mod, B, TLI);
   if (!Sqrt)
     return nullptr;
@@ -1696,7 +1746,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
     // TODO: This whole transformation should be backend specific (e.g. some
     //       backends might prefer libcalls or the limit for the exponent might
     //       be different) and it should also consider optimizing for size.
-    APFloat LimF(ExpoF->getSemantics(), 33.0),
+    APFloat LimF(ExpoF->getSemantics(), 33),
             ExpoA(abs(*ExpoF));
     if (ExpoA.compare(LimF) == APFloat::cmpLessThan) {
       // This transformation applies to integer or integer+0.5 exponents only.
@@ -2426,9 +2476,11 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) {
       return nullptr; // we found a format specifier, bail out.
 
     // sprintf(str, fmt) -> llvm.memcpy(align 1 str, align 1 fmt, strlen(fmt)+1)
-    B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
-                   ConstantInt::get(DL.getIntPtrType(CI->getContext()),
-                                    FormatStr.size() + 1)); // Copy the null byte.
+    B.CreateMemCpy(
+        CI->getArgOperand(0), Align::None(), CI->getArgOperand(1),
+        Align::None(),
+        ConstantInt::get(DL.getIntPtrType(CI->getContext()),
+                         FormatStr.size() + 1)); // Copy the null byte.
     return ConstantInt::get(CI->getType(), FormatStr.size());
   }
 
@@ -2463,7 +2515,8 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) {
       return nullptr;
     Value *IncLen =
         B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc");
-    B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(2), 1, IncLen);
+    B.CreateMemCpy(CI->getArgOperand(0), Align::None(), CI->getArgOperand(2),
+                   Align::None(), IncLen);
 
     // The sprintf result is the unincremented number of bytes in the string.
     return B.CreateIntCast(Len, CI->getType(), false);
@@ -2534,7 +2587,8 @@ Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI, IRBuilder<> &B) {
     // snprintf(dst, size, fmt) -> llvm.memcpy(align 1 dst, align 1 fmt,
     // strlen(fmt)+1)
     B.CreateMemCpy(
-        CI->getArgOperand(0), 1, CI->getArgOperand(2), 1,
+        CI->getArgOperand(0), Align::None(), CI->getArgOperand(2),
+        Align::None(),
         ConstantInt::get(DL.getIntPtrType(CI->getContext()),
                          FormatStr.size() + 1)); // Copy the null byte.
     return ConstantInt::get(CI->getType(), FormatStr.size());
@@ -2575,7 +2629,8 @@ Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI, IRBuilder<> &B) {
       else if (N < Str.size() + 1)
         return nullptr;
 
-      B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(3), 1,
+      B.CreateMemCpy(CI->getArgOperand(0), Align::None(), CI->getArgOperand(3),
+                     Align::None(),
                      ConstantInt::get(CI->getType(), Str.size() + 1));
 
       // The snprintf result is the unincremented number of bytes in the string.
@@ -2716,7 +2771,8 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) {
   // Don't rewrite fputs to fwrite when optimising for size because fwrite
   // requires more arguments and thus extra MOVs are required.
   bool OptForSize = CI->getFunction()->hasOptSize() ||
-                    llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
+                    llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI,
+                                                PGSOQueryType::IRPass);
   if (OptForSize)
     return nullptr;
 
@@ -2792,7 +2848,8 @@ Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) {
 
 Value *LibCallSimplifier::optimizeBCopy(CallInst *CI, IRBuilder<> &B) {
   // bcopy(src, dst, n) -> llvm.memmove(dst, src, n)
-  return B.CreateMemMove(CI->getArgOperand(1), 1, CI->getArgOperand(0), 1,
+  return B.CreateMemMove(CI->getArgOperand(1), Align::None(),
+                         CI->getArgOperand(0), Align::None(),
                          CI->getArgOperand(2));
 }
 
@@ -2864,6 +2921,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
       return optimizeMemCmp(CI, Builder);
     case LibFunc_memcpy:
       return optimizeMemCpy(CI, Builder);
+    case LibFunc_memccpy:
+      return optimizeMemCCpy(CI, Builder);
     case LibFunc_mempcpy:
       return optimizeMemPCpy(CI, Builder);
     case LibFunc_memmove:
@@ -3223,8 +3282,9 @@ FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI,
 Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
                                                      IRBuilder<> &B) {
   if (isFortifiedCallFoldable(CI, 3, 2)) {
-    CallInst *NewCI = B.CreateMemCpy(
-        CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, CI->getArgOperand(2));
+    CallInst *NewCI = B.CreateMemCpy(CI->getArgOperand(0), Align::None(),
+                                     CI->getArgOperand(1), Align::None(),
+                                     CI->getArgOperand(2));
     NewCI->setAttributes(CI->getAttributes());
     return CI->getArgOperand(0);
   }
@@ -3234,8 +3294,9 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
 Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI,
                                                       IRBuilder<> &B) {
   if (isFortifiedCallFoldable(CI, 3, 2)) {
-    CallInst *NewCI = B.CreateMemMove(
-        CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, CI->getArgOperand(2));
+    CallInst *NewCI = B.CreateMemMove(CI->getArgOperand(0), Align::None(),
+                                      CI->getArgOperand(1), Align::None(),
+                                      CI->getArgOperand(2));
     NewCI->setAttributes(CI->getAttributes());
     return CI->getArgOperand(0);
   }
@@ -3248,8 +3309,8 @@ Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI,
 
   if (isFortifiedCallFoldable(CI, 3, 2)) {
     Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
-    CallInst *NewCI =
-        B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
+    CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val,
+                                     CI->getArgOperand(2), Align::None());
     NewCI->setAttributes(CI->getAttributes());
     return CI->getArgOperand(0);
   }
diff --git a/llvm/lib/Transforms/Utils/SizeOpts.cpp b/llvm/lib/Transforms/Utils/SizeOpts.cpp
index 1519751197d2..d2a400027d4b 100644
--- a/llvm/lib/Transforms/Utils/SizeOpts.cpp
+++ b/llvm/lib/Transforms/Utils/SizeOpts.cpp
@@ -10,28 +10,80 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/BlockFrequencyInfo.h"
-#include "llvm/Analysis/ProfileSummaryInfo.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Transforms/Utils/SizeOpts.h"
+
 using namespace llvm;
 
-static cl::opt<bool> ProfileGuidedSizeOpt(
+cl::opt<bool> EnablePGSO(
     "pgso", cl::Hidden, cl::init(true),
-    cl::desc("Enable the profile guided size optimization. "));
-
-bool llvm::shouldOptimizeForSize(Function *F, ProfileSummaryInfo *PSI,
-                                 BlockFrequencyInfo *BFI) {
-  assert(F);
-  if (!PSI || !BFI || !PSI->hasProfileSummary())
-    return false;
-  return ProfileGuidedSizeOpt && PSI->isFunctionColdInCallGraph(F, *BFI);
+    cl::desc("Enable the profile guided size optimizations. "));
+
+cl::opt<bool> PGSOLargeWorkingSetSizeOnly(
+    "pgso-lwss-only", cl::Hidden, cl::init(true),
+    cl::desc("Apply the profile guided size optimizations only "
+             "if the working set size is large (except for cold code.)"));
+
+cl::opt<bool> PGSOColdCodeOnly(
+    "pgso-cold-code-only", cl::Hidden, cl::init(true),
+    cl::desc("Apply the profile guided size optimizations only "
+             "to cold code."));
+
+cl::opt<bool> PGSOIRPassOrTestOnly(
+    "pgso-ir-pass-or-test-only", cl::Hidden, cl::init(false),
+    cl::desc("Apply the profile guided size optimizations only"
+             "to the IR passes or tests."));
+
+cl::opt<bool> ForcePGSO(
+    "force-pgso", cl::Hidden, cl::init(false),
+    cl::desc("Force the (profiled-guided) size optimizations. "));
+
+cl::opt<int> PgsoCutoffInstrProf(
+    "pgso-cutoff-instr-prof", cl::Hidden, cl::init(250000), cl::ZeroOrMore,
+    cl::desc("The profile guided size optimization profile summary cutoff "
+             "for instrumentation profile."));
+
+cl::opt<int> PgsoCutoffSampleProf(
+    "pgso-cutoff-sample-prof", cl::Hidden, cl::init(800000), cl::ZeroOrMore,
+    cl::desc("The profile guided size optimization profile summary cutoff "
+             "for sample profile."));
+
+namespace {
+struct BasicBlockBFIAdapter {
+  static bool isFunctionColdInCallGraph(const Function *F,
+                                        ProfileSummaryInfo *PSI,
+                                        BlockFrequencyInfo &BFI) {
+    return PSI->isFunctionColdInCallGraph(F, BFI);
+  }
+  static bool isFunctionHotInCallGraphNthPercentile(int CutOff,
+                                                    const Function *F,
+                                                    ProfileSummaryInfo *PSI,
+                                                    BlockFrequencyInfo &BFI) {
+    return PSI->isFunctionHotInCallGraphNthPercentile(CutOff, F, BFI);
+  }
+  static bool isColdBlock(const BasicBlock *BB,
+                          ProfileSummaryInfo *PSI,
+                          BlockFrequencyInfo *BFI) {
+    return PSI->isColdBlock(BB, BFI);
+  }
+  static bool isHotBlockNthPercentile(int CutOff,
+                                      const BasicBlock *BB,
+                                      ProfileSummaryInfo *PSI,
+                                      BlockFrequencyInfo *BFI) {
+    return PSI->isHotBlockNthPercentile(CutOff, BB, BFI);
+  }
+};
+} // end anonymous namespace
+
+bool llvm::shouldOptimizeForSize(const Function *F, ProfileSummaryInfo *PSI,
+                                 BlockFrequencyInfo *BFI,
+                                 PGSOQueryType QueryType) {
+  return shouldFuncOptimizeForSizeImpl<BasicBlockBFIAdapter>(F, PSI, BFI,
+                                                             QueryType);
 }
 
-bool llvm::shouldOptimizeForSize(BasicBlock *BB, ProfileSummaryInfo *PSI,
-                                 BlockFrequencyInfo *BFI) {
-  assert(BB);
-  if (!PSI || !BFI || !PSI->hasProfileSummary())
-    return false;
-  return ProfileGuidedSizeOpt && PSI->isColdBlock(BB, BFI);
+bool llvm::shouldOptimizeForSize(const BasicBlock *BB, ProfileSummaryInfo *PSI,
+                                 BlockFrequencyInfo *BFI,
+                                 PGSOQueryType QueryType) {
+  return shouldOptimizeForSizeImpl<BasicBlockBFIAdapter>(BB, PSI, BFI,
+                                                         QueryType);
 }
diff --git a/llvm/lib/Transforms/Utils/StripGCRelocates.cpp b/llvm/lib/Transforms/Utils/StripGCRelocates.cpp
index 50844cf9d1c5..7880ea1c6c47 100644
--- a/llvm/lib/Transforms/Utils/StripGCRelocates.cpp
+++ b/llvm/lib/Transforms/Utils/StripGCRelocates.cpp
@@ -18,6 +18,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Statepoint.h"
 #include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/raw_ostream.h"
 
diff --git a/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp b/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
index 97a4533fabe5..21cbbfb140b6 100644
--- a/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
+++ b/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/IR/DebugInfo.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Transforms/Utils.h"
 using namespace llvm;
diff --git a/llvm/lib/Transforms/Utils/SymbolRewriter.cpp b/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
index 5d380dcf231c..aacf81d83519 100644
--- a/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
+++ b/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -69,6 +69,7 @@
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
diff --git a/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index 7f7bdf8a3d6d..9af39d9a0dd1 100644
--- a/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -18,10 +18,16 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Transforms/Utils.h"
 using namespace llvm;
 
 char UnifyFunctionExitNodes::ID = 0;
+
+UnifyFunctionExitNodes::UnifyFunctionExitNodes() : FunctionPass(ID) {
+  initializeUnifyFunctionExitNodesPass(*PassRegistry::getPassRegistry());
+}
+
 INITIALIZE_PASS(UnifyFunctionExitNodes, "mergereturn",
                 "Unify function exit nodes", false, false)
 
diff --git a/llvm/lib/Transforms/Utils/Utils.cpp b/llvm/lib/Transforms/Utils/Utils.cpp
index 5272ab6e95d5..7769c7493cda 100644
--- a/llvm/lib/Transforms/Utils/Utils.cpp
+++ b/llvm/lib/Transforms/Utils/Utils.cpp
@@ -39,6 +39,7 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) {
   initializeMetaRenamerPass(Registry);
   initializeStripGCRelocatesPass(Registry);
   initializePredicateInfoPrinterLegacyPassPass(Registry);
+  initializeInjectTLIMappingsLegacyPass(Registry);
 }
 
 /// LLVMInitializeTransformUtils - C binding for initializeTransformUtilsPasses.