vendor/llvm/llvm-trunk-r338536

author: Dimitry Andric <dim@FreeBSD.org> 2018-08-02 17:32:43 +0000
committer: Dimitry Andric <dim@FreeBSD.org> 2018-08-02 17:32:43 +0000
commit: b7eb8e35e481a74962664b63dfb09483b200209a (patch)
tree: 1937fb4a348458ce2d02ade03ac3bb0aa18d2fcd /lib/Transforms
parent: eb11fae6d08f479c0799db45860a98af528fa6e7 (diff)
58 files changed, 702 insertions, 429 deletions
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 31e771da3bd3..cd2bd734eb26 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -56,7 +56,7 @@ using namespace llvm;
 
 STATISTIC(NumArgumentsEliminated, "Number of unread args removed");
 STATISTIC(NumRetValsEliminated  , "Number of unused return values removed");
-STATISTIC(NumArgumentsReplacedWithUndef, 
+STATISTIC(NumArgumentsReplacedWithUndef,
           "Number of unread args replaced with undef");
 
 namespace {
@@ -109,7 +109,7 @@ namespace {
 
 char DAH::ID = 0;
 
-INITIALIZE_PASS(DAH, "deadarghaX0r", 
+INITIALIZE_PASS(DAH, "deadarghaX0r",
                 "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)",
                 false, false)
 
@@ -256,7 +256,7 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) {
   return true;
 }
 
-/// RemoveDeadArgumentsFromCallers - Checks if the given function has any 
+/// RemoveDeadArgumentsFromCallers - Checks if the given function has any
 /// arguments that are unused, and changes the caller parameters to be undefined
 /// instead.
 bool DeadArgumentEliminationPass::RemoveDeadArgumentsFromCallers(Function &Fn) {
@@ -640,7 +640,7 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
       Result = Live;
     } else {
       // See what the effect of this use is (recording any uses that cause
-      // MaybeLive in MaybeLiveArgUses). 
+      // MaybeLive in MaybeLiveArgUses).
       Result = SurveyUses(&*AI, MaybeLiveArgUses);
     }
 
@@ -777,7 +777,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
   //    argument.
   // 2) Retain the 'returned' attribute and treat the return value (but not the
   //    entire function) as live so that it is not eliminated.
-  // 
+  //
   // It's not clear in the general case which option is more profitable because,
   // even in the absence of explicit uses of the return value, code generation
   // is free to use the 'returned' attribute to do things like eliding
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 2797da6c0abd..010b0a29807d 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -617,7 +617,7 @@ static bool addArgumentAttrsFromCallsites(Function &F) {
     if (!isGuaranteedToTransferExecutionToSuccessor(&I))
       break;
   }
-  
+
   return Changed;
 }
 
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 1af7e6894777..1761d7faff57 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -357,6 +357,41 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
   return Changed;
 }
 
+static bool isSafeSROAElementUse(Value *V);
+
+/// Return true if the specified GEP is a safe user of a derived
+/// expression from a global that we want to SROA.
+static bool isSafeSROAGEP(User *U) {
+  // Check to see if this ConstantExpr GEP is SRA'able.  In particular, we
+  // don't like < 3 operand CE's, and we don't like non-constant integer
+  // indices.  This enforces that all uses are 'gep GV, 0, C, ...' for some
+  // value of C.
+  if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) ||
+      !cast<Constant>(U->getOperand(1))->isNullValue())
+    return false;
+
+  gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U);
+  ++GEPI; // Skip over the pointer index.
+
+  // For all other level we require that the indices are constant and inrange.
+  // In particular, consider: A[0][i].  We cannot know that the user isn't doing
+  // invalid things like allowing i to index an out-of-range subscript that
+  // accesses A[1]. This can also happen between different members of a struct
+  // in llvm IR.
+  for (; GEPI != E; ++GEPI) {
+    if (GEPI.isStruct())
+      continue;
+
+    ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
+    if (!IdxVal || (GEPI.isBoundedSequential() &&
+                    IdxVal->getZExtValue() >= GEPI.getSequentialNumElements()))
+      return false;
+  }
+
+  return llvm::all_of(U->users(),
+                      [](User *UU) { return isSafeSROAElementUse(UU); });
+}
+
 /// Return true if the specified instruction is a safe user of a derived
 /// expression from a global that we want to SROA.
 static bool isSafeSROAElementUse(Value *V) {
@@ -374,84 +409,25 @@ static bool isSafeSROAElementUse(Value *V) {
   if (StoreInst *SI = dyn_cast<StoreInst>(I))
     return SI->getOperand(0) != V;
 
-  // Otherwise, it must be a GEP.
-  GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I);
-  if (!GEPI) return false;
-
-  if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) ||
-      !cast<Constant>(GEPI->getOperand(1))->isNullValue())
-    return false;
-
-  for (User *U : GEPI->users())
-    if (!isSafeSROAElementUse(U))
-      return false;
-  return true;
-}
-
-/// U is a direct user of the specified global value.  Look at it and its uses
-/// and decide whether it is safe to SROA this global.
-static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
-  // The user of the global must be a GEP Inst or a ConstantExpr GEP.
-  if (!isa<GetElementPtrInst>(U) &&
-      (!isa<ConstantExpr>(U) ||
-       cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr))
-    return false;
-
-  // Check to see if this ConstantExpr GEP is SRA'able.  In particular, we
-  // don't like < 3 operand CE's, and we don't like non-constant integer
-  // indices.  This enforces that all uses are 'gep GV, 0, C, ...' for some
-  // value of C.
-  if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) ||
-      !cast<Constant>(U->getOperand(1))->isNullValue() ||
-      !isa<ConstantInt>(U->getOperand(2)))
-    return false;
-
-  gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U);
-  ++GEPI;  // Skip over the pointer index.
-
-  // If this is a use of an array allocation, do a bit more checking for sanity.
-  if (GEPI.isSequential()) {
-    ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2));
-
-    // Check to make sure that index falls within the array.  If not,
-    // something funny is going on, so we won't do the optimization.
-    //
-    if (GEPI.isBoundedSequential() &&
-        Idx->getZExtValue() >= GEPI.getSequentialNumElements())
-      return false;
-
-    // We cannot scalar repl this level of the array unless any array
-    // sub-indices are in-range constants.  In particular, consider:
-    // A[0][i].  We cannot know that the user isn't doing invalid things like
-    // allowing i to index an out-of-range subscript that accesses A[1].
-    //
-    // Scalar replacing *just* the outer index of the array is probably not
-    // going to be a win anyway, so just give up.
-    for (++GEPI; // Skip array index.
-         GEPI != E;
-         ++GEPI) {
-      if (GEPI.isStruct())
-        continue;
-
-      ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
-      if (!IdxVal ||
-          (GEPI.isBoundedSequential() &&
-           IdxVal->getZExtValue() >= GEPI.getSequentialNumElements()))
-        return false;
-    }
-  }
-
-  return llvm::all_of(U->users(),
-                      [](User *UU) { return isSafeSROAElementUse(UU); });
+  // Otherwise, it must be a GEP. Check it and its users are safe to SRA.
+  return isa<GetElementPtrInst>(I) && isSafeSROAGEP(I);
 }
 
 /// Look at all uses of the global and decide whether it is safe for us to
 /// perform this transformation.
 static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
-  for (User *U : GV->users())
-    if (!IsUserOfGlobalSafeForSRA(U, GV))
+  for (User *U : GV->users()) {
+    // The user of the global must be a GEP Inst or a ConstantExpr GEP.
+    if (!isa<GetElementPtrInst>(U) &&
+        (!isa<ConstantExpr>(U) ||
+        cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr))
       return false;
 
+    // Check the gep and it's users are safe to SRA
+    if (!isSafeSROAGEP(U))
+      return false;
+  }
+
   return true;
 }
 
diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp
index f79b61037f1d..7d55ebecbf92 100644
--- a/lib/Transforms/IPO/IPConstantPropagation.cpp
+++ b/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -61,12 +61,12 @@ static bool PropagateConstantsIntoArguments(Function &F) {
     User *UR = U.getUser();
     // Ignore blockaddress uses.
     if (isa<BlockAddress>(UR)) continue;
-    
+
     // Used by a non-instruction, or not the callee of a function, do not
     // transform.
     if (!isa<CallInst>(UR) && !isa<InvokeInst>(UR))
       return false;
-    
+
     CallSite CS(cast<Instruction>(UR));
     if (!CS.isCallee(&U))
       return false;
@@ -77,11 +77,11 @@ static bool PropagateConstantsIntoArguments(Function &F) {
     Function::arg_iterator Arg = F.arg_begin();
     for (unsigned i = 0, e = ArgumentConstants.size(); i != e;
          ++i, ++AI, ++Arg) {
-      
+
       // If this argument is known non-constant, ignore it.
       if (ArgumentConstants[i].second)
         continue;
-      
+
       Constant *C = dyn_cast<Constant>(*AI);
       if (C && ArgumentConstants[i].first == nullptr) {
         ArgumentConstants[i].first = C;   // First constant seen.
@@ -108,7 +108,7 @@ static bool PropagateConstantsIntoArguments(Function &F) {
     if (ArgumentConstants[i].second || AI->use_empty() ||
         AI->hasInAllocaAttr() || (AI->hasByValAttr() && !F.onlyReadsMemory()))
       continue;
-  
+
     Value *V = ArgumentConstants[i].first;
     if (!V) V = UndefValue::get(AI->getType());
     AI->replaceAllUsesWith(V);
@@ -147,7 +147,7 @@ static bool PropagateConstantReturn(Function &F) {
   SmallVector<Value *,4> RetVals;
   StructType *STy = dyn_cast<StructType>(F.getReturnType());
   if (STy)
-    for (unsigned i = 0, e = STy->getNumElements(); i < e; ++i) 
+    for (unsigned i = 0, e = STy->getNumElements(); i < e; ++i)
       RetVals.push_back(UndefValue::get(STy->getElementType(i)));
   else
     RetVals.push_back(UndefValue::get(F.getReturnType()));
@@ -172,7 +172,7 @@ static bool PropagateConstantReturn(Function &F) {
           // Ignore undefs, we can change them into anything
           if (isa<UndefValue>(V))
             continue;
-          
+
           // Try to see if all the rets return the same constant or argument.
           if (isa<Constant>(V) || isa<Argument>(V)) {
             if (isa<UndefValue>(RV)) {
@@ -206,7 +206,7 @@ static bool PropagateConstantReturn(Function &F) {
     // directly?
     if (!Call || !CS.isCallee(&U))
       continue;
-    
+
     // Call result not used?
     if (Call->use_empty())
       continue;
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 139941127dee..3bebb96c6d35 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -27,7 +27,7 @@
 // -- We define Function* container class with custom "operator<" (FunctionPtr).
 // -- "FunctionPtr" instances are stored in std::set collection, so every
 //    std::set::insert operation will give you result in log(N) time.
-// 
+//
 // As an optimization, a hash of the function structure is calculated first, and
 // two functions are only compared if they have the same hash. This hash is
 // cheap to compute, and has the property that if function F == G according to
@@ -383,7 +383,7 @@ bool MergeFunctions::runOnModule(Module &M) {
   for (Function &Func : M) {
     if (!Func.isDeclaration() && !Func.hasAvailableExternallyLinkage()) {
       HashedFuncs.push_back({FunctionComparator::functionHash(Func), &Func});
-    } 
+    }
   }
 
   std::stable_sort(
@@ -402,7 +402,7 @@ bool MergeFunctions::runOnModule(Module &M) {
       Deferred.push_back(WeakTrackingVH(I->second));
     }
   }
-  
+
   do {
     std::vector<WeakTrackingVH> Worklist;
     Deferred.swap(Worklist);
@@ -802,11 +802,11 @@ void MergeFunctions::replaceFunctionInTree(const FunctionNode &FN,
   Function *F = FN.getFunc();
   assert(FunctionComparator(F, G, &GlobalNumbers).compare() == 0 &&
          "The two functions must be equal");
-  
+
   auto I = FNodesInTree.find(F);
   assert(I != FNodesInTree.end() && "F should be in FNodesInTree");
   assert(FNodesInTree.count(G) == 0 && "FNodesInTree should not contain G");
-  
+
   FnTreeType::iterator IterToFNInFnTree = I->second;
   assert(&(*IterToFNInFnTree) == &FN && "F should map to FN in FNodesInTree.");
   // Remove F -> FN and insert G -> FN
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index 27d791857314..2be654258aa8 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -77,13 +77,13 @@ static bool runImpl(CallGraphSCC &SCC, CallGraph &CG) {
 
   // Next, check to see if any callees might throw or if there are any external
   // functions in this SCC: if so, we cannot prune any functions in this SCC.
-  // Definitions that are weak and not declared non-throwing might be 
+  // Definitions that are weak and not declared non-throwing might be
   // overridden at linktime with something that throws, so assume that.
   // If this SCC includes the unwind instruction, we KNOW it throws, so
   // obviously the SCC might throw.
   //
   bool SCCMightUnwind = false, SCCMightReturn = false;
-  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); 
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end();
        (!SCCMightUnwind || !SCCMightReturn) && I != E; ++I) {
     Function *F = (*I)->getFunction();
     if (!F) {
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index aa31e0d850dd..83054588a9aa 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -926,7 +926,13 @@ Instruction *InstCombiner::foldAddWithConstant(BinaryOperator &Add) {
   if (Instruction *NV = foldBinOpIntoSelectOrPhi(Add))
     return NV;
 
-  Value *X;
+  Value *X, *Y;
+
+  // add (sub X, Y), -1 --> add (not Y), X
+  if (match(Op0, m_OneUse(m_Sub(m_Value(X), m_Value(Y)))) &&
+      match(Op1, m_AllOnes()))
+    return BinaryOperator::CreateAdd(Builder.CreateNot(Y), X);
+
   // zext(bool) + C -> bool ? C + 1 : C
   if (match(Op0, m_ZExt(m_Value(X))) &&
       X->getType()->getScalarSizeInBits() == 1)
@@ -1608,6 +1614,14 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
   if (match(Op0, m_Not(m_Value(X))) && match(Op1, m_Not(m_Value(Y))))
     return BinaryOperator::CreateSub(Y, X);
 
+  // (X + -1) - Y --> ~Y + X
+  if (match(Op0, m_OneUse(m_Add(m_Value(X), m_AllOnes()))))
+    return BinaryOperator::CreateAdd(Builder.CreateNot(Op1), X);
+
+  // Y - (X + 1) --> ~X + Y
+  if (match(Op1, m_OneUse(m_Add(m_Value(X), m_One()))))
+    return BinaryOperator::CreateAdd(Builder.CreateNot(X), Op0);
+
   if (Constant *C = dyn_cast<Constant>(Op0)) {
     bool IsNegate = match(C, m_ZeroInt());
     Value *X;
@@ -1858,7 +1872,7 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
   Constant *C;
   if (match(Op1, m_Constant(C)) && !isa<ConstantExpr>(Op1))
     return BinaryOperator::CreateFAddFMF(Op0, ConstantExpr::getFNeg(C), &I);
-  
+
   // X - (-Y) --> X + Y
   if (match(Op1, m_FNeg(m_Value(Y))))
     return BinaryOperator::CreateFAddFMF(Op0, Y, &I);
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 372bc41f780e..3d758e2fe7c9 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1550,31 +1550,13 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
     return DeMorgan;
 
   {
-    Value *A = nullptr, *B = nullptr, *C = nullptr;
-    // A&(A^B) => A & ~B
-    {
-      Value *tmpOp0 = Op0;
-      Value *tmpOp1 = Op1;
-      if (match(Op0, m_OneUse(m_Xor(m_Value(A), m_Value(B))))) {
-        if (A == Op1 || B == Op1 ) {
-          tmpOp1 = Op0;
-          tmpOp0 = Op1;
-          // Simplify below
-        }
-      }
-
-      if (match(tmpOp1, m_OneUse(m_Xor(m_Value(A), m_Value(B))))) {
-        if (B == tmpOp0) {
-          std::swap(A, B);
-        }
-        // Notice that the pattern (A&(~B)) is actually (A&(-1^B)), so if
-        // A is originally -1 (or a vector of -1 and undefs), then we enter
-        // an endless loop. By checking that A is non-constant we ensure that
-        // we will never get to the loop.
-        if (A == tmpOp0 && !isa<Constant>(A)) // A&(A^B) -> A & ~B
-          return BinaryOperator::CreateAnd(A, Builder.CreateNot(B));
-      }
-    }
+    Value *A, *B, *C;
+    // A & (A ^ B) --> A & ~B
+    if (match(Op1, m_OneUse(m_c_Xor(m_Specific(Op0), m_Value(B)))))
+      return BinaryOperator::CreateAnd(Op0, Builder.CreateNot(B));
+    // (A ^ B) & A --> A & ~B
+    if (match(Op0, m_OneUse(m_c_Xor(m_Specific(Op1), m_Value(B)))))
+      return BinaryOperator::CreateAnd(Op1, Builder.CreateNot(B));
 
     // (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C
     if (match(Op0, m_Xor(m_Value(A), m_Value(B))))
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index e8ea7396a96a..fd59c3a7c0c3 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -2243,6 +2243,12 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
     Type *DstElTy = DstPTy->getElementType();
     Type *SrcElTy = SrcPTy->getElementType();
 
+    // Casting pointers between the same type, but with different address spaces
+    // is an addrspace cast rather than a bitcast.
+    if ((DstElTy == SrcElTy) &&
+        (DstPTy->getAddressSpace() != SrcPTy->getAddressSpace()))
+      return new AddrSpaceCastInst(Src, DestTy);
+
     // If we are casting a alloca to a pointer to a type of the same
     // size, rewrite the allocation instruction to allocate the "right" type.
     // There is no need to modify malloc calls because it is their bitcast that
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 742caf649007..62769f077b47 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -518,7 +518,7 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
 static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value *V) {
   assert((!SI.isAtomic() || isSupportedAtomicType(V->getType())) &&
          "can't fold an atomic store of requested type");
-  
+
   Value *Ptr = SI.getPointerOperand();
   unsigned AS = SI.getPointerAddressSpace();
   SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 4867808478a3..796b4021d273 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -54,6 +54,36 @@ static Value *createMinMax(InstCombiner::BuilderTy &Builder,
   return Builder.CreateSelect(Builder.CreateICmp(Pred, A, B), A, B);
 }
 
+/// Fold
+///   %A = icmp eq/ne i8 %x, 0
+///   %B = op i8 %x, %z
+///   %C = select i1 %A, i8 %B, i8 %y
+/// To
+///   %C = select i1 %A, i8 %z, i8 %y
+/// OP: binop with an identity constant
+/// TODO: support for non-commutative and FP opcodes
+static Instruction *foldSelectBinOpIdentity(SelectInst &Sel) {
+
+  Value *Cond = Sel.getCondition();
+  Value *X, *Z;
+  Constant *C;
+  CmpInst::Predicate Pred;
+  if (!match(Cond, m_ICmp(Pred, m_Value(X), m_Constant(C))) ||
+      !ICmpInst::isEquality(Pred))
+    return nullptr;
+
+  bool IsEq = Pred == ICmpInst::ICMP_EQ;
+  auto *BO =
+      dyn_cast<BinaryOperator>(IsEq ? Sel.getTrueValue() : Sel.getFalseValue());
+  // TODO: support for undefs
+  if (BO && match(BO, m_c_BinOp(m_Specific(X), m_Value(Z))) &&
+      ConstantExpr::getBinOpIdentity(BO->getOpcode(), X->getType()) == C) {
+    Sel.setOperand(IsEq ? 1 : 2, Z);
+    return &Sel;
+  }
+  return nullptr;
+}
+
 /// This folds:
 ///  select (icmp eq (and X, C1)), TC, FC
 ///    iff C1 is a power 2 and the difference between TC and FC is a power-of-2.
@@ -1961,5 +1991,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
   if (Instruction *Select = foldSelectCmpXchg(SI))
     return Select;
 
+  if (Instruction *Select = foldSelectBinOpIdentity(SI))
+    return Select;
+
   return nullptr;
 }
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 34f8037e519f..1ca75f3989d4 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -570,7 +570,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
                             m_OneUse(m_BinOp(FBO))))) {
       const APInt *C;
       if (!isa<Constant>(TrueVal) && FBO->getOperand(0) == TrueVal &&
-          match(FBO->getOperand(1), m_APInt(C)) && 
+          match(FBO->getOperand(1), m_APInt(C)) &&
           canShiftBinOpWithConstantRHS(I, FBO, *C)) {
         Constant *NewRHS = ConstantExpr::get(I.getOpcode(),
                                        cast<Constant>(FBO->getOperand(1)), Op1);
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 2560feb37d66..1c2de6352fa5 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -605,7 +605,7 @@ static Instruction *foldInsSequenceIntoBroadcast(InsertElementInst &InsElt) {
     return nullptr;
 
   Value *SplatVal = InsElt.getOperand(1);
-  InsertElementInst *CurrIE = &InsElt;  
+  InsertElementInst *CurrIE = &InsElt;
   SmallVector<bool, 16> ElementPresent(NumElements, false);
   InsertElementInst *FirstIE = nullptr;
 
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 12fcc8752ea9..cff0d5447290 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1424,7 +1424,7 @@ Instruction *InstCombiner::foldShuffledBinop(BinaryOperator &Inst) {
       bool ConstOp1 = isa<Constant>(Inst.getOperand(1));
       if (Inst.isIntDivRem() || (Inst.isShift() && ConstOp1))
         NewC = getSafeVectorConstantForBinop(Inst.getOpcode(), NewC, ConstOp1);
-      
+
       // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
       // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
       Value *NewLHS = isa<Constant>(LHS) ? NewC : V1;
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index b3f659194558..6af44354225c 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -2464,10 +2464,10 @@ bool AddressSanitizer::runOnFunction(Function &F) {
 
   // If needed, insert __asan_init before checking for SanitizeAddress attr.
   // This function needs to be called even if the function body is not
-  // instrumented.  
+  // instrumented.
   if (maybeInsertAsanInitAtFunctionEntry(F))
     FunctionModified = true;
-  
+
   // Leave if the function doesn't need instrumentation.
   if (!F.hasFnAttribute(Attribute::SanitizeAddress)) return FunctionModified;
 
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index acd27c2e226f..132e8089fe3b 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -148,7 +148,7 @@ public:
   }
   StringRef getPassName() const override { return "GCOV Profiler"; }
 
-  bool runOnModule(Module &M) override { 
+  bool runOnModule(Module &M) override {
     auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
     return Profiler.runOnModule(M, TLI);
   }
diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 22076f04d6ad..4d5dfb0aa66b 100644
--- a/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -898,7 +898,7 @@ void InstrProfiling::emitRegistration() {
 
   IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", RegisterF));
   for (Value *Data : UsedVars)
-    if (Data != NamesVar)
+    if (Data != NamesVar && !isa<Function>(Data))
       IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy));
 
   if (NamesVar) {
diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index fa7bcec677f7..0830ff5dd042 100644
--- a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -280,7 +280,7 @@ bool AlignmentFromAssumptionsPass::extractAlignmentInfo(CallInst *I,
     return false;
 
   // Sign extend the offset to 64 bits (so that it is like all of the other
-  // expressions). 
+  // expressions).
   unsigned OffSCEVBits = OffSCEV->getType()->getPrimitiveSizeInBits();
   if (OffSCEVBits < 64)
     OffSCEV = SE->getSignExtendExpr(OffSCEV, Int64Ty);
diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp
index 3a675b979017..55759e8b1661 100644
--- a/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -781,7 +781,7 @@ bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI,
   this->TTI = &TTI;
   this->DT = &DT;
   this->BFI = BFI;
-  this->Entry = &Entry;  
+  this->Entry = &Entry;
   // Collect all constant candidates.
   collectConstantCandidates(Fn);
 
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index ea148b728a10..2f2d7f620a29 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -473,7 +473,7 @@ static bool processCallSite(CallSite CS, LazyValueInfo *LVI) {
     // relatively expensive analysis for constants which are obviously either
     // null or non-null to start with.
     if (Type && !CS.paramHasAttr(ArgNo, Attribute::NonNull) &&
-        !isa<Constant>(V) && 
+        !isa<Constant>(V) &&
         LVI->getPredicateAt(ICmpInst::ICMP_EQ, V,
                             ConstantPointerNull::get(Type),
                             CS.getInstruction()) == LazyValueInfo::False)
@@ -670,12 +670,12 @@ static Constant *getConstantAt(Value *V, Instruction *At, LazyValueInfo *LVI) {
   Value *Op0 = C->getOperand(0);
   Constant *Op1 = dyn_cast<Constant>(C->getOperand(1));
   if (!Op1) return nullptr;
-  
+
   LazyValueInfo::Tristate Result =
     LVI->getPredicateAt(C->getPredicate(), Op0, Op1, At);
   if (Result == LazyValueInfo::Unknown)
     return nullptr;
-  
+
   return (Result == LazyValueInfo::True) ?
     ConstantInt::getTrue(C->getContext()) :
     ConstantInt::getFalse(C->getContext());
@@ -747,7 +747,7 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT,
       if (auto *C = getConstantAt(RetVal, RI, LVI)) {
         ++NumReturns;
         RI->replaceUsesOfWith(RetVal, C);
-        BBChanged = true;        
+        BBChanged = true;
       }
     }
     }
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index dd1a2a6adb82..9a7405e98e7d 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -188,7 +188,7 @@ static bool hasAnalyzableMemoryWrite(Instruction *I,
 /// returns true, this function and getLocForRead completely describe the memory
 /// operations for this instruction.
 static MemoryLocation getLocForWrite(Instruction *Inst) {
-  
+
   if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
     return MemoryLocation::get(SI);
 
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 565745d12e99..533d16e088c8 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -384,7 +384,7 @@ public:
                       LoadMapAllocator>;
 
   LoadHTType AvailableLoads;
-  
+
   // A scoped hash table mapping memory locations (represented as typed
   // addresses) to generation numbers at which that memory location became
   // (henceforth indefinitely) invariant.
@@ -844,7 +844,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
     // start a scope in the current generaton which is true for all future
     // generations.  Also, we dont need to consume the last store since the
     // semantics of invariant.start allow us to perform   DSE of the last
-    // store, if there was a store following invariant.start. Consider: 
+    // store, if there was a store following invariant.start. Consider:
     //
     // store 30, i8* p
     // invariant.start(p)
@@ -852,7 +852,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
     // We can DSE the store to 30, since the store 40 to invariant location p
     // causes undefined behaviour.
     if (match(Inst, m_Intrinsic<Intrinsic::invariant_start>())) {
-      // If there are any uses, the scope might end.  
+      // If there are any uses, the scope might end.
       if (!Inst->use_empty())
         continue;
       auto *CI = cast<CallInst>(Inst);
diff --git a/lib/Transforms/Scalar/GVNSink.cpp b/lib/Transforms/Scalar/GVNSink.cpp
index 28c5940db1e0..8959038de596 100644
--- a/lib/Transforms/Scalar/GVNSink.cpp
+++ b/lib/Transforms/Scalar/GVNSink.cpp
@@ -568,7 +568,7 @@ public:
     ReversePostOrderTraversal<Function*> RPOT(&F);
     for (auto *N : RPOT)
       NumSunk += sinkBB(N);
-    
+
     return NumSunk > 0;
   }
 
diff --git a/lib/Transforms/Scalar/GuardWidening.cpp b/lib/Transforms/Scalar/GuardWidening.cpp
index ad1598d7b8bf..055fcbc8436f 100644
--- a/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/lib/Transforms/Scalar/GuardWidening.cpp
@@ -43,6 +43,7 @@
 #include <functional>
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/PostDominators.h"
@@ -61,6 +62,8 @@ using namespace llvm;
 
 #define DEBUG_TYPE "guard-widening"
 
+STATISTIC(GuardsEliminated, "Number of eliminated guards");
+
 namespace {
 
 class GuardWideningImpl {
@@ -75,21 +78,33 @@ class GuardWideningImpl {
 
   /// The set of guards whose conditions have been widened into dominating
   /// guards.
-  SmallVector<IntrinsicInst *, 16> EliminatedGuards;
+  SmallVector<Instruction *, 16> EliminatedGuards;
 
   /// The set of guards which have been widened to include conditions to other
   /// guards.
-  DenseSet<IntrinsicInst *> WidenedGuards;
+  DenseSet<Instruction *> WidenedGuards;
 
   /// Try to eliminate guard \p Guard by widening it into an earlier dominating
   /// guard.  \p DFSI is the DFS iterator on the dominator tree that is
   /// currently visiting the block containing \p Guard, and \p GuardsPerBlock
   /// maps BasicBlocks to the set of guards seen in that block.
   bool eliminateGuardViaWidening(
-      IntrinsicInst *Guard, const df_iterator<DomTreeNode *> &DFSI,
-      const DenseMap<BasicBlock *, SmallVector<IntrinsicInst *, 8>> &
+      Instruction *Guard, const df_iterator<DomTreeNode *> &DFSI,
+      const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> &
           GuardsPerBlock);
 
+  // Get the condition from \p GuardInst.
+  Value *getGuardCondition(Instruction *GuardInst);
+
+  // Set the condition for \p GuardInst.
+  void setGuardCondition(Instruction *GuardInst, Value *NewCond);
+
+  // Whether or not the particular instruction is a guard.
+  bool isGuard(const Instruction *I);
+
+  // Eliminates the guard instruction properly.
+  void eliminateGuard(Instruction *GuardInst);
+
   /// Used to keep track of which widening potential is more effective.
   enum WideningScore {
     /// Don't widen.
@@ -113,9 +128,9 @@ class GuardWideningImpl {
   /// Compute the score for widening the condition in \p DominatedGuard
   /// (contained in \p DominatedGuardLoop) into \p DominatingGuard (contained in
   /// \p DominatingGuardLoop).
-  WideningScore computeWideningScore(IntrinsicInst *DominatedGuard,
+  WideningScore computeWideningScore(Instruction *DominatedGuard,
                                      Loop *DominatedGuardLoop,
-                                     IntrinsicInst *DominatingGuard,
+                                     Instruction *DominatingGuard,
                                      Loop *DominatingGuardLoop);
 
   /// Helper to check if \p V can be hoisted to \p InsertPos.
@@ -206,10 +221,10 @@ class GuardWideningImpl {
 
   /// Widen \p ToWiden to fail if \p NewCondition is false (in addition to
   /// whatever it is already checking).
-  void widenGuard(IntrinsicInst *ToWiden, Value *NewCondition) {
+  void widenGuard(Instruction *ToWiden, Value *NewCondition) {
     Value *Result;
-    widenCondCommon(ToWiden->getArgOperand(0), NewCondition, ToWiden, Result);
-    ToWiden->setArgOperand(0, Result);
+    widenCondCommon(ToWiden->getOperand(0), NewCondition, ToWiden, Result);
+    setGuardCondition(ToWiden, Result);
   }
 
 public:
@@ -225,9 +240,7 @@ public:
 }
 
 bool GuardWideningImpl::run() {
-  using namespace llvm::PatternMatch;
-
-  DenseMap<BasicBlock *, SmallVector<IntrinsicInst *, 8>> GuardsInBlock;
+  DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> GuardsInBlock;
   bool Changed = false;
 
   for (auto DFI = df_begin(Root), DFE = df_end(Root);
@@ -239,8 +252,8 @@ bool GuardWideningImpl::run() {
     auto &CurrentList = GuardsInBlock[BB];
 
     for (auto &I : *BB)
-      if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>()))
-        CurrentList.push_back(cast<IntrinsicInst>(&I));
+      if (isGuard(&I))
+        CurrentList.push_back(cast<Instruction>(&I));
 
     for (auto *II : CurrentList)
       Changed |= eliminateGuardViaWidening(II, DFI, GuardsInBlock);
@@ -249,16 +262,16 @@ bool GuardWideningImpl::run() {
   assert(EliminatedGuards.empty() || Changed);
   for (auto *II : EliminatedGuards)
     if (!WidenedGuards.count(II))
-      II->eraseFromParent();
+      eliminateGuard(II);
 
   return Changed;
 }
 
 bool GuardWideningImpl::eliminateGuardViaWidening(
-    IntrinsicInst *GuardInst, const df_iterator<DomTreeNode *> &DFSI,
-    const DenseMap<BasicBlock *, SmallVector<IntrinsicInst *, 8>> &
+    Instruction *GuardInst, const df_iterator<DomTreeNode *> &DFSI,
+    const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> &
         GuardsInBlock) {
-  IntrinsicInst *BestSoFar = nullptr;
+  Instruction *BestSoFar = nullptr;
   auto BestScoreSoFar = WS_IllegalOrNegative;
   auto *GuardInstLoop = LI.getLoopFor(GuardInst->getParent());
 
@@ -302,8 +315,8 @@ bool GuardWideningImpl::eliminateGuardViaWidening(
     for (auto *Candidate : make_range(I, E)) {
       auto Score =
           computeWideningScore(GuardInst, GuardInstLoop, Candidate, CurLoop);
-      LLVM_DEBUG(dbgs() << "Score between " << *GuardInst->getArgOperand(0)
-                        << " and " << *Candidate->getArgOperand(0) << " is "
+      LLVM_DEBUG(dbgs() << "Score between " << *getGuardCondition(GuardInst)
+                        << " and " << *getGuardCondition(Candidate) << " is "
                         << scoreTypeToString(Score) << "\n");
       if (Score > BestScoreSoFar) {
         BestScoreSoFar = Score;
@@ -323,16 +336,41 @@ bool GuardWideningImpl::eliminateGuardViaWidening(
   LLVM_DEBUG(dbgs() << "Widening " << *GuardInst << " into " << *BestSoFar
                     << " with score " << scoreTypeToString(BestScoreSoFar)
                     << "\n");
-  widenGuard(BestSoFar, GuardInst->getArgOperand(0));
-  GuardInst->setArgOperand(0, ConstantInt::getTrue(GuardInst->getContext()));
+  widenGuard(BestSoFar, getGuardCondition(GuardInst));
+  setGuardCondition(GuardInst, ConstantInt::getTrue(GuardInst->getContext()));
   EliminatedGuards.push_back(GuardInst);
   WidenedGuards.insert(BestSoFar);
   return true;
 }
 
+Value *GuardWideningImpl::getGuardCondition(Instruction *GuardInst) {
+  IntrinsicInst *GI = cast<IntrinsicInst>(GuardInst);
+  assert(GI->getIntrinsicID() == Intrinsic::experimental_guard &&
+         "Bad guard intrinsic?");
+  return GI->getArgOperand(0);
+}
+
+void GuardWideningImpl::setGuardCondition(Instruction *GuardInst,
+                                          Value *NewCond) {
+  IntrinsicInst *GI = cast<IntrinsicInst>(GuardInst);
+  assert(GI->getIntrinsicID() == Intrinsic::experimental_guard &&
+         "Bad guard intrinsic?");
+  GI->setArgOperand(0, NewCond);
+}
+
+bool GuardWideningImpl::isGuard(const Instruction* I) {
+  using namespace llvm::PatternMatch;
+  return match(I, m_Intrinsic<Intrinsic::experimental_guard>());
+}
+
+void GuardWideningImpl::eliminateGuard(Instruction *GuardInst) {
+  GuardInst->eraseFromParent();
+  ++GuardsEliminated;
+}
+
 GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
-    IntrinsicInst *DominatedGuard, Loop *DominatedGuardLoop,
-    IntrinsicInst *DominatingGuard, Loop *DominatingGuardLoop) {
+    Instruction *DominatedGuard, Loop *DominatedGuardLoop,
+    Instruction *DominatingGuard, Loop *DominatingGuardLoop) {
   bool HoistingOutOfLoop = false;
 
   if (DominatingGuardLoop != DominatedGuardLoop) {
@@ -345,7 +383,7 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
     HoistingOutOfLoop = true;
   }
 
-  if (!isAvailableAt(DominatedGuard->getArgOperand(0), DominatingGuard))
+  if (!isAvailableAt(getGuardCondition(DominatedGuard), DominatingGuard))
     return WS_IllegalOrNegative;
 
   // If the guard was conditional executed, it may never be reached
@@ -355,9 +393,9 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
   // case.  At the moment, we really only consider the second in our heuristic
   // here.  TODO: evaluate cost model for spurious deopt
   // NOTE: As written, this also lets us hoist right over another guard which
-  // is essentially just another spelling for control flow.  
-  if (isWideningCondProfitable(DominatedGuard->getArgOperand(0),
-                               DominatingGuard->getArgOperand(0)))
+  // is essentially just another spelling for control flow.
+  if (isWideningCondProfitable(getGuardCondition(DominatedGuard),
+                               getGuardCondition(DominatingGuard)))
     return HoistingOutOfLoop ? WS_VeryPositive : WS_Positive;
 
   if (HoistingOutOfLoop)
@@ -369,7 +407,7 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
   auto MaybeHoistingOutOfIf = [&]() {
     auto *DominatingBlock = DominatingGuard->getParent();
     auto *DominatedBlock = DominatedGuard->getParent();
-    
+
     // Same Block?
     if (DominatedBlock == DominatingBlock)
       return false;
diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index e2f29705f2dd..c5ed6d5c1b87 100644
--- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -735,7 +735,7 @@ static bool isSafeDecreasingBound(const SCEV *Start,
 
   assert(LatchBrExitIdx == 0 &&
          "LatchBrExitIdx should be either 0 or 1");
-            
+
   const SCEV *StepPlusOne = SE.getAddExpr(Step, SE.getOne(Step->getType()));
   unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth();
   APInt Min = IsSigned ? APInt::getSignedMinValue(BitWidth) :
@@ -786,7 +786,7 @@ static bool isSafeIncreasingBound(const SCEV *Start,
   const SCEV *StepMinusOne =
     SE.getMinusSCEV(Step, SE.getOne(Step->getType()));
   unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth();
-  APInt Max = IsSigned ? APInt::getSignedMaxValue(BitWidth) : 
+  APInt Max = IsSigned ? APInt::getSignedMaxValue(BitWidth) :
     APInt::getMaxValue(BitWidth);
   const SCEV *Limit = SE.getMinusSCEV(SE.getConstant(Max), StepMinusOne);
 
@@ -798,7 +798,7 @@ static bool isSafeIncreasingBound(const SCEV *Start,
 static bool CannotBeMinInLoop(const SCEV *BoundSCEV, Loop *L,
                               ScalarEvolution &SE, bool Signed) {
   unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth();
-  APInt Min = Signed ? APInt::getSignedMinValue(BitWidth) : 
+  APInt Min = Signed ? APInt::getSignedMinValue(BitWidth) :
     APInt::getMinValue(BitWidth);
   auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
   return SE.isAvailableAtLoopEntry(BoundSCEV, L) &&
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index ff66632f0391..c4ea43a43249 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -455,7 +455,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
 
     // Keep track of whether the prefix of instructions visited so far are such
     // that the next instruction visited is guaranteed to execute if the loop
-    // is entered.  
+    // is entered.
     bool IsMustExecute = CurLoop->getHeader() == BB;
 
     for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
@@ -1186,9 +1186,9 @@ bool isKnownNonEscaping(Value *Object, const TargetLibraryInfo *TLI) {
   if (isa<AllocaInst>(Object))
     // Since the alloca goes out of scope, we know the caller can't retain a
     // reference to it and be well defined.  Thus, we don't need to check for
-    // capture. 
+    // capture.
     return true;
-  
+
   // For all other objects we need to know that the caller can't possibly
   // have gotten a reference to the object.  There are two components of
   // that:
@@ -1282,7 +1282,7 @@ bool llvm::promoteLoopAccessesToScalars(
     // That said, we can't actually make the unwind edge explicit. Therefore,
     // we have to prove that the store is dead along the unwind edge.  We do
     // this by proving that the caller can't have a reference to the object
-    // after return and thus can't possibly load from the object.  
+    // after return and thus can't possibly load from the object.
     Value *Object = GetUnderlyingObject(SomePtr, MDL);
     if (!isKnownNonEscaping(Object, TLI))
       return false;
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index d8692198f7a3..653948717fb9 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1573,7 +1573,7 @@ void LoopIdiomRecognize::transformLoopToCountable(
       InitXNext =
           Builder.CreateLShr(InitX, ConstantInt::get(InitX->getType(), 1));
     else
-      llvm_unreachable("Unexpected opcode!");      
+      llvm_unreachable("Unexpected opcode!");
   } else
     InitXNext = InitX;
   CTLZ = createCTLZIntrinsic(Builder, InitXNext, DL, ZeroCheck);
diff --git a/lib/Transforms/Scalar/LoopPredication.cpp b/lib/Transforms/Scalar/LoopPredication.cpp
index 561ceea1d880..cbb6594cf8f4 100644
--- a/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/lib/Transforms/Scalar/LoopPredication.cpp
@@ -74,7 +74,7 @@
 //   }
 //
 // One solution for M is M = forall X . (G(X) && B(X)) => G(X + Step)
-// 
+//
 // Informal proof that the transformation above is correct:
 //
 //   By the definition of guards we can rewrite the guard condition to:
@@ -83,7 +83,7 @@
 //   Let's prove that for each iteration of the loop:
 //     G(0) && M => G(I)
 //   And the condition above can be simplified to G(Start) && M.
-// 
+//
 //   Induction base.
 //     G(0) && M => G(0)
 //
@@ -379,7 +379,7 @@ Value *LoopPredication::expandCheck(SCEVExpander &Expander,
                                     ICmpInst::Predicate Pred, const SCEV *LHS,
                                     const SCEV *RHS, Instruction *InsertAt) {
   // TODO: we can check isLoopEntryGuardedByCond before emitting the check
- 
+
   Type *Ty = LHS->getType();
   assert(Ty == RHS->getType() && "expandCheck operands have different types?");
 
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 634215c9770f..e955821effa0 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -888,7 +888,7 @@ bool llvm::computeUnrollCount(
     UP.Count = 0;
     return false;
   }
-  
+
   // Check if the runtime trip count is too small when profile is available.
   if (L->getHeader()->getParent()->hasProfileData()) {
     if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
@@ -897,7 +897,7 @@ bool llvm::computeUnrollCount(
       else
         UP.AllowExpensiveTripCount = true;
     }
-  }  
+  }
 
   // Reduce count based on the type of unrolling and the threshold values.
   UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index b12586758925..6aad077ff19e 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -708,7 +708,7 @@ bool LoopUnswitch::processCurrentLoop() {
       // Unswitch only those branches that are reachable.
       if (isUnreachableDueToPreviousUnswitching(*I))
         continue;
- 
+
       // If this isn't branching on an invariant condition, we can't unswitch
       // it.
       if (BI->isConditional()) {
@@ -754,7 +754,7 @@ bool LoopUnswitch::processCurrentLoop() {
           // We are unswitching ~0 out.
           UnswitchVal = AllOne;
         } else {
-          assert(OpChain == OC_OpChainNone && 
+          assert(OpChain == OC_OpChainNone &&
                  "Expect to unswitch on trivial chain");
           // Do not process same value again and again.
           // At this point we have some cases already unswitched and
@@ -1440,11 +1440,11 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
         // This in-loop instruction has been simplified w.r.t. its context,
         // i.e. LIC != Val, make sure we propagate its replacement value to
         // all its users.
-        //  
+        //
         // We can not yet delete UI, the LIC user, yet, because that would invalidate
         // the LIC->users() iterator !. However, we can make this instruction
         // dead by replacing all its users and push it onto the worklist so that
-        // it can be properly deleted and its operands simplified. 
+        // it can be properly deleted and its operands simplified.
         UI->replaceAllUsesWith(Replacement);
       }
     }
@@ -1609,7 +1609,7 @@ Value *LoopUnswitch::SimplifyInstructionWithNotEqual(Instruction *Inst,
       LLVMContext &Ctx = Inst->getContext();
       if (CI->getPredicate() == CmpInst::ICMP_EQ)
         return ConstantInt::getFalse(Ctx);
-      else 
+      else
         return ConstantInt::getTrue(Ctx);
      }
   }
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index 2eb887c986be..3e47e9441d15 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -2007,7 +2007,7 @@ NewGVN::performSymbolicEvaluation(Value *V,
     case Instruction::Load:
       E = performSymbolicLoadEvaluation(I);
       break;
-    case Instruction::BitCast: 
+    case Instruction::BitCast:
       E = createExpression(I);
       break;
     case Instruction::ICmp:
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index c81ac70d99e6..1df0a9c49fb1 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -1179,7 +1179,7 @@ static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd,
 // and both "Res" and "ConstOpnd" remain unchanged.
 bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
                                      APInt &ConstOpnd, Value *&Res) {
-  // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2 
+  // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2
   //                       = ((x | c1) ^ c1) ^ (c1 ^ c2)
   //                       = (x & ~c1) ^ (c1 ^ c2)
   // It is useful only when c1 == c2.
@@ -1202,12 +1202,12 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
     RedoInsts.insert(T);
   return true;
 }
-                           
+
 // Helper function of OptimizeXor(). It tries to simplify
 // "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a
-// symbolic value. 
-// 
-// If it was successful, true is returned, and the "R" and "C" is returned 
+// symbolic value.
+//
+// If it was successful, true is returned, and the "R" and "C" is returned
 // via "Res" and "ConstOpnd", respectively (If the entire expression is
 // evaluated to a constant, the Res is set to NULL); otherwise, false is
 // returned, and both "Res" and "ConstOpnd" remain unchanged.
@@ -1254,7 +1254,7 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
     const APInt &C1 = Opnd1->getConstPart();
     const APInt &C2 = Opnd2->getConstPart();
     APInt C3 = C1 ^ C2;
-    
+
     // Do not increase code size
     if (!C3.isNullValue() && !C3.isAllOnesValue()) {
       int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
@@ -1290,7 +1290,7 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
                                     SmallVectorImpl<ValueEntry> &Ops) {
   if (Value *V = OptimizeAndOrXor(Instruction::Xor, Ops))
     return V;
-      
+
   if (Ops.size() == 1)
     return nullptr;
 
@@ -1365,7 +1365,7 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
     }
 
     // step 3.2: When previous and current operands share the same symbolic
-    //  value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd" 
+    //  value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd"
     if (CombineXorOpnd(I, CurrOpnd, PrevOpnd, ConstOpnd, CV)) {
       // Remove previous operand
       PrevOpnd->Invalidate();
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 391e43f79121..0de2bc72b522 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -401,7 +401,7 @@ namespace {
 /// defining value.  The 'base defining value' for 'Def' is the transitive
 /// closure of this relation stopping at the first instruction which has no
 /// immediate base defining value.  The b.d.v. might itself be a base pointer,
-/// but it can also be an arbitrary derived pointer. 
+/// but it can also be an arbitrary derived pointer.
 struct BaseDefiningValueResult {
   /// Contains the value which is the base defining value.
   Value * const BDV;
@@ -427,13 +427,13 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I);
 
 /// Return a base defining value for the 'Index' element of the given vector
 /// instruction 'I'.  If Index is null, returns a BDV for the entire vector
-/// 'I'.  As an optimization, this method will try to determine when the 
+/// 'I'.  As an optimization, this method will try to determine when the
 /// element is known to already be a base pointer.  If this can be established,
 /// the second value in the returned pair will be true.  Note that either a
 /// vector or a pointer typed value can be returned.  For the former, the
 /// vector returned is a BDV (and possibly a base) of the entire vector 'I'.
 /// If the later, the return pointer is a BDV (or possibly a base) for the
-/// particular element in 'I'.  
+/// particular element in 'I'.
 static BaseDefiningValueResult
 findBaseDefiningValueOfVector(Value *I) {
   // Each case parallels findBaseDefiningValue below, see that code for
@@ -444,7 +444,7 @@ findBaseDefiningValueOfVector(Value *I) {
     return BaseDefiningValueResult(I, true);
 
   if (isa<Constant>(I))
-    // Base of constant vector consists only of constant null pointers. 
+    // Base of constant vector consists only of constant null pointers.
     // For reasoning see similar case inside 'findBaseDefiningValue' function.
     return BaseDefiningValueResult(ConstantAggregateZero::get(I->getType()),
                                    true);
@@ -508,11 +508,11 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
   if (isa<Constant>(I)) {
     // We assume that objects with a constant base (e.g. a global) can't move
     // and don't need to be reported to the collector because they are always
-    // live. Besides global references, all kinds of constants (e.g. undef, 
+    // live. Besides global references, all kinds of constants (e.g. undef,
     // constant expressions, null pointers) can be introduced by the inliner or
     // the optimizer, especially on dynamically dead paths.
     // Here we treat all of them as having single null base. By doing this we
-    // trying to avoid problems reporting various conflicts in a form of 
+    // trying to avoid problems reporting various conflicts in a form of
     // "phi (const1, const2)" or "phi (const, regular gc ptr)".
     // See constant.ll file for relevant test cases.
 
@@ -1285,14 +1285,14 @@ static void CreateGCRelocates(ArrayRef<Value *> LiveVariables,
     return Index;
   };
   Module *M = StatepointToken->getModule();
-  
+
   // All gc_relocate are generated as i8 addrspace(1)* (or a vector type whose
   // element type is i8 addrspace(1)*). We originally generated unique
   // declarations for each pointer type, but this proved problematic because
   // the intrinsic mangling code is incomplete and fragile.  Since we're moving
   // towards a single unified pointer type anyways, we can just cast everything
   // to an i8* of the right address space.  A bitcast is added later to convert
-  // gc_relocate to the actual value's type.  
+  // gc_relocate to the actual value's type.
   auto getGCRelocateDecl = [&] (Type *Ty) {
     assert(isHandledGCPointerType(Ty));
     auto AS = Ty->getScalarType()->getPointerAddressSpace();
@@ -1413,7 +1413,7 @@ static StringRef getDeoptLowering(CallSite CS) {
   }
   return "live-through";
 }
-    
+
 static void
 makeStatepointExplicitImpl(const CallSite CS, /* to replace */
                            const SmallVectorImpl<Value *> &BasePtrs,
@@ -2570,7 +2570,7 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
     }
 
   // Before we start introducing relocations, we want to tweak the IR a bit to
-  // avoid unfortunate code generation effects.  The main example is that we 
+  // avoid unfortunate code generation effects.  The main example is that we
   // want to try to make sure the comparison feeding a branch is after any
   // safepoints.  Otherwise, we end up with a comparison of pre-relocation
   // values feeding a branch after relocation.  This is semantically correct,
@@ -2593,7 +2593,7 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
     TerminatorInst *TI = BB.getTerminator();
     if (auto *Cond = getConditionInst(TI))
       // TODO: Handle more than just ICmps here.  We should be able to move
-      // most instructions without side effects or memory access.  
+      // most instructions without side effects or memory access.
       if (isa<ICmpInst>(Cond) && Cond->hasOneUse()) {
         MadeChange = true;
         Cond->moveBefore(TI);
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 6c3f012c6280..de16b608f752 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -3730,7 +3730,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
                          PartPtrTy, BasePtr->getName() + "."),
           getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
           LI->getName());
-      PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access); 
+      PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access);
 
       // Append this load onto the list of split loads so we can find it later
       // to rewrite the stores.
diff --git a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 34510cb40732..5834b619046b 100644
--- a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -459,9 +459,11 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
                                               *ParentBB, *OldPH, FullUnswitch);
 
   // Now we need to update the dominator tree.
-  DT.insertEdge(OldPH, UnswitchedBB);
+  SmallVector<DominatorTree::UpdateType, 2> DTUpdates;
+  DTUpdates.push_back({DT.Insert, OldPH, UnswitchedBB});
   if (FullUnswitch)
-    DT.deleteEdge(ParentBB, UnswitchedBB);
+    DTUpdates.push_back({DT.Delete, ParentBB, LoopExitBB});
+  DT.applyUpdates(DTUpdates);
 
   // The constant we can replace all of our invariants with inside the loop
   // body. If any of the invariants have a value other than this the loop won't
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 5f5c4150d3bb..d0396e6ce47d 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -911,7 +911,7 @@ static void appendTypeSuffix(Value *Op, StringRef &Name,
       NameBuffer += 'l';
 
     Name = NameBuffer;
-  }  
+  }
 }
 
 Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
diff --git a/lib/Transforms/Utils/CallPromotionUtils.cpp b/lib/Transforms/Utils/CallPromotionUtils.cpp
index 4d9c22e57a68..6d18d0614611 100644
--- a/lib/Transforms/Utils/CallPromotionUtils.cpp
+++ b/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -392,7 +392,7 @@ Instruction *llvm::promoteCall(CallSite CS, Function *Callee,
   auto CalleeType = Callee->getFunctionType();
   auto CalleeParamNum = CalleeType->getNumParams();
   for (unsigned ArgNo = 0; ArgNo < CalleeParamNum; ++ArgNo) {
-    auto *Arg = CS.getArgument(ArgNo); 
+    auto *Arg = CS.getArgument(ArgNo);
     Type *FormalTy = CalleeType->getParamType(ArgNo);
     Type *ActualTy = Arg->getType();
     if (FormalTy != ActualTy) {
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 61448e9acb57..807360340055 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -290,7 +290,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
 
   // Have we already cloned this block?
   if (BBEntry) return;
-  
+
   // Nope, clone it now.
   BasicBlock *NewBB;
   BBEntry = NewBB = BasicBlock::Create(BB->getContext());
@@ -363,7 +363,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
         hasDynamicAllocas = true;
     }
   }
-  
+
   // Finally, clone over the terminator.
   const TerminatorInst *OldTI = BB->getTerminator();
   bool TerminatorDone = false;
@@ -400,7 +400,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
       TerminatorDone = true;
     }
   }
-  
+
   if (!TerminatorDone) {
     Instruction *NewInst = OldTI->clone();
     if (OldTI->hasName())
@@ -418,11 +418,11 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
     for (const BasicBlock *Succ : TI->successors())
       ToClone.push_back(Succ);
   }
-  
+
   if (CodeInfo) {
     CodeInfo->ContainsCalls          |= hasCalls;
     CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
-    CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && 
+    CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas &&
       BB != &BB->getParent()->front();
   }
 }
@@ -468,7 +468,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
     CloneWorklist.pop_back();
     PFC.CloneBlock(BB, BB->begin(), CloneWorklist);
   }
-  
+
   // Loop over all of the basic blocks in the old function.  If the block was
   // reachable, we have cloned it and the old block is now in the value map:
   // insert it into the new function in the right order.  If not, ignore it.
@@ -500,7 +500,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
                      ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
                      TypeMapper, Materializer);
   }
-  
+
   // Defer PHI resolution until rest of function is resolved, PHI resolution
   // requires the CFG to be up-to-date.
   for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) {
@@ -519,7 +519,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
         Value *V = VMap.lookup(PN->getIncomingBlock(pred));
         if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) {
           Value *InVal = MapValue(PN->getIncomingValue(pred),
-                                  VMap, 
+                                  VMap,
                         ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
           assert(InVal && "Unknown input value?");
           PN->setIncomingValue(pred, InVal);
@@ -529,9 +529,9 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
           --pred;  // Revisit the next entry.
           --e;
         }
-      } 
+      }
     }
-    
+
     // The loop above has removed PHI entries for those blocks that are dead
     // and has updated others.  However, if a block is live (i.e. copied over)
     // but its terminator has been changed to not go to this block, then our
@@ -546,11 +546,11 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
       for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB);
            PI != E; ++PI)
         --PredCount[*PI];
-      
+
       // Figure out how many entries to remove from each PHI.
       for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
         ++PredCount[PN->getIncomingBlock(i)];
-      
+
       // At this point, the excess predecessor entries are positive in the
       // map.  Loop over all of the PHIs and remove excess predecessor
       // entries.
@@ -563,7 +563,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
         }
       }
     }
-    
+
     // If the loops above have made these phi nodes have 0 or 1 operand,
     // replace them with undef or the input value.  We must do this for
     // correctness, because 0-operand phis are not valid.
@@ -655,7 +655,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
 
     BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
     if (!BI || BI->isConditional()) { ++I; continue; }
-    
+
     BasicBlock *Dest = BI->getSuccessor(0);
     if (!Dest->getSinglePredecessor()) {
       ++I; continue;
@@ -668,16 +668,16 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
     // We know all single-entry PHI nodes in the inlined function have been
     // removed, so we just need to splice the blocks.
     BI->eraseFromParent();
-    
+
     // Make all PHI nodes that referred to Dest now refer to I as their source.
     Dest->replaceAllUsesWith(&*I);
 
     // Move all the instructions in the succ to the pred.
     I->getInstList().splice(I->end(), Dest->getInstList());
-    
+
     // Remove the dest block.
     Dest->eraseFromParent();
-    
+
     // Do not increment I, iteratively merge all things this block branches to.
   }
 
@@ -703,7 +703,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
                                      ValueToValueMapTy &VMap,
                                      bool ModuleLevelChanges,
                                      SmallVectorImpl<ReturnInst*> &Returns,
-                                     const char *NameSuffix, 
+                                     const char *NameSuffix,
                                      ClonedCodeInfo *CodeInfo,
                                      Instruction *TheCall) {
   CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap,
@@ -730,7 +730,7 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
                                    const Twine &NameSuffix, LoopInfo *LI,
                                    DominatorTree *DT,
                                    SmallVectorImpl<BasicBlock *> &Blocks) {
-  assert(OrigLoop->getSubLoops().empty() && 
+  assert(OrigLoop->getSubLoops().empty() &&
          "Loop to be cloned cannot have inner loop");
   Function *F = OrigLoop->getHeader()->getParent();
   Loop *ParentLoop = OrigLoop->getParentLoop();
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index 35c7511a24b9..c7d68bab8170 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -61,7 +61,7 @@ std::unique_ptr<Module> llvm::CloneModule(
   //
   for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
        I != E; ++I) {
-    GlobalVariable *GV = new GlobalVariable(*New, 
+    GlobalVariable *GV = new GlobalVariable(*New,
                                             I->getValueType(),
                                             I->isConstant(), I->getLinkage(),
                                             (Constant*) nullptr, I->getName(),
@@ -110,7 +110,7 @@ std::unique_ptr<Module> llvm::CloneModule(
     GA->copyAttributesFrom(&*I);
     VMap[&*I] = GA;
   }
-  
+
   // Now that all of the things that global variable initializer can refer to
   // have been created, loop through and copy the global variable referrers
   // over...  We also set the attributes on the global now.
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index f31dab9f96af..cb349e34606c 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -1020,7 +1020,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
     } else {
       // Otherwise we must have code extracted an unwind or something, just
       // return whatever we want.
-      ReturnInst::Create(Context, 
+      ReturnInst::Create(Context,
                          Constant::getNullValue(OldFnRetTy), TheSwitch);
     }
 
@@ -1158,13 +1158,13 @@ Function *CodeExtractor::extractCodeRegion() {
   splitReturnBlocks();
 
   // This takes place of the original loop
-  BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), 
+  BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(),
                                                 "codeRepl", oldFunction,
                                                 header);
 
   // The new function needs a root node because other nodes can branch to the
   // head of the region, but the entry node of a function cannot have preds.
-  BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(), 
+  BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(),
                                                "newFuncRoot");
   auto *BranchI = BranchInst::Create(header);
   // If the original function has debug info, we have to add a debug location
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 0315aac1cf84..ddc6e07e2f59 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -1199,7 +1199,7 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
     // Only copy the edge if the call was inlined!
     if (VMI == VMap.end() || VMI->second == nullptr)
       continue;
-    
+
     // If the call was inlined, but then constant folded, there is no edge to
     // add.  Check for this case.
     Instruction *NewCall = dyn_cast<Instruction>(VMI->second);
@@ -1211,7 +1211,7 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
     CallSite CS = CallSite(NewCall);
     if (CS && CS.getCalledFunction() && CS.getCalledFunction()->isIntrinsic())
       continue;
-    
+
     // Remember that this call site got inlined for the client of
     // InlineFunction.
     IFI.InlinedCalls.push_back(NewCall);
@@ -1231,7 +1231,7 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
 
     CallerNode->addCalledFunction(CallSite(NewCall), I->second);
   }
-  
+
   // Update the call graph by deleting the edge from Callee to Caller.  We must
   // do this after the loop above in case Caller and Callee are the same.
   CallerNode->removeCallEdgeFor(CS);
@@ -1380,7 +1380,7 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
 
       if (CalleeHasDebugInfo)
         continue;
-      
+
       // If the inlined instruction has no line number, make it look as if it
       // originates from the call location. This is important for
       // ((__always_inline__, __nodebug__)) functions which must use caller
@@ -1777,7 +1777,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
          E = FirstNewBlock->end(); I != E; ) {
       AllocaInst *AI = dyn_cast<AllocaInst>(I++);
       if (!AI) continue;
-      
+
       // If the alloca is now dead, remove it.  This often occurs due to code
       // specialization.
       if (AI->use_empty()) {
@@ -1787,10 +1787,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
 
       if (!allocaWouldBeStaticInEntry(AI))
         continue;
-      
+
       // Keep track of the static allocas that we inline into the caller.
       IFI.StaticAllocas.push_back(AI);
-      
+
       // Scan for the block of allocas that we can move over, and move them
       // all at once.
       while (isa<AllocaInst>(I) &&
diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp
index 3fbb3487884b..4a359b99bebd 100644
--- a/lib/Transforms/Utils/IntegerDivision.cpp
+++ b/lib/Transforms/Utils/IntegerDivision.cpp
@@ -476,10 +476,10 @@ bool llvm::expandDivision(BinaryOperator *Div) {
   return true;
 }
 
-/// Generate code to compute the remainder of two integers of bitwidth up to 
+/// Generate code to compute the remainder of two integers of bitwidth up to
 /// 32 bits. Uses the above routines and extends the inputs/truncates the
 /// outputs to operate in 32 bits; that is, these routines are good for targets
-/// that have no or very little suppport for smaller than 32 bit integer 
+/// that have no or very little suppport for smaller than 32 bit integer
 /// arithmetic.
 ///
 /// Replace Rem with emulation code.
@@ -527,7 +527,7 @@ bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) {
   return expandRemainder(cast<BinaryOperator>(ExtRem));
 }
 
-/// Generate code to compute the remainder of two integers of bitwidth up to 
+/// Generate code to compute the remainder of two integers of bitwidth up to
 /// 64 bits. Uses the above routines and extends the inputs/truncates the
 /// outputs to operate in 64 bits.
 ///
@@ -613,7 +613,7 @@ bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) {
   } else {
     ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int32Ty);
     ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int32Ty);
-    ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor);  
+    ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor);
   }
   Trunc = Builder.CreateTrunc(ExtDiv, DivTy);
 
@@ -662,7 +662,7 @@ bool llvm::expandDivisionUpTo64Bits(BinaryOperator *Div) {
   } else {
     ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int64Ty);
     ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int64Ty);
-    ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor);  
+    ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor);
   }
   Trunc = Builder.CreateTrunc(ExtDiv, DivTy);
 
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index 956d0387c7a8..a1f8e7484bcf 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -10,7 +10,7 @@
 // This pass transforms loops by placing phi nodes at the end of the loops for
 // all values that are live across the loop boundary.  For example, it turns
 // the left into the right code:
-// 
+//
 // for (...)                for (...)
 //   if (c)                   if (c)
 //     X1 = ...                 X1 = ...
@@ -21,8 +21,8 @@
 //                          ... = X4 + 4
 //
 // This is still valid LLVM; the extra phi nodes are purely redundant, and will
-// be trivially eliminated by InstCombine.  The major benefit of this 
-// transformation is that it makes many other loop optimizations, such as 
+// be trivially eliminated by InstCombine.  The major benefit of this
+// transformation is that it makes many other loop optimizations, such as
 // LoopUnswitching, simpler.
 //
 //===----------------------------------------------------------------------===//
@@ -144,7 +144,8 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
 
       PHINode *PN = PHINode::Create(I->getType(), PredCache.size(ExitBB),
                                     I->getName() + ".lcssa", &ExitBB->front());
-
+      // Get the debug location from the original instruction.
+      PN->setDebugLoc(I->getDebugLoc());
       // Add inputs from inside the loop for this PHI.
       for (BasicBlock *Pred : PredCache.get(ExitBB)) {
         PN->addIncoming(I, Pred);
diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp
index 13794c53f24b..78afe748e596 100644
--- a/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -344,7 +344,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
 /// Update the branch weights of the latch of a peeled-off loop
 /// iteration.
 /// This sets the branch weights for the latch of the recently peeled off loop
-/// iteration correctly. 
+/// iteration correctly.
 /// Our goal is to make sure that:
 /// a) The total weight of all the copies of the loop body is preserved.
 /// b) The total weight of the loop exit is preserved.
@@ -544,7 +544,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
   //
   // Each following iteration will split the current bottom anchor in two,
   // and put the new copy of the loop body between these two blocks. That is,
-  // after peeling another iteration from the example above, we'll split 
+  // after peeling another iteration from the example above, we'll split
   // InsertBot, and get:
   //
   // InsertTop:
diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp
index 323f2552ca80..88d595ee02ab 100644
--- a/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/lib/Transforms/Utils/MetaRenamer.cpp
@@ -68,7 +68,7 @@ namespace {
 
     PRNG prng;
   };
-  
+
   struct MetaRenamer : public ModulePass {
     // Pass identification, replacement for typeid
     static char ID;
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index ca184ed7c4e3..4a1fd8d571aa 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -201,13 +201,13 @@ void SSAUpdater::RewriteUse(Use &U) {
 
 void SSAUpdater::RewriteUseAfterInsertions(Use &U) {
   Instruction *User = cast<Instruction>(U.getUser());
-  
+
   Value *V;
   if (PHINode *UserPN = dyn_cast<PHINode>(User))
     V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U));
   else
     V = GetValueAtEndOfBlock(User->getParent());
-  
+
   U.set(V);
 }
 
@@ -235,7 +235,7 @@ public:
     PHI_iterator(PHINode *P, bool) // end iterator
       : PHI(P), idx(PHI->getNumIncomingValues()) {}
 
-    PHI_iterator &operator++() { ++idx; return *this; } 
+    PHI_iterator &operator++() { ++idx; return *this; }
     bool operator==(const PHI_iterator& x) const { return idx == x.idx; }
     bool operator!=(const PHI_iterator& x) const { return !operator==(x); }
 
@@ -333,7 +333,7 @@ LoadAndStorePromoter::
 LoadAndStorePromoter(ArrayRef<const Instruction *> Insts,
                      SSAUpdater &S, StringRef BaseName) : SSA(S) {
   if (Insts.empty()) return;
-  
+
   const Value *SomeVal;
   if (const LoadInst *LI = dyn_cast<LoadInst>(Insts[0]))
     SomeVal = LI;
@@ -354,7 +354,7 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {
 
   for (Instruction *User : Insts)
     UsesByBlock[User->getParent()].push_back(User);
-  
+
   // Okay, now we can iterate over all the blocks in the function with uses,
   // processing them.  Keep track of which loads are loading a live-in value.
   // Walk the uses in the use-list order to be determinstic.
@@ -364,10 +364,10 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {
   for (Instruction *User : Insts) {
     BasicBlock *BB = User->getParent();
     TinyPtrVector<Instruction *> &BlockUses = UsesByBlock[BB];
-    
+
     // If this block has already been processed, ignore this repeat use.
     if (BlockUses.empty()) continue;
-    
+
     // Okay, this is the first use in the block.  If this block just has a
     // single user in it, we can rewrite it trivially.
     if (BlockUses.size() == 1) {
@@ -375,13 +375,13 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {
       if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
         updateDebugInfo(SI);
         SSA.AddAvailableValue(BB, SI->getOperand(0));
-      } else 
+      } else
         // Otherwise it is a load, queue it to rewrite as a live-in load.
         LiveInLoads.push_back(cast<LoadInst>(User));
       BlockUses.clear();
       continue;
     }
-    
+
     // Otherwise, check to see if this block is all loads.
     bool HasStore = false;
     for (Instruction *I : BlockUses) {
@@ -390,7 +390,7 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {
         break;
       }
     }
-    
+
     // If so, we can queue them all as live in loads.  We don't have an
     // efficient way to tell which on is first in the block and don't want to
     // scan large blocks, so just add all loads as live ins.
@@ -400,7 +400,7 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {
       BlockUses.clear();
       continue;
     }
-    
+
     // Otherwise, we have mixed loads and stores (or just a bunch of stores).
     // Since SSAUpdater is purely for cross-block values, we need to determine
     // the order of these instructions in the block.  If the first use in the
@@ -411,7 +411,7 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {
       if (LoadInst *L = dyn_cast<LoadInst>(&I)) {
         // If this is a load from an unrelated pointer, ignore it.
         if (!isInstInList(L, Insts)) continue;
-        
+
         // If we haven't seen a store yet, this is a live in use, otherwise
         // use the stored value.
         if (StoredValue) {
@@ -433,13 +433,13 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {
         StoredValue = SI->getOperand(0);
       }
     }
-    
+
     // The last stored value that happened is the live-out for the block.
     assert(StoredValue && "Already checked that there is a store in block");
     SSA.AddAvailableValue(BB, StoredValue);
     BlockUses.clear();
   }
-  
+
   // Okay, now we rewrite all loads that use live-in values in the loop,
   // inserting PHI nodes as necessary.
   for (LoadInst *ALoad : LiveInLoads) {
@@ -451,10 +451,10 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {
     ALoad->replaceAllUsesWith(NewVal);
     ReplacedLoads[ALoad] = NewVal;
   }
-  
+
   // Allow the client to do stuff before we start nuking things.
   doExtraRewritesBeforeFinalDeletion();
-  
+
   // Now that everything is rewritten, delete the old instructions from the
   // function.  They should all be dead now.
   for (Instruction *User : Insts) {
@@ -465,7 +465,7 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {
     if (!User->use_empty()) {
       Value *NewVal = ReplacedLoads[User];
       assert(NewVal && "not a replaced load?");
-      
+
       // Propagate down to the ultimate replacee.  The intermediately loads
       // could theoretically already have been deleted, so we don't want to
       // dereference the Value*'s.
@@ -474,11 +474,11 @@ run(const SmallVectorImpl<Instruction *> &Insts) const {
         NewVal = RLI->second;
         RLI = ReplacedLoads.find(NewVal);
       }
-      
+
       replaceLoadWithValue(cast<LoadInst>(User), NewVal);
       User->replaceAllUsesWith(NewVal);
     }
-    
+
     instructionDeleted(User);
     User->eraseFromParent();
   }
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index e381fbc34ab4..65b23f4d94a1 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -196,7 +196,7 @@ bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp,
   SmallDenseMap<const SCEV*, Value*> CheapExpansions;
   CheapExpansions[S] = ICmp->getOperand(IVOperIdx);
   CheapExpansions[X] = ICmp->getOperand(1 - IVOperIdx);
-  
+
   // TODO: Support multiple entry loops?  (We currently bail out of these in
   // the IndVarSimplify pass)
   if (auto *BB = L->getLoopPredecessor()) {
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 8c48597fc2e4..15e035874002 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -890,7 +890,7 @@ static Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B,
     return nullptr;
 
   // Replace the malloc with a calloc. We need the data layout to know what the
-  // actual size of a 'size_t' parameter is. 
+  // actual size of a 'size_t' parameter is.
   B.SetInsertPoint(Malloc->getParent(), ++Malloc->getIterator());
   const DataLayout &DL = Malloc->getModule()->getDataLayout();
   IntegerType *SizeType = DL.getIntPtrType(B.GetInsertBlock()->getContext());
@@ -970,7 +970,7 @@ static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
   Value *V = valueHasFloatPrecision(CI->getArgOperand(0));
   if (V == nullptr)
     return nullptr;
-  
+
   // If call isn't an intrinsic, check that it isn't within a function with the
   // same name as the float version of this call.
   //
@@ -1126,165 +1126,164 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) {
   if (!Pow->isFast())
     return nullptr;
 
-  const APFloat *Arg1C;
-  if (!match(Pow->getArgOperand(1), m_APFloat(Arg1C)))
-    return nullptr;
-  if (!Arg1C->isExactlyValue(0.5) && !Arg1C->isExactlyValue(-0.5))
+  Value *Sqrt, *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
+  Type *Ty = Pow->getType();
+
+  const APFloat *ExpoF;
+  if (!match(Expo, m_APFloat(ExpoF)) ||
+      (!ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)))
     return nullptr;
 
-  // Fast-math flags from the pow() are propagated to all replacement ops.
-  IRBuilder<>::FastMathFlagGuard Guard(B);
-  B.setFastMathFlags(Pow->getFastMathFlags());
-  Type *Ty = Pow->getType();
-  Value *Sqrt;
+  // If errno is never set, then use the intrinsic for sqrt().
   if (Pow->hasFnAttr(Attribute::ReadNone)) {
-    // We know that errno is never set, so replace with an intrinsic:
-    // pow(x, 0.5) --> llvm.sqrt(x)
-    // llvm.pow(x, 0.5) --> llvm.sqrt(x)
-    auto *F = Intrinsic::getDeclaration(Pow->getModule(), Intrinsic::sqrt, Ty);
-    Sqrt = B.CreateCall(F, Pow->getArgOperand(0));
-  } else if (hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, LibFunc_sqrtf,
-                             LibFunc_sqrtl)) {
-    // Errno could be set, so we must use a sqrt libcall.
-    // TODO: We also should check that the target can in fact lower the sqrt
-    // libcall. We currently have no way to ask this question, so we ask
-    // whether the target has a sqrt libcall which is not exactly the same.
-    Sqrt = emitUnaryFloatFnCall(Pow->getArgOperand(0),
-                                TLI->getName(LibFunc_sqrt), B,
+    Function *SqrtFn = Intrinsic::getDeclaration(Pow->getModule(),
+                                                 Intrinsic::sqrt, Ty);
+    Sqrt = B.CreateCall(SqrtFn, Base);
+  }
+  // Otherwise, use the libcall for sqrt().
+  else if (hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl))
+    // TODO: We also should check that the target can in fact lower the sqrt()
+    // libcall. We currently have no way to ask this question, so we ask if
+    // the target has a sqrt() libcall, which is not exactly the same.
+    Sqrt = emitUnaryFloatFnCall(Base, TLI->getName(LibFunc_sqrt), B,
                                 Pow->getCalledFunction()->getAttributes());
-  } else {
-    // We can't replace with an intrinsic or a libcall.
+  else
     return nullptr;
-  }
 
-  // If this is pow(x, -0.5), get the reciprocal.
-  if (Arg1C->isExactlyValue(-0.5))
-    Sqrt = B.CreateFDiv(ConstantFP::get(Ty, 1.0), Sqrt);
+  // If the exponent is negative, then get the reciprocal.
+  if (ExpoF->isNegative())
+    Sqrt = B.CreateFDiv(ConstantFP::get(Ty, 1.0), Sqrt, "reciprocal");
 
   return Sqrt;
 }
 
-Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
-  Function *Callee = CI->getCalledFunction();
-  Value *Ret = nullptr;
+Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
+  Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
+  Function *Callee = Pow->getCalledFunction();
+  AttributeList Attrs = Callee->getAttributes();
   StringRef Name = Callee->getName();
-  if (UnsafeFPShrink && Name == "pow" && hasFloatVersion(Name))
-    Ret = optimizeUnaryDoubleFP(CI, B, true);
+  Module *Module = Pow->getModule();
+  Type *Ty = Pow->getType();
+  Value *Shrunk = nullptr;
+  bool Ignored;
+
+  if (UnsafeFPShrink &&
+      Name == TLI->getName(LibFunc_pow) && hasFloatVersion(Name))
+    Shrunk = optimizeUnaryDoubleFP(Pow, B, true);
+
+  // Propagate the math semantics from the call to any created instructions.
+  IRBuilder<>::FastMathFlagGuard Guard(B);
+  B.setFastMathFlags(Pow->getFastMathFlags());
 
-  Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1);
+  // Evaluate special cases related to the base.
 
   // pow(1.0, x) -> 1.0
-  if (match(Op1, m_SpecificFP(1.0)))
-    return Op1;
-  // pow(2.0, x) -> llvm.exp2(x)
-  if (match(Op1, m_SpecificFP(2.0))) {
-    Value *Exp2 = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::exp2,
-                                            CI->getType());
-    return B.CreateCall(Exp2, Op2, "exp2");
-  }
-
-  // There's no llvm.exp10 intrinsic yet, but, maybe, some day there will
-  // be one.
-  if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
-    // pow(10.0, x) -> exp10(x)
-    if (Op1C->isExactlyValue(10.0) &&
-        hasUnaryFloatFn(TLI, Op1->getType(), LibFunc_exp10, LibFunc_exp10f,
-                        LibFunc_exp10l))
-      return emitUnaryFloatFnCall(Op2, TLI->getName(LibFunc_exp10), B,
-                                  Callee->getAttributes());
+  if (match(Base, m_SpecificFP(1.0)))
+    return Base;
+
+  // pow(2.0, x) -> exp2(x)
+  if (match(Base, m_SpecificFP(2.0))) {
+    Value *Exp2 = Intrinsic::getDeclaration(Module, Intrinsic::exp2, Ty);
+    return B.CreateCall(Exp2, Expo, "exp2");
   }
 
+  // pow(10.0, x) -> exp10(x)
+  if (ConstantFP *BaseC = dyn_cast<ConstantFP>(Base))
+    // There's no exp10 intrinsic yet, but, maybe, some day there shall be one.
+    if (BaseC->isExactlyValue(10.0) &&
+        hasUnaryFloatFn(TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l))
+      return emitUnaryFloatFnCall(Expo, TLI->getName(LibFunc_exp10), B, Attrs);
+
   // pow(exp(x), y) -> exp(x * y)
   // pow(exp2(x), y) -> exp2(x * y)
   // We enable these only with fast-math. Besides rounding differences, the
   // transformation changes overflow and underflow behavior quite dramatically.
   // Example: x = 1000, y = 0.001.
   // pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1).
-  auto *OpC = dyn_cast<CallInst>(Op1);
-  if (OpC && OpC->isFast() && CI->isFast()) {
-    LibFunc Func;
-    Function *OpCCallee = OpC->getCalledFunction();
-    if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) &&
-        TLI->has(Func) && (Func == LibFunc_exp || Func == LibFunc_exp2)) {
+  auto *BaseFn = dyn_cast<CallInst>(Base);
+  if (BaseFn && BaseFn->isFast() && Pow->isFast()) {
+    LibFunc LibFn;
+    Function *CalleeFn = BaseFn->getCalledFunction();
+    if (CalleeFn && TLI->getLibFunc(CalleeFn->getName(), LibFn) &&
+        (LibFn == LibFunc_exp || LibFn == LibFunc_exp2) && TLI->has(LibFn)) {
       IRBuilder<>::FastMathFlagGuard Guard(B);
-      B.setFastMathFlags(CI->getFastMathFlags());
-      Value *FMul = B.CreateFMul(OpC->getArgOperand(0), Op2, "mul");
-      return emitUnaryFloatFnCall(FMul, OpCCallee->getName(), B,
-                                  OpCCallee->getAttributes());
+      B.setFastMathFlags(Pow->getFastMathFlags());
+
+      Value *FMul = B.CreateFMul(BaseFn->getArgOperand(0), Expo, "mul");
+      return emitUnaryFloatFnCall(FMul, CalleeFn->getName(), B,
+                                  CalleeFn->getAttributes());
     }
   }
 
-  if (Value *Sqrt = replacePowWithSqrt(CI, B))
+  // Evaluate special cases related to the exponent.
+
+  if (Value *Sqrt = replacePowWithSqrt(Pow, B))
     return Sqrt;
 
-  ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
-  if (!Op2C)
-    return Ret;
+  ConstantFP *ExpoC = dyn_cast<ConstantFP>(Expo);
+  if (!ExpoC)
+    return Shrunk;
 
-  if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0
-    return ConstantFP::get(CI->getType(), 1.0);
+  // pow(x, -1.0) -> 1.0 / x
+  if (ExpoC->isExactlyValue(-1.0))
+    return B.CreateFDiv(ConstantFP::get(Ty, 1.0), Base, "reciprocal");
 
-  // FIXME: Correct the transforms and pull this into replacePowWithSqrt().
-  if (Op2C->isExactlyValue(0.5) &&
-      hasUnaryFloatFn(TLI, Op2->getType(), LibFunc_sqrt, LibFunc_sqrtf,
-                      LibFunc_sqrtl)) {
-    // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
-    // This is faster than calling pow, and still handles negative zero
-    // and negative infinity correctly.
-    // TODO: In finite-only mode, this could be just fabs(sqrt(x)).
-    Value *Inf = ConstantFP::getInfinity(CI->getType());
-    Value *NegInf = ConstantFP::getInfinity(CI->getType(), true);
+  // pow(x, 0.0) -> 1.0
+  if (ExpoC->getValueAPF().isZero())
+    return ConstantFP::get(Ty, 1.0);
 
-    // TODO: As above, we should lower to the sqrt intrinsic if the pow is an
-    // intrinsic, to match errno semantics.
-    Value *Sqrt = emitUnaryFloatFnCall(Op1, "sqrt", B, Callee->getAttributes());
+  // pow(x, 1.0) -> x
+  if (ExpoC->isExactlyValue(1.0))
+    return Base;
 
-    Module *M = Callee->getParent();
-    Function *FabsF = Intrinsic::getDeclaration(M, Intrinsic::fabs,
-                                                CI->getType());
-    Value *FAbs = B.CreateCall(FabsF, Sqrt);
+  // pow(x, 2.0) -> x * x
+  if (ExpoC->isExactlyValue(2.0))
+    return B.CreateFMul(Base, Base, "square");
 
-    Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf);
-    Value *Sel = B.CreateSelect(FCmp, Inf, FAbs);
-    return Sel;
+  // FIXME: Correct the transforms and pull this into replacePowWithSqrt().
+  if (ExpoC->isExactlyValue(0.5) &&
+      hasUnaryFloatFn(TLI, Ty, LibFunc_sqrt, LibFunc_sqrtf, LibFunc_sqrtl)) {
+    // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
+    // This is faster than calling pow(), and still handles -0.0 and
+    // negative infinity correctly.
+    // TODO: In finite-only mode, this could be just fabs(sqrt(x)).
+    Value *PosInf = ConstantFP::getInfinity(Ty);
+    Value *NegInf = ConstantFP::getInfinity(Ty, true);
+
+    // TODO: As above, we should lower to the sqrt() intrinsic if the pow() is
+    // an intrinsic, to match errno semantics.
+    Value *Sqrt = emitUnaryFloatFnCall(Base, TLI->getName(LibFunc_sqrt),
+                                       B, Attrs);
+    Function *FAbsFn = Intrinsic::getDeclaration(Module, Intrinsic::fabs, Ty);
+    Value *FAbs = B.CreateCall(FAbsFn, Sqrt, "abs");
+    Value *FCmp = B.CreateFCmpOEQ(Base, NegInf, "isinf");
+    Sqrt = B.CreateSelect(FCmp, PosInf, FAbs);
+    return Sqrt;
   }
 
-  // Propagate fast-math-flags from the call to any created instructions.
-  IRBuilder<>::FastMathFlagGuard Guard(B);
-  B.setFastMathFlags(CI->getFastMathFlags());
-  // pow(x, 1.0) --> x
-  if (Op2C->isExactlyValue(1.0))
-    return Op1;
-  // pow(x, 2.0) --> x * x
-  if (Op2C->isExactlyValue(2.0))
-    return B.CreateFMul(Op1, Op1, "pow2");
-  // pow(x, -1.0) --> 1.0 / x
-  if (Op2C->isExactlyValue(-1.0))
-    return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip");
-
-  // In -ffast-math, generate repeated fmul instead of generating pow(x, n).
-  if (CI->isFast()) {
-    APFloat V = abs(Op2C->getValueAPF());
-    // We limit to a max of 7 fmul(s). Thus max exponent is 32.
+  // pow(x, n) -> x * x * x * ....
+  if (Pow->isFast()) {
+    APFloat ExpoA = abs(ExpoC->getValueAPF());
+    // We limit to a max of 7 fmul(s). Thus the maximum exponent is 32.
     // This transformation applies to integer exponents only.
-    if (V.compare(APFloat(V.getSemantics(), 32.0)) == APFloat::cmpGreaterThan ||
-        !V.isInteger())
+    if (!ExpoA.isInteger() ||
+        ExpoA.compare
+            (APFloat(ExpoA.getSemantics(), 32.0)) == APFloat::cmpGreaterThan)
       return nullptr;
 
     // We will memoize intermediate products of the Addition Chain.
     Value *InnerChain[33] = {nullptr};
-    InnerChain[1] = Op1;
-    InnerChain[2] = B.CreateFMul(Op1, Op1);
+    InnerChain[1] = Base;
+    InnerChain[2] = B.CreateFMul(Base, Base, "square");
 
     // We cannot readily convert a non-double type (like float) to a double.
-    // So we first convert V to something which could be converted to double.
-    bool Ignored;
-    V.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored);
-    
-    Value *FMul = getPow(InnerChain, V.convertToDouble(), B);
-    // For negative exponents simply compute the reciprocal.
-    if (Op2C->isNegative())
-      FMul = B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), FMul);
+    // So we first convert it to something which could be converted to double.
+    ExpoA.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored);
+    Value *FMul = getPow(InnerChain, ExpoA.convertToDouble(), B);
+
+    // If the exponent is negative, then get the reciprocal.
+    if (ExpoC->isNegative())
+      FMul = B.CreateFDiv(ConstantFP::get(Ty, 1.0), FMul, "reciprocal");
     return FMul;
   }
 
diff --git a/lib/Transforms/Utils/SymbolRewriter.cpp b/lib/Transforms/Utils/SymbolRewriter.cpp
index 3640541e63cc..fd0da79487f1 100644
--- a/lib/Transforms/Utils/SymbolRewriter.cpp
+++ b/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -536,7 +536,7 @@ private:
 char RewriteSymbolsLegacyPass::ID = 0;
 
 RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass() : ModulePass(ID) {
-  initializeRewriteSymbolsLegacyPassPass(*PassRegistry::getPassRegistry());  
+  initializeRewriteSymbolsLegacyPassPass(*PassRegistry::getPassRegistry());
 }
 
 RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass(
diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index e633ac0c874d..d49b26472548 100644
--- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -61,7 +61,7 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
   } else if (UnreachableBlocks.size() == 1) {
     UnreachableBlock = UnreachableBlocks.front();
   } else {
-    UnreachableBlock = BasicBlock::Create(F.getContext(), 
+    UnreachableBlock = BasicBlock::Create(F.getContext(),
                                           "UnifiedUnreachableBlock", &F);
     new UnreachableInst(F.getContext(), UnreachableBlock);
 
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3c693f5d5ee0..859d0c92ca5a 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -535,13 +535,13 @@ protected:
   /// Returns true if we should generate a scalar version of \p IV.
   bool needsScalarInduction(Instruction *IV) const;
 
-  /// If there is a cast involved in the induction variable \p ID, which should 
-  /// be ignored in the vectorized loop body, this function records the 
-  /// VectorLoopValue of the respective Phi also as the VectorLoopValue of the 
-  /// cast. We had already proved that the casted Phi is equal to the uncasted 
-  /// Phi in the vectorized loop (under a runtime guard), and therefore 
-  /// there is no need to vectorize the cast - the same value can be used in the 
-  /// vector loop for both the Phi and the cast. 
+  /// If there is a cast involved in the induction variable \p ID, which should
+  /// be ignored in the vectorized loop body, this function records the
+  /// VectorLoopValue of the respective Phi also as the VectorLoopValue of the
+  /// cast. We had already proved that the casted Phi is equal to the uncasted
+  /// Phi in the vectorized loop (under a runtime guard), and therefore
+  /// there is no need to vectorize the cast - the same value can be used in the
+  /// vector loop for both the Phi and the cast.
   /// If \p VectorLoopValue is a scalarized value, \p Lane is also specified,
   /// Otherwise, \p VectorLoopValue is a widened/vectorized value.
   ///
@@ -5443,7 +5443,7 @@ bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I){
   // high enough value to practically disable vectorization with such
   // operations, except where previously deployed legality hack allowed
   // using very low cost values. This is to avoid regressions coming simply
-  // from moving "masked load/store" check from legality to cost model. 
+  // from moving "masked load/store" check from legality to cost model.
   // Masked Load/Gather emulation was previously never allowed.
   // Limited number of Masked Store/Scatter emulation was allowed.
   assert(isScalarWithPredication(I) &&
@@ -6412,12 +6412,12 @@ void LoopVectorizationPlanner::collectTriviallyDeadInstructions(
         }))
       DeadInstructions.insert(IndUpdate);
 
-    // We record as "Dead" also the type-casting instructions we had identified 
+    // We record as "Dead" also the type-casting instructions we had identified
     // during induction analysis. We don't need any handling for them in the
-    // vectorized loop because we have proven that, under a proper runtime 
-    // test guarding the vectorized loop, the value of the phi, and the casted 
+    // vectorized loop because we have proven that, under a proper runtime
+    // test guarding the vectorized loop, the value of the phi, and the casted
     // value of the phi, are the same. The last instruction in this casting chain
-    // will get its scalar/vector/widened def from the scalar/vector/widened def 
+    // will get its scalar/vector/widened def from the scalar/vector/widened def
     // of the respective phi node. Any other casts in the induction def-use chain
     // have no other uses outside the phi update chain, and will be ignored.
     InductionDescriptor &IndDes = Induction.second;
@@ -7060,8 +7060,8 @@ LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
   auto Plan = llvm::make_unique<VPlan>();
 
   // Build hierarchical CFG
-  VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI);
-  HCFGBuilder.buildHierarchicalCFG(*Plan.get());
+  VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
+  HCFGBuilder.buildHierarchicalCFG();
 
   return Plan;
 }
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index ac8c4f046c6f..5c2efe885e22 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -345,7 +345,7 @@ static Value *isOneOf(const InstructionsState &S, Value *Op) {
 }
 
 /// \returns analysis of the Instructions in \p VL described in
-/// InstructionsState, the Opcode that we suppose the whole list 
+/// InstructionsState, the Opcode that we suppose the whole list
 /// could be vectorized even if its structure is diverse.
 static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
                                        unsigned BaseIndex = 0) {
@@ -3111,6 +3111,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
           // TODO: Merge this shuffle with the ReorderShuffleMask.
           if (!E->ReorderIndices.empty())
             Builder.SetInsertPoint(VL0);
+          else if (auto *I = dyn_cast<Instruction>(V))
+            Builder.SetInsertPoint(I->getParent(),
+                                   std::next(I->getIterator()));
+          else
+            Builder.SetInsertPoint(&F->getEntryBlock(),
+                                   F->getEntryBlock().getFirstInsertionPt());
           V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
                                           E->ReuseShuffleIndices, "shuffle");
         }
diff --git a/lib/Transforms/Vectorize/VPlan.cpp b/lib/Transforms/Vectorize/VPlan.cpp
index f7b07b722bb1..0780e70809d0 100644
--- a/lib/Transforms/Vectorize/VPlan.cpp
+++ b/lib/Transforms/Vectorize/VPlan.cpp
@@ -18,6 +18,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "VPlan.h"
+#include "VPlanDominatorTree.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SmallVector.h"
@@ -25,7 +26,6 @@
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
-#include "llvm/IR/Dominators.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
@@ -34,6 +34,7 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GenericDomTreeConstruction.h"
 #include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -576,3 +577,5 @@ void VPWidenMemoryInstructionRecipe::print(raw_ostream &O,
   }
   O << "\\l\"";
 }
+
+template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT);
diff --git a/lib/Transforms/Vectorize/VPlan.h b/lib/Transforms/Vectorize/VPlan.h
index 866951cb79a4..883e6f52369a 100644
--- a/lib/Transforms/Vectorize/VPlan.h
+++ b/lib/Transforms/Vectorize/VPlan.h
@@ -26,8 +26,10 @@
 #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
 #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
 
+#include "VPlanLoopInfo.h"
 #include "VPlanValue.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -51,7 +53,6 @@ class BasicBlock;
 class DominatorTree;
 class InnerLoopVectorizer;
 class InterleaveGroup;
-class LoopInfo;
 class raw_ostream;
 class Value;
 class VPBasicBlock;
@@ -516,6 +517,23 @@ public:
 
   /// Delete all blocks reachable from a given VPBlockBase, inclusive.
   static void deleteCFG(VPBlockBase *Entry);
+
+  void printAsOperand(raw_ostream &OS, bool PrintType) const {
+    OS << getName();
+  }
+
+  void print(raw_ostream &OS) const {
+    // TODO: Only printing VPBB name for now since we only have dot printing
+    // support for VPInstructions/Recipes.
+    printAsOperand(OS, false);
+  }
+
+  /// Return true if it is legal to hoist instructions into this block.
+  bool isLegalToHoistInto() {
+    // There are currently no constraints that prevent an instruction to be
+    // hoisted into a VPBlockBase.
+    return true;
+  }
 };
 
 /// VPRecipeBase is a base class modeling a sequence of one or more output IR
@@ -1037,6 +1055,12 @@ public:
     EntryBlock->setParent(this);
   }
 
+  // FIXME: DominatorTreeBase is doing 'A->getParent()->front()'. 'front' is a
+  // specific interface of llvm::Function, instead of using
+  // GraphTraints::getEntryNode. We should add a new template parameter to
+  // DominatorTreeBase representing the Graph type.
+  VPBlockBase &front() const { return *Entry; }
+
   const VPBlockBase *getExit() const { return Exit; }
   VPBlockBase *getExit() { return Exit; }
 
@@ -1087,6 +1111,9 @@ private:
   /// VPlan.
   Value2VPValueTy Value2VPValue;
 
+  /// Holds the VPLoopInfo analysis for this VPlan.
+  VPLoopInfo VPLInfo;
+
 public:
   VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) {}
 
@@ -1133,6 +1160,10 @@ public:
     return Value2VPValue[V];
   }
 
+  /// Return the VPLoopInfo analysis for this VPlan.
+  VPLoopInfo &getVPLoopInfo() { return VPLInfo; }
+  const VPLoopInfo &getVPLoopInfo() const { return VPLInfo; }
+
 private:
   /// Add to the given dominator tree the header block and every new basic block
   /// that was created between it and the latch block, inclusive.
@@ -1210,12 +1241,15 @@ inline raw_ostream &operator<<(raw_ostream &OS, VPlan &Plan) {
   return OS;
 }
 
-//===--------------------------------------------------------------------===//
-// GraphTraits specializations for VPlan/VPRegionBlock Control-Flow Graphs  //
-//===--------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// GraphTraits specializations for VPlan Hierarchical Control-Flow Graphs     //
+//===----------------------------------------------------------------------===//
 
-// Provide specializations of GraphTraits to be able to treat a VPBlockBase as a
-// graph of VPBlockBase nodes...
+// The following set of template specializations implement GraphTraits to treat
+// any VPBlockBase as a node in a graph of VPBlockBases. It's important to note
+// that VPBlockBase traits don't recurse into VPRegioBlocks, i.e., if the
+// VPBlockBase is a VPRegionBlock, this specialization provides access to its
+// successors/predecessors but not to the blocks inside the region.
 
 template <> struct GraphTraits<VPBlockBase *> {
   using NodeRef = VPBlockBase *;
@@ -1247,17 +1281,13 @@ template <> struct GraphTraits<const VPBlockBase *> {
   }
 };
 
-// Provide specializations of GraphTraits to be able to treat a VPBlockBase as a
-// graph of VPBlockBase nodes... and to walk it in inverse order. Inverse order
-// for a VPBlockBase is considered to be when traversing the predecessors of a
-// VPBlockBase instead of its successors.
+// Inverse order specialization for VPBasicBlocks. Predecessors are used instead
+// of successors for the inverse traversal.
 template <> struct GraphTraits<Inverse<VPBlockBase *>> {
   using NodeRef = VPBlockBase *;
   using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::iterator;
 
-  static Inverse<VPBlockBase *> getEntryNode(Inverse<VPBlockBase *> B) {
-    return B;
-  }
+  static NodeRef getEntryNode(Inverse<NodeRef> B) { return B.Graph; }
 
   static inline ChildIteratorType child_begin(NodeRef N) {
     return N->getPredecessors().begin();
@@ -1268,6 +1298,71 @@ template <> struct GraphTraits<Inverse<VPBlockBase *>> {
   }
 };
 
+// The following set of template specializations implement GraphTraits to
+// treat VPRegionBlock as a graph and recurse inside its nodes. It's important
+// to note that the blocks inside the VPRegionBlock are treated as VPBlockBases
+// (i.e., no dyn_cast is performed, VPBlockBases specialization is used), so
+// there won't be automatic recursion into other VPBlockBases that turn to be
+// VPRegionBlocks.
+
+template <>
+struct GraphTraits<VPRegionBlock *> : public GraphTraits<VPBlockBase *> {
+  using GraphRef = VPRegionBlock *;
+  using nodes_iterator = df_iterator<NodeRef>;
+
+  static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); }
+
+  static nodes_iterator nodes_begin(GraphRef N) {
+    return nodes_iterator::begin(N->getEntry());
+  }
+
+  static nodes_iterator nodes_end(GraphRef N) {
+    // df_iterator::end() returns an empty iterator so the node used doesn't
+    // matter.
+    return nodes_iterator::end(N);
+  }
+};
+
+template <>
+struct GraphTraits<const VPRegionBlock *>
+    : public GraphTraits<const VPBlockBase *> {
+  using GraphRef = const VPRegionBlock *;
+  using nodes_iterator = df_iterator<NodeRef>;
+
+  static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); }
+
+  static nodes_iterator nodes_begin(GraphRef N) {
+    return nodes_iterator::begin(N->getEntry());
+  }
+
+  static nodes_iterator nodes_end(GraphRef N) {
+    // df_iterator::end() returns an empty iterator so the node used doesn't
+    // matter.
+    return nodes_iterator::end(N);
+  }
+};
+
+template <>
+struct GraphTraits<Inverse<VPRegionBlock *>>
+    : public GraphTraits<Inverse<VPBlockBase *>> {
+  using GraphRef = VPRegionBlock *;
+  using nodes_iterator = df_iterator<NodeRef>;
+
+  static NodeRef getEntryNode(Inverse<GraphRef> N) {
+    return N.Graph->getExit();
+  }
+
+  static nodes_iterator nodes_begin(GraphRef N) {
+    return nodes_iterator::begin(N->getExit());
+  }
+
+  static nodes_iterator nodes_end(GraphRef N) {
+    // df_iterator::end() returns an empty iterator so the node used doesn't
+    // matter.
+    return nodes_iterator::end(N);
+  }
+};
+
 //===----------------------------------------------------------------------===//
 // VPlan Utilities
 //===----------------------------------------------------------------------===//
diff --git a/lib/Transforms/Vectorize/VPlanDominatorTree.h b/lib/Transforms/Vectorize/VPlanDominatorTree.h
new file mode 100644
index 000000000000..1b81097b6d31
--- /dev/null
+++ b/lib/Transforms/Vectorize/VPlanDominatorTree.h
@@ -0,0 +1,41 @@
+//===-- VPlanDominatorTree.h ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements dominator tree analysis for a single level of a VPlan's
+/// H-CFG.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANDOMINATORTREE_H
+#define LLVM_TRANSFORMS_VECTORIZE_VPLANDOMINATORTREE_H
+
+#include "VPlan.h"
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/IR/Dominators.h"
+
+namespace llvm {
+
+/// Template specialization of the standard LLVM dominator tree utility for
+/// VPBlockBases.
+using VPDominatorTree = DomTreeBase<VPBlockBase>;
+
+using VPDomTreeNode = DomTreeNodeBase<VPBlockBase>;
+
+/// Template specializations of GraphTraits for VPDomTreeNode.
+template <>
+struct GraphTraits<VPDomTreeNode *>
+    : public DomTreeGraphTraitsBase<VPDomTreeNode, VPDomTreeNode::iterator> {};
+
+template <>
+struct GraphTraits<const VPDomTreeNode *>
+    : public DomTreeGraphTraitsBase<const VPDomTreeNode,
+                                    VPDomTreeNode::const_iterator> {};
+} // namespace llvm
+#endif // LLVM_TRANSFORMS_VECTORIZE_VPLANDOMINATORTREE_H
diff --git a/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
index 08129b74cddf..b6307acb9474 100644
--- a/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
+++ b/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
@@ -324,13 +324,28 @@ VPRegionBlock *PlainCFGBuilder::buildPlainCFG() {
   return TopRegion;
 }
 
+VPRegionBlock *VPlanHCFGBuilder::buildPlainCFG() {
+  PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan);
+  return PCFGBuilder.buildPlainCFG();
+}
+
 // Public interface to build a H-CFG.
-void VPlanHCFGBuilder::buildHierarchicalCFG(VPlan &Plan) {
+void VPlanHCFGBuilder::buildHierarchicalCFG() {
   // Build Top Region enclosing the plain CFG and set it as VPlan entry.
-  PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan);
-  VPRegionBlock *TopRegion = PCFGBuilder.buildPlainCFG();
+  VPRegionBlock *TopRegion = buildPlainCFG();
   Plan.setEntry(TopRegion);
   LLVM_DEBUG(Plan.setName("HCFGBuilder: Plain CFG\n"); dbgs() << Plan);
 
   Verifier.verifyHierarchicalCFG(TopRegion);
+
+  // Compute plain CFG dom tree for VPLInfo.
+  VPDomTree.recalculate(*TopRegion);
+  LLVM_DEBUG(dbgs() << "Dominator Tree after building the plain CFG.\n";
+             VPDomTree.print(dbgs()));
+
+  // Compute VPLInfo and keep it in Plan.
+  VPLoopInfo &VPLInfo = Plan.getVPLoopInfo();
+  VPLInfo.analyze(VPDomTree);
+  LLVM_DEBUG(dbgs() << "VPLoop Info After buildPlainCFG:\n";
+             VPLInfo.print(dbgs()));
 }
diff --git a/lib/Transforms/Vectorize/VPlanHCFGBuilder.h b/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
index c4e69843615a..3f11dcb5164d 100644
--- a/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
+++ b/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
@@ -26,14 +26,18 @@
 #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_VPLANHCFGBUILDER_H
 
 #include "VPlan.h"
+#include "VPlanDominatorTree.h"
 #include "VPlanVerifier.h"
 
 namespace llvm {
 
 class Loop;
+class VPlanTestBase;
 
 /// Main class to build the VPlan H-CFG for an incoming IR.
 class VPlanHCFGBuilder {
+  friend VPlanTestBase;
+
 private:
   // The outermost loop of the input loop nest considered for vectorization.
   Loop *TheLoop;
@@ -41,14 +45,27 @@ private:
   // Loop Info analysis.
   LoopInfo *LI;
 
+  // The VPlan that will contain the H-CFG we are building.
+  VPlan &Plan;
+
   // VPlan verifier utility.
   VPlanVerifier Verifier;
 
+  // Dominator analysis for VPlan plain CFG to be used in the
+  // construction of the H-CFG. This analysis is no longer valid once regions
+  // are introduced.
+  VPDominatorTree VPDomTree;
+
+  /// Build plain CFG for TheLoop. Return a new VPRegionBlock (TopRegion)
+  /// enclosing the plain CFG.
+  VPRegionBlock *buildPlainCFG();
+
 public:
-  VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI) : TheLoop(Lp), LI(LI) {}
+  VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P)
+      : TheLoop(Lp), LI(LI), Plan(P) {}
 
-  /// Build H-CFG for TheLoop and update \p Plan accordingly.
-  void buildHierarchicalCFG(VPlan &Plan);
+  /// Build H-CFG for TheLoop and update Plan accordingly.
+  void buildHierarchicalCFG();
 };
 } // namespace llvm
 
diff --git a/lib/Transforms/Vectorize/VPlanLoopInfo.h b/lib/Transforms/Vectorize/VPlanLoopInfo.h
new file mode 100644
index 000000000000..5c2485fc2145
--- /dev/null
+++ b/lib/Transforms/Vectorize/VPlanLoopInfo.h
@@ -0,0 +1,45 @@
+//===-- VPLoopInfo.h --------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines VPLoopInfo analysis and VPLoop class. VPLoopInfo is a
+/// specialization of LoopInfoBase for VPBlockBase. VPLoops is a specialization
+/// of LoopBase that is used to hold loop metadata from VPLoopInfo. Further
+/// information can be found in VectorizationPlanner.rst.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLOOPINFO_H
+#define LLVM_TRANSFORMS_VECTORIZE_VPLOOPINFO_H
+
+#include "llvm/Analysis/LoopInfoImpl.h"
+
+namespace llvm {
+class VPBlockBase;
+
+/// Hold analysis information for every loop detected by VPLoopInfo. It is an
+/// instantiation of LoopBase.
+class VPLoop : public LoopBase<VPBlockBase, VPLoop> {
+private:
+  friend class LoopInfoBase<VPBlockBase, VPLoop>;
+  explicit VPLoop(VPBlockBase *VPB) : LoopBase<VPBlockBase, VPLoop>(VPB) {}
+};
+
+/// VPLoopInfo provides analysis of natural loop for VPBlockBase-based
+/// Hierarchical CFG. It is a specialization of LoopInfoBase class.
+// TODO: VPLoopInfo is initially computed on top of the VPlan plain CFG, which
+// is the same as the incoming IR CFG. If it's more efficient than running the
+// whole loop detection algorithm, we may want to create a mechanism to
+// translate LoopInfo into VPLoopInfo. However, that would require significant
+// changes in LoopInfoBase class.
+typedef LoopInfoBase<VPBlockBase, VPLoop> VPLoopInfo;
+
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_VECTORIZE_VPLOOPINFO_H
author	Dimitry Andric <dim@FreeBSD.org>	2018-08-02 17:32:43 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2018-08-02 17:32:43 +0000
commit	b7eb8e35e481a74962664b63dfb09483b200209a (patch)
tree	1937fb4a348458ce2d02ade03ac3bb0aa18d2fcd /lib/Transforms
parent	eb11fae6d08f479c0799db45860a98af528fa6e7 (diff)