59 files changed, 1892 insertions, 1376 deletions
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index d8cf8d3f5da21..53223ab443161 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -124,6 +124,10 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
     } else if (I->use_empty()) {
       // Dead argument (which are always marked as promotable)
       ++NumArgumentsDead;
+
+      // There may be remaining metadata uses of the argument for things like
+      // llvm.dbg.value. Replace them with undef.
+      I->replaceAllUsesWith(UndefValue::get(I->getType()));
     } else {
       // Okay, this is being promoted. This means that the only uses are loads
       // or GEPs which are only used by loads
diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp
index 6d34ab8b0d960..233a36d2bc543 100644
--- a/lib/Transforms/IPO/FunctionImport.cpp
+++ b/lib/Transforms/IPO/FunctionImport.cpp
@@ -64,6 +64,12 @@ static cl::opt<float> ImportHotMultiplier(
     "import-hot-multiplier", cl::init(3.0), cl::Hidden, cl::value_desc("x"),
     cl::desc("Multiply the `import-instr-limit` threshold for hot callsites"));
 
+static cl::opt<float> ImportCriticalMultiplier(
+    "import-critical-multiplier", cl::init(100.0), cl::Hidden,
+    cl::value_desc("x"),
+    cl::desc(
+        "Multiply the `import-instr-limit` threshold for critical callsites"));
+
 // FIXME: This multiplier was not really tuned up.
 static cl::opt<float> ImportColdMultiplier(
     "import-cold-multiplier", cl::init(0), cl::Hidden, cl::value_desc("N"),
@@ -207,6 +213,8 @@ static void computeImportForFunction(
         return ImportHotMultiplier;
       if (Hotness == CalleeInfo::HotnessType::Cold)
         return ImportColdMultiplier;
+      if (Hotness == CalleeInfo::HotnessType::Critical)
+        return ImportCriticalMultiplier;
       return 1.0;
     };
 
@@ -537,8 +545,6 @@ void llvm::thinLTOResolveWeakForLinkerModule(
   };
 
   auto updateLinkage = [&](GlobalValue &GV) {
-    if (!GlobalValue::isWeakForLinker(GV.getLinkage()))
-      return;
     // See if the global summary analysis computed a new resolved linkage.
     const auto &GS = DefinedGlobals.find(GV.getGUID());
     if (GS == DefinedGlobals.end())
@@ -546,6 +552,21 @@ void llvm::thinLTOResolveWeakForLinkerModule(
     auto NewLinkage = GS->second->linkage();
     if (NewLinkage == GV.getLinkage())
       return;
+
+    // Switch the linkage to weakany if asked for, e.g. we do this for
+    // linker redefined symbols (via --wrap or --defsym).
+    // We record that the visibility should be changed here in `addThinLTO`
+    // as we need access to the resolution vectors for each input file in
+    // order to find which symbols have been redefined.
+    // We may consider reorganizing this code and moving the linkage recording
+    // somewhere else, e.g. in thinLTOResolveWeakForLinkerInIndex.
+    if (NewLinkage == GlobalValue::WeakAnyLinkage) {
+      GV.setLinkage(NewLinkage);
+      return;
+    }
+
+    if (!GlobalValue::isWeakForLinker(GV.getLinkage()))
+      return;
     // Check for a non-prevailing def that has interposable linkage
     // (e.g. non-odr weak or linkonce). In that case we can't simply
     // convert to available_externally, since it would lose the
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index f277a51ae659a..3d57acf06e746 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -837,7 +837,7 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
     if (StoreInst *SI = dyn_cast<StoreInst>(GV->user_back())) {
       // The global is initialized when the store to it occurs.
       new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, 0,
-                    SI->getOrdering(), SI->getSynchScope(), SI);
+                    SI->getOrdering(), SI->getSyncScopeID(), SI);
       SI->eraseFromParent();
       continue;
     }
@@ -854,7 +854,7 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
       // Replace the cmp X, 0 with a use of the bool value.
       // Sink the load to where the compare was, if atomic rules allow us to.
       Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", false, 0,
-                               LI->getOrdering(), LI->getSynchScope(),
+                               LI->getOrdering(), LI->getSyncScopeID(),
                                LI->isUnordered() ? (Instruction*)ICI : LI);
       InitBoolUsed = true;
       switch (ICI->getPredicate()) {
@@ -1605,7 +1605,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
           assert(LI->getOperand(0) == GV && "Not a copy!");
           // Insert a new load, to preserve the saved value.
           StoreVal = new LoadInst(NewGV, LI->getName()+".b", false, 0,
-                                  LI->getOrdering(), LI->getSynchScope(), LI);
+                                  LI->getOrdering(), LI->getSyncScopeID(), LI);
         } else {
           assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
                  "This is not a form that we understand!");
@@ -1614,12 +1614,12 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
         }
       }
       new StoreInst(StoreVal, NewGV, false, 0,
-                    SI->getOrdering(), SI->getSynchScope(), SI);
+                    SI->getOrdering(), SI->getSyncScopeID(), SI);
     } else {
       // Change the load into a load of bool then a select.
       LoadInst *LI = cast<LoadInst>(UI);
       LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", false, 0,
-                                   LI->getOrdering(), LI->getSynchScope(), LI);
+                                   LI->getOrdering(), LI->getSyncScopeID(), LI);
       Value *NSI;
       if (IsOneZero)
         NSI = new ZExtInst(NLI, LI->getType(), "", LI);
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index ad89e40661c67..00ddb93df830a 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -989,5 +989,13 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
     // And delete the actual function from the module.
     M.getFunctionList().erase(DeadF);
   }
-  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+
+  if (!Changed)
+    return PreservedAnalyses::all();
+
+  // Even if we change the IR, we update the core CGSCC data structures and so
+  // can preserve the proxy to the function analysis manager.
+  PreservedAnalyses PA;
+  PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
+  return PA;
 }
diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp
index b406c22c69d7a..693df5e7ba925 100644
--- a/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -855,15 +855,20 @@ void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) {
     FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage,
                              Name + ".cfi_jt", &M);
     FDecl->setVisibility(GlobalValue::HiddenVisibility);
-  } else {
-    // Definition.
-    assert(isDefinition);
+  } else if (isDefinition) {
     F->setName(Name + ".cfi");
     F->setLinkage(GlobalValue::ExternalLinkage);
     F->setVisibility(GlobalValue::HiddenVisibility);
     FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage,
                              Name, &M);
     FDecl->setVisibility(Visibility);
+  } else {
+    // Function definition without type metadata, where some other translation
+    // unit contained a declaration with type metadata. This normally happens
+    // during mixed CFI + non-CFI compilation. We do nothing with the function
+    // so that it is treated the same way as a function defined outside of the
+    // LTO unit.
+    return;
   }
 
   if (F->isWeakForLinker())
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 5b1b58b89c32e..0b319f6a488b3 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -188,6 +188,13 @@ PassManagerBuilder::~PassManagerBuilder() {
 static ManagedStatic<SmallVector<std::pair<PassManagerBuilder::ExtensionPointTy,
    PassManagerBuilder::ExtensionFn>, 8> > GlobalExtensions;
 
+/// Check if GlobalExtensions is constructed and not empty.
+/// Since GlobalExtensions is a managed static, calling 'empty()' will trigger
+/// the construction of the object.
+static bool GlobalExtensionsNotEmpty() {
+  return GlobalExtensions.isConstructed() && !GlobalExtensions->empty();
+}
+
 void PassManagerBuilder::addGlobalExtension(
     PassManagerBuilder::ExtensionPointTy Ty,
     PassManagerBuilder::ExtensionFn Fn) {
@@ -200,9 +207,12 @@ void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) {
 
 void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy,
                                            legacy::PassManagerBase &PM) const {
-  for (unsigned i = 0, e = GlobalExtensions->size(); i != e; ++i)
-    if ((*GlobalExtensions)[i].first == ETy)
-      (*GlobalExtensions)[i].second(*this, PM);
+  if (GlobalExtensionsNotEmpty()) {
+    for (auto &Ext : *GlobalExtensions) {
+      if (Ext.first == ETy)
+        Ext.second(*this, PM);
+    }
+  }
   for (unsigned i = 0, e = Extensions.size(); i != e; ++i)
     if (Extensions[i].first == ETy)
       Extensions[i].second(*this, PM);
@@ -415,7 +425,7 @@ void PassManagerBuilder::populateModulePassManager(
     // builds. The function merging pass is
     if (MergeFunctions)
       MPM.add(createMergeFunctionsPass());
-    else if (!GlobalExtensions->empty() || !Extensions.empty())
+    else if (GlobalExtensionsNotEmpty() || !Extensions.empty())
       MPM.add(createBarrierNoopPass());
 
     addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 8d494fe9cde28..8ef6bb6523093 100644
--- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -271,7 +271,8 @@ void splitAndWriteThinLTOBitcode(
           if (!ArgT || ArgT->getBitWidth() > 64)
             return;
         }
-        if (computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == MAK_ReadNone)
+        if (!F->isDeclaration() &&
+            computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == MAK_ReadNone)
           EligibleVirtualFns.insert(F);
       });
     }
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index d5f0dd1914157..809471cfd74f0 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -164,7 +164,7 @@ namespace {
   ///
   class FAddCombine {
   public:
-    FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(nullptr) {}
+    FAddCombine(InstCombiner::BuilderTy &B) : Builder(B), Instr(nullptr) {}
     Value *simplify(Instruction *FAdd);
 
   private:
@@ -187,7 +187,7 @@ namespace {
     Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota);
     void createInstPostProc(Instruction *NewInst, bool NoNumber = false);
 
-    InstCombiner::BuilderTy *Builder;
+    InstCombiner::BuilderTy &Builder;
     Instruction *Instr;
 
      // Debugging stuff are clustered here.
@@ -735,7 +735,7 @@ Value *FAddCombine::createNaryFAdd
 }
 
 Value *FAddCombine::createFSub(Value *Opnd0, Value *Opnd1) {
-  Value *V = Builder->CreateFSub(Opnd0, Opnd1);
+  Value *V = Builder.CreateFSub(Opnd0, Opnd1);
   if (Instruction *I = dyn_cast<Instruction>(V))
     createInstPostProc(I);
   return V;
@@ -750,21 +750,21 @@ Value *FAddCombine::createFNeg(Value *V) {
 }
 
 Value *FAddCombine::createFAdd(Value *Opnd0, Value *Opnd1) {
-  Value *V = Builder->CreateFAdd(Opnd0, Opnd1);
+  Value *V = Builder.CreateFAdd(Opnd0, Opnd1);
   if (Instruction *I = dyn_cast<Instruction>(V))
     createInstPostProc(I);
   return V;
 }
 
 Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) {
-  Value *V = Builder->CreateFMul(Opnd0, Opnd1);
+  Value *V = Builder.CreateFMul(Opnd0, Opnd1);
   if (Instruction *I = dyn_cast<Instruction>(V))
     createInstPostProc(I);
   return V;
 }
 
 Value *FAddCombine::createFDiv(Value *Opnd0, Value *Opnd1) {
-  Value *V = Builder->CreateFDiv(Opnd0, Opnd1);
+  Value *V = Builder.CreateFDiv(Opnd0, Opnd1);
   if (Instruction *I = dyn_cast<Instruction>(V))
     createInstPostProc(I);
   return V;
@@ -895,7 +895,7 @@ bool InstCombiner::willNotOverflowUnsignedSub(const Value *LHS,
 //   ADD(XOR(AND(Z, C), C), 1) == NEG(OR(Z, ~C))
 //   XOR(AND(Z, C), (C + 1)) == NEG(OR(Z, ~C)) if C is even
 static Value *checkForNegativeOperand(BinaryOperator &I,
-                                      InstCombiner::BuilderTy *Builder) {
+                                      InstCombiner::BuilderTy &Builder) {
   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
 
   // This function creates 2 instructions to replace ADD, we need at least one
@@ -919,13 +919,13 @@ static Value *checkForNegativeOperand(BinaryOperator &I,
       // X = XOR(Y, C1), Y = OR(Z, C2), C2 = NOT(C1) ==> X == NOT(AND(Z, C1))
       // ADD(ADD(X, 1), RHS) == ADD(X, ADD(RHS, 1)) == SUB(RHS, AND(Z, C1))
       if (match(Y, m_Or(m_Value(Z), m_APInt(C2))) && (*C2 == ~(*C1))) {
-        Value *NewAnd = Builder->CreateAnd(Z, *C1);
-        return Builder->CreateSub(RHS, NewAnd, "sub");
+        Value *NewAnd = Builder.CreateAnd(Z, *C1);
+        return Builder.CreateSub(RHS, NewAnd, "sub");
       } else if (match(Y, m_And(m_Value(Z), m_APInt(C2))) && (*C1 == *C2)) {
         // X = XOR(Y, C1), Y = AND(Z, C2), C2 == C1 ==> X == NOT(OR(Z, ~C1))
         // ADD(ADD(X, 1), RHS) == ADD(X, ADD(RHS, 1)) == SUB(RHS, OR(Z, ~C1))
-        Value *NewOr = Builder->CreateOr(Z, ~(*C1));
-        return Builder->CreateSub(RHS, NewOr, "sub");
+        Value *NewOr = Builder.CreateOr(Z, ~(*C1));
+        return Builder.CreateSub(RHS, NewOr, "sub");
       }
     }
   }
@@ -944,8 +944,8 @@ static Value *checkForNegativeOperand(BinaryOperator &I,
   if (match(LHS, m_Xor(m_Value(Y), m_APInt(C1))))
     if (C1->countTrailingZeros() == 0)
       if (match(Y, m_And(m_Value(Z), m_APInt(C2))) && *C1 == (*C2 + 1)) {
-        Value *NewOr = Builder->CreateOr(Z, ~(*C2));
-        return Builder->CreateSub(RHS, NewOr, "sub");
+        Value *NewOr = Builder.CreateOr(Z, ~(*C2));
+        return Builder.CreateSub(RHS, NewOr, "sub");
       }
   return nullptr;
 }
@@ -1027,7 +1027,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
   if (Value *V = SimplifyUsingDistributiveLaws(I))
     return replaceInstUsesWith(I, V);
 
-  if (Instruction *X = foldAddWithConstant(I, *Builder))
+  if (Instruction *X = foldAddWithConstant(I, Builder))
     return X;
 
   // FIXME: This should be moved into the above helper function to allow these
@@ -1060,7 +1060,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
 
       if (ExtendAmt) {
         Constant *ShAmt = ConstantInt::get(I.getType(), ExtendAmt);
-        Value *NewShl = Builder->CreateShl(XorLHS, ShAmt, "sext");
+        Value *NewShl = Builder.CreateShl(XorLHS, ShAmt, "sext");
         return BinaryOperator::CreateAShr(NewShl, ShAmt);
       }
 
@@ -1084,7 +1084,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
     if (Instruction *NV = foldOpWithConstantIntoOperand(I))
       return NV;
 
-  if (I.getType()->getScalarType()->isIntegerTy(1))
+  if (I.getType()->isIntOrIntVectorTy(1))
     return BinaryOperator::CreateXor(LHS, RHS);
 
   // X + X --> X << 1
@@ -1101,7 +1101,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
   if (Value *LHSV = dyn_castNegVal(LHS)) {
     if (!isa<Constant>(RHS))
       if (Value *RHSV = dyn_castNegVal(RHS)) {
-        Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum");
+        Value *NewAdd = Builder.CreateAdd(LHSV, RHSV, "sum");
         return BinaryOperator::CreateNeg(NewAdd);
       }
 
@@ -1148,7 +1148,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
 
       if (AddRHSHighBits == AddRHSHighBitsAnd) {
         // Okay, the xform is safe.  Insert the new add pronto.
-        Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName());
+        Value *NewAdd = Builder.CreateAdd(X, CRHS, LHS->getName());
         return BinaryOperator::CreateAnd(NewAdd, C2);
       }
     }
@@ -1191,7 +1191,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
             willNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) {
           // Insert the new, smaller add.
           Value *NewAdd =
-              Builder->CreateNSWAdd(LHSConv->getOperand(0), CI, "addconv");
+              Builder.CreateNSWAdd(LHSConv->getOperand(0), CI, "addconv");
           return new SExtInst(NewAdd, I.getType());
         }
       }
@@ -1208,7 +1208,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
           willNotOverflowSignedAdd(LHSConv->getOperand(0),
                                    RHSConv->getOperand(0), I)) {
         // Insert the new integer add.
-        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
+        Value *NewAdd = Builder.CreateNSWAdd(LHSConv->getOperand(0),
                                              RHSConv->getOperand(0), "addconv");
         return new SExtInst(NewAdd, I.getType());
       }
@@ -1227,7 +1227,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
             willNotOverflowUnsignedAdd(LHSConv->getOperand(0), CI, I)) {
           // Insert the new, smaller add.
           Value *NewAdd =
-              Builder->CreateNUWAdd(LHSConv->getOperand(0), CI, "addconv");
+              Builder.CreateNUWAdd(LHSConv->getOperand(0), CI, "addconv");
           return new ZExtInst(NewAdd, I.getType());
         }
       }
@@ -1244,7 +1244,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
           willNotOverflowUnsignedAdd(LHSConv->getOperand(0),
                                      RHSConv->getOperand(0), I)) {
         // Insert the new integer add.
-        Value *NewAdd = Builder->CreateNUWAdd(
+        Value *NewAdd = Builder.CreateNUWAdd(
             LHSConv->getOperand(0), RHSConv->getOperand(0), "addconv");
         return new ZExtInst(NewAdd, I.getType());
       }
@@ -1362,8 +1362,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
             ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
             willNotOverflowSignedAdd(LHSIntVal, CI, I)) {
           // Insert the new integer add.
-          Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal,
-                                                CI, "addconv");
+          Value *NewAdd = Builder.CreateNSWAdd(LHSIntVal, CI, "addconv");
           return new SIToFPInst(NewAdd, I.getType());
         }
       }
@@ -1381,8 +1380,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
             (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
             willNotOverflowSignedAdd(LHSIntVal, RHSIntVal, I)) {
           // Insert the new integer add.
-          Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal,
-                                                RHSIntVal, "addconv");
+          Value *NewAdd = Builder.CreateNSWAdd(LHSIntVal, RHSIntVal, "addconv");
           return new SIToFPInst(NewAdd, I.getType());
         }
       }
@@ -1480,14 +1478,14 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
   // pointer, subtract it from the offset we have.
   if (GEP2) {
     Value *Offset = EmitGEPOffset(GEP2);
-    Result = Builder->CreateSub(Result, Offset);
+    Result = Builder.CreateSub(Result, Offset);
   }
 
   // If we have p - gep(p, ...)  then we have to negate the result.
   if (Swapped)
-    Result = Builder->CreateNeg(Result, "diff.neg");
+    Result = Builder.CreateNeg(Result, "diff.neg");
 
-  return Builder->CreateIntCast(Result, Ty, true);
+  return Builder.CreateIntCast(Result, Ty, true);
 }
 
 Instruction *InstCombiner::visitSub(BinaryOperator &I) {
@@ -1522,7 +1520,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
     return Res;
   }
 
-  if (I.getType()->getScalarType()->isIntegerTy(1))
+  if (I.getType()->isIntOrIntVectorTy(1))
     return BinaryOperator::CreateXor(Op0, Op1);
 
   // Replace (-1 - A) with (~A).
@@ -1552,12 +1550,12 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
 
     // Fold (sub 0, (zext bool to B)) --> (sext bool to B)
     if (C->isNullValue() && match(Op1, m_ZExt(m_Value(X))))
-      if (X->getType()->getScalarType()->isIntegerTy(1))
+      if (X->getType()->isIntOrIntVectorTy(1))
         return CastInst::CreateSExtOrBitCast(X, Op1->getType());
 
     // Fold (sub 0, (sext bool to B)) --> (zext bool to B)
     if (C->isNullValue() && match(Op1, m_SExt(m_Value(X))))
-      if (X->getType()->getScalarType()->isIntegerTy(1))
+      if (X->getType()->isIntOrIntVectorTy(1))
         return CastInst::CreateZExtOrBitCast(X, Op1->getType());
   }
 
@@ -1615,7 +1613,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
     // ((X | Y) - X) --> (~X & Y)
     if (match(Op0, m_OneUse(m_c_Or(m_Value(Y), m_Specific(Op1)))))
       return BinaryOperator::CreateAnd(
-          Y, Builder->CreateNot(Op1, Op1->getName() + ".not"));
+          Y, Builder.CreateNot(Op1, Op1->getName() + ".not"));
   }
 
   if (Op1->hasOneUse()) {
@@ -1625,13 +1623,13 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
     // (X - (Y - Z))  -->  (X + (Z - Y)).
     if (match(Op1, m_Sub(m_Value(Y), m_Value(Z))))
       return BinaryOperator::CreateAdd(Op0,
-                                      Builder->CreateSub(Z, Y, Op1->getName()));
+                                      Builder.CreateSub(Z, Y, Op1->getName()));
 
     // (X - (X & Y))   -->   (X & ~Y)
     //
     if (match(Op1, m_c_And(m_Value(Y), m_Specific(Op0))))
       return BinaryOperator::CreateAnd(Op0,
-                                  Builder->CreateNot(Y, Y->getName() + ".not"));
+                                  Builder.CreateNot(Y, Y->getName() + ".not"));
 
     // 0 - (X sdiv C)  -> (X sdiv -C)  provided the negation doesn't overflow.
     if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) && match(Op0, m_Zero()) &&
@@ -1648,7 +1646,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
     // 'nuw' is dropped in favor of the canonical form.
     if (match(Op1, m_SExt(m_Value(Y))) &&
         Y->getType()->getScalarSizeInBits() == 1) {
-      Value *Zext = Builder->CreateZExt(Y, I.getType());
+      Value *Zext = Builder.CreateZExt(Y, I.getType());
       BinaryOperator *Add = BinaryOperator::CreateAdd(Op0, Zext);
       Add->setHasNoSignedWrap(I.hasNoSignedWrap());
       return Add;
@@ -1659,13 +1657,13 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
     Value *A, *B;
     Constant *CI;
     if (match(Op1, m_c_Mul(m_Value(A), m_Neg(m_Value(B)))))
-      return BinaryOperator::CreateAdd(Op0, Builder->CreateMul(A, B));
+      return BinaryOperator::CreateAdd(Op0, Builder.CreateMul(A, B));
 
     // X - A*CI -> X + A*-CI
     // No need to handle commuted multiply because multiply handling will
     // ensure constant will be move to the right hand side.
     if (match(Op1, m_Mul(m_Value(A), m_Constant(CI)))) {
-      Value *NewMul = Builder->CreateMul(A, ConstantExpr::getNeg(CI));
+      Value *NewMul = Builder.CreateMul(A, ConstantExpr::getNeg(CI));
       return BinaryOperator::CreateAdd(Op0, NewMul);
     }
   }
@@ -1729,14 +1727,14 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
   }
   if (FPTruncInst *FPTI = dyn_cast<FPTruncInst>(Op1)) {
     if (Value *V = dyn_castFNegVal(FPTI->getOperand(0))) {
-      Value *NewTrunc = Builder->CreateFPTrunc(V, I.getType());
+      Value *NewTrunc = Builder.CreateFPTrunc(V, I.getType());
       Instruction *NewI = BinaryOperator::CreateFAdd(Op0, NewTrunc);
       NewI->copyFastMathFlags(&I);
       return NewI;
     }
   } else if (FPExtInst *FPEI = dyn_cast<FPExtInst>(Op1)) {
     if (Value *V = dyn_castFNegVal(FPEI->getOperand(0))) {
-      Value *NewExt = Builder->CreateFPExt(V, I.getType());
+      Value *NewExt = Builder.CreateFPExt(V, I.getType());
       Instruction *NewI = BinaryOperator::CreateFAdd(Op0, NewExt);
       NewI->copyFastMathFlags(&I);
       return NewI;
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index db98be2c98f51..773c86e23707f 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -54,17 +54,17 @@ static unsigned getFCmpCode(FCmpInst::Predicate CC) {
 /// instruction. The sign is passed in to determine which kind of predicate to
 /// use in the new icmp instruction.
 static Value *getNewICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
-                              InstCombiner::BuilderTy *Builder) {
+                              InstCombiner::BuilderTy &Builder) {
   ICmpInst::Predicate NewPred;
   if (Value *NewConstant = getICmpValue(Sign, Code, LHS, RHS, NewPred))
     return NewConstant;
-  return Builder->CreateICmp(NewPred, LHS, RHS);
+  return Builder.CreateICmp(NewPred, LHS, RHS);
 }
 
 /// This is the complement of getFCmpCode, which turns an opcode and two
 /// operands into either a FCmp instruction, or a true/false constant.
 static Value *getFCmpValue(unsigned Code, Value *LHS, Value *RHS,
-                           InstCombiner::BuilderTy *Builder) {
+                           InstCombiner::BuilderTy &Builder) {
   const auto Pred = static_cast<FCmpInst::Predicate>(Code);
   assert(FCmpInst::FCMP_FALSE <= Pred && Pred <= FCmpInst::FCMP_TRUE &&
          "Unexpected FCmp predicate!");
@@ -72,53 +72,45 @@ static Value *getFCmpValue(unsigned Code, Value *LHS, Value *RHS,
     return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
   if (Pred == FCmpInst::FCMP_TRUE)
     return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
-  return Builder->CreateFCmp(Pred, LHS, RHS);
+  return Builder.CreateFCmp(Pred, LHS, RHS);
 }
 
-/// \brief Transform BITWISE_OP(BSWAP(A),BSWAP(B)) to BSWAP(BITWISE_OP(A, B))
+/// \brief Transform BITWISE_OP(BSWAP(A),BSWAP(B)) or
+/// BITWISE_OP(BSWAP(A), Constant) to BSWAP(BITWISE_OP(A, B))
 /// \param I Binary operator to transform.
 /// \return Pointer to node that must replace the original binary operator, or
 ///         null pointer if no transformation was made.
-Value *InstCombiner::SimplifyBSwap(BinaryOperator &I) {
-  IntegerType *ITy = dyn_cast<IntegerType>(I.getType());
-
-  // Can't do vectors.
-  if (I.getType()->isVectorTy())
-    return nullptr;
-
-  // Can only do bitwise ops.
-  if (!I.isBitwiseLogicOp())
-    return nullptr;
+static Value *SimplifyBSwap(BinaryOperator &I,
+                            InstCombiner::BuilderTy &Builder) {
+  assert(I.isBitwiseLogicOp() && "Unexpected opcode for bswap simplifying");
 
   Value *OldLHS = I.getOperand(0);
   Value *OldRHS = I.getOperand(1);
-  ConstantInt *ConstLHS = dyn_cast<ConstantInt>(OldLHS);
-  ConstantInt *ConstRHS = dyn_cast<ConstantInt>(OldRHS);
-  IntrinsicInst *IntrLHS = dyn_cast<IntrinsicInst>(OldLHS);
-  IntrinsicInst *IntrRHS = dyn_cast<IntrinsicInst>(OldRHS);
-  bool IsBswapLHS = (IntrLHS && IntrLHS->getIntrinsicID() == Intrinsic::bswap);
-  bool IsBswapRHS = (IntrRHS && IntrRHS->getIntrinsicID() == Intrinsic::bswap);
-
-  if (!IsBswapLHS && !IsBswapRHS)
-    return nullptr;
-
-  if (!IsBswapLHS && !ConstLHS)
-    return nullptr;
 
-  if (!IsBswapRHS && !ConstRHS)
+  Value *NewLHS;
+  if (!match(OldLHS, m_BSwap(m_Value(NewLHS))))
     return nullptr;
 
-  /// OP( BSWAP(x), BSWAP(y) ) -> BSWAP( OP(x, y) )
-  /// OP( BSWAP(x), CONSTANT ) -> BSWAP( OP(x, BSWAP(CONSTANT) ) )
-  Value *NewLHS = IsBswapLHS ? IntrLHS->getOperand(0) :
-                  Builder->getInt(ConstLHS->getValue().byteSwap());
+  Value *NewRHS;
+  const APInt *C;
 
-  Value *NewRHS = IsBswapRHS ? IntrRHS->getOperand(0) :
-                  Builder->getInt(ConstRHS->getValue().byteSwap());
+  if (match(OldRHS, m_BSwap(m_Value(NewRHS)))) {
+    // OP( BSWAP(x), BSWAP(y) ) -> BSWAP( OP(x, y) )
+    if (!OldLHS->hasOneUse() && !OldRHS->hasOneUse())
+      return nullptr;
+    // NewRHS initialized by the matcher.
+  } else if (match(OldRHS, m_APInt(C))) {
+    // OP( BSWAP(x), CONSTANT ) -> BSWAP( OP(x, BSWAP(CONSTANT) ) )
+    if (!OldLHS->hasOneUse())
+      return nullptr;
+    NewRHS = ConstantInt::get(I.getType(), C->byteSwap());
+  } else
+    return nullptr;
 
-  Value *BinOp = Builder->CreateBinOp(I.getOpcode(), NewLHS, NewRHS);
-  Function *F = Intrinsic::getDeclaration(I.getModule(), Intrinsic::bswap, ITy);
-  return Builder->CreateCall(F, BinOp);
+  Value *BinOp = Builder.CreateBinOp(I.getOpcode(), NewLHS, NewRHS);
+  Function *F = Intrinsic::getDeclaration(I.getModule(), Intrinsic::bswap,
+                                          I.getType());
+  return Builder.CreateCall(F, BinOp);
 }
 
 /// This handles expressions of the form ((val OP C1) & C2).  Where
@@ -137,7 +129,7 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op,
   case Instruction::Xor:
     if (Op->hasOneUse()) {
       // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2)
-      Value *And = Builder->CreateAnd(X, AndRHS);
+      Value *And = Builder.CreateAnd(X, AndRHS);
       And->takeName(Op);
       return BinaryOperator::CreateXor(And, Together);
     }
@@ -150,7 +142,7 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op,
         // NOTE: This reduces the number of bits set in the & mask, which
         // can expose opportunities for store narrowing.
         Together = ConstantExpr::getXor(AndRHS, Together);
-        Value *And = Builder->CreateAnd(X, Together);
+        Value *And = Builder.CreateAnd(X, Together);
         And->takeName(Op);
         return BinaryOperator::CreateOr(And, OpRHS);
       }
@@ -182,7 +174,7 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op,
             return &TheAnd;
           } else {
             // Pull the XOR out of the AND.
-            Value *NewAnd = Builder->CreateAnd(X, AndRHS);
+            Value *NewAnd = Builder.CreateAnd(X, AndRHS);
             NewAnd->takeName(Op);
             return BinaryOperator::CreateXor(NewAnd, AndRHS);
           }
@@ -198,7 +190,7 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op,
     uint32_t BitWidth = AndRHS->getType()->getBitWidth();
     uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
     APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal));
-    ConstantInt *CI = Builder->getInt(AndRHS->getValue() & ShlMask);
+    ConstantInt *CI = Builder.getInt(AndRHS->getValue() & ShlMask);
 
     if (CI->getValue() == ShlMask)
       // Masking out bits that the shift already masks.
@@ -218,7 +210,7 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op,
     uint32_t BitWidth = AndRHS->getType()->getBitWidth();
     uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
     APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
-    ConstantInt *CI = Builder->getInt(AndRHS->getValue() & ShrMask);
+    ConstantInt *CI = Builder.getInt(AndRHS->getValue() & ShrMask);
 
     if (CI->getValue() == ShrMask)
       // Masking out bits that the shift already masks.
@@ -238,12 +230,12 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op,
       uint32_t BitWidth = AndRHS->getType()->getBitWidth();
       uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
       APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
-      Constant *C = Builder->getInt(AndRHS->getValue() & ShrMask);
+      Constant *C = Builder.getInt(AndRHS->getValue() & ShrMask);
       if (C == AndRHS) {          // Masking out bits shifted in.
         // (Val ashr C1) & C2 -> (Val lshr C1) & C2
         // Make the argument unsigned.
         Value *ShVal = Op->getOperand(0);
-        ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName());
+        ShVal = Builder.CreateLShr(ShVal, OpRHS, Op->getName());
         return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName());
       }
     }
@@ -269,15 +261,15 @@ Value *InstCombiner::insertRangeTest(Value *V, const APInt &Lo, const APInt &Hi,
   ICmpInst::Predicate Pred = Inside ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_UGE;
   if (isSigned ? Lo.isMinSignedValue() : Lo.isMinValue()) {
     Pred = isSigned ? ICmpInst::getSignedPredicate(Pred) : Pred;
-    return Builder->CreateICmp(Pred, V, ConstantInt::get(Ty, Hi));
+    return Builder.CreateICmp(Pred, V, ConstantInt::get(Ty, Hi));
   }
 
   // V >= Lo && V <  Hi --> V - Lo u<  Hi - Lo
   // V <  Lo || V >= Hi --> V - Lo u>= Hi - Lo
   Value *VMinusLo =
-      Builder->CreateSub(V, ConstantInt::get(Ty, Lo), V->getName() + ".off");
+      Builder.CreateSub(V, ConstantInt::get(Ty, Lo), V->getName() + ".off");
   Constant *HiMinusLo = ConstantInt::get(Ty, Hi - Lo);
-  return Builder->CreateICmp(Pred, VMinusLo, HiMinusLo);
+  return Builder.CreateICmp(Pred, VMinusLo, HiMinusLo);
 }
 
 /// Classify (icmp eq (A & B), C) and (icmp ne (A & B), C) as matching patterns
@@ -523,7 +515,7 @@ static unsigned getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C,
 /// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
 /// into a single (icmp(A & X) ==/!= Y).
 static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
-                                     llvm::InstCombiner::BuilderTy *Builder) {
+                                     llvm::InstCombiner::BuilderTy &Builder) {
   Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr;
   ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
   unsigned Mask =
@@ -556,27 +548,27 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
   if (Mask & Mask_AllZeros) {
     // (icmp eq (A & B), 0) & (icmp eq (A & D), 0)
     // -> (icmp eq (A & (B|D)), 0)
-    Value *NewOr = Builder->CreateOr(B, D);
-    Value *NewAnd = Builder->CreateAnd(A, NewOr);
+    Value *NewOr = Builder.CreateOr(B, D);
+    Value *NewAnd = Builder.CreateAnd(A, NewOr);
     // We can't use C as zero because we might actually handle
     //   (icmp ne (A & B), B) & (icmp ne (A & D), D)
     // with B and D, having a single bit set.
     Value *Zero = Constant::getNullValue(A->getType());
-    return Builder->CreateICmp(NewCC, NewAnd, Zero);
+    return Builder.CreateICmp(NewCC, NewAnd, Zero);
   }
   if (Mask & BMask_AllOnes) {
     // (icmp eq (A & B), B) & (icmp eq (A & D), D)
     // -> (icmp eq (A & (B|D)), (B|D))
-    Value *NewOr = Builder->CreateOr(B, D);
-    Value *NewAnd = Builder->CreateAnd(A, NewOr);
-    return Builder->CreateICmp(NewCC, NewAnd, NewOr);
+    Value *NewOr = Builder.CreateOr(B, D);
+    Value *NewAnd = Builder.CreateAnd(A, NewOr);
+    return Builder.CreateICmp(NewCC, NewAnd, NewOr);
   }
   if (Mask & AMask_AllOnes) {
     // (icmp eq (A & B), A) & (icmp eq (A & D), A)
     // -> (icmp eq (A & (B&D)), A)
-    Value *NewAnd1 = Builder->CreateAnd(B, D);
-    Value *NewAnd2 = Builder->CreateAnd(A, NewAnd1);
-    return Builder->CreateICmp(NewCC, NewAnd2, A);
+    Value *NewAnd1 = Builder.CreateAnd(B, D);
+    Value *NewAnd2 = Builder.CreateAnd(A, NewAnd1);
+    return Builder.CreateICmp(NewCC, NewAnd2, A);
   }
 
   // Remaining cases assume at least that B and D are constant, and depend on
@@ -644,10 +636,10 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
          (CCst->getValue() ^ ECst->getValue())).getBoolValue())
       return ConstantInt::get(LHS->getType(), !IsAnd);
 
-    Value *NewOr1 = Builder->CreateOr(B, D);
+    Value *NewOr1 = Builder.CreateOr(B, D);
     Value *NewOr2 = ConstantExpr::getOr(CCst, ECst);
-    Value *NewAnd = Builder->CreateAnd(A, NewOr1);
-    return Builder->CreateICmp(NewCC, NewAnd, NewOr2);
+    Value *NewAnd = Builder.CreateAnd(A, NewOr1);
+    return Builder.CreateICmp(NewCC, NewAnd, NewOr2);
   }
 
   return nullptr;
@@ -705,13 +697,13 @@ Value *InstCombiner::simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1,
   if (Inverted)
     NewPred = ICmpInst::getInversePredicate(NewPred);
 
-  return Builder->CreateICmp(NewPred, Input, RangeEnd);
+  return Builder.CreateICmp(NewPred, Input, RangeEnd);
 }
 
 static Value *
 foldAndOrOfEqualityCmpsWithConstants(ICmpInst *LHS, ICmpInst *RHS,
                                      bool JoinedByAnd,
-                                     InstCombiner::BuilderTy *Builder) {
+                                     InstCombiner::BuilderTy &Builder) {
   Value *X = LHS->getOperand(0);
   if (X != RHS->getOperand(0))
     return nullptr;
@@ -742,8 +734,8 @@ foldAndOrOfEqualityCmpsWithConstants(ICmpInst *LHS, ICmpInst *RHS,
     // (X != C1 && X != C2) --> (X | (C1 ^ C2)) != C2
     // We choose an 'or' with a Pow2 constant rather than the inverse mask with
     // 'and' because that may lead to smaller codegen from a smaller constant.
-    Value *Or = Builder->CreateOr(X, ConstantInt::get(X->getType(), Xor));
-    return Builder->CreateICmp(Pred, Or, ConstantInt::get(X->getType(), *C2));
+    Value *Or = Builder.CreateOr(X, ConstantInt::get(X->getType(), Xor));
+    return Builder.CreateICmp(Pred, Or, ConstantInt::get(X->getType(), *C2));
   }
 
   // Special case: get the ordering right when the values wrap around zero.
@@ -755,9 +747,9 @@ foldAndOrOfEqualityCmpsWithConstants(ICmpInst *LHS, ICmpInst *RHS,
     // (X == 13 || X == 14) --> X - 13 <=u 1
     // (X != 13 && X != 14) --> X - 13  >u 1
     // An 'add' is the canonical IR form, so favor that over a 'sub'.
-    Value *Add = Builder->CreateAdd(X, ConstantInt::get(X->getType(), -(*C1)));
+    Value *Add = Builder.CreateAdd(X, ConstantInt::get(X->getType(), -(*C1)));
     auto NewPred = JoinedByAnd ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_ULE;
-    return Builder->CreateICmp(NewPred, Add, ConstantInt::get(X->getType(), 1));
+    return Builder.CreateICmp(NewPred, Add, ConstantInt::get(X->getType(), 1));
   }
 
   return nullptr;
@@ -793,10 +785,10 @@ Value *InstCombiner::foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS, ICmpInst *RHS,
     if (A == C &&
         isKnownToBeAPowerOfTwo(B, false, 0, &CxtI) &&
         isKnownToBeAPowerOfTwo(D, false, 0, &CxtI)) {
-      Value *Mask = Builder->CreateOr(B, D);
-      Value *Masked = Builder->CreateAnd(A, Mask);
+      Value *Mask = Builder.CreateOr(B, D);
+      Value *Masked = Builder.CreateAnd(A, Mask);
       auto NewPred = JoinedByAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE;
-      return Builder->CreateICmp(NewPred, Masked, Mask);
+      return Builder.CreateICmp(NewPred, Masked, Mask);
     }
   }
 
@@ -855,8 +847,8 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
     // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0)
     if ((PredL == ICmpInst::ICMP_ULT && LHSC->getValue().isPowerOf2()) ||
         (PredL == ICmpInst::ICMP_EQ && LHSC->isZero())) {
-      Value *NewOr = Builder->CreateOr(LHS0, RHS0);
-      return Builder->CreateICmp(PredL, NewOr, LHSC);
+      Value *NewOr = Builder.CreateOr(LHS0, RHS0);
+      return Builder.CreateICmp(PredL, NewOr, LHSC);
     }
   }
 
@@ -888,10 +880,10 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
       APInt Low = APInt::getLowBitsSet(BigBitSize, SmallBitSize);
       if ((Low & AndC->getValue()).isNullValue() &&
           (Low & BigC->getValue()).isNullValue()) {
-        Value *NewAnd = Builder->CreateAnd(V, Low | AndC->getValue());
+        Value *NewAnd = Builder.CreateAnd(V, Low | AndC->getValue());
         APInt N = SmallC->getValue().zext(BigBitSize) | BigC->getValue();
         Value *NewVal = ConstantInt::get(AndC->getType()->getContext(), N);
-        return Builder->CreateICmp(PredL, NewAnd, NewVal);
+        return Builder.CreateICmp(PredL, NewAnd, NewVal);
       }
     }
   }
@@ -943,14 +935,14 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
       llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_ULT:
       if (LHSC == SubOne(RHSC)) // (X != 13 & X u< 14) -> X < 13
-        return Builder->CreateICmpULT(LHS0, LHSC);
-      if (LHSC->isNullValue()) // (X !=  0 & X u< 14) -> X-1 u< 13
+        return Builder.CreateICmpULT(LHS0, LHSC);
+      if (LHSC->isZero()) // (X !=  0 & X u< 14) -> X-1 u< 13
         return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(),
                                false, true);
       break; // (X != 13 & X u< 15) -> no change
     case ICmpInst::ICMP_SLT:
       if (LHSC == SubOne(RHSC)) // (X != 13 & X s< 14) -> X < 13
-        return Builder->CreateICmpSLT(LHS0, LHSC);
+        return Builder.CreateICmpSLT(LHS0, LHSC);
       break;                 // (X != 13 & X s< 15) -> no change
     case ICmpInst::ICMP_NE:
       // Potential folds for this case should already be handled.
@@ -963,7 +955,7 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
       llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_NE:
       if (RHSC == AddOne(LHSC)) // (X u> 13 & X != 14) -> X u> 14
-        return Builder->CreateICmp(PredL, LHS0, RHSC);
+        return Builder.CreateICmp(PredL, LHS0, RHSC);
       break;                 // (X u> 13 & X != 15) -> no change
     case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) <u 1
       return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(),
@@ -976,7 +968,7 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
       llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_NE:
       if (RHSC == AddOne(LHSC)) // (X s> 13 & X != 14) -> X s> 14
-        return Builder->CreateICmp(PredL, LHS0, RHSC);
+        return Builder.CreateICmp(PredL, LHS0, RHSC);
       break;                 // (X s> 13 & X != 15) -> no change
     case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) s< 1
       return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(), true,
@@ -1025,15 +1017,15 @@ Value *InstCombiner::foldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
         // If either of the constants are nans, then the whole thing returns
         // false.
         if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
-          return Builder->getFalse();
-        return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
+          return Builder.getFalse();
+        return Builder.CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
       }
 
     // Handle vector zeros.  This occurs because the canonical form of
     // "fcmp ord x,x" is "fcmp ord x, 0".
     if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
         isa<ConstantAggregateZero>(RHS->getOperand(1)))
-      return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
+      return Builder.CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
     return nullptr;
   }
 
@@ -1088,7 +1080,7 @@ bool InstCombiner::shouldOptimizeCast(CastInst *CI) {
 
 /// Fold {and,or,xor} (cast X), C.
 static Instruction *foldLogicCastConstant(BinaryOperator &Logic, CastInst *Cast,
-                                          InstCombiner::BuilderTy *Builder) {
+                                          InstCombiner::BuilderTy &Builder) {
   Constant *C;
   if (!match(Logic.getOperand(1), m_Constant(C)))
     return nullptr;
@@ -1107,7 +1099,7 @@ static Instruction *foldLogicCastConstant(BinaryOperator &Logic, CastInst *Cast,
     Constant *ZextTruncC = ConstantExpr::getZExt(TruncC, DestTy);
     if (ZextTruncC == C) {
       // LogicOpc (zext X), C --> zext (LogicOpc X, C)
-      Value *NewOp = Builder->CreateBinOp(LogicOpc, X, TruncC);
+      Value *NewOp = Builder.CreateBinOp(LogicOpc, X, TruncC);
       return new ZExtInst(NewOp, DestTy);
     }
   }
@@ -1150,7 +1142,7 @@ Instruction *InstCombiner::foldCastedBitwiseLogic(BinaryOperator &I) {
 
   // fold logic(cast(A), cast(B)) -> cast(logic(A, B))
   if (shouldOptimizeCast(Cast0) && shouldOptimizeCast(Cast1)) {
-    Value *NewOp = Builder->CreateBinOp(LogicOpc, Cast0Src, Cast1Src,
+    Value *NewOp = Builder.CreateBinOp(LogicOpc, Cast0Src, Cast1Src,
                                         I.getName());
     return CastInst::Create(CastOpcode, NewOp, DestTy);
   }
@@ -1196,15 +1188,14 @@ static Instruction *foldBoolSextMaskToSelect(BinaryOperator &I) {
 
   // Fold (and (sext bool to A), B) --> (select bool, B, 0)
   Value *X = nullptr;
-  if (match(Op0, m_SExt(m_Value(X))) &&
-      X->getType()->getScalarType()->isIntegerTy(1)) {
+  if (match(Op0, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) {
     Value *Zero = Constant::getNullValue(Op1->getType());
     return SelectInst::Create(X, Op1, Zero);
   }
 
   // Fold (and ~(sext bool to A), B) --> (select bool, 0, B)
   if (match(Op0, m_Not(m_SExt(m_Value(X)))) &&
-      X->getType()->getScalarType()->isIntegerTy(1)) {
+      X->getType()->isIntOrIntVectorTy(1)) {
     Value *Zero = Constant::getNullValue(Op0->getType());
     return SelectInst::Create(X, Zero, Op1);
   }
@@ -1283,14 +1274,14 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
     return &I;
 
   // Do this before using distributive laws to catch simple and/or/not patterns.
-  if (Instruction *Xor = foldAndToXor(I, *Builder))
+  if (Instruction *Xor = foldAndToXor(I, Builder))
     return Xor;
 
   // (A|B)&(A|C) -> A|(B&C) etc
   if (Value *V = SimplifyUsingDistributiveLaws(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyBSwap(I))
+  if (Value *V = SimplifyBSwap(I, Builder))
     return replaceInstUsesWith(I, V);
 
   if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
@@ -1310,15 +1301,15 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
         APInt NotAndRHS(~AndRHSMask);
         if (MaskedValueIsZero(Op0LHS, NotAndRHS, 0, &I)) {
           // Not masking anything out for the LHS, move to RHS.
-          Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS,
-                                             Op0RHS->getName()+".masked");
+          Value *NewRHS = Builder.CreateAnd(Op0RHS, AndRHS,
+                                            Op0RHS->getName()+".masked");
           return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS);
         }
         if (!isa<Constant>(Op0RHS) &&
             MaskedValueIsZero(Op0RHS, NotAndRHS, 0, &I)) {
           // Not masking anything out for the RHS, move to LHS.
-          Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS,
-                                             Op0LHS->getName()+".masked");
+          Value *NewLHS = Builder.CreateAnd(Op0LHS, AndRHS,
+                                            Op0LHS->getName()+".masked");
           return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS);
         }
 
@@ -1337,7 +1328,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
         // (1 >> x) & 1 --> zext(x == 0)
         if (AndRHSMask.isOneValue() && Op0LHS == AndRHS) {
           Value *NewICmp =
-            Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType()));
+            Builder.CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType()));
           return new ZExtInst(NewICmp, I.getType());
         }
         break;
@@ -1360,11 +1351,11 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
             auto *TruncC1 = ConstantExpr::getTrunc(C1, X->getType());
             Value *BinOp;
             if (isa<ZExtInst>(Op0LHS))
-              BinOp = Builder->CreateBinOp(Op0I->getOpcode(), X, TruncC1);
+              BinOp = Builder.CreateBinOp(Op0I->getOpcode(), X, TruncC1);
             else
-              BinOp = Builder->CreateBinOp(Op0I->getOpcode(), TruncC1, X);
+              BinOp = Builder.CreateBinOp(Op0I->getOpcode(), TruncC1, X);
             auto *TruncC2 = ConstantExpr::getTrunc(AndRHS, X->getType());
-            auto *And = Builder->CreateAnd(BinOp, TruncC2);
+            auto *And = Builder.CreateAnd(BinOp, TruncC2);
             return new ZExtInst(And, I.getType());
           }
         }
@@ -1384,7 +1375,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
         // into  : and (trunc X to T), trunc(YC) & C2
         // This will fold the two constants together, which may allow
         // other simplifications.
-        Value *NewCast = Builder->CreateTrunc(X, I.getType(), "and.shrunk");
+        Value *NewCast = Builder.CreateTrunc(X, I.getType(), "and.shrunk");
         Constant *C3 = ConstantExpr::getTrunc(YC, I.getType());
         C3 = ConstantExpr::getAnd(C3, AndRHS);
         return BinaryOperator::CreateAnd(NewCast, C3);
@@ -1396,7 +1387,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
     if (Instruction *FoldedLogic = foldOpWithConstantIntoOperand(I))
       return FoldedLogic;
 
-  if (Instruction *DeMorgan = matchDeMorgansLaws(I, *Builder))
+  if (Instruction *DeMorgan = matchDeMorgansLaws(I, Builder))
     return DeMorgan;
 
   {
@@ -1422,7 +1413,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
         // an endless loop. By checking that A is non-constant we ensure that
         // we will never get to the loop.
         if (A == tmpOp0 && !isa<Constant>(A)) // A&(A^B) -> A & ~B
-          return BinaryOperator::CreateAnd(A, Builder->CreateNot(B));
+          return BinaryOperator::CreateAnd(A, Builder.CreateNot(B));
       }
     }
 
@@ -1436,13 +1427,13 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
     if (match(Op0, m_Xor(m_Value(A), m_Value(B))))
       if (match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A))))
         if (Op1->hasOneUse() || IsFreeToInvert(C, C->hasOneUse()))
-          return BinaryOperator::CreateAnd(Op0, Builder->CreateNot(C));
+          return BinaryOperator::CreateAnd(Op0, Builder.CreateNot(C));
 
     // ((A ^ C) ^ B) & (B ^ A) -> (B ^ A) & ~C
     if (match(Op0, m_Xor(m_Xor(m_Value(A), m_Value(C)), m_Value(B))))
       if (match(Op1, m_Xor(m_Specific(B), m_Specific(A))))
         if (Op0->hasOneUse() || IsFreeToInvert(C, C->hasOneUse()))
-          return BinaryOperator::CreateAnd(Op1, Builder->CreateNot(C));
+          return BinaryOperator::CreateAnd(Op1, Builder.CreateNot(C));
 
     // (A | B) & ((~A) ^ B) -> (A & B)
     // (A | B) & (B ^ (~A)) -> (A & B)
@@ -1474,18 +1465,18 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
     if (LHS && match(Op1, m_OneUse(m_And(m_Value(X), m_Value(Y))))) {
       if (auto *Cmp = dyn_cast<ICmpInst>(X))
         if (Value *Res = foldAndOfICmps(LHS, Cmp, I))
-          return replaceInstUsesWith(I, Builder->CreateAnd(Res, Y));
+          return replaceInstUsesWith(I, Builder.CreateAnd(Res, Y));
       if (auto *Cmp = dyn_cast<ICmpInst>(Y))
         if (Value *Res = foldAndOfICmps(LHS, Cmp, I))
-          return replaceInstUsesWith(I, Builder->CreateAnd(Res, X));
+          return replaceInstUsesWith(I, Builder.CreateAnd(Res, X));
     }
     if (RHS && match(Op0, m_OneUse(m_And(m_Value(X), m_Value(Y))))) {
       if (auto *Cmp = dyn_cast<ICmpInst>(X))
         if (Value *Res = foldAndOfICmps(Cmp, RHS, I))
-          return replaceInstUsesWith(I, Builder->CreateAnd(Res, Y));
+          return replaceInstUsesWith(I, Builder.CreateAnd(Res, Y));
       if (auto *Cmp = dyn_cast<ICmpInst>(Y))
         if (Value *Res = foldAndOfICmps(Cmp, RHS, I))
-          return replaceInstUsesWith(I, Builder->CreateAnd(Res, X));
+          return replaceInstUsesWith(I, Builder.CreateAnd(Res, X));
     }
   }
 
@@ -1567,14 +1558,14 @@ static Value *getSelectCondition(Value *A, Value *B,
                                  InstCombiner::BuilderTy &Builder) {
   // If these are scalars or vectors of i1, A can be used directly.
   Type *Ty = A->getType();
-  if (match(A, m_Not(m_Specific(B))) && Ty->getScalarType()->isIntegerTy(1))
+  if (match(A, m_Not(m_Specific(B))) && Ty->isIntOrIntVectorTy(1))
     return A;
 
   // If A and B are sign-extended, look through the sexts to find the booleans.
   Value *Cond;
   Value *NotB;
   if (match(A, m_SExt(m_Value(Cond))) &&
-      Cond->getType()->getScalarType()->isIntegerTy(1) &&
+      Cond->getType()->isIntOrIntVectorTy(1) &&
       match(B, m_OneUse(m_Not(m_Value(NotB))))) {
     NotB = peekThroughBitcast(NotB, true);
     if (match(NotB, m_SExt(m_Specific(Cond))))
@@ -1596,7 +1587,7 @@ static Value *getSelectCondition(Value *A, Value *B,
   // operand, see if the constants are inverse bitmasks.
   if (match(A, (m_Xor(m_SExt(m_Value(Cond)), m_Constant(AC)))) &&
       match(B, (m_Xor(m_SExt(m_Specific(Cond)), m_Constant(BC)))) &&
-      Cond->getType()->getScalarType()->isIntegerTy(1) &&
+      Cond->getType()->isIntOrIntVectorTy(1) &&
       areInverseVectorBitmasks(AC, BC)) {
     AC = ConstantExpr::getTrunc(AC, CmpInst::makeCmpResultType(Ty));
     return Builder.CreateXor(Cond, AC);
@@ -1687,9 +1678,9 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
             RangeDiff.ugt(LHSC->getValue())) {
           Value *MaskC = ConstantInt::get(LAddC->getType(), ~DiffC);
 
-          Value *NewAnd = Builder->CreateAnd(LAddOpnd, MaskC);
-          Value *NewAdd = Builder->CreateAdd(NewAnd, MaxAddC);
-          return (Builder->CreateICmp(LHS->getPredicate(), NewAdd, LHSC));
+          Value *NewAnd = Builder.CreateAnd(LAddOpnd, MaskC);
+          Value *NewAdd = Builder.CreateAdd(NewAnd, MaxAddC);
+          return Builder.CreateICmp(LHS->getPredicate(), NewAdd, LHSC);
         }
       }
     }
@@ -1736,9 +1727,9 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
         A = LHS->getOperand(1);
     }
     if (A && B)
-      return Builder->CreateICmp(
+      return Builder.CreateICmp(
           ICmpInst::ICMP_UGE,
-          Builder->CreateAdd(B, ConstantInt::getSigned(B->getType(), -1)), A);
+          Builder.CreateAdd(B, ConstantInt::getSigned(B->getType(), -1)), A);
   }
 
   // E.g. (icmp slt x, 0) | (icmp sgt x, n) --> icmp ugt x, n
@@ -1759,8 +1750,8 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
   if (LHSC == RHSC && PredL == PredR) {
     // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
     if (PredL == ICmpInst::ICMP_NE && LHSC->isZero()) {
-      Value *NewOr = Builder->CreateOr(LHS0, RHS0);
-      return Builder->CreateICmp(PredL, NewOr, LHSC);
+      Value *NewOr = Builder.CreateOr(LHS0, RHS0);
+      return Builder.CreateICmp(PredL, NewOr, LHSC);
     }
   }
 
@@ -1770,7 +1761,7 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
     ConstantInt *AddC;
     if (match(LHS0, m_Add(m_Specific(RHS0), m_ConstantInt(AddC))))
       if (RHSC->getValue() + AddC->getValue() == LHSC->getValue())
-        return Builder->CreateICmpULE(LHS0, LHSC);
+        return Builder.CreateICmpULE(LHS0, LHSC);
   }
 
   // From here on, we only handle:
@@ -1886,18 +1877,18 @@ Value *InstCombiner::foldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
         // If either of the constants are nans, then the whole thing returns
         // true.
         if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
-          return Builder->getTrue();
+          return Builder.getTrue();
 
         // Otherwise, no need to compare the two constants, compare the
         // rest.
-        return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0));
+        return Builder.CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0));
       }
 
     // Handle vector zeros.  This occurs because the canonical form of
     // "fcmp uno x,x" is "fcmp uno x, 0".
     if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
         isa<ConstantAggregateZero>(RHS->getOperand(1)))
-      return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0));
+      return Builder.CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0));
 
     return nullptr;
   }
@@ -1916,7 +1907,7 @@ Value *InstCombiner::foldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
 /// when the XOR of the two constants is "all ones" (-1).
 static Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op,
                                         Value *A, Value *B, Value *C,
-                                        InstCombiner::BuilderTy *Builder) {
+                                        InstCombiner::BuilderTy &Builder) {
   ConstantInt *CI1 = dyn_cast<ConstantInt>(C);
   if (!CI1) return nullptr;
 
@@ -1928,7 +1919,7 @@ static Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op,
   if (!Xor.isAllOnesValue()) return nullptr;
 
   if (V1 == A || V1 == B) {
-    Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1);
+    Value *NewOp = Builder.CreateAnd((V1 == A) ? B : A, CI1);
     return BinaryOperator::CreateOr(NewOp, V1);
   }
 
@@ -1946,7 +1937,7 @@ static Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op,
 /// when the XOR of the two constants is "all ones" (-1).
 static Instruction *FoldXorWithConstants(BinaryOperator &I, Value *Op,
                                          Value *A, Value *B, Value *C,
-                                         InstCombiner::BuilderTy *Builder) {
+                                         InstCombiner::BuilderTy &Builder) {
   ConstantInt *CI1 = dyn_cast<ConstantInt>(C);
   if (!CI1)
     return nullptr;
@@ -1961,7 +1952,7 @@ static Instruction *FoldXorWithConstants(BinaryOperator &I, Value *Op,
     return nullptr;
 
   if (V1 == A || V1 == B) {
-    Value *NewOp = Builder->CreateAnd(V1 == A ? B : A, CI1);
+    Value *NewOp = Builder.CreateAnd(V1 == A ? B : A, CI1);
     return BinaryOperator::CreateXor(NewOp, V1);
   }
 
@@ -1987,14 +1978,14 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
     return &I;
 
   // Do this before using distributive laws to catch simple and/or/not patterns.
-  if (Instruction *Xor = foldOrToXor(I, *Builder))
+  if (Instruction *Xor = foldOrToXor(I, Builder))
     return Xor;
 
   // (A&B)|(A&C) -> A&(B|C) etc
   if (Value *V = SimplifyUsingDistributiveLaws(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyBSwap(I))
+  if (Value *V = SimplifyBSwap(I, Builder))
     return replaceInstUsesWith(I, V);
 
   if (isa<Constant>(Op1))
@@ -2011,7 +2002,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
     // (X^C)|Y -> (X|Y)^C iff Y&C == 0
     if (match(Op0, m_OneUse(m_Xor(m_Value(A), m_APInt(C)))) &&
         MaskedValueIsZero(Op1, *C, 0, &I)) {
-      Value *NOr = Builder->CreateOr(A, Op1);
+      Value *NOr = Builder.CreateOr(A, Op1);
       NOr->takeName(Op0);
       return BinaryOperator::CreateXor(NOr,
                                        ConstantInt::get(NOr->getType(), *C));
@@ -2020,7 +2011,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
     // Y|(X^C) -> (X|Y)^C iff Y&C == 0
     if (match(Op1, m_OneUse(m_Xor(m_Value(A), m_APInt(C)))) &&
         MaskedValueIsZero(Op0, *C, 0, &I)) {
-      Value *NOr = Builder->CreateOr(A, Op0);
+      Value *NOr = Builder.CreateOr(A, Op0);
       NOr->takeName(Op0);
       return BinaryOperator::CreateXor(NOr,
                                        ConstantInt::get(NOr->getType(), *C));
@@ -2058,7 +2049,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
              (V2 == B &&
               MaskedValueIsZero(V1, ~C1->getValue(), 0, &I))))  // (N|V)
           return BinaryOperator::CreateAnd(A,
-                                Builder->getInt(C1->getValue()|C2->getValue()));
+                                Builder.getInt(C1->getValue()|C2->getValue()));
         // Or commutes, try both ways.
         if (match(B, m_Or(m_Value(V1), m_Value(V2))) &&
             ((V1 == A &&
@@ -2066,7 +2057,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
              (V2 == A &&
               MaskedValueIsZero(V1, ~C2->getValue(), 0, &I))))  // (N|V)
           return BinaryOperator::CreateAnd(B,
-                                Builder->getInt(C1->getValue()|C2->getValue()));
+                                 Builder.getInt(C1->getValue()|C2->getValue()));
 
         // ((V|C3)&C1) | ((V|C4)&C2) --> (V|C3|C4)&(C1|C2)
         // iff (C1&C2) == 0 and (C3&~C1) == 0 and (C4&~C2) == 0.
@@ -2075,9 +2066,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
             (C3->getValue() & ~C1->getValue()).isNullValue() &&
             match(B, m_Or(m_Specific(V1), m_ConstantInt(C4))) &&
             (C4->getValue() & ~C2->getValue()).isNullValue()) {
-          V2 = Builder->CreateOr(V1, ConstantExpr::getOr(C3, C4), "bitfield");
+          V2 = Builder.CreateOr(V1, ConstantExpr::getOr(C3, C4), "bitfield");
           return BinaryOperator::CreateAnd(V2,
-                                Builder->getInt(C1->getValue()|C2->getValue()));
+                                 Builder.getInt(C1->getValue()|C2->getValue()));
         }
       }
     }
@@ -2087,21 +2078,21 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
     // 'or' that it is replacing.
     if (Op0->hasOneUse() || Op1->hasOneUse()) {
       // (Cond & C) | (~Cond & D) -> Cond ? C : D, and commuted variants.
-      if (Value *V = matchSelectFromAndOr(A, C, B, D, *Builder))
+      if (Value *V = matchSelectFromAndOr(A, C, B, D, Builder))
         return replaceInstUsesWith(I, V);
-      if (Value *V = matchSelectFromAndOr(A, C, D, B, *Builder))
+      if (Value *V = matchSelectFromAndOr(A, C, D, B, Builder))
         return replaceInstUsesWith(I, V);
-      if (Value *V = matchSelectFromAndOr(C, A, B, D, *Builder))
+      if (Value *V = matchSelectFromAndOr(C, A, B, D, Builder))
         return replaceInstUsesWith(I, V);
-      if (Value *V = matchSelectFromAndOr(C, A, D, B, *Builder))
+      if (Value *V = matchSelectFromAndOr(C, A, D, B, Builder))
         return replaceInstUsesWith(I, V);
-      if (Value *V = matchSelectFromAndOr(B, D, A, C, *Builder))
+      if (Value *V = matchSelectFromAndOr(B, D, A, C, Builder))
         return replaceInstUsesWith(I, V);
-      if (Value *V = matchSelectFromAndOr(B, D, C, A, *Builder))
+      if (Value *V = matchSelectFromAndOr(B, D, C, A, Builder))
         return replaceInstUsesWith(I, V);
-      if (Value *V = matchSelectFromAndOr(D, B, A, C, *Builder))
+      if (Value *V = matchSelectFromAndOr(D, B, A, C, Builder))
         return replaceInstUsesWith(I, V);
-      if (Value *V = matchSelectFromAndOr(D, B, C, A, *Builder))
+      if (Value *V = matchSelectFromAndOr(D, B, C, A, Builder))
         return replaceInstUsesWith(I, V);
     }
 
@@ -2139,9 +2130,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
 
   // ((B | C) & A) | B -> B | (A & C)
   if (match(Op0, m_And(m_Or(m_Specific(Op1), m_Value(C)), m_Value(A))))
-    return BinaryOperator::CreateOr(Op1, Builder->CreateAnd(A, C));
+    return BinaryOperator::CreateOr(Op1, Builder.CreateAnd(A, C));
 
-  if (Instruction *DeMorgan = matchDeMorgansLaws(I, *Builder))
+  if (Instruction *DeMorgan = matchDeMorgansLaws(I, Builder))
     return DeMorgan;
 
   // Canonicalize xor to the RHS.
@@ -2163,11 +2154,11 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
       return BinaryOperator::CreateOr(A, B);
 
     if (Op1->hasOneUse() && match(A, m_Not(m_Specific(Op0)))) {
-      Value *Not = Builder->CreateNot(B, B->getName()+".not");
+      Value *Not = Builder.CreateNot(B, B->getName() + ".not");
       return BinaryOperator::CreateOr(Not, Op0);
     }
     if (Op1->hasOneUse() && match(B, m_Not(m_Specific(Op0)))) {
-      Value *Not = Builder->CreateNot(A, A->getName()+".not");
+      Value *Not = Builder.CreateNot(A, A->getName() + ".not");
       return BinaryOperator::CreateOr(Not, Op0);
     }
   }
@@ -2181,7 +2172,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
                                B->getOpcode() == Instruction::Xor)) {
         Value *NotOp = Op0 == B->getOperand(0) ? B->getOperand(1) :
                                                  B->getOperand(0);
-        Value *Not = Builder->CreateNot(NotOp, NotOp->getName()+".not");
+        Value *Not = Builder.CreateNot(NotOp, NotOp->getName() + ".not");
         return BinaryOperator::CreateOr(Not, Op0);
       }
 
@@ -2194,7 +2185,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
   // xor was canonicalized to Op1 above.
   if (match(Op1, m_c_Xor(m_Not(m_Value(A)), m_Value(B))) &&
       match(Op0, m_c_And(m_Specific(A), m_Specific(B))))
-    return BinaryOperator::CreateXor(Builder->CreateNot(A), B);
+    return BinaryOperator::CreateXor(Builder.CreateNot(A), B);
 
   if (SwappedForXor)
     std::swap(Op0, Op1);
@@ -2212,18 +2203,18 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
     if (LHS && match(Op1, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) {
       if (auto *Cmp = dyn_cast<ICmpInst>(X))
         if (Value *Res = foldOrOfICmps(LHS, Cmp, I))
-          return replaceInstUsesWith(I, Builder->CreateOr(Res, Y));
+          return replaceInstUsesWith(I, Builder.CreateOr(Res, Y));
       if (auto *Cmp = dyn_cast<ICmpInst>(Y))
         if (Value *Res = foldOrOfICmps(LHS, Cmp, I))
-          return replaceInstUsesWith(I, Builder->CreateOr(Res, X));
+          return replaceInstUsesWith(I, Builder.CreateOr(Res, X));
     }
     if (RHS && match(Op0, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) {
       if (auto *Cmp = dyn_cast<ICmpInst>(X))
         if (Value *Res = foldOrOfICmps(Cmp, RHS, I))
-          return replaceInstUsesWith(I, Builder->CreateOr(Res, Y));
+          return replaceInstUsesWith(I, Builder.CreateOr(Res, Y));
       if (auto *Cmp = dyn_cast<ICmpInst>(Y))
         if (Value *Res = foldOrOfICmps(Cmp, RHS, I))
-          return replaceInstUsesWith(I, Builder->CreateOr(Res, X));
+          return replaceInstUsesWith(I, Builder.CreateOr(Res, X));
     }
   }
 
@@ -2238,10 +2229,10 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
 
   // or(sext(A), B) / or(B, sext(A)) --> A ? -1 : B, where A is i1 or <N x i1>.
   if (match(Op0, m_OneUse(m_SExt(m_Value(A)))) &&
-      A->getType()->getScalarType()->isIntegerTy(1))
+      A->getType()->isIntOrIntVectorTy(1))
     return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op1);
   if (match(Op1, m_OneUse(m_SExt(m_Value(A)))) &&
-      A->getType()->getScalarType()->isIntegerTy(1))
+      A->getType()->isIntOrIntVectorTy(1))
     return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op0);
 
   // Note: If we've gotten to the point of visiting the outer OR, then the
@@ -2252,7 +2243,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
   ConstantInt *C1;
   if (Op0->hasOneUse() && !isa<ConstantInt>(Op1) &&
       match(Op0, m_Or(m_Value(A), m_ConstantInt(C1)))) {
-    Value *Inner = Builder->CreateOr(A, Op1);
+    Value *Inner = Builder.CreateOr(A, Op1);
     Inner->takeName(Op0);
     return BinaryOperator::CreateOr(Inner, C1);
   }
@@ -2265,8 +2256,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
     if (Op0->hasOneUse() && Op1->hasOneUse() &&
         match(Op0, m_Select(m_Value(X), m_Value(A), m_Value(B))) &&
         match(Op1, m_Select(m_Value(Y), m_Value(C), m_Value(D))) && X == Y) {
-      Value *orTrue = Builder->CreateOr(A, C);
-      Value *orFalse = Builder->CreateOr(B, D);
+      Value *orTrue = Builder.CreateOr(A, C);
+      Value *orFalse = Builder.CreateOr(B, D);
       return SelectInst::Create(X, orTrue, orFalse);
     }
   }
@@ -2276,7 +2267,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
 
 /// A ^ B can be specified using other logic ops in a variety of patterns. We
 /// can fold these early and efficiently by morphing an existing instruction.
-static Instruction *foldXorToXor(BinaryOperator &I) {
+static Instruction *foldXorToXor(BinaryOperator &I,
+                                 InstCombiner::BuilderTy &Builder) {
   assert(I.getOpcode() == Instruction::Xor);
   Value *Op0 = I.getOperand(0);
   Value *Op1 = I.getOperand(1);
@@ -2323,6 +2315,21 @@ static Instruction *foldXorToXor(BinaryOperator &I) {
     return &I;
   }
 
+  // For the remaining cases we need to get rid of one of the operands.
+  if (!Op0->hasOneUse() && !Op1->hasOneUse())
+    return nullptr;
+
+  // (A | B) ^ ~(A & B) -> ~(A ^ B)
+  // (A | B) ^ ~(B & A) -> ~(A ^ B)
+  // (A & B) ^ ~(A | B) -> ~(A ^ B)
+  // (A & B) ^ ~(B | A) -> ~(A ^ B)
+  // Complexity sorting ensures the not will be on the right side.
+  if ((match(Op0, m_Or(m_Value(A), m_Value(B))) &&
+       match(Op1, m_Not(m_c_And(m_Specific(A), m_Specific(B))))) ||
+      (match(Op0, m_And(m_Value(A), m_Value(B))) &&
+       match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))))
+    return BinaryOperator::CreateNot(Builder.CreateXor(A, B));
+
   return nullptr;
 }
 
@@ -2355,12 +2362,12 @@ Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
       if (OrICmp == LHS && AndICmp == RHS && RHS->hasOneUse()) {
         // (LHS | RHS) & !(LHS & RHS) --> LHS & !RHS
         RHS->setPredicate(RHS->getInversePredicate());
-        return Builder->CreateAnd(LHS, RHS);
+        return Builder.CreateAnd(LHS, RHS);
       }
       if (OrICmp == RHS && AndICmp == LHS && LHS->hasOneUse()) {
         // !(LHS & RHS) & (LHS | RHS) --> !LHS & RHS
         LHS->setPredicate(LHS->getInversePredicate());
-        return Builder->CreateAnd(LHS, RHS);
+        return Builder.CreateAnd(LHS, RHS);
       }
     }
   }
@@ -2381,7 +2388,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   if (Value *V = SimplifyXorInst(Op0, Op1, SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
-  if (Instruction *NewXor = foldXorToXor(I))
+  if (Instruction *NewXor = foldXorToXor(I, Builder))
     return NewXor;
 
   // (A&B)^(A&C) -> A&(B^C) etc
@@ -2393,7 +2400,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   if (SimplifyDemandedInstructionBits(I))
     return &I;
 
-  if (Value *V = SimplifyBSwap(I))
+  if (Value *V = SimplifyBSwap(I, Builder))
     return replaceInstUsesWith(I, V);
 
   // Apply DeMorgan's Law for 'nand' / 'nor' logic with an inverted operand.
@@ -2404,13 +2411,13 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   // ~(~X & Y) --> (X | ~Y)
   // ~(Y & ~X) --> (X | ~Y)
   if (match(&I, m_Not(m_OneUse(m_c_And(m_Not(m_Value(X)), m_Value(Y)))))) {
-    Value *NotY = Builder->CreateNot(Y, Y->getName() + ".not");
+    Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not");
     return BinaryOperator::CreateOr(X, NotY);
   }
   // ~(~X | Y) --> (X & ~Y)
   // ~(Y | ~X) --> (X & ~Y)
   if (match(&I, m_Not(m_OneUse(m_c_Or(m_Not(m_Value(X)), m_Value(Y)))))) {
-    Value *NotY = Builder->CreateNot(Y, Y->getName() + ".not");
+    Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not");
     return BinaryOperator::CreateAnd(X, NotY);
   }
 
@@ -2426,8 +2433,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
                          NotVal->getOperand(0)->hasOneUse()) &&
           IsFreeToInvert(NotVal->getOperand(1),
                          NotVal->getOperand(1)->hasOneUse())) {
-        Value *NotX = Builder->CreateNot(NotVal->getOperand(0), "notlhs");
-        Value *NotY = Builder->CreateNot(NotVal->getOperand(1), "notrhs");
+        Value *NotX = Builder.CreateNot(NotVal->getOperand(0), "notlhs");
+        Value *NotY = Builder.CreateNot(NotVal->getOperand(1), "notrhs");
         if (NotVal->getOpcode() == Instruction::And)
           return BinaryOperator::CreateOr(NotX, NotY);
         return BinaryOperator::CreateAnd(NotX, NotY);
@@ -2457,7 +2464,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   }
 
   // not (cmp A, B) = !cmp A, B
-  ICmpInst::Predicate Pred;
+  CmpInst::Predicate Pred;
   if (match(&I, m_Not(m_OneUse(m_Cmp(Pred, m_Value(), m_Value()))))) {
     cast<CmpInst>(Op0)->setPredicate(CmpInst::getInversePredicate(Pred));
     return replaceInstUsesWith(I, Op0);
@@ -2470,8 +2477,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
         if (CI->hasOneUse() && Op0C->hasOneUse()) {
           Instruction::CastOps Opcode = Op0C->getOpcode();
           if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
-              (RHSC == ConstantExpr::getCast(Opcode, Builder->getTrue(),
-                                            Op0C->getDestTy()))) {
+              (RHSC == ConstantExpr::getCast(Opcode, Builder.getTrue(),
+                                             Op0C->getDestTy()))) {
             CI->setPredicate(CI->getInversePredicate());
             return CastInst::Create(Opcode, CI, Op0C->getType());
           }
@@ -2481,7 +2488,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
 
     if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
       // ~(c-X) == X-c-1 == X+(-c-1)
-      if (Op0I->getOpcode() == Instruction::Sub && RHSC->isAllOnesValue())
+      if (Op0I->getOpcode() == Instruction::Sub && RHSC->isMinusOne())
         if (Constant *Op0I0C = dyn_cast<Constant>(Op0I->getOperand(0))) {
           Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C);
           return BinaryOperator::CreateAdd(Op0I->getOperand(1),
@@ -2491,13 +2498,13 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
       if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
         if (Op0I->getOpcode() == Instruction::Add) {
           // ~(X-c) --> (-c-1)-X
-          if (RHSC->isAllOnesValue()) {
+          if (RHSC->isMinusOne()) {
             Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI);
             return BinaryOperator::CreateSub(SubOne(NegOp0CI),
                                              Op0I->getOperand(0));
           } else if (RHSC->getValue().isSignMask()) {
             // (X + C) ^ signmask -> (X + C + signmask)
-            Constant *C = Builder->getInt(RHSC->getValue() + Op0CI->getValue());
+            Constant *C = Builder.getInt(RHSC->getValue() + Op0CI->getValue());
             return BinaryOperator::CreateAdd(Op0I->getOperand(0), C);
 
           }
@@ -2530,7 +2537,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
             APInt FoldConst = C1->getValue().lshr(C2->getValue());
             FoldConst ^= C3->getValue();
             // Prepare the two operands.
-            Value *Opnd0 = Builder->CreateLShr(E1->getOperand(0), C2);
+            Value *Opnd0 = Builder.CreateLShr(E1->getOperand(0), C2);
             Opnd0->takeName(Op0I);
             cast<Instruction>(Opnd0)->setDebugLoc(I.getDebugLoc());
             Value *FoldVal = ConstantInt::get(Opnd0->getType(), FoldConst);
@@ -2575,14 +2582,14 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
       if (A == Op1)                                  // (B|A)^B == (A|B)^B
         std::swap(A, B);
       if (B == Op1)                                  // (A|B)^B == A & ~B
-        return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1));
+        return BinaryOperator::CreateAnd(A, Builder.CreateNot(Op1));
     } else if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B))))) {
       if (A == Op1)                                        // (A&B)^A -> (B&A)^A
         std::swap(A, B);
       const APInt *C;
       if (B == Op1 &&                                      // (B&A)^A == ~B & A
           !match(Op1, m_APInt(C))) {  // Canonical form is (B&C)^C
-        return BinaryOperator::CreateAnd(Builder->CreateNot(A), Op1);
+        return BinaryOperator::CreateAnd(Builder.CreateNot(A), Op1);
       }
     }
   }
@@ -2594,20 +2601,20 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
         match(Op1, m_Or(m_Value(A), m_Value(B)))) {
       if (D == A)
         return BinaryOperator::CreateXor(
-            Builder->CreateAnd(Builder->CreateNot(A), B), C);
+            Builder.CreateAnd(Builder.CreateNot(A), B), C);
       if (D == B)
         return BinaryOperator::CreateXor(
-            Builder->CreateAnd(Builder->CreateNot(B), A), C);
+            Builder.CreateAnd(Builder.CreateNot(B), A), C);
     }
     // (A | B)^(A ^ C) -> ((~A) & B) ^ C
     if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
         match(Op1, m_Xor(m_Value(D), m_Value(C)))) {
       if (D == A)
         return BinaryOperator::CreateXor(
-            Builder->CreateAnd(Builder->CreateNot(A), B), C);
+            Builder.CreateAnd(Builder.CreateNot(A), B), C);
       if (D == B)
         return BinaryOperator::CreateXor(
-            Builder->CreateAnd(Builder->CreateNot(B), A), C);
+            Builder.CreateAnd(Builder.CreateNot(B), A), C);
     }
     // (A & B) ^ (A ^ B) -> (A | B)
     if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
@@ -2624,7 +2631,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   Value *A, *B;
   if (match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) &&
       match(Op1, m_Not(m_Specific(A))))
-    return BinaryOperator::CreateNot(Builder->CreateAnd(A, B));
+    return BinaryOperator::CreateNot(Builder.CreateAnd(A, B));
 
   if (auto *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
     if (auto *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 3770021de1002..391c430dab75d 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -128,23 +128,23 @@ Instruction *InstCombiner::SimplifyElementUnorderedAtomicMemCpy(
         Type::getIntNPtrTy(AMI->getContext(), ElementSizeInBits,
                            Src->getType()->getPointerAddressSpace());
 
-    Value *SrcCasted = Builder->CreatePointerCast(Src, ElementPointerType,
-                                                  "memcpy_unfold.src_casted");
-    Value *DstCasted = Builder->CreatePointerCast(Dst, ElementPointerType,
-                                                  "memcpy_unfold.dst_casted");
+    Value *SrcCasted = Builder.CreatePointerCast(Src, ElementPointerType,
+                                                 "memcpy_unfold.src_casted");
+    Value *DstCasted = Builder.CreatePointerCast(Dst, ElementPointerType,
+                                                 "memcpy_unfold.dst_casted");
 
     for (uint64_t i = 0; i < NumElements; ++i) {
       // Get current element addresses
       ConstantInt *ElementIdxCI =
           ConstantInt::get(AMI->getContext(), APInt(64, i));
       Value *SrcElementAddr =
-          Builder->CreateGEP(SrcCasted, ElementIdxCI, "memcpy_unfold.src_addr");
+          Builder.CreateGEP(SrcCasted, ElementIdxCI, "memcpy_unfold.src_addr");
       Value *DstElementAddr =
-          Builder->CreateGEP(DstCasted, ElementIdxCI, "memcpy_unfold.dst_addr");
+          Builder.CreateGEP(DstCasted, ElementIdxCI, "memcpy_unfold.dst_addr");
 
       // Load from the source. Transfer alignment information and mark load as
       // unordered atomic.
-      LoadInst *Load = Builder->CreateLoad(SrcElementAddr, "memcpy_unfold.val");
+      LoadInst *Load = Builder.CreateLoad(SrcElementAddr, "memcpy_unfold.val");
       Load->setOrdering(AtomicOrdering::Unordered);
       // We know alignment of the first element. It is also guaranteed by the
       // verifier that element size is less or equal than first element
@@ -157,7 +157,7 @@ Instruction *InstCombiner::SimplifyElementUnorderedAtomicMemCpy(
       Load->setDebugLoc(AMI->getDebugLoc());
 
       // Store loaded value via unordered atomic store.
-      StoreInst *Store = Builder->CreateStore(Load, DstElementAddr);
+      StoreInst *Store = Builder.CreateStore(Load, DstElementAddr);
       Store->setOrdering(AtomicOrdering::Unordered);
       Store->setAlignment(i == 0 ? AMI->getParamAlignment(0)
                                  : ElementSizeInBytes);
@@ -213,7 +213,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
   if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) {
     if (M->getNumOperands() == 3 && M->getOperand(0) &&
         mdconst::hasa<ConstantInt>(M->getOperand(0)) &&
-        mdconst::extract<ConstantInt>(M->getOperand(0))->isNullValue() &&
+        mdconst::extract<ConstantInt>(M->getOperand(0))->isZero() &&
         M->getOperand(1) &&
         mdconst::hasa<ConstantInt>(M->getOperand(1)) &&
         mdconst::extract<ConstantInt>(M->getOperand(1))->getValue() ==
@@ -227,9 +227,9 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
   SrcAlign = std::max(SrcAlign, CopyAlign);
   DstAlign = std::max(DstAlign, CopyAlign);
 
-  Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
-  Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
-  LoadInst *L = Builder->CreateLoad(Src, MI->isVolatile());
+  Value *Src = Builder.CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
+  Value *Dest = Builder.CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
+  LoadInst *L = Builder.CreateLoad(Src, MI->isVolatile());
   L->setAlignment(SrcAlign);
   if (CopyMD)
     L->setMetadata(LLVMContext::MD_tbaa, CopyMD);
@@ -238,7 +238,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
   if (LoopMemParallelMD)
     L->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
 
-  StoreInst *S = Builder->CreateStore(L, Dest, MI->isVolatile());
+  StoreInst *S = Builder.CreateStore(L, Dest, MI->isVolatile());
   S->setAlignment(DstAlign);
   if (CopyMD)
     S->setMetadata(LLVMContext::MD_tbaa, CopyMD);
@@ -274,15 +274,15 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
     Value *Dest = MI->getDest();
     unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
     Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
-    Dest = Builder->CreateBitCast(Dest, NewDstPtrTy);
+    Dest = Builder.CreateBitCast(Dest, NewDstPtrTy);
 
     // Alignment 0 is identity for alignment 1 for memset, but not store.
     if (Alignment == 0) Alignment = 1;
 
     // Extract the fill value and store.
     uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
-    StoreInst *S = Builder->CreateStore(ConstantInt::get(ITy, Fill), Dest,
-                                        MI->isVolatile());
+    StoreInst *S = Builder.CreateStore(ConstantInt::get(ITy, Fill), Dest,
+                                       MI->isVolatile());
     S->setAlignment(Alignment);
 
     // Set the size of the copy to 0, it will be deleted on the next iteration.
@@ -600,8 +600,7 @@ static Value *simplifyX86muldq(const IntrinsicInst &II,
   return Builder.CreateMul(LHS, RHS);
 }
 
-static Value *simplifyX86pack(IntrinsicInst &II, InstCombiner &IC,
-                              InstCombiner::BuilderTy &Builder, bool IsSigned) {
+static Value *simplifyX86pack(IntrinsicInst &II, bool IsSigned) {
   Value *Arg0 = II.getArgOperand(0);
   Value *Arg1 = II.getArgOperand(1);
   Type *ResTy = II.getType();
@@ -676,8 +675,7 @@ static Value *simplifyX86pack(IntrinsicInst &II, InstCombiner &IC,
   return ConstantVector::get(Vals);
 }
 
-static Value *simplifyX86movmsk(const IntrinsicInst &II,
-                                InstCombiner::BuilderTy &Builder) {
+static Value *simplifyX86movmsk(const IntrinsicInst &II) {
   Value *Arg = II.getArgOperand(0);
   Type *ResTy = II.getType();
   Type *ArgTy = Arg->getType();
@@ -860,7 +858,7 @@ static Value *simplifyX86extrq(IntrinsicInst &II, Value *Op0,
   }
 
   // Constant Fold - extraction from zero is always {zero, undef}.
-  if (CI0 && CI0->equalsInt(0))
+  if (CI0 && CI0->isZero())
     return LowConstantHighUndef(0);
 
   return nullptr;
@@ -1404,7 +1402,7 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) {
       isKnownNonZero(Op0, IC.getDataLayout(), 0, &IC.getAssumptionCache(), &II,
                      &IC.getDominatorTree())) {
     if (!match(II.getArgOperand(1), m_One())) {
-      II.setOperand(1, IC.Builder->getTrue());
+      II.setOperand(1, IC.Builder.getTrue());
       return &II;
     }
   }
@@ -1477,7 +1475,7 @@ static Instruction *simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC) {
   // the LLVM intrinsic definition for the pointer argument.
   unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
   PointerType *VecPtrTy = PointerType::get(II.getType(), AddrSpace);
-  Value *PtrCast = IC.Builder->CreateBitCast(Ptr, VecPtrTy, "castvec");
+  Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec");
 
   // Second, convert the x86 XMM integer vector mask to a vector of bools based
   // on each element's most significant bit (the sign bit).
@@ -1485,7 +1483,7 @@ static Instruction *simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC) {
 
   // The pass-through vector for an x86 masked load is a zero vector.
   CallInst *NewMaskedLoad =
-      IC.Builder->CreateMaskedLoad(PtrCast, 1, BoolMask, ZeroVec);
+      IC.Builder.CreateMaskedLoad(PtrCast, 1, BoolMask, ZeroVec);
   return IC.replaceInstUsesWith(II, NewMaskedLoad);
 }
 
@@ -1520,13 +1518,13 @@ static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC) {
   // the LLVM intrinsic definition for the pointer argument.
   unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
   PointerType *VecPtrTy = PointerType::get(Vec->getType(), AddrSpace);
-  Value *PtrCast = IC.Builder->CreateBitCast(Ptr, VecPtrTy, "castvec");
+  Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec");
 
   // Second, convert the x86 XMM integer vector mask to a vector of bools based
   // on each element's most significant bit (the sign bit).
   Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask);
 
-  IC.Builder->CreateMaskedStore(Vec, PtrCast, 1, BoolMask);
+  IC.Builder.CreateMaskedStore(Vec, PtrCast, 1, BoolMask);
 
   // 'Replace uses' doesn't work for stores. Erase the original masked store.
   IC.eraseInstFromFunction(II);
@@ -1764,7 +1762,7 @@ static Instruction *SimplifyNVVMIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
     // The remainder of cases are NVVM intrinsics that map to LLVM idioms, but
     // need special handling.
     //
-    // We seem to be mising intrinsics for rcp.approx.{ftz.}f32, which is just
+    // We seem to be missing intrinsics for rcp.approx.{ftz.}f32, which is just
     // as well.
     case Intrinsic::nvvm_rcp_rn_d:
       return {SPC_Reciprocal, FTZ_Any};
@@ -1965,16 +1963,17 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     Value *IIOperand = II->getArgOperand(0);
     Value *X = nullptr;
 
+    // TODO should this be in InstSimplify?
     // bswap(bswap(x)) -> x
     if (match(IIOperand, m_BSwap(m_Value(X))))
-        return replaceInstUsesWith(CI, X);
+      return replaceInstUsesWith(CI, X);
 
     // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
     if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
       unsigned C = X->getType()->getPrimitiveSizeInBits() -
         IIOperand->getType()->getPrimitiveSizeInBits();
       Value *CV = ConstantInt::get(X->getType(), C);
-      Value *V = Builder->CreateLShr(X, CV);
+      Value *V = Builder.CreateLShr(X, CV);
       return new TruncInst(V, IIOperand->getType());
     }
     break;
@@ -1984,6 +1983,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     Value *IIOperand = II->getArgOperand(0);
     Value *X = nullptr;
 
+    // TODO should this be in InstSimplify?
     // bitreverse(bitreverse(x)) -> x
     if (match(IIOperand, m_BitReverse(m_Value(X))))
       return replaceInstUsesWith(CI, X);
@@ -1991,7 +1991,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   }
 
   case Intrinsic::masked_load:
-    if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, *Builder))
+    if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, Builder))
       return replaceInstUsesWith(CI, SimplifiedMaskedOp);
     break;
   case Intrinsic::masked_store:
@@ -2010,7 +2010,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       if (Power->isOne())
         return replaceInstUsesWith(CI, II->getArgOperand(0));
       // powi(x, -1) -> 1/x
-      if (Power->isAllOnesValue())
+      if (Power->isMinusOne())
         return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
                                           II->getArgOperand(0));
     }
@@ -2073,11 +2073,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::fmuladd: {
     // Canonicalize fast fmuladd to the separate fmul + fadd.
     if (II->hasUnsafeAlgebra()) {
-      BuilderTy::FastMathFlagGuard Guard(*Builder);
-      Builder->setFastMathFlags(II->getFastMathFlags());
-      Value *Mul = Builder->CreateFMul(II->getArgOperand(0),
-                                       II->getArgOperand(1));
-      Value *Add = Builder->CreateFAdd(Mul, II->getArgOperand(2));
+      BuilderTy::FastMathFlagGuard Guard(Builder);
+      Builder.setFastMathFlags(II->getFastMathFlags());
+      Value *Mul = Builder.CreateFMul(II->getArgOperand(0),
+                                      II->getArgOperand(1));
+      Value *Add = Builder.CreateFAdd(Mul, II->getArgOperand(2));
       Add->takeName(II);
       return replaceInstUsesWith(*II, Add);
     }
@@ -2128,8 +2128,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     Constant *LHS, *RHS;
     if (match(II->getArgOperand(0),
               m_Select(m_Value(Cond), m_Constant(LHS), m_Constant(RHS)))) {
-      CallInst *Call0 = Builder->CreateCall(II->getCalledFunction(), {LHS});
-      CallInst *Call1 = Builder->CreateCall(II->getCalledFunction(), {RHS});
+      CallInst *Call0 = Builder.CreateCall(II->getCalledFunction(), {LHS});
+      CallInst *Call1 = Builder.CreateCall(II->getCalledFunction(), {RHS});
       return SelectInst::Create(Cond, Call0, Call1);
     }
 
@@ -2147,7 +2147,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       // fabs (fpext x) -> fpext (fabs x)
       Value *F = Intrinsic::getDeclaration(II->getModule(), II->getIntrinsicID(),
                                            { ExtSrc->getType() });
-      CallInst *NewFabs = Builder->CreateCall(F, ExtSrc);
+      CallInst *NewFabs = Builder.CreateCall(F, ExtSrc);
       NewFabs->copyFastMathFlags(II);
       NewFabs->takeName(II);
       return new FPExtInst(NewFabs, II->getType());
@@ -2174,7 +2174,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     // Turn PPC lvx -> load if the pointer is known aligned.
     if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC,
                                    &DT) >= 16) {
-      Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
+      Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
                                          PointerType::getUnqual(II->getType()));
       return new LoadInst(Ptr);
     }
@@ -2182,8 +2182,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::ppc_vsx_lxvw4x:
   case Intrinsic::ppc_vsx_lxvd2x: {
     // Turn PPC VSX loads into normal loads.
-    Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
-                                        PointerType::getUnqual(II->getType()));
+    Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
+                                       PointerType::getUnqual(II->getType()));
     return new LoadInst(Ptr, Twine(""), false, 1);
   }
   case Intrinsic::ppc_altivec_stvx:
@@ -2193,7 +2193,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
                                    &DT) >= 16) {
       Type *OpPtrTy =
         PointerType::getUnqual(II->getArgOperand(0)->getType());
-      Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
+      Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
       return new StoreInst(II->getArgOperand(0), Ptr);
     }
     break;
@@ -2201,18 +2201,18 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::ppc_vsx_stxvd2x: {
     // Turn PPC VSX stores into normal stores.
     Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType());
-    Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
+    Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
     return new StoreInst(II->getArgOperand(0), Ptr, false, 1);
   }
   case Intrinsic::ppc_qpx_qvlfs:
     // Turn PPC QPX qvlfs -> load if the pointer is known aligned.
     if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC,
                                    &DT) >= 16) {
-      Type *VTy = VectorType::get(Builder->getFloatTy(),
+      Type *VTy = VectorType::get(Builder.getFloatTy(),
                                   II->getType()->getVectorNumElements());
-      Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
+      Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
                                          PointerType::getUnqual(VTy));
-      Value *Load = Builder->CreateLoad(Ptr);
+      Value *Load = Builder.CreateLoad(Ptr);
       return new FPExtInst(Load, II->getType());
     }
     break;
@@ -2220,7 +2220,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     // Turn PPC QPX qvlfd -> load if the pointer is known aligned.
     if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, &AC,
                                    &DT) >= 32) {
-      Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
+      Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
                                          PointerType::getUnqual(II->getType()));
       return new LoadInst(Ptr);
     }
@@ -2229,11 +2229,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     // Turn PPC QPX qvstfs -> store if the pointer is known aligned.
     if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC,
                                    &DT) >= 16) {
-      Type *VTy = VectorType::get(Builder->getFloatTy(),
+      Type *VTy = VectorType::get(Builder.getFloatTy(),
           II->getArgOperand(0)->getType()->getVectorNumElements());
-      Value *TOp = Builder->CreateFPTrunc(II->getArgOperand(0), VTy);
+      Value *TOp = Builder.CreateFPTrunc(II->getArgOperand(0), VTy);
       Type *OpPtrTy = PointerType::getUnqual(VTy);
-      Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
+      Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
       return new StoreInst(TOp, Ptr);
     }
     break;
@@ -2243,7 +2243,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
                                    &DT) >= 32) {
       Type *OpPtrTy =
         PointerType::getUnqual(II->getArgOperand(0)->getType());
-      Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
+      Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
       return new StoreInst(II->getArgOperand(0), Ptr);
     }
     break;
@@ -2272,15 +2272,15 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
         SmallVector<uint32_t, 8> SubVecMask;
         for (unsigned i = 0; i != RetWidth; ++i)
           SubVecMask.push_back((int)i);
-        VectorHalfAsShorts = Builder->CreateShuffleVector(
+        VectorHalfAsShorts = Builder.CreateShuffleVector(
             Arg, UndefValue::get(ArgType), SubVecMask);
       }
 
       auto VectorHalfType =
           VectorType::get(Type::getHalfTy(II->getContext()), RetWidth);
       auto VectorHalfs =
-          Builder->CreateBitCast(VectorHalfAsShorts, VectorHalfType);
-      auto VectorFloats = Builder->CreateFPExt(VectorHalfs, RetType);
+          Builder.CreateBitCast(VectorHalfAsShorts, VectorHalfType);
+      auto VectorFloats = Builder.CreateFPExt(VectorHalfs, RetType);
       return replaceInstUsesWith(*II, VectorFloats);
     }
 
@@ -2334,7 +2334,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::x86_avx_movmsk_pd_256:
   case Intrinsic::x86_avx_movmsk_ps_256:
   case Intrinsic::x86_avx2_pmovmskb: {
-    if (Value *V = simplifyX86movmsk(*II, *Builder))
+    if (Value *V = simplifyX86movmsk(*II))
       return replaceInstUsesWith(*II, V);
     break;
   }
@@ -2437,25 +2437,25 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
         default: llvm_unreachable("Case stmts out of sync!");
         case Intrinsic::x86_avx512_mask_add_ps_512:
         case Intrinsic::x86_avx512_mask_add_pd_512:
-          V = Builder->CreateFAdd(Arg0, Arg1);
+          V = Builder.CreateFAdd(Arg0, Arg1);
           break;
         case Intrinsic::x86_avx512_mask_sub_ps_512:
         case Intrinsic::x86_avx512_mask_sub_pd_512:
-          V = Builder->CreateFSub(Arg0, Arg1);
+          V = Builder.CreateFSub(Arg0, Arg1);
           break;
         case Intrinsic::x86_avx512_mask_mul_ps_512:
         case Intrinsic::x86_avx512_mask_mul_pd_512:
-          V = Builder->CreateFMul(Arg0, Arg1);
+          V = Builder.CreateFMul(Arg0, Arg1);
           break;
         case Intrinsic::x86_avx512_mask_div_ps_512:
         case Intrinsic::x86_avx512_mask_div_pd_512:
-          V = Builder->CreateFDiv(Arg0, Arg1);
+          V = Builder.CreateFDiv(Arg0, Arg1);
           break;
         }
 
         // Create a select for the masking.
         V = emitX86MaskSelect(II->getArgOperand(3), V, II->getArgOperand(2),
-                              *Builder);
+                              Builder);
         return replaceInstUsesWith(*II, V);
       }
     }
@@ -2476,27 +2476,27 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
         // Extract the element as scalars.
         Value *Arg0 = II->getArgOperand(0);
         Value *Arg1 = II->getArgOperand(1);
-        Value *LHS = Builder->CreateExtractElement(Arg0, (uint64_t)0);
-        Value *RHS = Builder->CreateExtractElement(Arg1, (uint64_t)0);
+        Value *LHS = Builder.CreateExtractElement(Arg0, (uint64_t)0);
+        Value *RHS = Builder.CreateExtractElement(Arg1, (uint64_t)0);
 
         Value *V;
         switch (II->getIntrinsicID()) {
         default: llvm_unreachable("Case stmts out of sync!");
         case Intrinsic::x86_avx512_mask_add_ss_round:
         case Intrinsic::x86_avx512_mask_add_sd_round:
-          V = Builder->CreateFAdd(LHS, RHS);
+          V = Builder.CreateFAdd(LHS, RHS);
           break;
         case Intrinsic::x86_avx512_mask_sub_ss_round:
         case Intrinsic::x86_avx512_mask_sub_sd_round:
-          V = Builder->CreateFSub(LHS, RHS);
+          V = Builder.CreateFSub(LHS, RHS);
           break;
         case Intrinsic::x86_avx512_mask_mul_ss_round:
         case Intrinsic::x86_avx512_mask_mul_sd_round:
-          V = Builder->CreateFMul(LHS, RHS);
+          V = Builder.CreateFMul(LHS, RHS);
           break;
         case Intrinsic::x86_avx512_mask_div_ss_round:
         case Intrinsic::x86_avx512_mask_div_sd_round:
-          V = Builder->CreateFDiv(LHS, RHS);
+          V = Builder.CreateFDiv(LHS, RHS);
           break;
         }
 
@@ -2506,18 +2506,18 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
         // We don't need a select if we know the mask bit is a 1.
         if (!C || !C->getValue()[0]) {
           // Cast the mask to an i1 vector and then extract the lowest element.
-          auto *MaskTy = VectorType::get(Builder->getInt1Ty(),
+          auto *MaskTy = VectorType::get(Builder.getInt1Ty(),
                              cast<IntegerType>(Mask->getType())->getBitWidth());
-          Mask = Builder->CreateBitCast(Mask, MaskTy);
-          Mask = Builder->CreateExtractElement(Mask, (uint64_t)0);
+          Mask = Builder.CreateBitCast(Mask, MaskTy);
+          Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
           // Extract the lowest element from the passthru operand.
-          Value *Passthru = Builder->CreateExtractElement(II->getArgOperand(2),
+          Value *Passthru = Builder.CreateExtractElement(II->getArgOperand(2),
                                                           (uint64_t)0);
-          V = Builder->CreateSelect(Mask, V, Passthru);
+          V = Builder.CreateSelect(Mask, V, Passthru);
         }
 
         // Insert the result back into the original argument 0.
-        V = Builder->CreateInsertElement(Arg0, V, (uint64_t)0);
+        V = Builder.CreateInsertElement(Arg0, V, (uint64_t)0);
 
         return replaceInstUsesWith(*II, V);
       }
@@ -2598,7 +2598,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::x86_avx512_pslli_d_512:
   case Intrinsic::x86_avx512_pslli_q_512:
   case Intrinsic::x86_avx512_pslli_w_512:
-    if (Value *V = simplifyX86immShift(*II, *Builder))
+    if (Value *V = simplifyX86immShift(*II, Builder))
       return replaceInstUsesWith(*II, V);
     break;
 
@@ -2629,7 +2629,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::x86_avx512_psll_d_512:
   case Intrinsic::x86_avx512_psll_q_512:
   case Intrinsic::x86_avx512_psll_w_512: {
-    if (Value *V = simplifyX86immShift(*II, *Builder))
+    if (Value *V = simplifyX86immShift(*II, Builder))
       return replaceInstUsesWith(*II, V);
 
     // SSE2/AVX2 uses only the first 64-bits of the 128-bit vector
@@ -2673,7 +2673,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::x86_avx512_psrlv_w_128:
   case Intrinsic::x86_avx512_psrlv_w_256:
   case Intrinsic::x86_avx512_psrlv_w_512:
-    if (Value *V = simplifyX86varShift(*II, *Builder))
+    if (Value *V = simplifyX86varShift(*II, Builder))
       return replaceInstUsesWith(*II, V);
     break;
 
@@ -2683,7 +2683,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::x86_avx2_pmulu_dq:
   case Intrinsic::x86_avx512_pmul_dq_512:
   case Intrinsic::x86_avx512_pmulu_dq_512: {
-    if (Value *V = simplifyX86muldq(*II, *Builder))
+    if (Value *V = simplifyX86muldq(*II, Builder))
       return replaceInstUsesWith(*II, V);
 
     unsigned VWidth = II->getType()->getVectorNumElements();
@@ -2703,7 +2703,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::x86_avx2_packsswb:
   case Intrinsic::x86_avx512_packssdw_512:
   case Intrinsic::x86_avx512_packsswb_512:
-    if (Value *V = simplifyX86pack(*II, *this, *Builder, true))
+    if (Value *V = simplifyX86pack(*II, true))
       return replaceInstUsesWith(*II, V);
     break;
 
@@ -2713,7 +2713,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::x86_avx2_packuswb:
   case Intrinsic::x86_avx512_packusdw_512:
   case Intrinsic::x86_avx512_packuswb_512:
-    if (Value *V = simplifyX86pack(*II, *this, *Builder, false))
+    if (Value *V = simplifyX86pack(*II, false))
       return replaceInstUsesWith(*II, V);
     break;
 
@@ -2756,7 +2756,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   }
 
   case Intrinsic::x86_sse41_insertps:
-    if (Value *V = simplifyX86insertps(*II, *Builder))
+    if (Value *V = simplifyX86insertps(*II, Builder))
       return replaceInstUsesWith(*II, V);
     break;
 
@@ -2779,7 +2779,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
            : nullptr;
 
     // Attempt to simplify to a constant, shuffle vector or EXTRQI call.
-    if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, *Builder))
+    if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, Builder))
       return replaceInstUsesWith(*II, V);
 
     // EXTRQ only uses the lowest 64-bits of the first 128-bit vector
@@ -2811,7 +2811,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     ConstantInt *CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(2));
 
     // Attempt to simplify to a constant or shuffle vector.
-    if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, *Builder))
+    if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, Builder))
       return replaceInstUsesWith(*II, V);
 
     // EXTRQI only uses the lowest 64-bits of the first 128-bit vector
@@ -2843,7 +2843,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       const APInt &V11 = CI11->getValue();
       APInt Len = V11.zextOrTrunc(6);
       APInt Idx = V11.lshr(8).zextOrTrunc(6);
-      if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, *Builder))
+      if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, Builder))
         return replaceInstUsesWith(*II, V);
     }
 
@@ -2876,7 +2876,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     if (CILength && CIIndex) {
       APInt Len = CILength->getValue().zextOrTrunc(6);
       APInt Idx = CIIndex->getValue().zextOrTrunc(6);
-      if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, *Builder))
+      if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, Builder))
         return replaceInstUsesWith(*II, V);
     }
 
@@ -2930,7 +2930,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::x86_ssse3_pshuf_b_128:
   case Intrinsic::x86_avx2_pshuf_b:
   case Intrinsic::x86_avx512_pshuf_b_512:
-    if (Value *V = simplifyX86pshufb(*II, *Builder))
+    if (Value *V = simplifyX86pshufb(*II, Builder))
       return replaceInstUsesWith(*II, V);
     break;
 
@@ -2940,13 +2940,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::x86_avx_vpermilvar_pd:
   case Intrinsic::x86_avx_vpermilvar_pd_256:
   case Intrinsic::x86_avx512_vpermilvar_pd_512:
-    if (Value *V = simplifyX86vpermilvar(*II, *Builder))
+    if (Value *V = simplifyX86vpermilvar(*II, Builder))
       return replaceInstUsesWith(*II, V);
     break;
 
   case Intrinsic::x86_avx2_permd:
   case Intrinsic::x86_avx2_permps:
-    if (Value *V = simplifyX86vpermv(*II, *Builder))
+    if (Value *V = simplifyX86vpermv(*II, Builder))
       return replaceInstUsesWith(*II, V);
     break;
 
@@ -2964,10 +2964,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::x86_avx512_mask_permvar_sf_512:
   case Intrinsic::x86_avx512_mask_permvar_si_256:
   case Intrinsic::x86_avx512_mask_permvar_si_512:
-    if (Value *V = simplifyX86vpermv(*II, *Builder)) {
+    if (Value *V = simplifyX86vpermv(*II, Builder)) {
       // We simplified the permuting, now create a select for the masking.
       V = emitX86MaskSelect(II->getArgOperand(3), V, II->getArgOperand(2),
-                            *Builder);
+                            Builder);
       return replaceInstUsesWith(*II, V);
     }
     break;
@@ -2976,7 +2976,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::x86_avx_vperm2f128_ps_256:
   case Intrinsic::x86_avx_vperm2f128_si_256:
   case Intrinsic::x86_avx2_vperm2i128:
-    if (Value *V = simplifyX86vperm2(*II, *Builder))
+    if (Value *V = simplifyX86vperm2(*II, Builder))
       return replaceInstUsesWith(*II, V);
     break;
 
@@ -3009,7 +3009,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::x86_xop_vpcomd:
   case Intrinsic::x86_xop_vpcomq:
   case Intrinsic::x86_xop_vpcomw:
-    if (Value *V = simplifyX86vpcom(*II, *Builder, true))
+    if (Value *V = simplifyX86vpcom(*II, Builder, true))
       return replaceInstUsesWith(*II, V);
     break;
 
@@ -3017,7 +3017,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::x86_xop_vpcomud:
   case Intrinsic::x86_xop_vpcomuq:
   case Intrinsic::x86_xop_vpcomuw:
-    if (Value *V = simplifyX86vpcom(*II, *Builder, false))
+    if (Value *V = simplifyX86vpcom(*II, Builder, false))
       return replaceInstUsesWith(*II, V);
     break;
 
@@ -3044,10 +3044,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
 
       if (AllEltsOk) {
         // Cast the input vectors to byte vectors.
-        Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0),
-                                            Mask->getType());
-        Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1),
-                                            Mask->getType());
+        Value *Op0 = Builder.CreateBitCast(II->getArgOperand(0),
+                                           Mask->getType());
+        Value *Op1 = Builder.CreateBitCast(II->getArgOperand(1),
+                                           Mask->getType());
         Value *Result = UndefValue::get(Op0->getType());
 
         // Only extract each element once.
@@ -3067,13 +3067,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
             Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
             Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
             ExtractedElts[Idx] =
-              Builder->CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse,
-                                            Builder->getInt32(Idx&15));
+              Builder.CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse,
+                                           Builder.getInt32(Idx&15));
           }
 
           // Insert this value into the result vector.
-          Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx],
-                                                Builder->getInt32(i));
+          Result = Builder.CreateInsertElement(Result, ExtractedElts[Idx],
+                                               Builder.getInt32(i));
         }
         return CastInst::Create(Instruction::BitCast, Result, CI.getType());
       }
@@ -3238,7 +3238,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
 
     if (Mask == (S_NAN | Q_NAN)) {
       // Equivalent of isnan. Replace with standard fcmp.
-      Value *FCmp = Builder->CreateFCmpUNO(Src0, Src0);
+      Value *FCmp = Builder.CreateFCmpUNO(Src0, Src0);
       FCmp->takeName(II);
       return replaceInstUsesWith(*II, FCmp);
     }
@@ -3250,7 +3250,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
 
       // Clamp mask to used bits
       if ((Mask & FullMask) != Mask) {
-        CallInst *NewCall = Builder->CreateCall(II->getCalledFunction(),
+        CallInst *NewCall = Builder.CreateCall(II->getCalledFunction(),
           { Src0, ConstantInt::get(Src1->getType(), Mask & FullMask) }
         );
 
@@ -3343,13 +3343,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     // TODO: Also emit sub if only width is constant.
     if (!CWidth && COffset && Offset == 0) {
       Constant *KSize = ConstantInt::get(COffset->getType(), IntSize);
-      Value *ShiftVal = Builder->CreateSub(KSize, II->getArgOperand(2));
-      ShiftVal = Builder->CreateZExt(ShiftVal, II->getType());
+      Value *ShiftVal = Builder.CreateSub(KSize, II->getArgOperand(2));
+      ShiftVal = Builder.CreateZExt(ShiftVal, II->getType());
 
-      Value *Shl = Builder->CreateShl(Src, ShiftVal);
-      Value *RightShift = Signed ?
-        Builder->CreateAShr(Shl, ShiftVal) :
-        Builder->CreateLShr(Shl, ShiftVal);
+      Value *Shl = Builder.CreateShl(Src, ShiftVal);
+      Value *RightShift = Signed ? Builder.CreateAShr(Shl, ShiftVal)
+                                 : Builder.CreateLShr(Shl, ShiftVal);
       RightShift->takeName(II);
       return replaceInstUsesWith(*II, RightShift);
     }
@@ -3360,17 +3359,15 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     // TODO: This allows folding to undef when the hardware has specific
     // behavior?
     if (Offset + Width < IntSize) {
-      Value *Shl = Builder->CreateShl(Src, IntSize  - Offset - Width);
-      Value *RightShift = Signed ?
-        Builder->CreateAShr(Shl, IntSize - Width) :
-        Builder->CreateLShr(Shl, IntSize - Width);
+      Value *Shl = Builder.CreateShl(Src, IntSize - Offset - Width);
+      Value *RightShift = Signed ? Builder.CreateAShr(Shl, IntSize - Width)
+                                 : Builder.CreateLShr(Shl, IntSize - Width);
       RightShift->takeName(II);
       return replaceInstUsesWith(*II, RightShift);
     }
 
-    Value *RightShift = Signed ?
-      Builder->CreateAShr(Src, Offset) :
-      Builder->CreateLShr(Src, Offset);
+    Value *RightShift = Signed ? Builder.CreateAShr(Src, Offset)
+                               : Builder.CreateLShr(Src, Offset);
 
     RightShift->takeName(II);
     return replaceInstUsesWith(*II, RightShift);
@@ -3439,7 +3436,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     }
 
     if (match(Src2, m_NaN()) || isa<UndefValue>(Src2)) {
-      CallInst *NewCall = Builder->CreateMinNum(Src0, Src1);
+      CallInst *NewCall = Builder.CreateMinNum(Src0, Src1);
       NewCall->copyFastMathFlags(II);
       NewCall->takeName(II);
       return replaceInstUsesWith(*II, NewCall);
@@ -3451,7 +3448,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
           APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(),
                                        C2->getValueAPF());
           return replaceInstUsesWith(*II,
-            ConstantFP::get(Builder->getContext(), Result));
+            ConstantFP::get(Builder.getContext(), Result));
         }
       }
     }
@@ -3494,7 +3491,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
         Metadata *MDArgs[] = {MDString::get(II->getContext(), "exec")};
         MDNode *MD = MDNode::get(II->getContext(), MDArgs);
         Value *Args[] = {MetadataAsValue::get(II->getContext(), MD)};
-        CallInst *NewCall = Builder->CreateCall(NewF, Args);
+        CallInst *NewCall = Builder.CreateCall(NewF, Args);
         NewCall->addAttribute(AttributeList::FunctionIndex,
                               Attribute::Convergent);
         NewCall->takeName(II);
@@ -3556,7 +3553,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
                                               SrcLHS->getType());
       Value *Args[] = { SrcLHS, SrcRHS,
                         ConstantInt::get(CC->getType(), SrcPred) };
-      CallInst *NewCall = Builder->CreateCall(NewF, Args);
+      CallInst *NewCall = Builder.CreateCall(NewF, Args);
       NewCall->takeName(II);
       return replaceInstUsesWith(*II, NewCall);
     }
@@ -3633,16 +3630,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     // the InstCombineIRInserter object.
     Value *AssumeIntrinsic = II->getCalledValue(), *A, *B;
     if (match(IIOperand, m_And(m_Value(A), m_Value(B)))) {
-      Builder->CreateCall(AssumeIntrinsic, A, II->getName());
-      Builder->CreateCall(AssumeIntrinsic, B, II->getName());
+      Builder.CreateCall(AssumeIntrinsic, A, II->getName());
+      Builder.CreateCall(AssumeIntrinsic, B, II->getName());
       return eraseInstFromFunction(*II);
     }
     // assume(!(a || b)) -> assume(!a); assume(!b);
     if (match(IIOperand, m_Not(m_Or(m_Value(A), m_Value(B))))) {
-      Builder->CreateCall(AssumeIntrinsic, Builder->CreateNot(A),
-                          II->getName());
-      Builder->CreateCall(AssumeIntrinsic, Builder->CreateNot(B),
-                          II->getName());
+      Builder.CreateCall(AssumeIntrinsic, Builder.CreateNot(A), II->getName());
+      Builder.CreateCall(AssumeIntrinsic, Builder.CreateNot(B), II->getName());
       return eraseInstFromFunction(*II);
     }
 
@@ -3726,7 +3721,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
         return eraseInstFromFunction(*NextInst);
 
       // Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
-      II->setArgOperand(0, Builder->CreateAnd(CurrCond, NextCond));
+      II->setArgOperand(0, Builder.CreateAnd(CurrCond, NextCond));
       return eraseInstFromFunction(*NextInst);
     }
     break;
@@ -4163,7 +4158,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
 
     Value *NewArg = *AI;
     if ((*AI)->getType() != ParamTy)
-      NewArg = Builder->CreateBitOrPointerCast(*AI, ParamTy);
+      NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy);
     Args.push_back(NewArg);
 
     // Add any parameter attributes.
@@ -4189,7 +4184,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
           // Must promote to pass through va_arg area!
           Instruction::CastOps opcode =
             CastInst::getCastOpcode(*AI, false, PTy, false);
-          NewArg = Builder->CreateCast(opcode, *AI, PTy);
+          NewArg = Builder.CreateCast(opcode, *AI, PTy);
         }
         Args.push_back(NewArg);
 
@@ -4215,10 +4210,10 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
 
   CallSite NewCS;
   if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
-    NewCS = Builder->CreateInvoke(Callee, II->getNormalDest(),
-                                  II->getUnwindDest(), Args, OpBundles);
+    NewCS = Builder.CreateInvoke(Callee, II->getNormalDest(),
+                                 II->getUnwindDest(), Args, OpBundles);
   } else {
-    NewCS = Builder->CreateCall(Callee, Args, OpBundles);
+    NewCS = Builder.CreateCall(Callee, Args, OpBundles);
     cast<CallInst>(NewCS.getInstruction())
         ->setTailCallKind(cast<CallInst>(Caller)->getTailCallKind());
   }
@@ -4328,7 +4323,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
             // Add the chain argument and attributes.
             Value *NestVal = Tramp->getArgOperand(2);
             if (NestVal->getType() != NestTy)
-              NestVal = Builder->CreateBitCast(NestVal, NestTy, "nest");
+              NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest");
             NewArgs.push_back(NestVal);
             NewArgAttrs.push_back(NestAttr);
           }
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index d3049389dfb9f..dfdfd3e9da840 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -84,7 +84,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
                                                    AllocaInst &AI) {
   PointerType *PTy = cast<PointerType>(CI.getType());
 
-  BuilderTy AllocaBuilder(*Builder);
+  BuilderTy AllocaBuilder(Builder);
   AllocaBuilder.SetInsertPoint(&AI);
 
   // Get the type really allocated and the type casted to.
@@ -406,8 +406,7 @@ static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
 ///   trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32
 ///   --->
 ///   extractelement <4 x i32> %X, 1
-static Instruction *foldVecTruncToExtElt(TruncInst &Trunc, InstCombiner &IC,
-                                         const DataLayout &DL) {
+static Instruction *foldVecTruncToExtElt(TruncInst &Trunc, InstCombiner &IC) {
   Value *TruncOp = Trunc.getOperand(0);
   Type *DestType = Trunc.getType();
   if (!TruncOp->hasOneUse() || !isa<IntegerType>(DestType))
@@ -434,14 +433,14 @@ static Instruction *foldVecTruncToExtElt(TruncInst &Trunc, InstCombiner &IC,
   unsigned NumVecElts = VecWidth / DestWidth;
   if (VecType->getElementType() != DestType) {
     VecType = VectorType::get(DestType, NumVecElts);
-    VecInput = IC.Builder->CreateBitCast(VecInput, VecType, "bc");
+    VecInput = IC.Builder.CreateBitCast(VecInput, VecType, "bc");
   }
 
   unsigned Elt = ShiftAmount / DestWidth;
-  if (DL.isBigEndian())
+  if (IC.getDataLayout().isBigEndian())
     Elt = NumVecElts - 1 - Elt;
 
-  return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
+  return ExtractElementInst::Create(VecInput, IC.Builder.getInt32(Elt));
 }
 
 /// Try to narrow the width of bitwise logic instructions with constants.
@@ -460,7 +459,7 @@ Instruction *InstCombiner::shrinkBitwiseLogic(TruncInst &Trunc) {
 
   // trunc (logic X, C) --> logic (trunc X, C')
   Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
-  Value *NarrowOp0 = Builder->CreateTrunc(LogicOp->getOperand(0), DestTy);
+  Value *NarrowOp0 = Builder.CreateTrunc(LogicOp->getOperand(0), DestTy);
   return BinaryOperator::Create(LogicOp->getOpcode(), NarrowOp0, NarrowC);
 }
 
@@ -554,7 +553,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
   // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0), likewise for vector.
   if (DestTy->getScalarSizeInBits() == 1) {
     Constant *One = ConstantInt::get(SrcTy, 1);
-    Src = Builder->CreateAnd(Src, One);
+    Src = Builder.CreateAnd(Src, One);
     Value *Zero = Constant::getNullValue(Src->getType());
     return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
   }
@@ -580,7 +579,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
     // Since we're doing an lshr and a zero extend, and know that the shift
     // amount is smaller than ASize, it is always safe to do the shift in A's
     // type, then zero extend or truncate to the result.
-    Value *Shift = Builder->CreateLShr(A, Cst->getZExtValue());
+    Value *Shift = Builder.CreateLShr(A, Cst->getZExtValue());
     Shift->takeName(Src);
     return CastInst::CreateIntegerCast(Shift, DestTy, false);
   }
@@ -610,7 +609,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
         return BinaryOperator::CreateAShr(A, ConstantInt::get(CI.getType(),
                                           std::min(ShiftAmt, ASize - 1)));
       if (SExt->hasOneUse()) {
-        Value *Shift = Builder->CreateAShr(A, std::min(ShiftAmt, ASize-1));
+        Value *Shift = Builder.CreateAShr(A, std::min(ShiftAmt, ASize - 1));
         Shift->takeName(Src);
         return CastInst::CreateIntegerCast(Shift, CI.getType(), true);
       }
@@ -620,10 +619,10 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
   if (Instruction *I = shrinkBitwiseLogic(CI))
     return I;
 
-  if (Instruction *I = shrinkSplatShuffle(CI, *Builder))
+  if (Instruction *I = shrinkSplatShuffle(CI, Builder))
     return I;
 
-  if (Instruction *I = shrinkInsertElt(CI, *Builder))
+  if (Instruction *I = shrinkInsertElt(CI, Builder))
     return I;
 
   if (Src->hasOneUse() && isa<IntegerType>(SrcTy) &&
@@ -636,7 +635,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
       // FoldShiftByConstant and is the extend in reg pattern.
       const unsigned DestSize = DestTy->getScalarSizeInBits();
       if (Cst->getValue().ult(DestSize)) {
-        Value *NewTrunc = Builder->CreateTrunc(A, DestTy, A->getName() + ".tr");
+        Value *NewTrunc = Builder.CreateTrunc(A, DestTy, A->getName() + ".tr");
 
         return BinaryOperator::Create(
           Instruction::Shl, NewTrunc,
@@ -645,7 +644,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
     }
   }
 
-  if (Instruction *I = foldVecTruncToExtElt(CI, *this, DL))
+  if (Instruction *I = foldVecTruncToExtElt(CI, *this))
     return I;
 
   return nullptr;
@@ -668,13 +667,13 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
       Value *In = ICI->getOperand(0);
       Value *Sh = ConstantInt::get(In->getType(),
                                    In->getType()->getScalarSizeInBits() - 1);
-      In = Builder->CreateLShr(In, Sh, In->getName() + ".lobit");
+      In = Builder.CreateLShr(In, Sh, In->getName() + ".lobit");
       if (In->getType() != CI.getType())
-        In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/);
+        In = Builder.CreateIntCast(In, CI.getType(), false /*ZExt*/);
 
       if (ICI->getPredicate() == ICmpInst::ICMP_SGT) {
         Constant *One = ConstantInt::get(In->getType(), 1);
-        In = Builder->CreateXor(In, One, In->getName() + ".not");
+        In = Builder.CreateXor(In, One, In->getName() + ".not");
       }
 
       return replaceInstUsesWith(CI, In);
@@ -713,19 +712,19 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
         if (ShAmt) {
           // Perform a logical shr by shiftamt.
           // Insert the shift to put the result in the low bit.
-          In = Builder->CreateLShr(In, ConstantInt::get(In->getType(), ShAmt),
-                                   In->getName() + ".lobit");
+          In = Builder.CreateLShr(In, ConstantInt::get(In->getType(), ShAmt),
+                                  In->getName() + ".lobit");
         }
 
         if (!Op1CV.isNullValue() == isNE) { // Toggle the low bit.
           Constant *One = ConstantInt::get(In->getType(), 1);
-          In = Builder->CreateXor(In, One);
+          In = Builder.CreateXor(In, One);
         }
 
         if (CI.getType() == In->getType())
           return replaceInstUsesWith(CI, In);
 
-        Value *IntCast = Builder->CreateIntCast(In, CI.getType(), false);
+        Value *IntCast = Builder.CreateIntCast(In, CI.getType(), false);
         return replaceInstUsesWith(CI, IntCast);
       }
     }
@@ -748,19 +747,19 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
         if (UnknownBit.countPopulation() == 1) {
           if (!DoTransform) return ICI;
 
-          Value *Result = Builder->CreateXor(LHS, RHS);
+          Value *Result = Builder.CreateXor(LHS, RHS);
 
           // Mask off any bits that are set and won't be shifted away.
           if (KnownLHS.One.uge(UnknownBit))
-            Result = Builder->CreateAnd(Result,
+            Result = Builder.CreateAnd(Result,
                                         ConstantInt::get(ITy, UnknownBit));
 
           // Shift the bit we're testing down to the lsb.
-          Result = Builder->CreateLShr(
+          Result = Builder.CreateLShr(
                Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros()));
 
           if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
-            Result = Builder->CreateXor(Result, ConstantInt::get(ITy, 1));
+            Result = Builder.CreateXor(Result, ConstantInt::get(ITy, 1));
           Result->takeName(ICI);
           return replaceInstUsesWith(CI, Result);
         }
@@ -960,7 +959,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
     if (SrcSize < DstSize) {
       APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
       Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
-      Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask");
+      Value *And = Builder.CreateAnd(A, AndConst, CSrc->getName() + ".mask");
       return new ZExtInst(And, CI.getType());
     }
 
@@ -970,7 +969,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
                                                            AndValue));
     }
     if (SrcSize > DstSize) {
-      Value *Trunc = Builder->CreateTrunc(A, CI.getType());
+      Value *Trunc = Builder.CreateTrunc(A, CI.getType());
       APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
       return BinaryOperator::CreateAnd(Trunc,
                                        ConstantInt::get(Trunc->getType(),
@@ -992,8 +991,8 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
         (transformZExtICmp(LHS, CI, false) ||
          transformZExtICmp(RHS, CI, false))) {
       // zext (or icmp, icmp) -> or (zext icmp), (zext icmp)
-      Value *LCast = Builder->CreateZExt(LHS, CI.getType(), LHS->getName());
-      Value *RCast = Builder->CreateZExt(RHS, CI.getType(), RHS->getName());
+      Value *LCast = Builder.CreateZExt(LHS, CI.getType(), LHS->getName());
+      Value *RCast = Builder.CreateZExt(RHS, CI.getType(), RHS->getName());
       BinaryOperator *Or = BinaryOperator::Create(Instruction::Or, LCast, RCast);
 
       // Perform the elimination.
@@ -1020,7 +1019,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
       match(And, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Specific(C)))) &&
       X->getType() == CI.getType()) {
     Constant *ZC = ConstantExpr::getZExt(C, CI.getType());
-    return BinaryOperator::CreateXor(Builder->CreateAnd(X, ZC), ZC);
+    return BinaryOperator::CreateXor(Builder.CreateAnd(X, ZC), ZC);
   }
 
   return nullptr;
@@ -1043,12 +1042,12 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
 
       Value *Sh = ConstantInt::get(Op0->getType(),
                                    Op0->getType()->getScalarSizeInBits()-1);
-      Value *In = Builder->CreateAShr(Op0, Sh, Op0->getName()+".lobit");
+      Value *In = Builder.CreateAShr(Op0, Sh, Op0->getName() + ".lobit");
       if (In->getType() != CI.getType())
-        In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/);
+        In = Builder.CreateIntCast(In, CI.getType(), true /*SExt*/);
 
       if (Pred == ICmpInst::ICMP_SGT)
-        In = Builder->CreateNot(In, In->getName()+".not");
+        In = Builder.CreateNot(In, In->getName() + ".not");
       return replaceInstUsesWith(CI, In);
     }
   }
@@ -1079,26 +1078,26 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
           unsigned ShiftAmt = KnownZeroMask.countTrailingZeros();
           // Perform a right shift to place the desired bit in the LSB.
           if (ShiftAmt)
-            In = Builder->CreateLShr(In,
-                                     ConstantInt::get(In->getType(), ShiftAmt));
+            In = Builder.CreateLShr(In,
+                                    ConstantInt::get(In->getType(), ShiftAmt));
 
           // At this point "In" is either 1 or 0. Subtract 1 to turn
           // {1, 0} -> {0, -1}.
-          In = Builder->CreateAdd(In,
-                                  ConstantInt::getAllOnesValue(In->getType()),
-                                  "sext");
+          In = Builder.CreateAdd(In,
+                                 ConstantInt::getAllOnesValue(In->getType()),
+                                 "sext");
         } else {
           // sext ((x & 2^n) != 0)   -> (x << bitwidth-n) a>> bitwidth-1
           // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
           unsigned ShiftAmt = KnownZeroMask.countLeadingZeros();
           // Perform a left shift to place the desired bit in the MSB.
           if (ShiftAmt)
-            In = Builder->CreateShl(In,
-                                    ConstantInt::get(In->getType(), ShiftAmt));
+            In = Builder.CreateShl(In,
+                                   ConstantInt::get(In->getType(), ShiftAmt));
 
           // Distribute the bit over the whole bit width.
-          In = Builder->CreateAShr(In, ConstantInt::get(In->getType(),
-                                      KnownZeroMask.getBitWidth() - 1), "sext");
+          In = Builder.CreateAShr(In, ConstantInt::get(In->getType(),
+                                  KnownZeroMask.getBitWidth() - 1), "sext");
         }
 
         if (CI.getType() == In->getType())
@@ -1191,7 +1190,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
   // instead.
   KnownBits Known = computeKnownBits(Src, 0, &CI);
   if (Known.isNonNegative()) {
-    Value *ZExt = Builder->CreateZExt(Src, DestTy);
+    Value *ZExt = Builder.CreateZExt(Src, DestTy);
     return replaceInstUsesWith(CI, ZExt);
   }
 
@@ -1217,7 +1216,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
 
     // We need to emit a shl + ashr to do the sign extend.
     Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
-    return BinaryOperator::CreateAShr(Builder->CreateShl(Res, ShAmt, "sext"),
+    return BinaryOperator::CreateAShr(Builder.CreateShl(Res, ShAmt, "sext"),
                                       ShAmt);
   }
 
@@ -1229,7 +1228,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
     unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
     unsigned DestBitSize = DestTy->getScalarSizeInBits();
     Constant *ShAmt = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
-    return BinaryOperator::CreateAShr(Builder->CreateShl(X, ShAmt), ShAmt);
+    return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShAmt), ShAmt);
   }
 
   if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src))
@@ -1258,7 +1257,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
     unsigned SrcDstSize = CI.getType()->getScalarSizeInBits();
     unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize;
     Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt);
-    A = Builder->CreateShl(A, ShAmtV, CI.getName());
+    A = Builder.CreateShl(A, ShAmtV, CI.getName());
     return BinaryOperator::CreateAShr(A, ShAmtV);
   }
 
@@ -1347,9 +1346,9 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
         // case of interest here is (float)((double)float + float)).
         if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) {
           if (LHSOrig->getType() != CI.getType())
-            LHSOrig = Builder->CreateFPExt(LHSOrig, CI.getType());
+            LHSOrig = Builder.CreateFPExt(LHSOrig, CI.getType());
           if (RHSOrig->getType() != CI.getType())
-            RHSOrig = Builder->CreateFPExt(RHSOrig, CI.getType());
+            RHSOrig = Builder.CreateFPExt(RHSOrig, CI.getType());
           Instruction *RI =
             BinaryOperator::Create(OpI->getOpcode(), LHSOrig, RHSOrig);
           RI->copyFastMathFlags(OpI);
@@ -1364,9 +1363,9 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
         // in the destination format if it can represent both sources.
         if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) {
           if (LHSOrig->getType() != CI.getType())
-            LHSOrig = Builder->CreateFPExt(LHSOrig, CI.getType());
+            LHSOrig = Builder.CreateFPExt(LHSOrig, CI.getType());
           if (RHSOrig->getType() != CI.getType())
-            RHSOrig = Builder->CreateFPExt(RHSOrig, CI.getType());
+            RHSOrig = Builder.CreateFPExt(RHSOrig, CI.getType());
           Instruction *RI =
             BinaryOperator::CreateFMul(LHSOrig, RHSOrig);
           RI->copyFastMathFlags(OpI);
@@ -1382,9 +1381,9 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
         // TODO: Tighten bound via rigorous analysis of the unbalanced case.
         if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) {
           if (LHSOrig->getType() != CI.getType())
-            LHSOrig = Builder->CreateFPExt(LHSOrig, CI.getType());
+            LHSOrig = Builder.CreateFPExt(LHSOrig, CI.getType());
           if (RHSOrig->getType() != CI.getType())
-            RHSOrig = Builder->CreateFPExt(RHSOrig, CI.getType());
+            RHSOrig = Builder.CreateFPExt(RHSOrig, CI.getType());
           Instruction *RI =
             BinaryOperator::CreateFDiv(LHSOrig, RHSOrig);
           RI->copyFastMathFlags(OpI);
@@ -1399,11 +1398,11 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
         if (SrcWidth == OpWidth)
           break;
         if (LHSWidth < SrcWidth)
-          LHSOrig = Builder->CreateFPExt(LHSOrig, RHSOrig->getType());
+          LHSOrig = Builder.CreateFPExt(LHSOrig, RHSOrig->getType());
         else if (RHSWidth <= SrcWidth)
-          RHSOrig = Builder->CreateFPExt(RHSOrig, LHSOrig->getType());
+          RHSOrig = Builder.CreateFPExt(RHSOrig, LHSOrig->getType());
         if (LHSOrig != OpI->getOperand(0) || RHSOrig != OpI->getOperand(1)) {
-          Value *ExactResult = Builder->CreateFRem(LHSOrig, RHSOrig);
+          Value *ExactResult = Builder.CreateFRem(LHSOrig, RHSOrig);
           if (Instruction *RI = dyn_cast<Instruction>(ExactResult))
             RI->copyFastMathFlags(OpI);
           return CastInst::CreateFPCast(ExactResult, CI.getType());
@@ -1412,8 +1411,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
 
     // (fptrunc (fneg x)) -> (fneg (fptrunc x))
     if (BinaryOperator::isFNeg(OpI)) {
-      Value *InnerTrunc = Builder->CreateFPTrunc(OpI->getOperand(1),
-                                                 CI.getType());
+      Value *InnerTrunc = Builder.CreateFPTrunc(OpI->getOperand(1),
+                                                CI.getType());
       Instruction *RI = BinaryOperator::CreateFNeg(InnerTrunc);
       RI->copyFastMathFlags(OpI);
       return RI;
@@ -1432,10 +1431,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
       (isa<ConstantFP>(SI->getOperand(1)) ||
        isa<ConstantFP>(SI->getOperand(2))) &&
       matchSelectPattern(SI, LHS, RHS).Flavor == SPF_UNKNOWN) {
-    Value *LHSTrunc = Builder->CreateFPTrunc(SI->getOperand(1),
-                                             CI.getType());
-    Value *RHSTrunc = Builder->CreateFPTrunc(SI->getOperand(2),
-                                             CI.getType());
+    Value *LHSTrunc = Builder.CreateFPTrunc(SI->getOperand(1), CI.getType());
+    Value *RHSTrunc = Builder.CreateFPTrunc(SI->getOperand(2), CI.getType());
     return SelectInst::Create(SI->getOperand(0), LHSTrunc, RHSTrunc);
   }
 
@@ -1465,7 +1462,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
 
       // Do unary FP operation on smaller type.
       // (fptrunc (fabs x)) -> (fabs (fptrunc x))
-      Value *InnerTrunc = Builder->CreateFPTrunc(Src, CI.getType());
+      Value *InnerTrunc = Builder.CreateFPTrunc(Src, CI.getType());
       Type *IntrinsicType[] = { CI.getType() };
       Function *Overload = Intrinsic::getDeclaration(
         CI.getModule(), II->getIntrinsicID(), IntrinsicType);
@@ -1482,7 +1479,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
     }
   }
 
-  if (Instruction *I = shrinkInsertElt(CI, *Builder))
+  if (Instruction *I = shrinkInsertElt(CI, Builder))
     return I;
 
   return nullptr;
@@ -1577,7 +1574,7 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
     if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
       Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
 
-    Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty);
+    Value *P = Builder.CreateZExtOrTrunc(CI.getOperand(0), Ty);
     return new IntToPtrInst(P, CI.getType());
   }
 
@@ -1627,7 +1624,7 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
   if (Ty->isVectorTy()) // Handle vectors of pointers.
     PtrTy = VectorType::get(PtrTy, Ty->getVectorNumElements());
 
-  Value *P = Builder->CreatePtrToInt(CI.getOperand(0), PtrTy);
+  Value *P = Builder.CreatePtrToInt(CI.getOperand(0), PtrTy);
   return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
 }
 
@@ -1653,7 +1650,7 @@ static Instruction *optimizeVectorResize(Value *InVal, VectorType *DestTy,
       return nullptr;
 
     SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements());
-    InVal = IC.Builder->CreateBitCast(InVal, SrcTy);
+    InVal = IC.Builder.CreateBitCast(InVal, SrcTy);
   }
 
   // Now that the element types match, get the shuffle mask and RHS of the
@@ -1833,8 +1830,8 @@ static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI,
   for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
     if (!Elements[i]) continue;  // Unset element.
 
-    Result = IC.Builder->CreateInsertElement(Result, Elements[i],
-                                             IC.Builder->getInt32(i));
+    Result = IC.Builder.CreateInsertElement(Result, Elements[i],
+                                            IC.Builder.getInt32(i));
   }
 
   return Result;
@@ -1845,8 +1842,7 @@ static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI,
 /// vectors better than bitcasts of scalars because vector registers are
 /// usually not type-specific like scalar integer or scalar floating-point.
 static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast,
-                                              InstCombiner &IC,
-                                              const DataLayout &DL) {
+                                              InstCombiner &IC) {
   // TODO: Create and use a pattern matcher for ExtractElementInst.
   auto *ExtElt = dyn_cast<ExtractElementInst>(BitCast.getOperand(0));
   if (!ExtElt || !ExtElt->hasOneUse())
@@ -1860,8 +1856,8 @@ static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast,
 
   unsigned NumElts = ExtElt->getVectorOperandType()->getNumElements();
   auto *NewVecType = VectorType::get(DestType, NumElts);
-  auto *NewBC = IC.Builder->CreateBitCast(ExtElt->getVectorOperand(),
-                                          NewVecType, "bc");
+  auto *NewBC = IC.Builder.CreateBitCast(ExtElt->getVectorOperand(),
+                                         NewVecType, "bc");
   return ExtractElementInst::Create(NewBC, ExtElt->getIndexOperand());
 }
 
@@ -1870,7 +1866,7 @@ static Instruction *foldBitCastBitwiseLogic(BitCastInst &BitCast,
                                             InstCombiner::BuilderTy &Builder) {
   Type *DestTy = BitCast.getType();
   BinaryOperator *BO;
-  if (!DestTy->getScalarType()->isIntegerTy() ||
+  if (!DestTy->isIntOrIntVectorTy() ||
       !match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) ||
       !BO->isBitwiseLogicOp())
     return nullptr;
@@ -2033,8 +2029,8 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) {
   // For each old PHI node, create a corresponding new PHI node with a type A.
   SmallDenseMap<PHINode *, PHINode *> NewPNodes;
   for (auto *OldPN : OldPhiNodes) {
-    Builder->SetInsertPoint(OldPN);
-    PHINode *NewPN = Builder->CreatePHI(DestTy, OldPN->getNumOperands());
+    Builder.SetInsertPoint(OldPN);
+    PHINode *NewPN = Builder.CreatePHI(DestTy, OldPN->getNumOperands());
     NewPNodes[OldPN] = NewPN;
   }
 
@@ -2047,8 +2043,8 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) {
       if (auto *C = dyn_cast<Constant>(V)) {
         NewV = ConstantExpr::getBitCast(C, DestTy);
       } else if (auto *LI = dyn_cast<LoadInst>(V)) {
-        Builder->SetInsertPoint(LI->getNextNode());
-        NewV = Builder->CreateBitCast(LI, DestTy);
+        Builder.SetInsertPoint(LI->getNextNode());
+        NewV = Builder.CreateBitCast(LI, DestTy);
         Worklist.Add(LI);
       } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
         NewV = BCI->getOperand(0);
@@ -2064,9 +2060,9 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) {
   for (User *U : PN->users()) {
     auto *SI = dyn_cast<StoreInst>(U);
     if (SI && SI->isSimple() && SI->getOperand(0) == PN) {
-      Builder->SetInsertPoint(SI);
+      Builder.SetInsertPoint(SI);
       auto *NewBC =
-          cast<BitCastInst>(Builder->CreateBitCast(NewPNodes[PN], SrcTy));
+          cast<BitCastInst>(Builder.CreateBitCast(NewPNodes[PN], SrcTy));
       SI->setOperand(0, NewBC);
       Worklist.Add(SI);
       assert(hasStoreUsersOnly(*NewBC));
@@ -2121,14 +2117,14 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
 
     // If we found a path from the src to dest, create the getelementptr now.
     if (SrcElTy == DstElTy) {
-      SmallVector<Value *, 8> Idxs(NumZeros + 1, Builder->getInt32(0));
+      SmallVector<Value *, 8> Idxs(NumZeros + 1, Builder.getInt32(0));
       return GetElementPtrInst::CreateInBounds(Src, Idxs);
     }
   }
 
   if (VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
     if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) {
-      Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType());
+      Value *Elem = Builder.CreateBitCast(Src, DestVTy->getElementType());
       return InsertElementInst::Create(UndefValue::get(DestTy), Elem,
                      Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
       // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast)
@@ -2161,7 +2157,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
       // scalar-scalar cast.
       if (!DestTy->isVectorTy()) {
         Value *Elem =
-          Builder->CreateExtractElement(Src,
+          Builder.CreateExtractElement(Src,
                      Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
         return CastInst::Create(Instruction::BitCast, Elem, DestTy);
       }
@@ -2190,8 +2186,8 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
            Tmp->getOperand(0)->getType() == DestTy) ||
           ((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(1))) &&
            Tmp->getOperand(0)->getType() == DestTy)) {
-        Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy);
-        Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy);
+        Value *LHS = Builder.CreateBitCast(SVI->getOperand(0), DestTy);
+        Value *RHS = Builder.CreateBitCast(SVI->getOperand(1), DestTy);
         // Return a new shuffle vector.  Use the same element ID's, as we
         // know the vector types match #elts.
         return new ShuffleVectorInst(LHS, RHS, SVI->getOperand(2));
@@ -2204,13 +2200,13 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
     if (Instruction *I = optimizeBitCastFromPhi(CI, PN))
       return I;
 
-  if (Instruction *I = canonicalizeBitCastExtElt(CI, *this, DL))
+  if (Instruction *I = canonicalizeBitCastExtElt(CI, *this))
     return I;
 
-  if (Instruction *I = foldBitCastBitwiseLogic(CI, *Builder))
+  if (Instruction *I = foldBitCastBitwiseLogic(CI, Builder))
     return I;
 
-  if (Instruction *I = foldBitCastSelect(CI, *Builder))
+  if (Instruction *I = foldBitCastSelect(CI, Builder))
     return I;
 
   if (SrcTy->isPointerTy())
@@ -2234,7 +2230,7 @@ Instruction *InstCombiner::visitAddrSpaceCast(AddrSpaceCastInst &CI) {
       MidTy = VectorType::get(MidTy, VT->getNumElements());
     }
 
-    Value *NewBitCast = Builder->CreateBitCast(Src, MidTy);
+    Value *NewBitCast = Builder.CreateBitCast(Src, MidTy);
     return new AddrSpaceCastInst(NewBitCast, CI.getType());
   }
 
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 58b8b2f526299..60d1cde971dd4 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -392,7 +392,7 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
     Type *IntPtrTy = DL.getIntPtrType(GEP->getType());
     unsigned PtrSize = IntPtrTy->getIntegerBitWidth();
     if (Idx->getType()->getPrimitiveSizeInBits() > PtrSize)
-      Idx = Builder->CreateTrunc(Idx, IntPtrTy);
+      Idx = Builder.CreateTrunc(Idx, IntPtrTy);
   }
 
   // If the comparison is only true for one or two elements, emit direct
@@ -400,7 +400,7 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
   if (SecondTrueElement != Overdefined) {
     // None true -> false.
     if (FirstTrueElement == Undefined)
-      return replaceInstUsesWith(ICI, Builder->getFalse());
+      return replaceInstUsesWith(ICI, Builder.getFalse());
 
     Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement);
 
@@ -409,9 +409,9 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
       return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx);
 
     // True for two elements -> 'i == 47 | i == 72'.
-    Value *C1 = Builder->CreateICmpEQ(Idx, FirstTrueIdx);
+    Value *C1 = Builder.CreateICmpEQ(Idx, FirstTrueIdx);
     Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement);
-    Value *C2 = Builder->CreateICmpEQ(Idx, SecondTrueIdx);
+    Value *C2 = Builder.CreateICmpEQ(Idx, SecondTrueIdx);
     return BinaryOperator::CreateOr(C1, C2);
   }
 
@@ -420,7 +420,7 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
   if (SecondFalseElement != Overdefined) {
     // None false -> true.
     if (FirstFalseElement == Undefined)
-      return replaceInstUsesWith(ICI, Builder->getTrue());
+      return replaceInstUsesWith(ICI, Builder.getTrue());
 
     Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement);
 
@@ -429,9 +429,9 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
       return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx);
 
     // False for two elements -> 'i != 47 & i != 72'.
-    Value *C1 = Builder->CreateICmpNE(Idx, FirstFalseIdx);
+    Value *C1 = Builder.CreateICmpNE(Idx, FirstFalseIdx);
     Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement);
-    Value *C2 = Builder->CreateICmpNE(Idx, SecondFalseIdx);
+    Value *C2 = Builder.CreateICmpNE(Idx, SecondFalseIdx);
     return BinaryOperator::CreateAnd(C1, C2);
   }
 
@@ -443,7 +443,7 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
     // Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1).
     if (FirstTrueElement) {
       Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement);
-      Idx = Builder->CreateAdd(Idx, Offs);
+      Idx = Builder.CreateAdd(Idx, Offs);
     }
 
     Value *End = ConstantInt::get(Idx->getType(),
@@ -457,7 +457,7 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
     // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse).
     if (FirstFalseElement) {
       Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement);
-      Idx = Builder->CreateAdd(Idx, Offs);
+      Idx = Builder.CreateAdd(Idx, Offs);
     }
 
     Value *End = ConstantInt::get(Idx->getType(),
@@ -481,9 +481,9 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
       Ty = DL.getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
 
     if (Ty) {
-      Value *V = Builder->CreateIntCast(Idx, Ty, false);
-      V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
-      V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V);
+      Value *V = Builder.CreateIntCast(Idx, Ty, false);
+      V = Builder.CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
+      V = Builder.CreateAnd(ConstantInt::get(Ty, 1), V);
       return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0));
     }
   }
@@ -566,7 +566,7 @@ static Value *evaluateGEPOffsetExpression(User *GEP, InstCombiner &IC,
     // we don't need to bother extending: the extension won't affect where the
     // computation crosses zero.
     if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) {
-      VariableIdx = IC.Builder->CreateTrunc(VariableIdx, IntPtrTy);
+      VariableIdx = IC.Builder.CreateTrunc(VariableIdx, IntPtrTy);
     }
     return VariableIdx;
   }
@@ -588,10 +588,10 @@ static Value *evaluateGEPOffsetExpression(User *GEP, InstCombiner &IC,
 
   // Okay, we can do this evaluation.  Start by converting the index to intptr.
   if (VariableIdx->getType() != IntPtrTy)
-    VariableIdx = IC.Builder->CreateIntCast(VariableIdx, IntPtrTy,
+    VariableIdx = IC.Builder.CreateIntCast(VariableIdx, IntPtrTy,
                                             true /*Signed*/);
   Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs);
-  return IC.Builder->CreateAdd(VariableIdx, OffsetVal, "offset");
+  return IC.Builder.CreateAdd(VariableIdx, OffsetVal, "offset");
 }
 
 /// Returns true if we can rewrite Start as a GEP with pointer Base
@@ -981,13 +981,13 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
         if (LHSIndexTy != RHSIndexTy) {
           if (LHSIndexTy->getPrimitiveSizeInBits() <
               RHSIndexTy->getPrimitiveSizeInBits()) {
-            ROffset = Builder->CreateTrunc(ROffset, LHSIndexTy);
+            ROffset = Builder.CreateTrunc(ROffset, LHSIndexTy);
           } else
-            LOffset = Builder->CreateTrunc(LOffset, RHSIndexTy);
+            LOffset = Builder.CreateTrunc(LOffset, RHSIndexTy);
         }
 
-        Value *Cmp = Builder->CreateICmp(ICmpInst::getSignedPredicate(Cond),
-                                         LOffset, ROffset);
+        Value *Cmp = Builder.CreateICmp(ICmpInst::getSignedPredicate(Cond),
+                                        LOffset, ROffset);
         return replaceInstUsesWith(I, Cmp);
       }
 
@@ -1026,7 +1026,7 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
 
       if (NumDifferences == 0)   // SAME GEP?
         return replaceInstUsesWith(I, // No comparison is needed here.
-                             Builder->getInt1(ICmpInst::isTrueWhenEqual(Cond)));
+                             Builder.getInt1(ICmpInst::isTrueWhenEqual(Cond)));
 
       else if (NumDifferences == 1 && GEPsInBounds) {
         Value *LHSV = GEPLHS->getOperand(DiffOperand);
@@ -1174,7 +1174,7 @@ Instruction *InstCombiner::foldICmpAddOpConst(Instruction &ICI,
   // (X+ -1) >s X      --> X <s (MAXSINT-(-1-1))      --> X == -128
 
   assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE);
-  Constant *C = Builder->getInt(CI->getValue()-1);
+  Constant *C = Builder.getInt(CI->getValue() - 1);
   return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C));
 }
 
@@ -1347,17 +1347,17 @@ static Instruction *processUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
   Value *F = Intrinsic::getDeclaration(I.getModule(),
                                        Intrinsic::sadd_with_overflow, NewType);
 
-  InstCombiner::BuilderTy *Builder = IC.Builder;
+  InstCombiner::BuilderTy &Builder = IC.Builder;
 
   // Put the new code above the original add, in case there are any uses of the
   // add between the add and the compare.
-  Builder->SetInsertPoint(OrigAdd);
+  Builder.SetInsertPoint(OrigAdd);
 
-  Value *TruncA = Builder->CreateTrunc(A, NewType, A->getName() + ".trunc");
-  Value *TruncB = Builder->CreateTrunc(B, NewType, B->getName() + ".trunc");
-  CallInst *Call = Builder->CreateCall(F, {TruncA, TruncB}, "sadd");
-  Value *Add = Builder->CreateExtractValue(Call, 0, "sadd.result");
-  Value *ZExt = Builder->CreateZExt(Add, OrigAdd->getType());
+  Value *TruncA = Builder.CreateTrunc(A, NewType, A->getName() + ".trunc");
+  Value *TruncB = Builder.CreateTrunc(B, NewType, B->getName() + ".trunc");
+  CallInst *Call = Builder.CreateCall(F, {TruncA, TruncB}, "sadd");
+  Value *Add = Builder.CreateExtractValue(Call, 0, "sadd.result");
+  Value *ZExt = Builder.CreateZExt(Add, OrigAdd->getType());
 
   // The inner add was the result of the narrow add, zero extended to the
   // wider type.  Replace it with the result computed by the intrinsic.
@@ -1434,9 +1434,9 @@ Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) {
     ConstantRange Intersection = DominatingCR.intersectWith(CR);
     ConstantRange Difference = DominatingCR.difference(CR);
     if (Intersection.isEmptySet())
-      return replaceInstUsesWith(Cmp, Builder->getFalse());
+      return replaceInstUsesWith(Cmp, Builder.getFalse());
     if (Difference.isEmptySet())
-      return replaceInstUsesWith(Cmp, Builder->getTrue());
+      return replaceInstUsesWith(Cmp, Builder.getTrue());
 
     // If this is a normal comparison, it demands all bits. If it is a sign
     // bit comparison, it only demands the sign bit.
@@ -1452,9 +1452,9 @@ Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) {
       return nullptr;
 
     if (auto *AI = Intersection.getSingleElement())
-      return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder->getInt(*AI));
+      return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder.getInt(*AI));
     if (auto *AD = Difference.getSingleElement())
-      return new ICmpInst(ICmpInst::ICMP_NE, X, Builder->getInt(*AD));
+      return new ICmpInst(ICmpInst::ICMP_NE, X, Builder.getInt(*AD));
   }
 
   return nullptr;
@@ -1628,11 +1628,11 @@ Instruction *InstCombiner::foldICmpAndShift(ICmpInst &Cmp, BinaryOperator *And,
       !Shift->isArithmeticShift() && !isa<Constant>(Shift->getOperand(0))) {
     // Compute C2 << Y.
     Value *NewShift =
-        IsShl ? Builder->CreateLShr(And->getOperand(1), Shift->getOperand(1))
-              : Builder->CreateShl(And->getOperand(1), Shift->getOperand(1));
+        IsShl ? Builder.CreateLShr(And->getOperand(1), Shift->getOperand(1))
+              : Builder.CreateShl(And->getOperand(1), Shift->getOperand(1));
 
     // Compute X & (C2 << Y).
-    Value *NewAnd = Builder->CreateAnd(Shift->getOperand(0), NewShift);
+    Value *NewAnd = Builder.CreateAnd(Shift->getOperand(0), NewShift);
     Cmp.setOperand(0, NewAnd);
     return &Cmp;
   }
@@ -1670,7 +1670,7 @@ Instruction *InstCombiner::foldICmpAndConstConst(ICmpInst &Cmp,
       unsigned WideScalarBits = WideType->getScalarSizeInBits();
       Constant *ZextC1 = ConstantInt::get(WideType, C1->zext(WideScalarBits));
       Constant *ZextC2 = ConstantInt::get(WideType, C2->zext(WideScalarBits));
-      Value *NewAnd = Builder->CreateAnd(W, ZextC2, And->getName());
+      Value *NewAnd = Builder.CreateAnd(W, ZextC2, And->getName());
       return new ICmpInst(Cmp.getPredicate(), NewAnd, ZextC1);
     }
   }
@@ -1704,12 +1704,12 @@ Instruction *InstCombiner::foldICmpAndConstConst(ICmpInst &Cmp,
           NewOr = ConstantExpr::getOr(ConstantExpr::getNUWShl(One, C), One);
       } else {
         if (UsesRemoved >= 3)
-          NewOr = Builder->CreateOr(Builder->CreateShl(One, B, LShr->getName(),
-                                                       /*HasNUW=*/true),
-                                    One, Or->getName());
+          NewOr = Builder.CreateOr(Builder.CreateShl(One, B, LShr->getName(),
+                                                     /*HasNUW=*/true),
+                                   One, Or->getName());
       }
       if (NewOr) {
-        Value *NewAnd = Builder->CreateAnd(A, NewOr, And->getName());
+        Value *NewAnd = Builder.CreateAnd(A, NewOr, And->getName());
         Cmp.setOperand(0, NewAnd);
         return &Cmp;
       }
@@ -1772,7 +1772,7 @@ Instruction *InstCombiner::foldICmpAndConstant(ICmpInst &Cmp,
       Type *NTy = IntegerType::get(Cmp.getContext(), ExactLogBase2 + 1);
       if (And->getType()->isVectorTy())
         NTy = VectorType::get(NTy, And->getType()->getVectorNumElements());
-      Value *Trunc = Builder->CreateTrunc(X, NTy);
+      Value *Trunc = Builder.CreateTrunc(X, NTy);
       auto NewPred = Cmp.getPredicate() == CmpInst::ICMP_EQ ? CmpInst::ICMP_SGE
                                                             : CmpInst::ICMP_SLT;
       return new ICmpInst(NewPred, Trunc, Constant::getNullValue(NTy));
@@ -1811,9 +1811,9 @@ Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or,
     // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0
     // -> and (icmp eq P, null), (icmp eq Q, null).
     Value *CmpP =
-        Builder->CreateICmp(Pred, P, ConstantInt::getNullValue(P->getType()));
+        Builder.CreateICmp(Pred, P, ConstantInt::getNullValue(P->getType()));
     Value *CmpQ =
-        Builder->CreateICmp(Pred, Q, ConstantInt::getNullValue(Q->getType()));
+        Builder.CreateICmp(Pred, Q, ConstantInt::getNullValue(Q->getType()));
     auto LogicOpc = Pred == ICmpInst::Predicate::ICMP_EQ ? Instruction::And
                                                          : Instruction::Or;
     return BinaryOperator::Create(LogicOpc, CmpP, CmpQ);
@@ -1993,7 +1993,7 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp,
     Constant *Mask = ConstantInt::get(
         ShType,
         APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt->getZExtValue()));
-    Value *And = Builder->CreateAnd(X, Mask, Shl->getName() + ".mask");
+    Value *And = Builder.CreateAnd(X, Mask, Shl->getName() + ".mask");
     Constant *LShrC = ConstantInt::get(ShType, C->lshr(*ShiftAmt));
     return new ICmpInst(Pred, And, LShrC);
   }
@@ -2005,7 +2005,7 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp,
     Constant *Mask = ConstantInt::get(
         ShType,
         APInt::getOneBitSet(TypeBits, TypeBits - ShiftAmt->getZExtValue() - 1));
-    Value *And = Builder->CreateAnd(X, Mask, Shl->getName() + ".mask");
+    Value *And = Builder.CreateAnd(X, Mask, Shl->getName() + ".mask");
     return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ,
                         And, Constant::getNullValue(ShType));
   }
@@ -2024,7 +2024,7 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp,
       TruncTy = VectorType::get(TruncTy, ShType->getVectorNumElements());
     Constant *NewC =
         ConstantInt::get(TruncTy, C->ashr(*ShiftAmt).trunc(TypeBits - Amt));
-    return new ICmpInst(Pred, Builder->CreateTrunc(X, TruncTy), NewC);
+    return new ICmpInst(Pred, Builder.CreateTrunc(X, TruncTy), NewC);
   }
 
   return nullptr;
@@ -2076,8 +2076,8 @@ Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp,
     Constant *DivCst = ConstantInt::get(
         Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal));
 
-    Value *Tmp = IsAShr ? Builder->CreateSDiv(X, DivCst, "", Shr->isExact())
-                        : Builder->CreateUDiv(X, DivCst, "", Shr->isExact());
+    Value *Tmp = IsAShr ? Builder.CreateSDiv(X, DivCst, "", Shr->isExact())
+                        : Builder.CreateUDiv(X, DivCst, "", Shr->isExact());
 
     Cmp.setOperand(0, Tmp);
 
@@ -2115,7 +2115,7 @@ Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp,
     // Otherwise strength reduce the shift into an 'and'.
     APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
     Constant *Mask = ConstantInt::get(Shr->getType(), Val);
-    Value *And = Builder->CreateAnd(X, Mask, Shr->getName() + ".mask");
+    Value *And = Builder.CreateAnd(X, Mask, Shr->getName() + ".mask");
     return new ICmpInst(Pred, And, ShiftedCmpRHS);
   }
 
@@ -2279,7 +2279,7 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp,
     default: llvm_unreachable("Unhandled icmp opcode!");
     case ICmpInst::ICMP_EQ:
       if (LoOverflow && HiOverflow)
-        return replaceInstUsesWith(Cmp, Builder->getFalse());
+        return replaceInstUsesWith(Cmp, Builder.getFalse());
       if (HiOverflow)
         return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
                             ICmpInst::ICMP_UGE, X, LoBound);
@@ -2291,7 +2291,7 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp,
                                HiBound->getUniqueInteger(), DivIsSigned, true));
     case ICmpInst::ICMP_NE:
       if (LoOverflow && HiOverflow)
-        return replaceInstUsesWith(Cmp, Builder->getTrue());
+        return replaceInstUsesWith(Cmp, Builder.getTrue());
       if (HiOverflow)
         return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
                             ICmpInst::ICMP_ULT, X, LoBound);
@@ -2305,16 +2305,16 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp,
     case ICmpInst::ICMP_ULT:
     case ICmpInst::ICMP_SLT:
       if (LoOverflow == +1)   // Low bound is greater than input range.
-        return replaceInstUsesWith(Cmp, Builder->getTrue());
+        return replaceInstUsesWith(Cmp, Builder.getTrue());
       if (LoOverflow == -1)   // Low bound is less than input range.
-        return replaceInstUsesWith(Cmp, Builder->getFalse());
+        return replaceInstUsesWith(Cmp, Builder.getFalse());
       return new ICmpInst(Pred, X, LoBound);
     case ICmpInst::ICMP_UGT:
     case ICmpInst::ICMP_SGT:
       if (HiOverflow == +1)       // High bound greater than input range.
-        return replaceInstUsesWith(Cmp, Builder->getFalse());
+        return replaceInstUsesWith(Cmp, Builder.getFalse());
       if (HiOverflow == -1)       // High bound less than input range.
-        return replaceInstUsesWith(Cmp, Builder->getTrue());
+        return replaceInstUsesWith(Cmp, Builder.getTrue());
       if (Pred == ICmpInst::ICMP_UGT)
         return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound);
       return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound);
@@ -2361,12 +2361,12 @@ Instruction *InstCombiner::foldICmpSubConstant(ICmpInst &Cmp,
   //   iff (C2 & (C - 1)) == C - 1 and C is a power of 2
   if (Pred == ICmpInst::ICMP_ULT && C->isPowerOf2() &&
       (*C2 & (*C - 1)) == (*C - 1))
-    return new ICmpInst(ICmpInst::ICMP_EQ, Builder->CreateOr(Y, *C - 1), X);
+    return new ICmpInst(ICmpInst::ICMP_EQ, Builder.CreateOr(Y, *C - 1), X);
 
   // C2 - Y >u C -> (Y | C) != C2
   //   iff C2 & C == C and C + 1 is a power of 2
   if (Pred == ICmpInst::ICMP_UGT && (*C + 1).isPowerOf2() && (*C2 & *C) == *C)
-    return new ICmpInst(ICmpInst::ICMP_NE, Builder->CreateOr(Y, *C), X);
+    return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateOr(Y, *C), X);
 
   return nullptr;
 }
@@ -2422,14 +2422,14 @@ Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp,
   //   iff C & (C2-1) == 0
   //       C2 is a power of 2
   if (Pred == ICmpInst::ICMP_ULT && C->isPowerOf2() && (*C2 & (*C - 1)) == 0)
-    return new ICmpInst(ICmpInst::ICMP_EQ, Builder->CreateAnd(X, -(*C)),
+    return new ICmpInst(ICmpInst::ICMP_EQ, Builder.CreateAnd(X, -(*C)),
                         ConstantExpr::getNeg(cast<Constant>(Y)));
 
   // X+C >u C2 -> (X & ~C2) != C
   //   iff C & C2 == 0
   //       C2+1 is a power of 2
   if (Pred == ICmpInst::ICMP_UGT && (*C + 1).isPowerOf2() && (*C2 & *C) == 0)
-    return new ICmpInst(ICmpInst::ICMP_NE, Builder->CreateAnd(X, ~(*C)),
+    return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateAnd(X, ~(*C)),
                         ConstantExpr::getNeg(cast<Constant>(Y)));
 
   return nullptr;
@@ -2493,13 +2493,13 @@ Instruction *InstCombiner::foldICmpSelectConstant(ICmpInst &Cmp,
 
     // When none of the three constants satisfy the predicate for the RHS (C),
     // the entire original Cmp can be simplified to a false.
-    Value *Cond = Builder->getFalse();
+    Value *Cond = Builder.getFalse();
     if (TrueWhenLessThan)
-      Cond = Builder->CreateOr(Cond, Builder->CreateICmp(ICmpInst::ICMP_SLT, OrigLHS, OrigRHS));
+      Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SLT, OrigLHS, OrigRHS));
     if (TrueWhenEqual)
-      Cond = Builder->CreateOr(Cond, Builder->CreateICmp(ICmpInst::ICMP_EQ, OrigLHS, OrigRHS));
+      Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_EQ, OrigLHS, OrigRHS));
     if (TrueWhenGreaterThan)
-      Cond = Builder->CreateOr(Cond, Builder->CreateICmp(ICmpInst::ICMP_SGT, OrigLHS, OrigRHS));
+      Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SGT, OrigLHS, OrigRHS));
 
     return replaceInstUsesWith(Cmp, Cond);
   }
@@ -2615,7 +2615,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
     if (C->isNullValue() && BO->hasOneUse()) {
       const APInt *BOC;
       if (match(BOp1, m_APInt(BOC)) && BOC->sgt(1) && BOC->isPowerOf2()) {
-        Value *NewRem = Builder->CreateURem(BOp0, BOp1, BO->getName());
+        Value *NewRem = Builder.CreateURem(BOp0, BOp1, BO->getName());
         return new ICmpInst(Pred, NewRem,
                             Constant::getNullValue(BO->getType()));
       }
@@ -2637,7 +2637,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
       if (Value *NegVal = dyn_castNegVal(BOp0))
         return new ICmpInst(Pred, NegVal, BOp1);
       if (BO->hasOneUse()) {
-        Value *Neg = Builder->CreateNeg(BOp1);
+        Value *Neg = Builder.CreateNeg(BOp1);
         Neg->takeName(BO);
         return new ICmpInst(Pred, BOp0, Neg);
       }
@@ -2676,7 +2676,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
       // Replace (X | C) == -1 with (X & ~C) == ~C.
       // This removes the -1 constant.
       Constant *NotBOC = ConstantExpr::getNot(cast<Constant>(BOp1));
-      Value *And = Builder->CreateAnd(BOp0, NotBOC);
+      Value *And = Builder.CreateAnd(BOp0, NotBOC);
       return new ICmpInst(Pred, And, NotBOC);
     }
     break;
@@ -2740,23 +2740,26 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
   if (!II || !Cmp.isEquality())
     return nullptr;
 
-  // Handle icmp {eq|ne} <intrinsic>, intcst.
+  // Handle icmp {eq|ne} <intrinsic>, Constant.
+  Type *Ty = II->getType();
   switch (II->getIntrinsicID()) {
   case Intrinsic::bswap:
     Worklist.Add(II);
     Cmp.setOperand(0, II->getArgOperand(0));
-    Cmp.setOperand(1, Builder->getInt(C->byteSwap()));
+    Cmp.setOperand(1, ConstantInt::get(Ty, C->byteSwap()));
     return &Cmp;
+
   case Intrinsic::ctlz:
   case Intrinsic::cttz:
     // ctz(A) == bitwidth(A)  ->  A == 0 and likewise for !=
     if (*C == C->getBitWidth()) {
       Worklist.Add(II);
       Cmp.setOperand(0, II->getArgOperand(0));
-      Cmp.setOperand(1, ConstantInt::getNullValue(II->getType()));
+      Cmp.setOperand(1, ConstantInt::getNullValue(Ty));
       return &Cmp;
     }
     break;
+
   case Intrinsic::ctpop: {
     // popcount(A) == 0  ->  A == 0 and likewise for !=
     // popcount(A) == bitwidth(A)  ->  A == -1 and likewise for !=
@@ -2764,8 +2767,8 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
     if (IsZero || *C == C->getBitWidth()) {
       Worklist.Add(II);
       Cmp.setOperand(0, II->getArgOperand(0));
-      auto *NewOp = IsZero ? Constant::getNullValue(II->getType())
-                           : Constant::getAllOnesValue(II->getType());
+      auto *NewOp =
+          IsZero ? Constant::getNullValue(Ty) : Constant::getAllOnesValue(Ty);
       Cmp.setOperand(1, NewOp);
       return &Cmp;
     }
@@ -2774,6 +2777,7 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
   default:
     break;
   }
+
   return nullptr;
 }
 
@@ -2841,11 +2845,11 @@ Instruction *InstCombiner::foldICmpInstWithConstantNotInt(ICmpInst &I) {
     }
     if (Transform) {
       if (!Op1)
-        Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1), RHSC,
-                                  I.getName());
+        Op1 = Builder.CreateICmp(I.getPredicate(), LHSI->getOperand(1), RHSC,
+                                 I.getName());
       if (!Op2)
-        Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2), RHSC,
-                                  I.getName());
+        Op2 = Builder.CreateICmp(I.getPredicate(), LHSI->getOperand(2), RHSC,
+                                 I.getName());
       return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
     }
     break;
@@ -3029,12 +3033,12 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
           APInt AP1Abs = C1->getValue().abs();
           APInt AP2Abs = C2->getValue().abs();
           if (AP1Abs.uge(AP2Abs)) {
-            ConstantInt *C3 = Builder->getInt(AP1 - AP2);
-            Value *NewAdd = Builder->CreateNSWAdd(A, C3);
+            ConstantInt *C3 = Builder.getInt(AP1 - AP2);
+            Value *NewAdd = Builder.CreateNSWAdd(A, C3);
             return new ICmpInst(Pred, NewAdd, C);
           } else {
-            ConstantInt *C3 = Builder->getInt(AP2 - AP1);
-            Value *NewAdd = Builder->CreateNSWAdd(C, C3);
+            ConstantInt *C3 = Builder.getInt(AP2 - AP1);
+            Value *NewAdd = Builder.CreateNSWAdd(C, C3);
             return new ICmpInst(Pred, A, NewAdd);
           }
         }
@@ -3157,8 +3161,8 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
           Constant *Mask = ConstantInt::get(
               BO0->getType(),
               APInt::getLowBitsSet(C->getBitWidth(), C->getBitWidth() - TZs));
-          Value *And1 = Builder->CreateAnd(BO0->getOperand(0), Mask);
-          Value *And2 = Builder->CreateAnd(BO1->getOperand(0), Mask);
+          Value *And1 = Builder.CreateAnd(BO0->getOperand(0), Mask);
+          Value *And2 = Builder.CreateAnd(BO1->getOperand(0), Mask);
           return new ICmpInst(Pred, And1, And2);
         }
         // If there are no trailing zeros in the multiplier, just eliminate
@@ -3315,8 +3319,8 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) {
       ConstantInt *C1, *C2;
       if (match(B, m_ConstantInt(C1)) && match(D, m_ConstantInt(C2)) &&
           Op1->hasOneUse()) {
-        Constant *NC = Builder->getInt(C1->getValue() ^ C2->getValue());
-        Value *Xor = Builder->CreateXor(C, NC);
+        Constant *NC = Builder.getInt(C1->getValue() ^ C2->getValue());
+        Value *Xor = Builder.CreateXor(C, NC);
         return new ICmpInst(Pred, A, Xor);
       }
 
@@ -3362,8 +3366,8 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) {
     }
 
     if (X) { // Build (X^Y) & Z
-      Op1 = Builder->CreateXor(X, Y);
-      Op1 = Builder->CreateAnd(Op1, Z);
+      Op1 = Builder.CreateXor(X, Y);
+      Op1 = Builder.CreateAnd(Op1, Z);
       I.setOperand(0, Op1);
       I.setOperand(1, Constant::getNullValue(Op1->getType()));
       return &I;
@@ -3380,7 +3384,7 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) {
     APInt Pow2 = Cst1->getValue() + 1;
     if (Pow2.isPowerOf2() && isa<IntegerType>(A->getType()) &&
         Pow2.logBase2() == cast<IntegerType>(A->getType())->getBitWidth())
-      return new ICmpInst(Pred, A, Builder->CreateTrunc(B, A->getType()));
+      return new ICmpInst(Pred, A, Builder.CreateTrunc(B, A->getType()));
   }
 
   // (A >> C) == (B >> C) --> (A^B) u< (1 << C)
@@ -3394,9 +3398,9 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) {
     if (ShAmt < TypeBits && ShAmt != 0) {
       ICmpInst::Predicate NewPred =
           Pred == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
-      Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted");
+      Value *Xor = Builder.CreateXor(A, B, I.getName() + ".unshifted");
       APInt CmpVal = APInt::getOneBitSet(TypeBits, ShAmt);
-      return new ICmpInst(NewPred, Xor, Builder->getInt(CmpVal));
+      return new ICmpInst(NewPred, Xor, Builder.getInt(CmpVal));
     }
   }
 
@@ -3406,9 +3410,9 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) {
     unsigned TypeBits = Cst1->getBitWidth();
     unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
     if (ShAmt < TypeBits && ShAmt != 0) {
-      Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted");
+      Value *Xor = Builder.CreateXor(A, B, I.getName() + ".unshifted");
       APInt AndVal = APInt::getLowBitsSet(TypeBits, TypeBits - ShAmt);
-      Value *And = Builder->CreateAnd(Xor, Builder->getInt(AndVal),
+      Value *And = Builder.CreateAnd(Xor, Builder.getInt(AndVal),
                                       I.getName() + ".mask");
       return new ICmpInst(Pred, And, Constant::getNullValue(Cst1->getType()));
     }
@@ -3433,11 +3437,20 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) {
       APInt CmpV = Cst1->getValue().zext(ASize);
       CmpV <<= ShAmt;
 
-      Value *Mask = Builder->CreateAnd(A, Builder->getInt(MaskV));
-      return new ICmpInst(Pred, Mask, Builder->getInt(CmpV));
+      Value *Mask = Builder.CreateAnd(A, Builder.getInt(MaskV));
+      return new ICmpInst(Pred, Mask, Builder.getInt(CmpV));
     }
   }
 
+  // If both operands are byte-swapped or bit-reversed, just compare the
+  // original values.
+  // TODO: Move this to a function similar to foldICmpIntrinsicWithConstant()
+  // and handle more intrinsics.
+  if ((match(Op0, m_BSwap(m_Value(A))) && match(Op1, m_BSwap(m_Value(B)))) ||
+      (match(Op0, m_BitReverse(m_Value(A))) &&
+       match(Op1, m_BitReverse(m_Value(B)))))
+    return new ICmpInst(Pred, A, B);
+
   return nullptr;
 }
 
@@ -3462,7 +3475,7 @@ Instruction *InstCombiner::foldICmpWithCastAndCast(ICmpInst &ICmp) {
         RHSOp = RHSC->getOperand(0);
         // If the pointer types don't match, insert a bitcast.
         if (LHSCIOp->getType() != RHSOp->getType())
-          RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType());
+          RHSOp = Builder.CreateBitCast(RHSOp, LHSCIOp->getType());
       }
     } else if (auto *RHSC = dyn_cast<Constant>(ICmp.getOperand(1))) {
       RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy);
@@ -3546,7 +3559,7 @@ Instruction *InstCombiner::foldICmpWithCastAndCast(ICmpInst &ICmp) {
   // We're performing an unsigned comp with a sign extended value.
   // This is true if the input is >= 0. [aka >s -1]
   Constant *NegOne = Constant::getAllOnesValue(SrcTy);
-  Value *Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICmp.getName());
+  Value *Result = Builder.CreateICmpSGT(LHSCIOp, NegOne, ICmp.getName());
 
   // Finally, return the value computed.
   if (ICmp.getPredicate() == ICmpInst::ICMP_ULT)
@@ -3574,7 +3587,7 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
   // may be pointing to the compare.  We want to insert the new instructions
   // before the add in case there are uses of the add between the add and the
   // compare.
-  Builder->SetInsertPoint(&OrigI);
+  Builder.SetInsertPoint(&OrigI);
 
   switch (OCF) {
   case OCF_INVALID:
@@ -3583,11 +3596,11 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
   case OCF_UNSIGNED_ADD: {
     OverflowResult OR = computeOverflowForUnsignedAdd(LHS, RHS, &OrigI);
     if (OR == OverflowResult::NeverOverflows)
-      return SetResult(Builder->CreateNUWAdd(LHS, RHS), Builder->getFalse(),
+      return SetResult(Builder.CreateNUWAdd(LHS, RHS), Builder.getFalse(),
                        true);
 
     if (OR == OverflowResult::AlwaysOverflows)
-      return SetResult(Builder->CreateAdd(LHS, RHS), Builder->getTrue(), true);
+      return SetResult(Builder.CreateAdd(LHS, RHS), Builder.getTrue(), true);
 
     // Fall through uadd into sadd
     LLVM_FALLTHROUGH;
@@ -3595,13 +3608,13 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
   case OCF_SIGNED_ADD: {
     // X + 0 -> {X, false}
     if (match(RHS, m_Zero()))
-      return SetResult(LHS, Builder->getFalse(), false);
+      return SetResult(LHS, Builder.getFalse(), false);
 
     // We can strength reduce this signed add into a regular add if we can prove
     // that it will never overflow.
     if (OCF == OCF_SIGNED_ADD)
       if (willNotOverflowSignedAdd(LHS, RHS, OrigI))
-        return SetResult(Builder->CreateNSWAdd(LHS, RHS), Builder->getFalse(),
+        return SetResult(Builder.CreateNSWAdd(LHS, RHS), Builder.getFalse(),
                          true);
     break;
   }
@@ -3610,15 +3623,15 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
   case OCF_SIGNED_SUB: {
     // X - 0 -> {X, false}
     if (match(RHS, m_Zero()))
-      return SetResult(LHS, Builder->getFalse(), false);
+      return SetResult(LHS, Builder.getFalse(), false);
 
     if (OCF == OCF_SIGNED_SUB) {
       if (willNotOverflowSignedSub(LHS, RHS, OrigI))
-        return SetResult(Builder->CreateNSWSub(LHS, RHS), Builder->getFalse(),
+        return SetResult(Builder.CreateNSWSub(LHS, RHS), Builder.getFalse(),
                          true);
     } else {
       if (willNotOverflowUnsignedSub(LHS, RHS, OrigI))
-        return SetResult(Builder->CreateNUWSub(LHS, RHS), Builder->getFalse(),
+        return SetResult(Builder.CreateNUWSub(LHS, RHS), Builder.getFalse(),
                          true);
     }
     break;
@@ -3627,28 +3640,28 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
   case OCF_UNSIGNED_MUL: {
     OverflowResult OR = computeOverflowForUnsignedMul(LHS, RHS, &OrigI);
     if (OR == OverflowResult::NeverOverflows)
-      return SetResult(Builder->CreateNUWMul(LHS, RHS), Builder->getFalse(),
+      return SetResult(Builder.CreateNUWMul(LHS, RHS), Builder.getFalse(),
                        true);
     if (OR == OverflowResult::AlwaysOverflows)
-      return SetResult(Builder->CreateMul(LHS, RHS), Builder->getTrue(), true);
+      return SetResult(Builder.CreateMul(LHS, RHS), Builder.getTrue(), true);
     LLVM_FALLTHROUGH;
   }
   case OCF_SIGNED_MUL:
     // X * undef -> undef
     if (isa<UndefValue>(RHS))
-      return SetResult(RHS, UndefValue::get(Builder->getInt1Ty()), false);
+      return SetResult(RHS, UndefValue::get(Builder.getInt1Ty()), false);
 
     // X * 0 -> {0, false}
     if (match(RHS, m_Zero()))
-      return SetResult(RHS, Builder->getFalse(), false);
+      return SetResult(RHS, Builder.getFalse(), false);
 
     // X * 1 -> {X, false}
     if (match(RHS, m_One()))
-      return SetResult(LHS, Builder->getFalse(), false);
+      return SetResult(LHS, Builder.getFalse(), false);
 
     if (OCF == OCF_SIGNED_MUL)
       if (willNotOverflowSignedMul(LHS, RHS, OrigI))
-        return SetResult(Builder->CreateNSWMul(LHS, RHS), Builder->getFalse(),
+        return SetResult(Builder.CreateNSWMul(LHS, RHS), Builder.getFalse(),
                          true);
     break;
   }
@@ -3813,25 +3826,25 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
     return nullptr;
   }
 
-  InstCombiner::BuilderTy *Builder = IC.Builder;
-  Builder->SetInsertPoint(MulInstr);
+  InstCombiner::BuilderTy &Builder = IC.Builder;
+  Builder.SetInsertPoint(MulInstr);
 
   // Replace: mul(zext A, zext B) --> mul.with.overflow(A, B)
   Value *MulA = A, *MulB = B;
   if (WidthA < MulWidth)
-    MulA = Builder->CreateZExt(A, MulType);
+    MulA = Builder.CreateZExt(A, MulType);
   if (WidthB < MulWidth)
-    MulB = Builder->CreateZExt(B, MulType);
+    MulB = Builder.CreateZExt(B, MulType);
   Value *F = Intrinsic::getDeclaration(I.getModule(),
                                        Intrinsic::umul_with_overflow, MulType);
-  CallInst *Call = Builder->CreateCall(F, {MulA, MulB}, "umul");
+  CallInst *Call = Builder.CreateCall(F, {MulA, MulB}, "umul");
   IC.Worklist.Add(MulInstr);
 
   // If there are uses of mul result other than the comparison, we know that
   // they are truncation or binary AND. Change them to use result of
   // mul.with.overflow and adjust properly mask/size.
   if (MulVal->hasNUsesOrMore(2)) {
-    Value *Mul = Builder->CreateExtractValue(Call, 0, "umul.value");
+    Value *Mul = Builder.CreateExtractValue(Call, 0, "umul.value");
     for (User *U : MulVal->users()) {
       if (U == &I || U == OtherVal)
         continue;
@@ -3843,17 +3856,18 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
       } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) {
         assert(BO->getOpcode() == Instruction::And);
         // Replace (mul & mask) --> zext (mul.with.overflow & short_mask)
-        ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1));
-        APInt ShortMask = CI->getValue().trunc(MulWidth);
-        Value *ShortAnd = Builder->CreateAnd(Mul, ShortMask);
-        Instruction *Zext =
-            cast<Instruction>(Builder->CreateZExt(ShortAnd, BO->getType()));
-        IC.Worklist.Add(Zext);
+        Value *ShortMask =
+            Builder.CreateTrunc(BO->getOperand(1), Builder.getIntNTy(MulWidth));
+        Value *ShortAnd = Builder.CreateAnd(Mul, ShortMask);
+        Value *Zext = Builder.CreateZExt(ShortAnd, BO->getType());
+        if (auto *ZextI = dyn_cast<Instruction>(Zext))
+          IC.Worklist.Add(ZextI);
         IC.replaceInstUsesWith(*BO, Zext);
       } else {
         llvm_unreachable("Unexpected Binary operation");
       }
-      IC.Worklist.Add(cast<Instruction>(U));
+      if (auto *UI = dyn_cast<Instruction>(U))
+        IC.Worklist.Add(UI);
     }
   }
   if (isa<Instruction>(OtherVal))
@@ -3884,7 +3898,7 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
     llvm_unreachable("Unexpected predicate");
   }
   if (Inverse) {
-    Value *Res = Builder->CreateExtractValue(Call, 1);
+    Value *Res = Builder.CreateExtractValue(Call, 1);
     return BinaryOperator::CreateNot(Res);
   }
 
@@ -4239,7 +4253,7 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) {
     if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
       if (Op1Max == Op0Min + 1) // A <s C -> A == C-1 if min(A)+1 == C
         return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
-                            Builder->getInt(CI->getValue() - 1));
+                            Builder.getInt(CI->getValue() - 1));
     }
     break;
   case ICmpInst::ICMP_SGT:
@@ -4253,7 +4267,7 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) {
     if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
       if (Op1Min == Op0Max - 1) // A >s C -> A == C+1 if max(A)-1 == C
         return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
-                            Builder->getInt(CI->getValue() + 1));
+                            Builder.getInt(CI->getValue() + 1));
     }
     break;
   case ICmpInst::ICMP_SGE:
@@ -4358,7 +4372,7 @@ static ICmpInst *canonicalizeCmpWithConstant(ICmpInst &I) {
 static Instruction *canonicalizeICmpBool(ICmpInst &I,
                                          InstCombiner::BuilderTy &Builder) {
   Value *A = I.getOperand(0), *B = I.getOperand(1);
-  assert(A->getType()->getScalarType()->isIntegerTy(1) && "Bools only");
+  assert(A->getType()->isIntOrIntVectorTy(1) && "Bools only");
 
   // A boolean compared to true/false can be simplified to Op0/true/false in
   // 14 out of the 20 (10 predicates * 2 constants) possible combinations.
@@ -4465,8 +4479,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
     }
   }
 
-  if (Op0->getType()->getScalarType()->isIntegerTy(1))
-    if (Instruction *Res = canonicalizeICmpBool(I, *Builder))
+  if (Op0->getType()->isIntOrIntVectorTy(1))
+    if (Instruction *Res = canonicalizeICmpBool(I, Builder))
       return Res;
 
   if (ICmpInst *NewICmp = canonicalizeCmpWithConstant(I))
@@ -4559,7 +4573,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
           Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType());
         } else {
           // Otherwise, cast the RHS right before the icmp
-          Op1 = Builder->CreateBitCast(Op1, Op0->getType());
+          Op1 = Builder.CreateBitCast(Op1, Op0->getType());
         }
       }
       return new ICmpInst(I.getPredicate(), Op0, Op1);
@@ -4592,8 +4606,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
     if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
         match(Op1, m_Zero()) &&
         isKnownToBeAPowerOfTwo(A, false, 0, &I) && I.isEquality())
-      return new ICmpInst(I.getInversePredicate(),
-                          Builder->CreateAnd(A, B),
+      return new ICmpInst(I.getInversePredicate(), Builder.CreateAnd(A, B),
                           Op1);
 
     // ~X < ~Y --> Y < X
@@ -4693,10 +4706,10 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI,
       RHSRoundInt.roundToIntegral(APFloat::rmNearestTiesToEven);
       if (RHS.compare(RHSRoundInt) != APFloat::cmpEqual) {
         if (P == FCmpInst::FCMP_OEQ || P == FCmpInst::FCMP_UEQ)
-          return replaceInstUsesWith(I, Builder->getFalse());
+          return replaceInstUsesWith(I, Builder.getFalse());
 
         assert(P == FCmpInst::FCMP_ONE || P == FCmpInst::FCMP_UNE);
-        return replaceInstUsesWith(I, Builder->getTrue());
+        return replaceInstUsesWith(I, Builder.getTrue());
       }
     }
 
@@ -4762,9 +4775,9 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI,
     Pred = ICmpInst::ICMP_NE;
     break;
   case FCmpInst::FCMP_ORD:
-    return replaceInstUsesWith(I, Builder->getTrue());
+    return replaceInstUsesWith(I, Builder.getTrue());
   case FCmpInst::FCMP_UNO:
-    return replaceInstUsesWith(I, Builder->getFalse());
+    return replaceInstUsesWith(I, Builder.getFalse());
   }
 
   // Now we know that the APFloat is a normal number, zero or inf.
@@ -4782,8 +4795,8 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI,
     if (SMax.compare(RHS) == APFloat::cmpLessThan) {  // smax < 13123.0
       if (Pred == ICmpInst::ICMP_NE  || Pred == ICmpInst::ICMP_SLT ||
           Pred == ICmpInst::ICMP_SLE)
-        return replaceInstUsesWith(I, Builder->getTrue());
-      return replaceInstUsesWith(I, Builder->getFalse());
+        return replaceInstUsesWith(I, Builder.getTrue());
+      return replaceInstUsesWith(I, Builder.getFalse());
     }
   } else {
     // If the RHS value is > UnsignedMax, fold the comparison. This handles
@@ -4794,8 +4807,8 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI,
     if (UMax.compare(RHS) == APFloat::cmpLessThan) {  // umax < 13123.0
       if (Pred == ICmpInst::ICMP_NE  || Pred == ICmpInst::ICMP_ULT ||
           Pred == ICmpInst::ICMP_ULE)
-        return replaceInstUsesWith(I, Builder->getTrue());
-      return replaceInstUsesWith(I, Builder->getFalse());
+        return replaceInstUsesWith(I, Builder.getTrue());
+      return replaceInstUsesWith(I, Builder.getFalse());
     }
   }
 
@@ -4807,8 +4820,8 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI,
     if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0
       if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT ||
           Pred == ICmpInst::ICMP_SGE)
-        return replaceInstUsesWith(I, Builder->getTrue());
-      return replaceInstUsesWith(I, Builder->getFalse());
+        return replaceInstUsesWith(I, Builder.getTrue());
+      return replaceInstUsesWith(I, Builder.getFalse());
     }
   } else {
     // See if the RHS value is < UnsignedMin.
@@ -4818,8 +4831,8 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI,
     if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // umin > 12312.0
       if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_UGT ||
           Pred == ICmpInst::ICMP_UGE)
-        return replaceInstUsesWith(I, Builder->getTrue());
-      return replaceInstUsesWith(I, Builder->getFalse());
+        return replaceInstUsesWith(I, Builder.getTrue());
+      return replaceInstUsesWith(I, Builder.getFalse());
     }
   }
 
@@ -4841,14 +4854,14 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI,
       switch (Pred) {
       default: llvm_unreachable("Unexpected integer comparison!");
       case ICmpInst::ICMP_NE:  // (float)int != 4.4   --> true
-        return replaceInstUsesWith(I, Builder->getTrue());
+        return replaceInstUsesWith(I, Builder.getTrue());
       case ICmpInst::ICMP_EQ:  // (float)int == 4.4   --> false
-        return replaceInstUsesWith(I, Builder->getFalse());
+        return replaceInstUsesWith(I, Builder.getFalse());
       case ICmpInst::ICMP_ULE:
         // (float)int <= 4.4   --> int <= 4
         // (float)int <= -4.4  --> false
         if (RHS.isNegative())
-          return replaceInstUsesWith(I, Builder->getFalse());
+          return replaceInstUsesWith(I, Builder.getFalse());
         break;
       case ICmpInst::ICMP_SLE:
         // (float)int <= 4.4   --> int <= 4
@@ -4860,7 +4873,7 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI,
         // (float)int < -4.4   --> false
         // (float)int < 4.4    --> int <= 4
         if (RHS.isNegative())
-          return replaceInstUsesWith(I, Builder->getFalse());
+          return replaceInstUsesWith(I, Builder.getFalse());
         Pred = ICmpInst::ICMP_ULE;
         break;
       case ICmpInst::ICMP_SLT:
@@ -4873,7 +4886,7 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI,
         // (float)int > 4.4    --> int > 4
         // (float)int > -4.4   --> true
         if (RHS.isNegative())
-          return replaceInstUsesWith(I, Builder->getTrue());
+          return replaceInstUsesWith(I, Builder.getTrue());
         break;
       case ICmpInst::ICMP_SGT:
         // (float)int > 4.4    --> int > 4
@@ -4885,7 +4898,7 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI,
         // (float)int >= -4.4   --> true
         // (float)int >= 4.4    --> int > 4
         if (RHS.isNegative())
-          return replaceInstUsesWith(I, Builder->getTrue());
+          return replaceInstUsesWith(I, Builder.getTrue());
         Pred = ICmpInst::ICMP_UGT;
         break;
       case ICmpInst::ICMP_SGE:
diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h
index 87f11467b95e2..c38a4981bf1dc 100644
--- a/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -21,8 +21,6 @@
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/TargetFolder.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/BinaryFormat/Dwarf.h"
-#include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstVisitor.h"
@@ -212,7 +210,7 @@ public:
   /// \brief An IRBuilder that automatically inserts new instructions into the
   /// worklist.
   typedef IRBuilder<TargetFolder, IRBuilderCallbackInserter> BuilderTy;
-  BuilderTy *Builder;
+  BuilderTy &Builder;
 
 private:
   // Mode in which we are running the combiner.
@@ -235,7 +233,7 @@ private:
   bool MadeIRChange;
 
 public:
-  InstCombiner(InstCombineWorklist &Worklist, BuilderTy *Builder,
+  InstCombiner(InstCombineWorklist &Worklist, BuilderTy &Builder,
                bool MinimizeSize, bool ExpensiveCombines, AliasAnalysis *AA,
                AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT,
                const DataLayout &DL, LoopInfo *LI)
@@ -598,9 +596,8 @@ private:
 
   /// This tries to simplify binary operations by factorizing out common terms
   /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
-  Value *tryFactorization(InstCombiner::BuilderTy *, BinaryOperator &,
-                          Instruction::BinaryOps, Value *, Value *, Value *,
-                          Value *);
+  Value *tryFactorization(BinaryOperator &, Instruction::BinaryOps, Value *,
+                          Value *, Value *, Value *);
 
   /// Match a select chain which produces one of three values based on whether
   /// the LHS is less than, equal to, or greater than RHS respectively.
@@ -639,7 +636,6 @@ private:
                                     APInt &UndefElts, unsigned Depth = 0);
 
   Value *SimplifyVectorOp(BinaryOperator &Inst);
-  Value *SimplifyBSwap(BinaryOperator &Inst);
 
 
   /// Given a binary operator, cast instruction, or select which has a PHI node
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 26bee204e5a44..c59e1ce69ac22 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -189,7 +189,7 @@ static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) {
       return nullptr;
 
     // Canonicalize it.
-    Value *V = IC.Builder->getInt32(1);
+    Value *V = IC.Builder.getInt32(1);
     AI.setOperand(0, V);
     return &AI;
   }
@@ -197,7 +197,7 @@ static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) {
   // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
   if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
     Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
-    AllocaInst *New = IC.Builder->CreateAlloca(NewTy, nullptr, AI.getName());
+    AllocaInst *New = IC.Builder.CreateAlloca(NewTy, nullptr, AI.getName());
     New->setAlignment(AI.getAlignment());
 
     // Scan to the end of the allocation instructions, to skip over a block of
@@ -229,7 +229,7 @@ static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) {
   // any casting is exposed early.
   Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType());
   if (AI.getArraySize()->getType() != IntPtrTy) {
-    Value *V = IC.Builder->CreateIntCast(AI.getArraySize(), IntPtrTy, false);
+    Value *V = IC.Builder.CreateIntCast(AI.getArraySize(), IntPtrTy, false);
     AI.setOperand(0, V);
     return &AI;
   }
@@ -458,10 +458,10 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
   SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
   LI.getAllMetadata(MD);
 
-  LoadInst *NewLoad = IC.Builder->CreateAlignedLoad(
-      IC.Builder->CreateBitCast(Ptr, NewTy->getPointerTo(AS)),
+  LoadInst *NewLoad = IC.Builder.CreateAlignedLoad(
+      IC.Builder.CreateBitCast(Ptr, NewTy->getPointerTo(AS)),
       LI.getAlignment(), LI.isVolatile(), LI.getName() + Suffix);
-  NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope());
+  NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
   MDBuilder MDB(NewLoad->getContext());
   for (const auto &MDPair : MD) {
     unsigned ID = MDPair.first;
@@ -518,10 +518,10 @@ static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value
   SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
   SI.getAllMetadata(MD);
 
-  StoreInst *NewStore = IC.Builder->CreateAlignedStore(
-      V, IC.Builder->CreateBitCast(Ptr, V->getType()->getPointerTo(AS)),
+  StoreInst *NewStore = IC.Builder.CreateAlignedStore(
+      V, IC.Builder.CreateBitCast(Ptr, V->getType()->getPointerTo(AS)),
       SI.getAlignment(), SI.isVolatile());
-  NewStore->setAtomic(SI.getOrdering(), SI.getSynchScope());
+  NewStore->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
   for (const auto &MDPair : MD) {
     unsigned ID = MDPair.first;
     MDNode *N = MDPair.second;
@@ -613,7 +613,7 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
       // Replace all the stores with stores of the newly loaded value.
       for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) {
         auto *SI = cast<StoreInst>(*UI++);
-        IC.Builder->SetInsertPoint(SI);
+        IC.Builder.SetInsertPoint(SI);
         combineStoreToNewValue(IC, *SI, NewLoad);
         IC.eraseInstFromFunction(*SI);
       }
@@ -664,7 +664,7 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
       AAMDNodes AAMD;
       LI.getAAMetadata(AAMD);
       NewLoad->setAAMetadata(AAMD);
-      return IC.replaceInstUsesWith(LI, IC.Builder->CreateInsertValue(
+      return IC.replaceInstUsesWith(LI, IC.Builder.CreateInsertValue(
         UndefValue::get(T), NewLoad, 0, Name));
     }
 
@@ -689,15 +689,15 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
         Zero,
         ConstantInt::get(IdxType, i),
       };
-      auto *Ptr = IC.Builder->CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),
-                                                Name + ".elt");
+      auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),
+                                               Name + ".elt");
       auto EltAlign = MinAlign(Align, SL->getElementOffset(i));
-      auto *L = IC.Builder->CreateAlignedLoad(Ptr, EltAlign, Name + ".unpack");
+      auto *L = IC.Builder.CreateAlignedLoad(Ptr, EltAlign, Name + ".unpack");
       // Propagate AA metadata. It'll still be valid on the narrowed load.
       AAMDNodes AAMD;
       LI.getAAMetadata(AAMD);
       L->setAAMetadata(AAMD);
-      V = IC.Builder->CreateInsertValue(V, L, i);
+      V = IC.Builder.CreateInsertValue(V, L, i);
     }
 
     V->setName(Name);
@@ -712,7 +712,7 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
       AAMDNodes AAMD;
       LI.getAAMetadata(AAMD);
       NewLoad->setAAMetadata(AAMD);
-      return IC.replaceInstUsesWith(LI, IC.Builder->CreateInsertValue(
+      return IC.replaceInstUsesWith(LI, IC.Builder.CreateInsertValue(
         UndefValue::get(T), NewLoad, 0, Name));
     }
 
@@ -740,14 +740,14 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
         Zero,
         ConstantInt::get(IdxType, i),
       };
-      auto *Ptr = IC.Builder->CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),
-                                                Name + ".elt");
-      auto *L = IC.Builder->CreateAlignedLoad(Ptr, MinAlign(Align, Offset),
-                                              Name + ".unpack");
+      auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),
+                                               Name + ".elt");
+      auto *L = IC.Builder.CreateAlignedLoad(Ptr, MinAlign(Align, Offset),
+                                             Name + ".unpack");
       AAMDNodes AAMD;
       LI.getAAMetadata(AAMD);
       L->setAAMetadata(AAMD);
-      V = IC.Builder->CreateInsertValue(V, L, i);
+      V = IC.Builder.CreateInsertValue(V, L, i);
       Offset += EltSize;
     }
 
@@ -982,8 +982,8 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
       combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI);
 
     return replaceInstUsesWith(
-        LI, Builder->CreateBitOrPointerCast(AvailableVal, LI.getType(),
-                                            LI.getName() + ".cast"));
+        LI, Builder.CreateBitOrPointerCast(AvailableVal, LI.getType(),
+                                           LI.getName() + ".cast"));
   }
 
   // None of the following transforms are legal for volatile/ordered atomic
@@ -1019,15 +1019,15 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
       unsigned Align = LI.getAlignment();
       if (isSafeToLoadUnconditionally(SI->getOperand(1), Align, DL, SI) &&
           isSafeToLoadUnconditionally(SI->getOperand(2), Align, DL, SI)) {
-        LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1),
-                                           SI->getOperand(1)->getName()+".val");
-        LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2),
-                                           SI->getOperand(2)->getName()+".val");
+        LoadInst *V1 = Builder.CreateLoad(SI->getOperand(1),
+                                          SI->getOperand(1)->getName()+".val");
+        LoadInst *V2 = Builder.CreateLoad(SI->getOperand(2),
+                                          SI->getOperand(2)->getName()+".val");
         assert(LI.isUnordered() && "implied by above");
         V1->setAlignment(Align);
-        V1->setAtomic(LI.getOrdering(), LI.getSynchScope());
+        V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
         V2->setAlignment(Align);
-        V2->setAtomic(LI.getOrdering(), LI.getSynchScope());
+        V2->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
         return SelectInst::Create(SI->getCondition(), V1, V2);
       }
 
@@ -1172,7 +1172,7 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {
     // If the struct only have one element, we unpack.
     unsigned Count = ST->getNumElements();
     if (Count == 1) {
-      V = IC.Builder->CreateExtractValue(V, 0);
+      V = IC.Builder.CreateExtractValue(V, 0);
       combineStoreToNewValue(IC, SI, V);
       return true;
     }
@@ -1201,12 +1201,11 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {
         Zero,
         ConstantInt::get(IdxType, i),
       };
-      auto *Ptr = IC.Builder->CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),
-                                                AddrName);
-      auto *Val = IC.Builder->CreateExtractValue(V, i, EltName);
+      auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),
+                                               AddrName);
+      auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
       auto EltAlign = MinAlign(Align, SL->getElementOffset(i));
-      llvm::Instruction *NS =
-          IC.Builder->CreateAlignedStore(Val, Ptr, EltAlign);
+      llvm::Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
       AAMDNodes AAMD;
       SI.getAAMetadata(AAMD);
       NS->setAAMetadata(AAMD);
@@ -1219,7 +1218,7 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {
     // If the array only have one element, we unpack.
     auto NumElements = AT->getNumElements();
     if (NumElements == 1) {
-      V = IC.Builder->CreateExtractValue(V, 0);
+      V = IC.Builder.CreateExtractValue(V, 0);
       combineStoreToNewValue(IC, SI, V);
       return true;
     }
@@ -1252,11 +1251,11 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {
         Zero,
         ConstantInt::get(IdxType, i),
       };
-      auto *Ptr = IC.Builder->CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),
-                                                AddrName);
-      auto *Val = IC.Builder->CreateExtractValue(V, i, EltName);
+      auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),
+                                               AddrName);
+      auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
       auto EltAlign = MinAlign(Align, Offset);
-      Instruction *NS = IC.Builder->CreateAlignedStore(Val, Ptr, EltAlign);
+      Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
       AAMDNodes AAMD;
       SI.getAAMetadata(AAMD);
       NS->setAAMetadata(AAMD);
@@ -1541,7 +1540,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
                                    SI.isVolatile(),
                                    SI.getAlignment(),
                                    SI.getOrdering(),
-                                   SI.getSynchScope());
+                                   SI.getSyncScopeID());
   InsertNewInstBefore(NewSI, *BBI);
   // The debug locations of the original instructions might differ; merge them.
   NewSI->setDebugLoc(DILocation::getMergedLocation(SI.getDebugLoc(),
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 579639a6194e9..e3a50220f94e2 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -39,8 +39,8 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC,
   Value *A = nullptr, *B = nullptr, *One = nullptr;
   if (match(V, m_LShr(m_OneUse(m_Shl(m_Value(One), m_Value(A))), m_Value(B))) &&
       match(One, m_One())) {
-    A = IC.Builder->CreateSub(A, B);
-    return IC.Builder->CreateShl(One, A);
+    A = IC.Builder.CreateSub(A, B);
+    return IC.Builder.CreateShl(One, A);
   }
 
   // (PowerOfTwo >>u B) --> isExact since shifting out the result would make it
@@ -250,9 +250,9 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
           ConstantInt *C1;
           Value *Sub = nullptr;
           if (match(Op0, m_Sub(m_Value(Y), m_Value(X))))
-            Sub = Builder->CreateSub(X, Y, "suba");
+            Sub = Builder.CreateSub(X, Y, "suba");
           else if (match(Op0, m_Add(m_Value(Y), m_ConstantInt(C1))))
-            Sub = Builder->CreateSub(Builder->CreateNeg(C1), Y, "subc");
+            Sub = Builder.CreateSub(Builder.CreateNeg(C1), Y, "subc");
           if (Sub)
             return
               BinaryOperator::CreateMul(Sub,
@@ -272,11 +272,11 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
       Value *X;
       Constant *C1;
       if (match(Op0, m_OneUse(m_Add(m_Value(X), m_Constant(C1))))) {
-        Value *Mul = Builder->CreateMul(C1, Op1);
+        Value *Mul = Builder.CreateMul(C1, Op1);
         // Only go forward with the transform if C1*CI simplifies to a tidier
         // constant.
         if (!match(Mul, m_Mul(m_Value(), m_Value())))
-          return BinaryOperator::CreateAdd(Builder->CreateMul(X, Op1), Mul);
+          return BinaryOperator::CreateAdd(Builder.CreateMul(X, Op1), Mul);
       }
     }
   }
@@ -318,7 +318,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
 
       auto RemOpc = Div->getOpcode() == Instruction::UDiv ? Instruction::URem
                                                           : Instruction::SRem;
-      Value *Rem = Builder->CreateBinOp(RemOpc, X, DivOp1);
+      Value *Rem = Builder.CreateBinOp(RemOpc, X, DivOp1);
       if (DivOp1 == Y)
         return BinaryOperator::CreateSub(X, Rem);
       return BinaryOperator::CreateSub(Rem, X);
@@ -326,7 +326,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
   }
 
   /// i1 mul -> i1 and.
-  if (I.getType()->getScalarType()->isIntegerTy(1))
+  if (I.getType()->isIntOrIntVectorTy(1))
     return BinaryOperator::CreateAnd(Op0, Op1);
 
   // X*(1 << Y) --> X << Y
@@ -368,7 +368,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
     }
 
     if (BoolCast) {
-      Value *V = Builder->CreateSub(Constant::getNullValue(I.getType()),
+      Value *V = Builder.CreateSub(Constant::getNullValue(I.getType()),
                                     BoolCast);
       return BinaryOperator::CreateAnd(V, OtherOp);
     }
@@ -386,7 +386,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
             willNotOverflowSignedMul(Op0Conv->getOperand(0), CI, I)) {
           // Insert the new, smaller mul.
           Value *NewMul =
-              Builder->CreateNSWMul(Op0Conv->getOperand(0), CI, "mulconv");
+              Builder.CreateNSWMul(Op0Conv->getOperand(0), CI, "mulconv");
           return new SExtInst(NewMul, I.getType());
         }
       }
@@ -403,7 +403,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
           willNotOverflowSignedMul(Op0Conv->getOperand(0),
                                    Op1Conv->getOperand(0), I)) {
         // Insert the new integer mul.
-        Value *NewMul = Builder->CreateNSWMul(
+        Value *NewMul = Builder.CreateNSWMul(
             Op0Conv->getOperand(0), Op1Conv->getOperand(0), "mulconv");
         return new SExtInst(NewMul, I.getType());
       }
@@ -422,7 +422,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
             willNotOverflowUnsignedMul(Op0Conv->getOperand(0), CI, I)) {
           // Insert the new, smaller mul.
           Value *NewMul =
-              Builder->CreateNUWMul(Op0Conv->getOperand(0), CI, "mulconv");
+              Builder.CreateNUWMul(Op0Conv->getOperand(0), CI, "mulconv");
           return new ZExtInst(NewMul, I.getType());
         }
       }
@@ -439,7 +439,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
           willNotOverflowUnsignedMul(Op0Conv->getOperand(0),
                                      Op1Conv->getOperand(0), I)) {
         // Insert the new integer mul.
-        Value *NewMul = Builder->CreateNUWMul(
+        Value *NewMul = Builder.CreateNUWMul(
             Op0Conv->getOperand(0), Op1Conv->getOperand(0), "mulconv");
         return new ZExtInst(NewMul, I.getType());
       }
@@ -698,11 +698,11 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
     }
     // if pattern detected emit alternate sequence
     if (OpX && OpY) {
-      BuilderTy::FastMathFlagGuard Guard(*Builder);
-      Builder->setFastMathFlags(Log2->getFastMathFlags());
+      BuilderTy::FastMathFlagGuard Guard(Builder);
+      Builder.setFastMathFlags(Log2->getFastMathFlags());
       Log2->setArgOperand(0, OpY);
-      Value *FMulVal = Builder->CreateFMul(OpX, Log2);
-      Value *FSub = Builder->CreateFSub(FMulVal, OpX);
+      Value *FMulVal = Builder.CreateFMul(OpX, Log2);
+      Value *FSub = Builder.CreateFSub(FMulVal, OpX);
       FSub->takeName(&I);
       return replaceInstUsesWith(I, FSub);
     }
@@ -714,23 +714,23 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
   for (int i = 0; i < 2; i++) {
     bool IgnoreZeroSign = I.hasNoSignedZeros();
     if (BinaryOperator::isFNeg(Opnd0, IgnoreZeroSign)) {
-      BuilderTy::FastMathFlagGuard Guard(*Builder);
-      Builder->setFastMathFlags(I.getFastMathFlags());
+      BuilderTy::FastMathFlagGuard Guard(Builder);
+      Builder.setFastMathFlags(I.getFastMathFlags());
 
       Value *N0 = dyn_castFNegVal(Opnd0, IgnoreZeroSign);
       Value *N1 = dyn_castFNegVal(Opnd1, IgnoreZeroSign);
 
       // -X * -Y => X*Y
       if (N1) {
-        Value *FMul = Builder->CreateFMul(N0, N1);
+        Value *FMul = Builder.CreateFMul(N0, N1);
         FMul->takeName(&I);
         return replaceInstUsesWith(I, FMul);
       }
 
       if (Opnd0->hasOneUse()) {
         // -X * Y => -(X*Y) (Promote negation as high as possible)
-        Value *T = Builder->CreateFMul(N0, Opnd1);
-        Value *Neg = Builder->CreateFNeg(T);
+        Value *T = Builder.CreateFMul(N0, Opnd1);
+        Value *Neg = Builder.CreateFNeg(T);
         Neg->takeName(&I);
         return replaceInstUsesWith(I, Neg);
       }
@@ -755,10 +755,10 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
           Y = Opnd0_0;
 
         if (Y) {
-          BuilderTy::FastMathFlagGuard Guard(*Builder);
-          Builder->setFastMathFlags(I.getFastMathFlags());
-          Value *T = Builder->CreateFMul(Opnd1, Opnd1);
-          Value *R = Builder->CreateFMul(T, Y);
+          BuilderTy::FastMathFlagGuard Guard(Builder);
+          Builder.setFastMathFlags(I.getFastMathFlags());
+          Value *T = Builder.CreateFMul(Opnd1, Opnd1);
+          Value *R = Builder.CreateFMul(T, Y);
           R->takeName(&I);
           return replaceInstUsesWith(I, R);
         }
@@ -824,7 +824,7 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
         *I = SI->getOperand(NonNullOperand);
         Worklist.Add(&*BBI);
       } else if (*I == SelectCond) {
-        *I = Builder->getInt1(NonNullOperand == 1);
+        *I = Builder.getInt1(NonNullOperand == 1);
         Worklist.Add(&*BBI);
       }
     }
@@ -938,20 +938,18 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
   }
 
   if (match(Op0, m_One())) {
-    assert(!I.getType()->getScalarType()->isIntegerTy(1) &&
-           "i1 divide not removed?");
+    assert(!I.getType()->isIntOrIntVectorTy(1) && "i1 divide not removed?");
     if (I.getOpcode() == Instruction::SDiv) {
       // If Op1 is 0 then it's undefined behaviour, if Op1 is 1 then the
       // result is one, if Op1 is -1 then the result is minus one, otherwise
       // it's zero.
-      Value *Inc = Builder->CreateAdd(Op1, Op0);
-      Value *Cmp = Builder->CreateICmpULT(
-                       Inc, ConstantInt::get(I.getType(), 3));
+      Value *Inc = Builder.CreateAdd(Op1, Op0);
+      Value *Cmp = Builder.CreateICmpULT(Inc, ConstantInt::get(I.getType(), 3));
       return SelectInst::Create(Cmp, Op1, ConstantInt::get(I.getType(), 0));
     } else {
       // If Op1 is 0 then it's undefined behaviour. If Op1 is 1 then the
       // result is one, otherwise it's zero.
-      return new ZExtInst(Builder->CreateICmpEQ(Op1, Op0), I.getType());
+      return new ZExtInst(Builder.CreateICmpEQ(Op1, Op0), I.getType());
     }
   }
 
@@ -1026,7 +1024,7 @@ static Instruction *foldUDivPow2Cst(Value *Op0, Value *Op1,
 // X udiv C, where C >= signbit
 static Instruction *foldUDivNegCst(Value *Op0, Value *Op1,
                                    const BinaryOperator &I, InstCombiner &IC) {
-  Value *ICI = IC.Builder->CreateICmpULT(Op0, cast<ConstantInt>(Op1));
+  Value *ICI = IC.Builder.CreateICmpULT(Op0, cast<ConstantInt>(Op1));
 
   return SelectInst::Create(ICI, Constant::getNullValue(I.getType()),
                             ConstantInt::get(I.getType(), 1));
@@ -1045,10 +1043,9 @@ static Instruction *foldUDivShl(Value *Op0, Value *Op1, const BinaryOperator &I,
   if (!match(ShiftLeft, m_Shl(m_APInt(CI), m_Value(N))))
     llvm_unreachable("match should never fail here!");
   if (*CI != 1)
-    N = IC.Builder->CreateAdd(N,
-                              ConstantInt::get(N->getType(), CI->logBase2()));
+    N = IC.Builder.CreateAdd(N, ConstantInt::get(N->getType(), CI->logBase2()));
   if (Op1 != ShiftLeft)
-    N = IC.Builder->CreateZExt(N, Op1->getType());
+    N = IC.Builder.CreateZExt(N, Op1->getType());
   BinaryOperator *LShr = BinaryOperator::CreateLShr(Op0, N);
   if (I.isExact())
     LShr->setIsExact();
@@ -1134,7 +1131,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
   if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0))
     if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy()))
       return new ZExtInst(
-          Builder->CreateUDiv(ZOp0->getOperand(0), ZOp1, "div", I.isExact()),
+          Builder.CreateUDiv(ZOp0->getOperand(0), ZOp1, "div", I.isExact()),
           I.getType());
 
   // (LHS udiv (select (select (...)))) -> (LHS >> (select (select (...))))
@@ -1209,7 +1206,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
 
       Constant *NarrowDivisor =
           ConstantExpr::getTrunc(cast<Constant>(Op1), Op0Src->getType());
-      Value *NarrowOp = Builder->CreateSDiv(Op0Src, NarrowDivisor);
+      Value *NarrowOp = Builder.CreateSDiv(Op0Src, NarrowDivisor);
       return new SExtInst(NarrowOp, Op0->getType());
     }
   }
@@ -1217,7 +1214,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
   if (Constant *RHS = dyn_cast<Constant>(Op1)) {
     // X/INT_MIN -> X == INT_MIN
     if (RHS->isMinSignedValue())
-      return new ZExtInst(Builder->CreateICmpEQ(Op0, Op1), I.getType());
+      return new ZExtInst(Builder.CreateICmpEQ(Op0, Op1), I.getType());
 
     // -X/C  -->  X/-C  provided the negation doesn't overflow.
     Value *X;
@@ -1380,7 +1377,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
       // (X/Y) / Z => X / (Y*Z)
       //
       if (!isa<Constant>(Y) || !isa<Constant>(Op1)) {
-        NewInst = Builder->CreateFMul(Y, Op1);
+        NewInst = Builder.CreateFMul(Y, Op1);
         if (Instruction *RI = dyn_cast<Instruction>(NewInst)) {
           FastMathFlags Flags = I.getFastMathFlags();
           Flags &= cast<Instruction>(Op0)->getFastMathFlags();
@@ -1392,7 +1389,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
       // Z / (X/Y) => Z*Y / X
       //
       if (!isa<Constant>(Y) || !isa<Constant>(Op0)) {
-        NewInst = Builder->CreateFMul(Op0, Y);
+        NewInst = Builder.CreateFMul(Op0, Y);
         if (Instruction *RI = dyn_cast<Instruction>(NewInst)) {
           FastMathFlags Flags = I.getFastMathFlags();
           Flags &= cast<Instruction>(Op1)->getFastMathFlags();
@@ -1483,28 +1480,28 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
   // (zext A) urem (zext B) --> zext (A urem B)
   if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0))
     if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy()))
-      return new ZExtInst(Builder->CreateURem(ZOp0->getOperand(0), ZOp1),
+      return new ZExtInst(Builder.CreateURem(ZOp0->getOperand(0), ZOp1),
                           I.getType());
 
   // X urem Y -> X and Y-1, where Y is a power of 2,
   if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, &I)) {
     Constant *N1 = Constant::getAllOnesValue(I.getType());
-    Value *Add = Builder->CreateAdd(Op1, N1);
+    Value *Add = Builder.CreateAdd(Op1, N1);
     return BinaryOperator::CreateAnd(Op0, Add);
   }
 
   // 1 urem X -> zext(X != 1)
   if (match(Op0, m_One())) {
-    Value *Cmp = Builder->CreateICmpNE(Op1, Op0);
-    Value *Ext = Builder->CreateZExt(Cmp, I.getType());
+    Value *Cmp = Builder.CreateICmpNE(Op1, Op0);
+    Value *Ext = Builder.CreateZExt(Cmp, I.getType());
     return replaceInstUsesWith(I, Ext);
   }
 
   // X urem C -> X < C ? X : X - C, where C >= signbit.
   const APInt *DivisorC;
   if (match(Op1, m_APInt(DivisorC)) && DivisorC->isNegative()) {
-    Value *Cmp = Builder->CreateICmpULT(Op0, Op1);
-    Value *Sub = Builder->CreateSub(Op0, Op1);
+    Value *Cmp = Builder.CreateICmpULT(Op0, Op1);
+    Value *Sub = Builder.CreateSub(Op0, Op1);
     return SelectInst::Create(Cmp, Op0, Sub);
   }
 
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 5dbf1e85b05b9..0011412c2bf47 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -636,10 +636,10 @@ static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
 /// Return an existing non-zero constant if this phi node has one, otherwise
 /// return constant 1.
 static ConstantInt *GetAnyNonZeroConstInt(PHINode &PN) {
-  assert(isa<IntegerType>(PN.getType()) && "Expect only intger type phi");
+  assert(isa<IntegerType>(PN.getType()) && "Expect only integer type phi");
   for (Value *V : PN.operands())
     if (auto *ConstVA = dyn_cast<ConstantInt>(V))
-      if (!ConstVA->isZeroValue())
+      if (!ConstVA->isZero())
         return ConstVA;
   return ConstantInt::get(cast<IntegerType>(PN.getType()), 1);
 }
@@ -836,12 +836,12 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
         }
 
         // Otherwise, do an extract in the predecessor.
-        Builder->SetInsertPoint(Pred->getTerminator());
+        Builder.SetInsertPoint(Pred->getTerminator());
         Value *Res = InVal;
         if (Offset)
-          Res = Builder->CreateLShr(Res, ConstantInt::get(InVal->getType(),
+          Res = Builder.CreateLShr(Res, ConstantInt::get(InVal->getType(),
                                                           Offset), "extract");
-        Res = Builder->CreateTrunc(Res, Ty, "extract.t");
+        Res = Builder.CreateTrunc(Res, Ty, "extract.t");
         PredVal = Res;
         EltPHI->addIncoming(Res, Pred);
 
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 80c6595904e11..4eebe8255998c 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -61,12 +61,12 @@ static CmpInst::Predicate getCmpPredicateForMinMax(SelectPatternFlavor SPF,
   }
 }
 
-static Value *generateMinMaxSelectPattern(InstCombiner::BuilderTy *Builder,
+static Value *generateMinMaxSelectPattern(InstCombiner::BuilderTy &Builder,
                                           SelectPatternFlavor SPF, Value *A,
                                           Value *B) {
   CmpInst::Predicate Pred = getCmpPredicateForMinMax(SPF);
   assert(CmpInst::isIntPredicate(Pred));
-  return Builder->CreateSelect(Builder->CreateICmp(Pred, A, B), A, B);
+  return Builder.CreateSelect(Builder.CreateICmp(Pred, A, B), A, B);
 }
 
 /// We want to turn code that looks like this:
@@ -167,8 +167,8 @@ Instruction *InstCombiner::foldSelectOpOp(SelectInst &SI, Instruction *TI,
 
     // Fold this by inserting a select from the input values.
     Value *NewSI =
-        Builder->CreateSelect(SI.getCondition(), TI->getOperand(0),
-                              FI->getOperand(0), SI.getName() + ".v", &SI);
+        Builder.CreateSelect(SI.getCondition(), TI->getOperand(0),
+                             FI->getOperand(0), SI.getName() + ".v", &SI);
     return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI,
                             TI->getType());
   }
@@ -211,8 +211,8 @@ Instruction *InstCombiner::foldSelectOpOp(SelectInst &SI, Instruction *TI,
   }
 
   // If we reach here, they do have operations in common.
-  Value *NewSI = Builder->CreateSelect(SI.getCondition(), OtherOpT, OtherOpF,
-                                       SI.getName() + ".v", &SI);
+  Value *NewSI = Builder.CreateSelect(SI.getCondition(), OtherOpT, OtherOpF,
+                                      SI.getName() + ".v", &SI);
   Value *Op0 = MatchIsOpZero ? MatchOp : NewSI;
   Value *Op1 = MatchIsOpZero ? NewSI : MatchOp;
   return BinaryOperator::Create(BO->getOpcode(), Op0, Op1);
@@ -227,8 +227,8 @@ static bool isSelect01(Constant *C1, Constant *C2) {
     return false;
   if (!C1I->isZero() && !C2I->isZero()) // One side must be zero.
     return false;
-  return C1I->isOne() || C1I->isAllOnesValue() ||
-         C2I->isOne() || C2I->isAllOnesValue();
+  return C1I->isOne() || C1I->isMinusOne() ||
+         C2I->isOne() || C2I->isMinusOne();
 }
 
 /// Try to fold the select into one of the operands to allow further
@@ -254,7 +254,7 @@ Instruction *InstCombiner::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
           // Avoid creating select between 2 constants unless it's selecting
           // between 0, 1 and -1.
           if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) {
-            Value *NewSel = Builder->CreateSelect(SI.getCondition(), OOp, C);
+            Value *NewSel = Builder.CreateSelect(SI.getCondition(), OOp, C);
             NewSel->takeName(TVI);
             BinaryOperator *TVI_BO = cast<BinaryOperator>(TVI);
             BinaryOperator *BO = BinaryOperator::Create(TVI_BO->getOpcode(),
@@ -284,7 +284,7 @@ Instruction *InstCombiner::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
           // Avoid creating select between 2 constants unless it's selecting
           // between 0, 1 and -1.
           if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) {
-            Value *NewSel = Builder->CreateSelect(SI.getCondition(), C, OOp);
+            Value *NewSel = Builder.CreateSelect(SI.getCondition(), C, OOp);
             NewSel->takeName(FVI);
             BinaryOperator *FVI_BO = cast<BinaryOperator>(FVI);
             BinaryOperator *BO = BinaryOperator::Create(FVI_BO->getOpcode(),
@@ -315,7 +315,7 @@ Instruction *InstCombiner::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
 /// 3. The magnitude of C2 and C1 are flipped
 static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
                                   Value *FalseVal,
-                                  InstCombiner::BuilderTy *Builder) {
+                                  InstCombiner::BuilderTy &Builder) {
   const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition());
   if (!IC || !SI.getType()->isIntegerTy())
     return nullptr;
@@ -383,22 +383,22 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
   if (NeedAnd) {
     // Insert the AND instruction on the input to the truncate.
     APInt C1 = APInt::getOneBitSet(V->getType()->getScalarSizeInBits(), C1Log);
-    V = Builder->CreateAnd(V, ConstantInt::get(V->getType(), C1));
+    V = Builder.CreateAnd(V, ConstantInt::get(V->getType(), C1));
   }
 
   if (C2Log > C1Log) {
-    V = Builder->CreateZExtOrTrunc(V, Y->getType());
-    V = Builder->CreateShl(V, C2Log - C1Log);
+    V = Builder.CreateZExtOrTrunc(V, Y->getType());
+    V = Builder.CreateShl(V, C2Log - C1Log);
   } else if (C1Log > C2Log) {
-    V = Builder->CreateLShr(V, C1Log - C2Log);
-    V = Builder->CreateZExtOrTrunc(V, Y->getType());
+    V = Builder.CreateLShr(V, C1Log - C2Log);
+    V = Builder.CreateZExtOrTrunc(V, Y->getType());
   } else
-    V = Builder->CreateZExtOrTrunc(V, Y->getType());
+    V = Builder.CreateZExtOrTrunc(V, Y->getType());
 
   if (NeedXor)
-    V = Builder->CreateXor(V, *C2);
+    V = Builder.CreateXor(V, *C2);
 
-  return Builder->CreateOr(V, Y);
+  return Builder.CreateOr(V, Y);
 }
 
 /// Attempt to fold a cttz/ctlz followed by a icmp plus select into a single
@@ -414,7 +414,7 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
 /// into:
 ///   %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 false)
 static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal,
-                                 InstCombiner::BuilderTy *Builder) {
+                                 InstCombiner::BuilderTy &Builder) {
   ICmpInst::Predicate Pred = ICI->getPredicate();
   Value *CmpLHS = ICI->getOperand(0);
   Value *CmpRHS = ICI->getOperand(1);
@@ -449,8 +449,8 @@ static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal,
     IntrinsicInst *NewI = cast<IntrinsicInst>(II->clone());
     Type *Ty = NewI->getArgOperand(1)->getType();
     NewI->setArgOperand(1, Constant::getNullValue(Ty));
-    Builder->Insert(NewI);
-    return Builder->CreateZExtOrTrunc(NewI, ValueOnZero->getType());
+    Builder.Insert(NewI);
+    return Builder.CreateZExtOrTrunc(NewI, ValueOnZero->getType());
   }
 
   return nullptr;
@@ -597,7 +597,7 @@ canonicalizeMinMaxWithConstant(SelectInst &Sel, ICmpInst &Cmp,
 /// Visit a SelectInst that has an ICmpInst as its first operand.
 Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI,
                                                   ICmpInst *ICI) {
-  if (Instruction *NewSel = canonicalizeMinMaxWithConstant(SI, *ICI, *Builder))
+  if (Instruction *NewSel = canonicalizeMinMaxWithConstant(SI, *ICI, Builder))
     return NewSel;
 
   bool Changed = adjustMinMax(SI, *ICI);
@@ -617,23 +617,23 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI,
     if (TrueVal->getType() == Ty) {
       if (ConstantInt *Cmp = dyn_cast<ConstantInt>(CmpRHS)) {
         ConstantInt *C1 = nullptr, *C2 = nullptr;
-        if (Pred == ICmpInst::ICMP_SGT && Cmp->isAllOnesValue()) {
+        if (Pred == ICmpInst::ICMP_SGT && Cmp->isMinusOne()) {
           C1 = dyn_cast<ConstantInt>(TrueVal);
           C2 = dyn_cast<ConstantInt>(FalseVal);
-        } else if (Pred == ICmpInst::ICMP_SLT && Cmp->isNullValue()) {
+        } else if (Pred == ICmpInst::ICMP_SLT && Cmp->isZero()) {
           C1 = dyn_cast<ConstantInt>(FalseVal);
           C2 = dyn_cast<ConstantInt>(TrueVal);
         }
         if (C1 && C2) {
           // This shift results in either -1 or 0.
-          Value *AShr = Builder->CreateAShr(CmpLHS, Ty->getBitWidth()-1);
+          Value *AShr = Builder.CreateAShr(CmpLHS, Ty->getBitWidth() - 1);
 
           // Check if we can express the operation with a single or.
-          if (C2->isAllOnesValue())
-            return replaceInstUsesWith(SI, Builder->CreateOr(AShr, C1));
+          if (C2->isMinusOne())
+            return replaceInstUsesWith(SI, Builder.CreateOr(AShr, C1));
 
-          Value *And = Builder->CreateAnd(AShr, C2->getValue()-C1->getValue());
-          return replaceInstUsesWith(SI, Builder->CreateAdd(And, C1));
+          Value *And = Builder.CreateAnd(AShr, C2->getValue() - C1->getValue());
+          return replaceInstUsesWith(SI, Builder.CreateAdd(And, C1));
         }
       }
     }
@@ -684,19 +684,19 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI,
       // (X & Y) == 0 ? X : X ^ Y  --> X & ~Y
       if (TrueWhenUnset && TrueVal == X &&
           match(FalseVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C)
-        V = Builder->CreateAnd(X, ~(*Y));
+        V = Builder.CreateAnd(X, ~(*Y));
       // (X & Y) != 0 ? X ^ Y : X  --> X & ~Y
       else if (!TrueWhenUnset && FalseVal == X &&
                match(TrueVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C)
-        V = Builder->CreateAnd(X, ~(*Y));
+        V = Builder.CreateAnd(X, ~(*Y));
       // (X & Y) == 0 ? X ^ Y : X  --> X | Y
       else if (TrueWhenUnset && FalseVal == X &&
                match(TrueVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C)
-        V = Builder->CreateOr(X, *Y);
+        V = Builder.CreateOr(X, *Y);
       // (X & Y) != 0 ? X : X ^ Y  --> X | Y
       else if (!TrueWhenUnset && TrueVal == X &&
                match(FalseVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C)
-        V = Builder->CreateOr(X, *Y);
+        V = Builder.CreateOr(X, *Y);
 
       if (V)
         return replaceInstUsesWith(SI, V);
@@ -809,8 +809,8 @@ Instruction *InstCombiner::foldSPFofSPF(Instruction *Inner,
       (SPF1 == SPF_NABS && SPF2 == SPF_ABS)) {
     SelectInst *SI = cast<SelectInst>(Inner);
     Value *NewSI =
-        Builder->CreateSelect(SI->getCondition(), SI->getFalseValue(),
-                              SI->getTrueValue(), SI->getName(), SI);
+        Builder.CreateSelect(SI->getCondition(), SI->getFalseValue(),
+                             SI->getTrueValue(), SI->getName(), SI);
     return replaceInstUsesWith(Outer, NewSI);
   }
 
@@ -848,15 +848,15 @@ Instruction *InstCombiner::foldSPFofSPF(Instruction *Inner,
       IsFreeOrProfitableToInvert(B, NotB, ElidesXor) &&
       IsFreeOrProfitableToInvert(C, NotC, ElidesXor) && ElidesXor) {
     if (!NotA)
-      NotA = Builder->CreateNot(A);
+      NotA = Builder.CreateNot(A);
     if (!NotB)
-      NotB = Builder->CreateNot(B);
+      NotB = Builder.CreateNot(B);
     if (!NotC)
-      NotC = Builder->CreateNot(C);
+      NotC = Builder.CreateNot(C);
 
     Value *NewInner = generateMinMaxSelectPattern(
         Builder, getInverseMinMaxSelectPattern(SPF1), NotA, NotB);
-    Value *NewOuter = Builder->CreateNot(generateMinMaxSelectPattern(
+    Value *NewOuter = Builder.CreateNot(generateMinMaxSelectPattern(
         Builder, getInverseMinMaxSelectPattern(SPF2), NewInner, NotC));
     return replaceInstUsesWith(Outer, NewOuter);
   }
@@ -868,9 +868,9 @@ Instruction *InstCombiner::foldSPFofSPF(Instruction *Inner,
 /// icmp instruction with zero, and we have an 'and' with the non-constant value
 /// and a power of two we can turn the select into a shift on the result of the
 /// 'and'.
-static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal,
-                                ConstantInt *FalseVal,
-                                InstCombiner::BuilderTy *Builder) {
+static Value *foldSelectICmpAnd(const SelectInst &SI, APInt TrueVal,
+                                APInt FalseVal,
+                                InstCombiner::BuilderTy &Builder) {
   const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition());
   if (!IC || !IC->isEquality() || !SI.getType()->isIntegerTy())
     return nullptr;
@@ -886,56 +886,53 @@ static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal,
   // If both select arms are non-zero see if we have a select of the form
   // 'x ? 2^n + C : C'. Then we can offset both arms by C, use the logic
   // for 'x ? 2^n : 0' and fix the thing up at the end.
-  ConstantInt *Offset = nullptr;
-  if (!TrueVal->isZero() && !FalseVal->isZero()) {
-    if ((TrueVal->getValue() - FalseVal->getValue()).isPowerOf2())
+  APInt Offset(TrueVal.getBitWidth(), 0);
+  if (!TrueVal.isNullValue() && !FalseVal.isNullValue()) {
+    if ((TrueVal - FalseVal).isPowerOf2())
       Offset = FalseVal;
-    else if ((FalseVal->getValue() - TrueVal->getValue()).isPowerOf2())
+    else if ((FalseVal - TrueVal).isPowerOf2())
       Offset = TrueVal;
     else
       return nullptr;
 
     // Adjust TrueVal and FalseVal to the offset.
-    TrueVal = ConstantInt::get(Builder->getContext(),
-                               TrueVal->getValue() - Offset->getValue());
-    FalseVal = ConstantInt::get(Builder->getContext(),
-                                FalseVal->getValue() - Offset->getValue());
+    TrueVal -= Offset;
+    FalseVal -= Offset;
   }
 
   // Make sure the mask in the 'and' and one of the select arms is a power of 2.
   if (!AndRHS->getValue().isPowerOf2() ||
-      (!TrueVal->getValue().isPowerOf2() &&
-       !FalseVal->getValue().isPowerOf2()))
+      (!TrueVal.isPowerOf2() && !FalseVal.isPowerOf2()))
     return nullptr;
 
   // Determine which shift is needed to transform result of the 'and' into the
   // desired result.
-  ConstantInt *ValC = !TrueVal->isZero() ? TrueVal : FalseVal;
-  unsigned ValZeros = ValC->getValue().logBase2();
+  const APInt &ValC = !TrueVal.isNullValue() ? TrueVal : FalseVal;
+  unsigned ValZeros = ValC.logBase2();
   unsigned AndZeros = AndRHS->getValue().logBase2();
 
   // If types don't match we can still convert the select by introducing a zext
   // or a trunc of the 'and'. The trunc case requires that all of the truncated
   // bits are zero, we can figure that out by looking at the 'and' mask.
-  if (AndZeros >= ValC->getBitWidth())
+  if (AndZeros >= ValC.getBitWidth())
     return nullptr;
 
-  Value *V = Builder->CreateZExtOrTrunc(LHS, SI.getType());
+  Value *V = Builder.CreateZExtOrTrunc(LHS, SI.getType());
   if (ValZeros > AndZeros)
-    V = Builder->CreateShl(V, ValZeros - AndZeros);
+    V = Builder.CreateShl(V, ValZeros - AndZeros);
   else if (ValZeros < AndZeros)
-    V = Builder->CreateLShr(V, AndZeros - ValZeros);
+    V = Builder.CreateLShr(V, AndZeros - ValZeros);
 
   // Okay, now we know that everything is set up, we just don't know whether we
   // have a icmp_ne or icmp_eq and whether the true or false val is the zero.
-  bool ShouldNotVal = !TrueVal->isZero();
+  bool ShouldNotVal = !TrueVal.isNullValue();
   ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE;
   if (ShouldNotVal)
-    V = Builder->CreateXor(V, ValC);
+    V = Builder.CreateXor(V, ValC);
 
   // Apply an offset if needed.
-  if (Offset)
-    V = Builder->CreateAdd(V, Offset);
+  if (!Offset.isNullValue())
+    V = Builder.CreateAdd(V, ConstantInt::get(V->getType(), Offset));
   return V;
 }
 
@@ -1024,7 +1021,7 @@ Instruction *InstCombiner::foldSelectExtConst(SelectInst &Sel) {
   // TODO: Handle larger types? That requires adjusting FoldOpIntoSelect too.
   Value *X = ExtInst->getOperand(0);
   Type *SmallType = X->getType();
-  if (!SmallType->getScalarType()->isIntegerTy(1))
+  if (!SmallType->isIntOrIntVectorTy(1))
     return nullptr;
 
   Constant *C;
@@ -1045,7 +1042,7 @@ Instruction *InstCombiner::foldSelectExtConst(SelectInst &Sel) {
 
     // select Cond, (ext X), C --> ext(select Cond, X, C')
     // select Cond, C, (ext X) --> ext(select Cond, C', X)
-    Value *NewSel = Builder->CreateSelect(Cond, X, TruncCVal, "narrow", &Sel);
+    Value *NewSel = Builder.CreateSelect(Cond, X, TruncCVal, "narrow", &Sel);
     return CastInst::Create(Instruction::CastOps(ExtOpcode), NewSel, SelType);
   }
 
@@ -1184,7 +1181,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
     return &SI;
   }
 
-  if (SelType->getScalarType()->isIntegerTy(1) &&
+  if (SelType->isIntOrIntVectorTy(1) &&
       TrueVal->getType() == CondVal->getType()) {
     if (match(TrueVal, m_One())) {
       // Change: A = select B, true, C --> A = or B, C
@@ -1192,7 +1189,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
     }
     if (match(TrueVal, m_Zero())) {
       // Change: A = select B, false, C --> A = and !B, C
-      Value *NotCond = Builder->CreateNot(CondVal, "not." + CondVal->getName());
+      Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName());
       return BinaryOperator::CreateAnd(NotCond, FalseVal);
     }
     if (match(FalseVal, m_Zero())) {
@@ -1201,7 +1198,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
     }
     if (match(FalseVal, m_One())) {
       // Change: A = select B, C, true --> A = or !B, C
-      Value *NotCond = Builder->CreateNot(CondVal, "not." + CondVal->getName());
+      Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName());
       return BinaryOperator::CreateOr(NotCond, TrueVal);
     }
 
@@ -1226,7 +1223,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
   // select i1 %c, <2 x i8> <1, 1>, <2 x i8> <0, 0>
   // because that may need 3 instructions to splat the condition value:
   // extend, insertelement, shufflevector.
-  if (CondVal->getType()->isVectorTy() == SelType->isVectorTy()) {
+  if (SelType->isIntOrIntVectorTy() &&
+      CondVal->getType()->isVectorTy() == SelType->isVectorTy()) {
     // select C, 1, 0 -> zext C to int
     if (match(TrueVal, m_One()) && match(FalseVal, m_Zero()))
       return new ZExtInst(CondVal, SelType);
@@ -1237,20 +1235,21 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
 
     // select C, 0, 1 -> zext !C to int
     if (match(TrueVal, m_Zero()) && match(FalseVal, m_One())) {
-      Value *NotCond = Builder->CreateNot(CondVal, "not." + CondVal->getName());
+      Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName());
       return new ZExtInst(NotCond, SelType);
     }
 
     // select C, 0, -1 -> sext !C to int
     if (match(TrueVal, m_Zero()) && match(FalseVal, m_AllOnes())) {
-      Value *NotCond = Builder->CreateNot(CondVal, "not." + CondVal->getName());
+      Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName());
       return new SExtInst(NotCond, SelType);
     }
   }
 
   if (ConstantInt *TrueValC = dyn_cast<ConstantInt>(TrueVal))
     if (ConstantInt *FalseValC = dyn_cast<ConstantInt>(FalseVal))
-      if (Value *V = foldSelectICmpAnd(SI, TrueValC, FalseValC, Builder))
+      if (Value *V = foldSelectICmpAnd(SI, TrueValC->getValue(),
+                                       FalseValC->getValue(), Builder))
         return replaceInstUsesWith(SI, V);
 
   // See if we are selecting two values based on a comparison of the two values.
@@ -1288,10 +1287,10 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       // (X ugt Y) ? X : Y -> (X ole Y) ? Y : X
       if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) {
         FCmpInst::Predicate InvPred = FCI->getInversePredicate();
-        IRBuilder<>::FastMathFlagGuard FMFG(*Builder);
-        Builder->setFastMathFlags(FCI->getFastMathFlags());
-        Value *NewCond = Builder->CreateFCmp(InvPred, TrueVal, FalseVal,
-                                             FCI->getName() + ".inv");
+        IRBuilder<>::FastMathFlagGuard FMFG(Builder);
+        Builder.setFastMathFlags(FCI->getFastMathFlags());
+        Value *NewCond = Builder.CreateFCmp(InvPred, TrueVal, FalseVal,
+                                            FCI->getName() + ".inv");
 
         return SelectInst::Create(NewCond, FalseVal, TrueVal,
                                   SI.getName() + ".p");
@@ -1331,10 +1330,10 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       // (X ugt Y) ? X : Y -> (X ole Y) ? X : Y
       if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) {
         FCmpInst::Predicate InvPred = FCI->getInversePredicate();
-        IRBuilder<>::FastMathFlagGuard FMFG(*Builder);
-        Builder->setFastMathFlags(FCI->getFastMathFlags());
-        Value *NewCond = Builder->CreateFCmp(InvPred, FalseVal, TrueVal,
-                                             FCI->getName() + ".inv");
+        IRBuilder<>::FastMathFlagGuard FMFG(Builder);
+        Builder.setFastMathFlags(FCI->getFastMathFlags());
+        Value *NewCond = Builder.CreateFCmp(InvPred, FalseVal, TrueVal,
+                                            FCI->getName() + ".inv");
 
         return SelectInst::Create(NewCond, FalseVal, TrueVal,
                                   SI.getName() + ".p");
@@ -1350,7 +1349,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
     if (Instruction *Result = foldSelectInstWithICmp(SI, ICI))
       return Result;
 
-  if (Instruction *Add = foldAddSubSelect(SI, *Builder))
+  if (Instruction *Add = foldAddSubSelect(SI, Builder))
     return Add;
 
   // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z))
@@ -1381,16 +1380,16 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
 
         Value *Cmp;
         if (CmpInst::isIntPredicate(Pred)) {
-          Cmp = Builder->CreateICmp(Pred, LHS, RHS);
+          Cmp = Builder.CreateICmp(Pred, LHS, RHS);
         } else {
-          IRBuilder<>::FastMathFlagGuard FMFG(*Builder);
+          IRBuilder<>::FastMathFlagGuard FMFG(Builder);
           auto FMF = cast<FPMathOperator>(SI.getCondition())->getFastMathFlags();
-          Builder->setFastMathFlags(FMF);
-          Cmp = Builder->CreateFCmp(Pred, LHS, RHS);
+          Builder.setFastMathFlags(FMF);
+          Cmp = Builder.CreateFCmp(Pred, LHS, RHS);
         }
 
-        Value *NewSI = Builder->CreateCast(
-            CastOp, Builder->CreateSelect(Cmp, LHS, RHS, SI.getName(), &SI),
+        Value *NewSI = Builder.CreateCast(
+            CastOp, Builder.CreateSelect(Cmp, LHS, RHS, SI.getName(), &SI),
             SelType);
         return replaceInstUsesWith(SI, NewSI);
       }
@@ -1425,13 +1424,12 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
           (SI.hasOneUse() && match(*SI.user_begin(), m_Not(m_Value())));
 
       if (NumberOfNots >= 2) {
-        Value *NewLHS = Builder->CreateNot(LHS);
-        Value *NewRHS = Builder->CreateNot(RHS);
-        Value *NewCmp = SPF == SPF_SMAX
-                            ? Builder->CreateICmpSLT(NewLHS, NewRHS)
-                            : Builder->CreateICmpULT(NewLHS, NewRHS);
+        Value *NewLHS = Builder.CreateNot(LHS);
+        Value *NewRHS = Builder.CreateNot(RHS);
+        Value *NewCmp = SPF == SPF_SMAX ? Builder.CreateICmpSLT(NewLHS, NewRHS)
+                                        : Builder.CreateICmpULT(NewLHS, NewRHS);
         Value *NewSI =
-            Builder->CreateNot(Builder->CreateSelect(NewCmp, NewLHS, NewRHS));
+            Builder.CreateNot(Builder.CreateSelect(NewCmp, NewLHS, NewRHS));
         return replaceInstUsesWith(SI, NewSI);
       }
     }
@@ -1461,7 +1459,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       // We choose this as normal form to enable folding on the And and shortening
       // paths for the values (this helps GetUnderlyingObjects() for example).
       if (TrueSI->getFalseValue() == FalseVal && TrueSI->hasOneUse()) {
-        Value *And = Builder->CreateAnd(CondVal, TrueSI->getCondition());
+        Value *And = Builder.CreateAnd(CondVal, TrueSI->getCondition());
         SI.setOperand(0, And);
         SI.setOperand(1, TrueSI->getTrueValue());
         return &SI;
@@ -1479,7 +1477,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       }
       // select(C0, a, select(C1, a, b)) -> select(C0|C1, a, b)
       if (FalseSI->getTrueValue() == TrueVal && FalseSI->hasOneUse()) {
-        Value *Or = Builder->CreateOr(CondVal, FalseSI->getCondition());
+        Value *Or = Builder.CreateOr(CondVal, FalseSI->getCondition());
         SI.setOperand(0, Or);
         SI.setOperand(2, FalseSI->getFalseValue());
         return &SI;
@@ -1541,7 +1539,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       return replaceInstUsesWith(SI, FalseVal);
   }
 
-  if (Instruction *BitCastSel = foldSelectCmpBitcasts(SI, *Builder))
+  if (Instruction *BitCastSel = foldSelectCmpBitcasts(SI, Builder))
     return BitCastSel;
 
   return nullptr;
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 1bb1a85367d1b..7ed141c7fd79d 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -47,7 +47,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
     if (isKnownNonNegative(A, DL, 0, &AC, &I, &DT) &&
         isKnownNonNegative(C, DL, 0, &AC, &I, &DT))
       return BinaryOperator::Create(
-          I.getOpcode(), Builder->CreateBinOp(I.getOpcode(), Op0, C), A);
+          I.getOpcode(), Builder.CreateBinOp(I.getOpcode(), Op0, C), A);
 
   // X shift (A srem B) -> X shift (A and B-1) iff B is a power of 2.
   // Because shifts by negative values (which could occur if A were negative)
@@ -56,8 +56,8 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
   if (Op1->hasOneUse() && match(Op1, m_SRem(m_Value(A), m_Power2(B)))) {
     // FIXME: Should this get moved into SimplifyDemandedBits by saying we don't
     // demand the sign bit (and many others) here??
-    Value *Rem = Builder->CreateAnd(A, ConstantInt::get(I.getType(), *B-1),
-                                    Op1->getName());
+    Value *Rem = Builder.CreateAnd(A, ConstantInt::get(I.getType(), *B - 1),
+                                   Op1->getName());
     I.setOperand(1, Rem);
     return &I;
   }
@@ -260,9 +260,9 @@ static Value *getShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
   // We can always evaluate constants shifted.
   if (Constant *C = dyn_cast<Constant>(V)) {
     if (isLeftShift)
-      V = IC.Builder->CreateShl(C, NumBits);
+      V = IC.Builder.CreateShl(C, NumBits);
     else
-      V = IC.Builder->CreateLShr(C, NumBits);
+      V = IC.Builder.CreateLShr(C, NumBits);
     // If we got a constantexpr back, try to simplify it with TD info.
     if (auto *C = dyn_cast<Constant>(V))
       if (auto *FoldedC =
@@ -289,7 +289,7 @@ static Value *getShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
   case Instruction::Shl:
   case Instruction::LShr:
     return foldShiftedShift(cast<BinaryOperator>(I), NumBits, isLeftShift,
-                            *(IC.Builder));
+                            IC.Builder);
 
   case Instruction::Select:
     I->setOperand(
@@ -353,7 +353,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
       Constant *ShAmt =
           ConstantExpr::getZExt(cast<Constant>(Op1), TrOp->getType());
       // (shift2 (shift1 & 0x00FF), c2)
-      Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName());
+      Value *NSh = Builder.CreateBinOp(I.getOpcode(), TrOp, ShAmt, I.getName());
 
       // For logical shifts, the truncation has the effect of making the high
       // part of the register be zeros.  Emulate this by inserting an AND to
@@ -375,9 +375,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
       }
 
       // shift1 & 0x00FF
-      Value *And = Builder->CreateAnd(NSh,
-                                      ConstantInt::get(I.getContext(), MaskV),
-                                      TI->getName());
+      Value *And = Builder.CreateAnd(NSh,
+                                     ConstantInt::get(I.getContext(), MaskV),
+                                     TI->getName());
 
       // Return the value truncated to the interesting size.
       return new TruncInst(And, I.getType());
@@ -401,10 +401,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
             match(Op0BO->getOperand(1), m_Shr(m_Value(V1),
                   m_Specific(Op1)))) {
           Value *YS =         // (Y << C)
-            Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName());
+            Builder.CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName());
           // (X + (Y << C))
-          Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1,
-                                          Op0BO->getOperand(1)->getName());
+          Value *X = Builder.CreateBinOp(Op0BO->getOpcode(), YS, V1,
+                                         Op0BO->getOperand(1)->getName());
           unsigned Op1Val = Op1C->getLimitedValue(TypeBits);
 
           APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val);
@@ -421,11 +421,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
                   m_And(m_OneUse(m_Shr(m_Value(V1), m_Specific(Op1))),
                         m_ConstantInt(CC)))) {
           Value *YS =   // (Y << C)
-            Builder->CreateShl(Op0BO->getOperand(0), Op1,
-                                         Op0BO->getName());
+            Builder.CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName());
           // X & (CC << C)
-          Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
-                                         V1->getName()+".mask");
+          Value *XM = Builder.CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
+                                        V1->getName()+".mask");
           return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM);
         }
         LLVM_FALLTHROUGH;
@@ -437,10 +436,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
             match(Op0BO->getOperand(0), m_Shr(m_Value(V1),
                   m_Specific(Op1)))) {
           Value *YS =  // (Y << C)
-            Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
+            Builder.CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
           // (X + (Y << C))
-          Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS,
-                                          Op0BO->getOperand(0)->getName());
+          Value *X = Builder.CreateBinOp(Op0BO->getOpcode(), V1, YS,
+                                         Op0BO->getOperand(0)->getName());
           unsigned Op1Val = Op1C->getLimitedValue(TypeBits);
 
           APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val);
@@ -456,10 +455,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
                   m_And(m_OneUse(m_Shr(m_Value(V1), m_Value(V2))),
                         m_ConstantInt(CC))) && V2 == Op1) {
           Value *YS = // (Y << C)
-            Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
+            Builder.CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
           // X & (CC << C)
-          Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
-                                         V1->getName()+".mask");
+          Value *XM = Builder.CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
+                                        V1->getName()+".mask");
 
           return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS);
         }
@@ -502,7 +501,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
           Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1);
 
           Value *NewShift =
-            Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1);
+            Builder.CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1);
           NewShift->takeName(Op0BO);
 
           return BinaryOperator::Create(Op0BO->getOpcode(), NewShift,
@@ -541,7 +540,7 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
       unsigned SrcWidth = X->getType()->getScalarSizeInBits();
       if (ShAmt < SrcWidth &&
           MaskedValueIsZero(X, APInt::getHighBitsSet(SrcWidth, ShAmt), 0, &I))
-        return new ZExtInst(Builder->CreateShl(X, ShAmt), Ty);
+        return new ZExtInst(Builder.CreateShl(X, ShAmt), Ty);
     }
 
     // (X >>u C) << C --> X & (-1 << C)
@@ -641,7 +640,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
       // ctpop.i32(x)>>5 --> zext(x == -1)
       bool IsPop = II->getIntrinsicID() == Intrinsic::ctpop;
       Constant *RHS = ConstantInt::getSigned(Ty, IsPop ? -1 : 0);
-      Value *Cmp = Builder->CreateICmpEQ(II->getArgOperand(0), RHS);
+      Value *Cmp = Builder.CreateICmpEQ(II->getArgOperand(0), RHS);
       return new ZExtInst(Cmp, Ty);
     }
 
@@ -658,7 +657,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
           return NewLShr;
         }
         // (X << C1) >>u C2  --> (X >>u (C2 - C1)) & (-1 >> C2)
-        Value *NewLShr = Builder->CreateLShr(X, ShiftDiff, "", I.isExact());
+        Value *NewLShr = Builder.CreateLShr(X, ShiftDiff, "", I.isExact());
         APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt));
         return BinaryOperator::CreateAnd(NewLShr, ConstantInt::get(Ty, Mask));
       }
@@ -671,7 +670,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
           return NewShl;
         }
         // (X << C1) >>u C2  --> X << (C1 - C2) & (-1 >> C2)
-        Value *NewShl = Builder->CreateShl(X, ShiftDiff);
+        Value *NewShl = Builder.CreateShl(X, ShiftDiff);
         APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt));
         return BinaryOperator::CreateAnd(NewShl, ConstantInt::get(Ty, Mask));
       }
@@ -692,7 +691,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
 
         // lshr (sext iM X to iN), N-1 --> zext (lshr X, M-1) to iN
         if (Op0->hasOneUse()) {
-          Value *NewLShr = Builder->CreateLShr(X, SrcTyBitWidth - 1);
+          Value *NewLShr = Builder.CreateLShr(X, SrcTyBitWidth - 1);
           return new ZExtInst(NewLShr, Ty);
         }
       }
@@ -701,7 +700,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
       if (ShAmt == BitWidth - SrcTyBitWidth && Op0->hasOneUse()) {
         // The new shift amount can't be more than the narrow source type.
         unsigned NewShAmt = std::min(ShAmt, SrcTyBitWidth - 1);
-        Value *AShr = Builder->CreateAShr(X, NewShAmt);
+        Value *AShr = Builder.CreateAShr(X, NewShAmt);
         return new ZExtInst(AShr, Ty);
       }
     }
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 03841164b58de..5689c06042391 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -548,7 +548,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
       // X % -1 demands all the bits because we don't want to introduce
       // INT_MIN % -1 (== undef) by accident.
-      if (Rem->isAllOnesValue())
+      if (Rem->isMinusOne())
         break;
       APInt RA = Rem->getValue().abs();
       if (RA.isPowerOf2()) {
@@ -1627,10 +1627,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
       for (unsigned I = 0, E = II->getNumArgOperands(); I != E; ++I)
         Args.push_back(II->getArgOperand(I));
 
-      IRBuilderBase::InsertPointGuard Guard(*Builder);
-      Builder->SetInsertPoint(II);
+      IRBuilderBase::InsertPointGuard Guard(Builder);
+      Builder.SetInsertPoint(II);
 
-      CallInst *NewCall = Builder->CreateCall(NewIntrin, Args);
+      CallInst *NewCall = Builder.CreateCall(NewIntrin, Args);
       NewCall->takeName(II);
       NewCall->copyMetadata(*II);
 
@@ -1657,15 +1657,15 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
 
 
       if (NewNumElts == 1) {
-        return Builder->CreateInsertElement(UndefValue::get(V->getType()),
-                                            NewCall, static_cast<uint64_t>(0));
+        return Builder.CreateInsertElement(UndefValue::get(V->getType()),
+                                           NewCall, static_cast<uint64_t>(0));
       }
 
       SmallVector<uint32_t, 8> EltMask;
       for (unsigned I = 0; I < VWidth; ++I)
         EltMask.push_back(I);
 
-      Value *Shuffle = Builder->CreateShuffleVector(
+      Value *Shuffle = Builder.CreateShuffleVector(
         NewCall, UndefValue::get(NewTy), EltMask);
 
       MadeChange = true;
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 926e46655eb86..dd71a31b644b3 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -204,11 +204,11 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
       if (I->hasOneUse() &&
           cheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
         Value *newEI0 =
-          Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
-                                        EI.getName()+".lhs");
+          Builder.CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
+                                       EI.getName()+".lhs");
         Value *newEI1 =
-          Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1),
-                                        EI.getName()+".rhs");
+          Builder.CreateExtractElement(BO->getOperand(1), EI.getOperand(1),
+                                       EI.getName()+".rhs");
         return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(),
                                                      newEI0, newEI1, BO);
       }
@@ -250,8 +250,8 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
       // Bitcasts can change the number of vector elements, and they cost
       // nothing.
       if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
-        Value *EE = Builder->CreateExtractElement(CI->getOperand(0),
-                                                  EI.getIndexOperand());
+        Value *EE = Builder.CreateExtractElement(CI->getOperand(0),
+                                                 EI.getIndexOperand());
         Worklist.AddValue(EE);
         return CastInst::Create(CI->getOpcode(), EE, EI.getType());
       }
@@ -269,20 +269,20 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
 
         Value *Cond = SI->getCondition();
         if (Cond->getType()->isVectorTy()) {
-          Cond = Builder->CreateExtractElement(Cond,
-                                               EI.getIndexOperand(),
-                                               Cond->getName() + ".elt");
+          Cond = Builder.CreateExtractElement(Cond,
+                                              EI.getIndexOperand(),
+                                              Cond->getName() + ".elt");
         }
 
         Value *V1Elem
-          = Builder->CreateExtractElement(TrueVal,
-                                          EI.getIndexOperand(),
-                                          TrueVal->getName() + ".elt");
+          = Builder.CreateExtractElement(TrueVal,
+                                         EI.getIndexOperand(),
+                                         TrueVal->getName() + ".elt");
 
         Value *V2Elem
-          = Builder->CreateExtractElement(FalseVal,
-                                          EI.getIndexOperand(),
-                                          FalseVal->getName() + ".elt");
+          = Builder.CreateExtractElement(FalseVal,
+                                         EI.getIndexOperand(),
+                                         FalseVal->getName() + ".elt");
         return SelectInst::Create(Cond,
                                   V1Elem,
                                   V2Elem,
@@ -837,7 +837,7 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
   if (Instruction *Shuf = foldConstantInsEltIntoShuffle(IE))
     return Shuf;
 
-  if (Instruction *NewInsElt = hoistInsEltConst(IE, *Builder))
+  if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
     return NewInsElt;
 
   // Turn a sequence of inserts that broadcasts a scalar into a single
@@ -1020,9 +1020,9 @@ InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
     SmallVector<Constant *, 16> MaskValues;
     for (int i = 0, e = Mask.size(); i != e; ++i) {
       if (Mask[i] == -1)
-        MaskValues.push_back(UndefValue::get(Builder->getInt32Ty()));
+        MaskValues.push_back(UndefValue::get(Builder.getInt32Ty()));
       else
-        MaskValues.push_back(Builder->getInt32(Mask[i]));
+        MaskValues.push_back(Builder.getInt32(Mask[i]));
     }
     return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()),
                                           ConstantVector::get(MaskValues));
@@ -1095,7 +1095,7 @@ InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
 
       Value *V = EvaluateInDifferentElementOrder(I->getOperand(0), Mask);
       return InsertElementInst::Create(V, I->getOperand(1),
-                                       Builder->getInt32(Index), "", I);
+                                       Builder.getInt32(Index), "", I);
     }
   }
   llvm_unreachable("failed to reorder elements of vector instruction!");
@@ -1275,9 +1275,9 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
                                                 UndefValue::get(Int32Ty));
         for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
           ShuffleMask[I] = ConstantInt::get(Int32Ty, Idx);
-        V = Builder->CreateShuffleVector(V, UndefValue::get(V->getType()),
-                                         ConstantVector::get(ShuffleMask),
-                                         SVI.getName() + ".extract");
+        V = Builder.CreateShuffleVector(V, UndefValue::get(V->getType()),
+                                        ConstantVector::get(ShuffleMask),
+                                        SVI.getName() + ".extract");
         BegIdx = 0;
       }
       unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
@@ -1287,10 +1287,10 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
       auto *NewBC =
           BCAlreadyExists
               ? NewBCs[CastSrcTy]
-              : Builder->CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
+              : Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
       if (!BCAlreadyExists)
         NewBCs[CastSrcTy] = NewBC;
-      auto *Ext = Builder->CreateExtractElement(
+      auto *Ext = Builder.CreateExtractElement(
           NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract");
       // The shufflevector isn't being replaced: the bitcast that used it
       // is. InstCombine will visit the newly-created instructions.
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 723414635d6fb..90e2323991555 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -88,7 +88,7 @@ MaxArraySize("instcombine-maxarray-size", cl::init(1024),
              cl::desc("Maximum array size considered when doing a combine"));
 
 Value *InstCombiner::EmitGEPOffset(User *GEP) {
-  return llvm::EmitGEPOffset(Builder, DL, GEP);
+  return llvm::EmitGEPOffset(&Builder, DL, GEP);
 }
 
 /// Return true if it is desirable to convert an integer computation from a
@@ -498,8 +498,7 @@ getBinOpsForFactorization(Instruction::BinaryOps TopLevelOpcode,
 
 /// This tries to simplify binary operations by factorizing out common terms
 /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
-Value *InstCombiner::tryFactorization(InstCombiner::BuilderTy *Builder,
-                                      BinaryOperator &I,
+Value *InstCombiner::tryFactorization(BinaryOperator &I,
                                       Instruction::BinaryOps InnerOpcode,
                                       Value *A, Value *B, Value *C, Value *D) {
   assert(A && B && C && D && "All values must be provided");
@@ -525,9 +524,9 @@ Value *InstCombiner::tryFactorization(InstCombiner::BuilderTy *Builder,
       // If "B op D" doesn't simplify then only go on if both of the existing
       // operations "A op' B" and "C op' D" will be zapped as no longer used.
       if (!V && LHS->hasOneUse() && RHS->hasOneUse())
-        V = Builder->CreateBinOp(TopLevelOpcode, B, D, RHS->getName());
+        V = Builder.CreateBinOp(TopLevelOpcode, B, D, RHS->getName());
       if (V) {
-        SimplifiedInst = Builder->CreateBinOp(InnerOpcode, A, V);
+        SimplifiedInst = Builder.CreateBinOp(InnerOpcode, A, V);
       }
     }
 
@@ -545,9 +544,9 @@ Value *InstCombiner::tryFactorization(InstCombiner::BuilderTy *Builder,
       // If "A op C" doesn't simplify then only go on if both of the existing
       // operations "A op' B" and "C op' D" will be zapped as no longer used.
       if (!V && LHS->hasOneUse() && RHS->hasOneUse())
-        V = Builder->CreateBinOp(TopLevelOpcode, A, C, LHS->getName());
+        V = Builder.CreateBinOp(TopLevelOpcode, A, C, LHS->getName());
       if (V) {
-        SimplifiedInst = Builder->CreateBinOp(InnerOpcode, V, B);
+        SimplifiedInst = Builder.CreateBinOp(InnerOpcode, V, B);
       }
     }
 
@@ -610,7 +609,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
     // The instruction has the form "(A op' B) op (C op' D)".  Try to factorize
     // a common term.
     if (Op0 && Op1 && LHSOpcode == RHSOpcode)
-      if (Value *V = tryFactorization(Builder, I, LHSOpcode, A, B, C, D))
+      if (Value *V = tryFactorization(I, LHSOpcode, A, B, C, D))
         return V;
 
     // The instruction has the form "(A op' B) op (C)".  Try to factorize common
@@ -618,7 +617,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
     if (Op0)
       if (Value *Ident = getIdentityValue(LHSOpcode, RHS))
         if (Value *V =
-                tryFactorization(Builder, I, LHSOpcode, A, B, RHS, Ident))
+                tryFactorization(I, LHSOpcode, A, B, RHS, Ident))
           return V;
 
     // The instruction has the form "(B) op (C op' D)".  Try to factorize common
@@ -626,7 +625,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
     if (Op1)
       if (Value *Ident = getIdentityValue(RHSOpcode, LHS))
         if (Value *V =
-                tryFactorization(Builder, I, RHSOpcode, LHS, Ident, C, D))
+                tryFactorization(I, RHSOpcode, LHS, Ident, C, D))
           return V;
   }
 
@@ -644,7 +643,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
               SimplifyBinOp(TopLevelOpcode, B, C, SQ.getWithInstruction(&I))) {
         // They do! Return "L op' R".
         ++NumExpand;
-        C = Builder->CreateBinOp(InnerOpcode, L, R);
+        C = Builder.CreateBinOp(InnerOpcode, L, R);
         C->takeName(&I);
         return C;
       }
@@ -663,7 +662,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
               SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I))) {
         // They do! Return "L op' R".
         ++NumExpand;
-        A = Builder->CreateBinOp(InnerOpcode, L, R);
+        A = Builder.CreateBinOp(InnerOpcode, L, R);
         A->takeName(&I);
         return A;
       }
@@ -678,18 +677,18 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
         if (Value *V =
                 SimplifyBinOp(TopLevelOpcode, SI0->getFalseValue(),
                               SI1->getFalseValue(), SQ.getWithInstruction(&I)))
-          SI = Builder->CreateSelect(SI0->getCondition(),
-                                     Builder->CreateBinOp(TopLevelOpcode,
-                                                          SI0->getTrueValue(),
-                                                          SI1->getTrueValue()),
-                                     V);
+          SI = Builder.CreateSelect(SI0->getCondition(),
+                                    Builder.CreateBinOp(TopLevelOpcode,
+                                                        SI0->getTrueValue(),
+                                                        SI1->getTrueValue()),
+                                    V);
         if (Value *V =
                 SimplifyBinOp(TopLevelOpcode, SI0->getTrueValue(),
                               SI1->getTrueValue(), SQ.getWithInstruction(&I)))
-          SI = Builder->CreateSelect(
+          SI = Builder.CreateSelect(
               SI0->getCondition(), V,
-              Builder->CreateBinOp(TopLevelOpcode, SI0->getFalseValue(),
-                                   SI1->getFalseValue()));
+              Builder.CreateBinOp(TopLevelOpcode, SI0->getFalseValue(),
+                                  SI1->getFalseValue()));
         if (SI) {
           SI->takeName(&I);
           return SI;
@@ -751,9 +750,9 @@ Value *InstCombiner::dyn_castFNegVal(Value *V, bool IgnoreZeroSign) const {
 }
 
 static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO,
-                                             InstCombiner *IC) {
+                                             InstCombiner::BuilderTy &Builder) {
   if (auto *Cast = dyn_cast<CastInst>(&I))
-    return IC->Builder->CreateCast(Cast->getOpcode(), SO, I.getType());
+    return Builder.CreateCast(Cast->getOpcode(), SO, I.getType());
 
   assert(I.isBinaryOp() && "Unexpected opcode for select folding");
 
@@ -772,8 +771,8 @@ static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO,
     std::swap(Op0, Op1);
 
   auto *BO = cast<BinaryOperator>(&I);
-  Value *RI = IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1,
-                                       SO->getName() + ".op");
+  Value *RI = Builder.CreateBinOp(BO->getOpcode(), Op0, Op1,
+                                  SO->getName() + ".op");
   auto *FPInst = dyn_cast<Instruction>(RI);
   if (FPInst && isa<FPMathOperator>(FPInst))
     FPInst->copyFastMathFlags(BO);
@@ -791,7 +790,7 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
     return nullptr;
 
   // Bool selects with constant operands can be folded to logical ops.
-  if (SI->getType()->getScalarType()->isIntegerTy(1))
+  if (SI->getType()->isIntOrIntVectorTy(1))
     return nullptr;
 
   // If it's a bitcast involving vectors, make sure it has the same number of
@@ -825,13 +824,13 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
     }
   }
 
-  Value *NewTV = foldOperationIntoSelectOperand(Op, TV, this);
-  Value *NewFV = foldOperationIntoSelectOperand(Op, FV, this);
+  Value *NewTV = foldOperationIntoSelectOperand(Op, TV, Builder);
+  Value *NewFV = foldOperationIntoSelectOperand(Op, FV, Builder);
   return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI);
 }
 
 static Value *foldOperationIntoPhiValue(BinaryOperator *I, Value *InV,
-                                        InstCombiner *IC) {
+                                        InstCombiner::BuilderTy &Builder) {
   bool ConstIsRHS = isa<Constant>(I->getOperand(1));
   Constant *C = cast<Constant>(I->getOperand(ConstIsRHS));
 
@@ -845,7 +844,7 @@ static Value *foldOperationIntoPhiValue(BinaryOperator *I, Value *InV,
   if (!ConstIsRHS)
     std::swap(Op0, Op1);
 
-  Value *RI = IC->Builder->CreateBinOp(I->getOpcode(), Op0, Op1, "phitmp");
+  Value *RI = Builder.CreateBinOp(I->getOpcode(), Op0, Op1, "phitmp");
   auto *FPInst = dyn_cast<Instruction>(RI);
   if (FPInst && isa<FPMathOperator>(FPInst))
     FPInst->copyFastMathFlags(I);
@@ -916,7 +915,7 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) {
   // If we are going to have to insert a new computation, do so right before the
   // predecessor's terminator.
   if (NonConstBB)
-    Builder->SetInsertPoint(NonConstBB->getTerminator());
+    Builder.SetInsertPoint(NonConstBB->getTerminator());
 
   // Next, add all of the operands to the PHI.
   if (SelectInst *SI = dyn_cast<SelectInst>(&I)) {
@@ -948,9 +947,9 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) {
         // folded to TrueVInPred or FalseVInPred as done for ConstantInt. For
         // non-vector phis, this transformation was always profitable because
         // the select would be generated exactly once in the NonConstBB.
-        Builder->SetInsertPoint(ThisBB->getTerminator());
-        InV = Builder->CreateSelect(PN->getIncomingValue(i),
-                                    TrueVInPred, FalseVInPred, "phitmp");
+        Builder.SetInsertPoint(ThisBB->getTerminator());
+        InV = Builder.CreateSelect(PN->getIncomingValue(i), TrueVInPred,
+                                   FalseVInPred, "phitmp");
       }
       NewPN->addIncoming(InV, ThisBB);
     }
@@ -961,16 +960,17 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) {
       if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
         InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
       else if (isa<ICmpInst>(CI))
-        InV = Builder->CreateICmp(CI->getPredicate(), PN->getIncomingValue(i),
-                                  C, "phitmp");
+        InV = Builder.CreateICmp(CI->getPredicate(), PN->getIncomingValue(i),
+                                 C, "phitmp");
       else
-        InV = Builder->CreateFCmp(CI->getPredicate(), PN->getIncomingValue(i),
-                                  C, "phitmp");
+        InV = Builder.CreateFCmp(CI->getPredicate(), PN->getIncomingValue(i),
+                                 C, "phitmp");
       NewPN->addIncoming(InV, PN->getIncomingBlock(i));
     }
   } else if (auto *BO = dyn_cast<BinaryOperator>(&I)) {
     for (unsigned i = 0; i != NumPHIValues; ++i) {
-      Value *InV = foldOperationIntoPhiValue(BO, PN->getIncomingValue(i), this);
+      Value *InV = foldOperationIntoPhiValue(BO, PN->getIncomingValue(i),
+                                             Builder);
       NewPN->addIncoming(InV, PN->getIncomingBlock(i));
     }
   } else {
@@ -981,8 +981,8 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) {
       if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
         InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy);
       else
-        InV = Builder->CreateCast(CI->getOpcode(),
-                                PN->getIncomingValue(i), I.getType(), "phitmp");
+        InV = Builder.CreateCast(CI->getOpcode(), PN->getIncomingValue(i),
+                                 I.getType(), "phitmp");
       NewPN->addIncoming(InV, PN->getIncomingBlock(i));
     }
   }
@@ -1328,8 +1328,8 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
 /// \brief Creates node of binary operation with the same attributes as the
 /// specified one but with other operands.
 static Value *CreateBinOpAsGiven(BinaryOperator &Inst, Value *LHS, Value *RHS,
-                                 InstCombiner::BuilderTy *B) {
-  Value *BO = B->CreateBinOp(Inst.getOpcode(), LHS, RHS);
+                                 InstCombiner::BuilderTy &B) {
+  Value *BO = B.CreateBinOp(Inst.getOpcode(), LHS, RHS);
   // If LHS and RHS are constant, BO won't be a binary operator.
   if (BinaryOperator *NewBO = dyn_cast<BinaryOperator>(BO))
     NewBO->copyIRFlags(&Inst);
@@ -1365,7 +1365,7 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) {
       LShuf->getOperand(0)->getType() == RShuf->getOperand(0)->getType()) {
     Value *NewBO = CreateBinOpAsGiven(Inst, LShuf->getOperand(0),
                                       RShuf->getOperand(0), Builder);
-    return Builder->CreateShuffleVector(
+    return Builder.CreateShuffleVector(
         NewBO, UndefValue::get(NewBO->getType()), LShuf->getMask());
   }
 
@@ -1404,7 +1404,7 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) {
       Value *NewLHS = isa<Constant>(LHS) ? C2 : Shuffle->getOperand(0);
       Value *NewRHS = isa<Constant>(LHS) ? Shuffle->getOperand(0) : C2;
       Value *NewBO = CreateBinOpAsGiven(Inst, NewLHS, NewRHS, Builder);
-      return Builder->CreateShuffleVector(NewBO,
+      return Builder.CreateShuffleVector(NewBO,
           UndefValue::get(Inst.getType()), Shuffle->getMask());
     }
   }
@@ -1452,7 +1452,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       // If we are using a wider index than needed for this platform, shrink
       // it to what we need.  If narrower, sign-extend it to what we need.
       // This explicit cast can make subsequent optimizations more obvious.
-      *I = Builder->CreateIntCast(*I, NewIndexType, true);
+      *I = Builder.CreateIntCast(*I, NewIndexType, true);
       MadeChange = true;
     }
   }
@@ -1546,10 +1546,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       // set that index.
       PHINode *NewPN;
       {
-        IRBuilderBase::InsertPointGuard Guard(*Builder);
-        Builder->SetInsertPoint(PN);
-        NewPN = Builder->CreatePHI(Op1->getOperand(DI)->getType(),
-                                   PN->getNumOperands());
+        IRBuilderBase::InsertPointGuard Guard(Builder);
+        Builder.SetInsertPoint(PN);
+        NewPN = Builder.CreatePHI(Op1->getOperand(DI)->getType(),
+                                  PN->getNumOperands());
       }
 
       for (auto &I : PN->operands())
@@ -1669,8 +1669,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
         // pointer arithmetic.
         if (match(V, m_Neg(m_PtrToInt(m_Value())))) {
           Operator *Index = cast<Operator>(V);
-          Value *PtrToInt = Builder->CreatePtrToInt(PtrOp, Index->getType());
-          Value *NewSub = Builder->CreateSub(PtrToInt, Index->getOperand(1));
+          Value *PtrToInt = Builder.CreatePtrToInt(PtrOp, Index->getType());
+          Value *NewSub = Builder.CreateSub(PtrToInt, Index->getOperand(1));
           return CastInst::Create(Instruction::IntToPtr, NewSub, GEP.getType());
         }
         // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X))
@@ -1723,7 +1723,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
           // ->
           // %0 = GEP i8 addrspace(1)* X, ...
           // addrspacecast i8 addrspace(1)* %0 to i8*
-          return new AddrSpaceCastInst(Builder->Insert(Res), GEP.getType());
+          return new AddrSpaceCastInst(Builder.Insert(Res), GEP.getType());
         }
 
         if (ArrayType *XATy =
@@ -1751,10 +1751,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
             // addrspacecast i8 addrspace(1)* %0 to i8*
             SmallVector<Value*, 8> Idx(GEP.idx_begin(), GEP.idx_end());
             Value *NewGEP = GEP.isInBounds()
-                                ? Builder->CreateInBoundsGEP(
+                                ? Builder.CreateInBoundsGEP(
                                       nullptr, StrippedPtr, Idx, GEP.getName())
-                                : Builder->CreateGEP(nullptr, StrippedPtr, Idx,
-                                                     GEP.getName());
+                                : Builder.CreateGEP(nullptr, StrippedPtr, Idx,
+                                                    GEP.getName());
             return new AddrSpaceCastInst(NewGEP, GEP.getType());
           }
         }
@@ -1772,9 +1772,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
         Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) };
         Value *NewGEP =
             GEP.isInBounds()
-                ? Builder->CreateInBoundsGEP(nullptr, StrippedPtr, Idx,
-                                             GEP.getName())
-                : Builder->CreateGEP(nullptr, StrippedPtr, Idx, GEP.getName());
+                ? Builder.CreateInBoundsGEP(nullptr, StrippedPtr, Idx,
+                                            GEP.getName())
+                : Builder.CreateGEP(nullptr, StrippedPtr, Idx, GEP.getName());
 
         // V and GEP are both pointer types --> BitCast
         return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
@@ -1807,10 +1807,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
             // GEP may not be "inbounds".
             Value *NewGEP =
                 GEP.isInBounds() && NSW
-                    ? Builder->CreateInBoundsGEP(nullptr, StrippedPtr, NewIdx,
-                                                 GEP.getName())
-                    : Builder->CreateGEP(nullptr, StrippedPtr, NewIdx,
-                                         GEP.getName());
+                    ? Builder.CreateInBoundsGEP(nullptr, StrippedPtr, NewIdx,
+                                                GEP.getName())
+                    : Builder.CreateGEP(nullptr, StrippedPtr, NewIdx,
+                                        GEP.getName());
 
             // The NewGEP must be pointer typed, so must the old one -> BitCast
             return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
@@ -1849,10 +1849,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
                 NewIdx};
 
             Value *NewGEP = GEP.isInBounds() && NSW
-                                ? Builder->CreateInBoundsGEP(
+                                ? Builder.CreateInBoundsGEP(
                                       SrcElTy, StrippedPtr, Off, GEP.getName())
-                                : Builder->CreateGEP(SrcElTy, StrippedPtr, Off,
-                                                     GEP.getName());
+                                : Builder.CreateGEP(SrcElTy, StrippedPtr, Off,
+                                                    GEP.getName());
             // The NewGEP must be pointer typed, so must the old one -> BitCast
             return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
                                                                  GEP.getType());
@@ -1916,8 +1916,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       if (FindElementAtOffset(OpType, Offset.getSExtValue(), NewIndices)) {
         Value *NGEP =
             GEP.isInBounds()
-                ? Builder->CreateInBoundsGEP(nullptr, Operand, NewIndices)
-                : Builder->CreateGEP(nullptr, Operand, NewIndices);
+                ? Builder.CreateInBoundsGEP(nullptr, Operand, NewIndices)
+                : Builder.CreateGEP(nullptr, Operand, NewIndices);
 
         if (NGEP->getType() == GEP.getType())
           return replaceInstUsesWith(GEP, NGEP);
@@ -2166,8 +2166,8 @@ Instruction *InstCombiner::visitFree(CallInst &FI) {
   // free undef -> unreachable.
   if (isa<UndefValue>(Op)) {
     // Insert a new store to null because we cannot modify the CFG here.
-    Builder->CreateStore(ConstantInt::getTrue(FI.getContext()),
-                         UndefValue::get(Type::getInt1PtrTy(FI.getContext())));
+    Builder.CreateStore(ConstantInt::getTrue(FI.getContext()),
+                        UndefValue::get(Type::getInt1PtrTy(FI.getContext())));
     return eraseInstFromFunction(FI);
   }
 
@@ -2281,8 +2281,8 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
   // the backend should extend back to a legal type for the target.
   if (NewWidth > 0 && NewWidth < Known.getBitWidth()) {
     IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
-    Builder->SetInsertPoint(&SI);
-    Value *NewCond = Builder->CreateTrunc(Cond, Ty, "trunc");
+    Builder.SetInsertPoint(&SI);
+    Value *NewCond = Builder.CreateTrunc(Cond, Ty, "trunc");
     SI.setCondition(NewCond);
 
     for (auto Case : SI.cases()) {
@@ -2339,8 +2339,8 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
       // %E = insertvalue { i32 } %X, i32 42, 0
       // by switching the order of the insert and extract (though the
       // insertvalue should be left in, since it may have other uses).
-      Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(),
-                                                 EV.getIndices());
+      Value *NewEV = Builder.CreateExtractValue(IV->getAggregateOperand(),
+                                                EV.getIndices());
       return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
                                      makeArrayRef(insi, inse));
     }
@@ -2415,17 +2415,17 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
       // extractvalue has integer indices, getelementptr has Value*s. Convert.
       SmallVector<Value*, 4> Indices;
       // Prefix an i32 0 since we need the first element.
-      Indices.push_back(Builder->getInt32(0));
+      Indices.push_back(Builder.getInt32(0));
       for (ExtractValueInst::idx_iterator I = EV.idx_begin(), E = EV.idx_end();
             I != E; ++I)
-        Indices.push_back(Builder->getInt32(*I));
+        Indices.push_back(Builder.getInt32(*I));
 
       // We need to insert these at the location of the old load, not at that of
       // the extractvalue.
-      Builder->SetInsertPoint(L);
-      Value *GEP = Builder->CreateInBoundsGEP(L->getType(),
-                                              L->getPointerOperand(), Indices);
-      Instruction *NL = Builder->CreateLoad(GEP);
+      Builder.SetInsertPoint(L);
+      Value *GEP = Builder.CreateInBoundsGEP(L->getType(),
+                                             L->getPointerOperand(), Indices);
+      Instruction *NL = Builder.CreateLoad(GEP);
       // Whatever aliasing information we had for the orignal load must also
       // hold for the smaller load, so propagate the annotations.
       AAMDNodes Nodes;
@@ -2922,8 +2922,8 @@ bool InstCombiner::run() {
     }
 
     // Now that we have an instruction, try combining it to simplify it.
-    Builder->SetInsertPoint(I);
-    Builder->SetCurrentDebugLocation(I->getDebugLoc());
+    Builder.SetInsertPoint(I);
+    Builder.SetCurrentDebugLocation(I->getDebugLoc());
 
 #ifndef NDEBUG
     std::string OrigI;
@@ -3160,7 +3160,7 @@ combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist,
 
     MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist);
 
-    InstCombiner IC(Worklist, &Builder, F.optForMinSize(), ExpensiveCombines,
+    InstCombiner IC(Worklist, Builder, F.optForMinSize(), ExpensiveCombines,
                     AA, AC, TLI, DT, DL, LI);
     IC.MaxArraySizeForCombine = MaxArraySize;
 
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 7eea44d6aca03..184940b7ea583 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -1230,7 +1230,7 @@ static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass,
     if (auto *Vector = dyn_cast<ConstantVector>(Mask)) {
       // dyn_cast as we might get UndefValue
       if (auto *Masked = dyn_cast<ConstantInt>(Vector->getOperand(Idx))) {
-        if (Masked->isNullValue())
+        if (Masked->isZero())
           // Mask is constant false, so no instrumentation needed.
           continue;
         // If we have a true or undef value, fall through to doInstrumentAddress
diff --git a/lib/Transforms/Instrumentation/CFGMST.h b/lib/Transforms/Instrumentation/CFGMST.h
index 3802f9fbf7dbe..16e2e6b4e7304 100644
--- a/lib/Transforms/Instrumentation/CFGMST.h
+++ b/lib/Transforms/Instrumentation/CFGMST.h
@@ -12,6 +12,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#ifndef LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H
+#define LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H
+
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
@@ -24,10 +27,10 @@
 #include <utility>
 #include <vector>
 
-namespace llvm {
-
 #define DEBUG_TYPE "cfgmst"
 
+namespace llvm {
+
 /// \brief An union-find based Minimum Spanning Tree for CFG
 ///
 /// Implements a Union-find algorithm to compute Minimum Spanning Tree
@@ -220,5 +223,8 @@ public:
   }
 };
 
-#undef DEBUG_TYPE // "cfgmst"
 } // end namespace llvm
+
+#undef DEBUG_TYPE // "cfgmst"
+
+#endif // LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H
diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 9c14b0149fdc1..db8fa89779479 100644
--- a/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -112,7 +112,7 @@ cl::opt<bool> DoCounterPromotion("do-counter-promotion", cl::ZeroOrMore,
                                  cl::desc("Do counter register promotion"),
                                  cl::init(false));
 cl::opt<unsigned> MaxNumOfPromotionsPerLoop(
-    cl::ZeroOrMore, "max-counter-promotions-per-loop", cl::init(10),
+    cl::ZeroOrMore, "max-counter-promotions-per-loop", cl::init(20),
     cl::desc("Max number counter promotions per loop to avoid"
              " increasing register pressure too much"));
 
@@ -121,10 +121,21 @@ cl::opt<int>
     MaxNumOfPromotions(cl::ZeroOrMore, "max-counter-promotions", cl::init(-1),
                        cl::desc("Max number of allowed counter promotions"));
 
-cl::opt<bool> SpeculativeCounterPromotion(
-    cl::ZeroOrMore, "speculative-counter-promotion", cl::init(false),
-    cl::desc("Allow counter promotion for loops with multiple exiting blocks "
-             " or top-tested loops. "));
+cl::opt<unsigned> SpeculativeCounterPromotionMaxExiting(
+    cl::ZeroOrMore, "speculative-counter-promotion-max-exiting", cl::init(3),
+    cl::desc("The max number of exiting blocks of a loop to allow "
+             " speculative counter promotion"));
+
+cl::opt<bool> SpeculativeCounterPromotionToLoop(
+    cl::ZeroOrMore, "speculative-counter-promotion-to-loop", cl::init(false),
+    cl::desc("When the option is false, if the target block is in a loop, "
+             "the promotion will be disallowed unless the promoted counter "
+             " update can be further/iteratively promoted into an acyclic "
+             " region."));
+
+cl::opt<bool> IterativeCounterPromotion(
+    cl::ZeroOrMore, "iterative-counter-promotion", cl::init(true),
+    cl::desc("Allow counter promotion across the whole loop nest."));
 
 class InstrProfilingLegacyPass : public ModulePass {
   InstrProfiling InstrProf;
@@ -150,6 +161,7 @@ public:
   }
 };
 
+///
 /// A helper class to promote one counter RMW operation in the loop
 /// into register update.
 ///
@@ -158,16 +170,19 @@ public:
 ///
 class PGOCounterPromoterHelper : public LoadAndStorePromoter {
 public:
-  PGOCounterPromoterHelper(Instruction *L, Instruction *S, SSAUpdater &SSA,
-                           Value *Init, BasicBlock *PH,
-                           ArrayRef<BasicBlock *> ExitBlocks,
-                           ArrayRef<Instruction *> InsertPts)
+  PGOCounterPromoterHelper(
+      Instruction *L, Instruction *S, SSAUpdater &SSA, Value *Init,
+      BasicBlock *PH, ArrayRef<BasicBlock *> ExitBlocks,
+      ArrayRef<Instruction *> InsertPts,
+      DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
+      LoopInfo &LI)
       : LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks),
-        InsertPts(InsertPts) {
+        InsertPts(InsertPts), LoopToCandidates(LoopToCands), LI(LI) {
     assert(isa<LoadInst>(L));
     assert(isa<StoreInst>(S));
     SSA.AddAvailableValue(PH, Init);
   }
+
   void doExtraRewritesBeforeFinalDeletion() const override {
     for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
       BasicBlock *ExitBlock = ExitBlocks[i];
@@ -179,12 +194,21 @@ public:
       Value *Addr = cast<StoreInst>(Store)->getPointerOperand();
       IRBuilder<> Builder(InsertPos);
       if (AtomicCounterUpdatePromoted)
+        // automic update currently can only be promoted across the current
+        // loop, not the whole loop nest.
         Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue,
                                 AtomicOrdering::SequentiallyConsistent);
       else {
         LoadInst *OldVal = Builder.CreateLoad(Addr, "pgocount.promoted");
         auto *NewVal = Builder.CreateAdd(OldVal, LiveInValue);
-        Builder.CreateStore(NewVal, Addr);
+        auto *NewStore = Builder.CreateStore(NewVal, Addr);
+
+        // Now update the parent loop's candidate list:
+        if (IterativeCounterPromotion) {
+          auto *TargetLoop = LI.getLoopFor(ExitBlock);
+          if (TargetLoop)
+            LoopToCandidates[TargetLoop].emplace_back(OldVal, NewStore);
+        }
       }
     }
   }
@@ -193,6 +217,8 @@ private:
   Instruction *Store;
   ArrayRef<BasicBlock *> ExitBlocks;
   ArrayRef<Instruction *> InsertPts;
+  DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
+  LoopInfo &LI;
 };
 
 /// A helper class to do register promotion for all profile counter
@@ -200,12 +226,15 @@ private:
 ///
 class PGOCounterPromoter {
 public:
-  PGOCounterPromoter(ArrayRef<LoadStorePair> Cands, Loop &Loop)
-      : Candidates(Cands), ExitBlocks(), InsertPts(), ParentLoop(Loop) {
+  PGOCounterPromoter(
+      DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
+      Loop &CurLoop, LoopInfo &LI)
+      : LoopToCandidates(LoopToCands), ExitBlocks(), InsertPts(), L(CurLoop),
+        LI(LI) {
 
     SmallVector<BasicBlock *, 8> LoopExitBlocks;
     SmallPtrSet<BasicBlock *, 8> BlockSet;
-    ParentLoop.getExitBlocks(LoopExitBlocks);
+    L.getExitBlocks(LoopExitBlocks);
 
     for (BasicBlock *ExitBlock : LoopExitBlocks) {
       if (BlockSet.insert(ExitBlock).second) {
@@ -216,55 +245,97 @@ public:
   }
 
   bool run(int64_t *NumPromoted) {
-    // We can't insert into a catchswitch.
-    bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) {
-      return isa<CatchSwitchInst>(Exit->getTerminator());
-    });
-
-    if (HasCatchSwitch)
-      return false;
-
-    if (!ParentLoop.hasDedicatedExits())
-      return false;
-
-    BasicBlock *PH = ParentLoop.getLoopPreheader();
-    if (!PH)
-      return false;
-
-    BasicBlock *H = ParentLoop.getHeader();
-    bool TopTested =
-        ((ParentLoop.getBlocks().size() > 1) && ParentLoop.isLoopExiting(H));
-    if (!SpeculativeCounterPromotion &&
-        (TopTested || ParentLoop.getExitingBlock() == nullptr))
+    unsigned MaxProm = getMaxNumOfPromotionsInLoop(&L);
+    if (MaxProm == 0)
       return false;
 
     unsigned Promoted = 0;
-    for (auto &Cand : Candidates) {
+    for (auto &Cand : LoopToCandidates[&L]) {
 
       SmallVector<PHINode *, 4> NewPHIs;
       SSAUpdater SSA(&NewPHIs);
       Value *InitVal = ConstantInt::get(Cand.first->getType(), 0);
+
       PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal,
-                                        PH, ExitBlocks, InsertPts);
+                                        L.getLoopPreheader(), ExitBlocks,
+                                        InsertPts, LoopToCandidates, LI);
       Promoter.run(SmallVector<Instruction *, 2>({Cand.first, Cand.second}));
       Promoted++;
-      if (Promoted >= MaxNumOfPromotionsPerLoop)
+      if (Promoted >= MaxProm)
         break;
+
       (*NumPromoted)++;
       if (MaxNumOfPromotions != -1 && *NumPromoted >= MaxNumOfPromotions)
         break;
     }
 
     DEBUG(dbgs() << Promoted << " counters promoted for loop (depth="
-                 << ParentLoop.getLoopDepth() << ")\n");
+                 << L.getLoopDepth() << ")\n");
     return Promoted != 0;
   }
 
 private:
-  ArrayRef<LoadStorePair> Candidates;
+  bool allowSpeculativeCounterPromotion(Loop *LP) {
+    SmallVector<BasicBlock *, 8> ExitingBlocks;
+    L.getExitingBlocks(ExitingBlocks);
+    // Not considierered speculative.
+    if (ExitingBlocks.size() == 1)
+      return true;
+    if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
+      return false;
+    return true;
+  }
+
+  // Returns the max number of Counter Promotions for LP.
+  unsigned getMaxNumOfPromotionsInLoop(Loop *LP) {
+    // We can't insert into a catchswitch.
+    SmallVector<BasicBlock *, 8> LoopExitBlocks;
+    LP->getExitBlocks(LoopExitBlocks);
+    if (llvm::any_of(LoopExitBlocks, [](BasicBlock *Exit) {
+          return isa<CatchSwitchInst>(Exit->getTerminator());
+        }))
+      return 0;
+
+    if (!LP->hasDedicatedExits())
+      return 0;
+
+    BasicBlock *PH = LP->getLoopPreheader();
+    if (!PH)
+      return 0;
+
+    SmallVector<BasicBlock *, 8> ExitingBlocks;
+    LP->getExitingBlocks(ExitingBlocks);
+    // Not considierered speculative.
+    if (ExitingBlocks.size() == 1)
+      return MaxNumOfPromotionsPerLoop;
+
+    if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
+      return 0;
+
+    // Whether the target block is in a loop does not matter:
+    if (SpeculativeCounterPromotionToLoop)
+      return MaxNumOfPromotionsPerLoop;
+
+    // Now check the target block:
+    unsigned MaxProm = MaxNumOfPromotionsPerLoop;
+    for (auto *TargetBlock : LoopExitBlocks) {
+      auto *TargetLoop = LI.getLoopFor(TargetBlock);
+      if (!TargetLoop)
+        continue;
+      unsigned MaxPromForTarget = getMaxNumOfPromotionsInLoop(TargetLoop);
+      unsigned PendingCandsInTarget = LoopToCandidates[TargetLoop].size();
+      MaxProm =
+          std::min(MaxProm, std::max(MaxPromForTarget, PendingCandsInTarget) -
+                                PendingCandsInTarget);
+    }
+    return MaxProm;
+  }
+
+  DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
   SmallVector<BasicBlock *, 8> ExitBlocks;
   SmallVector<Instruction *, 8> InsertPts;
-  Loop &ParentLoop;
+  Loop &L;
+  LoopInfo &LI;
 };
 
 } // end anonymous namespace
@@ -349,8 +420,10 @@ void InstrProfiling::promoteCounterLoadStores(Function *F) {
 
   SmallVector<Loop *, 4> Loops = LI.getLoopsInPreorder();
 
-  for (auto *Loop : Loops) {
-    PGOCounterPromoter Promoter(LoopPromotionCandidates[Loop], *Loop);
+  // Do a post-order traversal of the loops so that counter updates can be
+  // iteratively hoisted outside the loop nest.
+  for (auto *Loop : llvm::reverse(Loops)) {
+    PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI);
     Promoter.run(&TotalCountersPromoted);
   }
 }
diff --git a/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/lib/Transforms/Instrumentation/MaximumSpanningTree.h
index 363539b2886f3..4eb758c69c581 100644
--- a/lib/Transforms/Instrumentation/MaximumSpanningTree.h
+++ b/lib/Transforms/Instrumentation/MaximumSpanningTree.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H
-#define LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H
+#ifndef LLVM_LIB_TRANSFORMS_INSTRUMENTATION_MAXIMUMSPANNINGTREE_H
+#define LLVM_LIB_TRANSFORMS_INSTRUMENTATION_MAXIMUMSPANNINGTREE_H
 
 #include "llvm/ADT/EquivalenceClasses.h"
 #include "llvm/IR/BasicBlock.h"
@@ -108,4 +108,4 @@ namespace llvm {
 
 } // End llvm namespace
 
-#endif
+#endif // LLVM_LIB_TRANSFORMS_INSTRUMENTATION_MAXIMUMSPANNINGTREE_H
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index df4ee9969c02f..1348e0ed0ed00 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -2918,8 +2918,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     if (ClDumpStrictInstructions)
       dumpInst(I);
     DEBUG(dbgs() << "DEFAULT: " << I << "\n");
-    for (size_t i = 0, n = I.getNumOperands(); i < n; i++)
-      insertShadowCheck(I.getOperand(i), &I);
+    for (size_t i = 0, n = I.getNumOperands(); i < n; i++) {
+      Value *Operand = I.getOperand(i);
+      if (Operand->getType()->isSized())
+        insertShadowCheck(Operand, &I);
+    }
     setShadow(&I, getCleanShadow(&I));
     setOrigin(&I, getCleanOrigin());
   }
diff --git a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 0e7d11c553977..8e4bfc0b91bc5 100644
--- a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -224,7 +224,7 @@ std::string getBranchCondString(Instruction *TI) {
       OS << "_Zero";
     else if (CV->isOne())
       OS << "_One";
-    else if (CV->isAllOnesValue())
+    else if (CV->isMinusOne())
       OS << "_MinusOne";
     else
       OS << "_Const";
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index a991792bf5a39..ec6904486e109 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -379,10 +379,11 @@ void ThreadSanitizer::chooseInstructionsToInstrument(
 }
 
 static bool isAtomic(Instruction *I) {
+  // TODO: Ask TTI whether synchronization scope is between threads.
   if (LoadInst *LI = dyn_cast<LoadInst>(I))
-    return LI->isAtomic() && LI->getSynchScope() == CrossThread;
+    return LI->isAtomic() && LI->getSyncScopeID() != SyncScope::SingleThread;
   if (StoreInst *SI = dyn_cast<StoreInst>(I))
-    return SI->isAtomic() && SI->getSynchScope() == CrossThread;
+    return SI->isAtomic() && SI->getSyncScopeID() != SyncScope::SingleThread;
   if (isa<AtomicRMWInst>(I))
     return true;
   if (isa<AtomicCmpXchgInst>(I))
@@ -676,7 +677,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
     I->eraseFromParent();
   } else if (FenceInst *FI = dyn_cast<FenceInst>(I)) {
     Value *Args[] = {createOrdering(&IRB, FI->getOrdering())};
-    Function *F = FI->getSynchScope() == SingleThread ?
+    Function *F = FI->getSyncScopeID() == SyncScope::SingleThread ?
         TsanAtomicSignalFence : TsanAtomicThreadFence;
     CallInst *C = CallInst::Create(F, Args);
     ReplaceInstWithInst(I, C);
diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp
index a49c9b68c97d0..122c9314e022a 100644
--- a/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -44,6 +44,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <tuple>
 
 using namespace llvm;
@@ -55,7 +56,7 @@ STATISTIC(NumConstantsHoisted, "Number of constants hoisted");
 STATISTIC(NumConstantsRebased, "Number of constants rebased");
 
 static cl::opt<bool> ConstHoistWithBlockFrequency(
-    "consthoist-with-block-frequency", cl::init(false), cl::Hidden,
+    "consthoist-with-block-frequency", cl::init(true), cl::Hidden,
     cl::desc("Enable the use of the block frequency analysis to reduce the "
              "chance to execute const materialization more frequently than "
              "without hoisting."));
@@ -231,7 +232,8 @@ static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI,
     // Return the optimal insert points in BBs.
     if (Node == Entry) {
       BBs.clear();
-      if (InsertPtsFreq > BFI.getBlockFreq(Node))
+      if (InsertPtsFreq > BFI.getBlockFreq(Node) ||
+          (InsertPtsFreq == BFI.getBlockFreq(Node) && InsertPts.size() > 1))
         BBs.insert(Entry);
       else
         BBs.insert(InsertPts.begin(), InsertPts.end());
@@ -244,7 +246,15 @@ static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI,
     SmallPtrSet<BasicBlock *, 16> &ParentInsertPts = InsertPtsMap[Parent].first;
     BlockFrequency &ParentPtsFreq = InsertPtsMap[Parent].second;
     // Choose to insert in Node or in subtree of Node.
-    if (InsertPtsFreq > BFI.getBlockFreq(Node) || NodeInBBs) {
+    // Don't hoist to EHPad because we may not find a proper place to insert
+    // in EHPad.
+    // If the total frequency of InsertPts is the same as the frequency of the
+    // target Node, and InsertPts contains more than one nodes, choose hoisting
+    // to reduce code size.
+    if (NodeInBBs ||
+        (!Node->isEHPad() &&
+         (InsertPtsFreq > BFI.getBlockFreq(Node) ||
+          (InsertPtsFreq == BFI.getBlockFreq(Node) && InsertPts.size() > 1)))) {
       ParentInsertPts.insert(Node);
       ParentPtsFreq += BFI.getBlockFreq(Node);
     } else {
@@ -392,42 +402,15 @@ void ConstantHoistingPass::collectConstantCandidates(
   if (Inst->isCast())
     return;
 
-  // Can't handle inline asm. Skip it.
-  if (auto Call = dyn_cast<CallInst>(Inst))
-    if (isa<InlineAsm>(Call->getCalledValue()))
-      return;
-
-  // Switch cases must remain constant, and if the value being tested is
-  // constant the entire thing should disappear.
-  if (isa<SwitchInst>(Inst))
-    return;
-
-  // Static allocas (constant size in the entry block) are handled by
-  // prologue/epilogue insertion so they're free anyway. We definitely don't
-  // want to make them non-constant.
-  auto AI = dyn_cast<AllocaInst>(Inst);
-  if (AI && AI->isStaticAlloca())
-    return;
-
-  // Constants in GEPs that index into a struct type should not be hoisted.
-  if (isa<GetElementPtrInst>(Inst)) {
-    gep_type_iterator GTI = gep_type_begin(Inst);
-
-    // Collect constant for first operand.
-    collectConstantCandidates(ConstCandMap, Inst, 0);
-    // Scan rest operands.
-    for (unsigned Idx = 1, E = Inst->getNumOperands(); Idx != E; ++Idx, ++GTI) {
-      // Only collect constants that index into a non struct type.
-      if (!GTI.isStruct()) {
-        collectConstantCandidates(ConstCandMap, Inst, Idx);
-      }
-    }
-    return;
-  }
-
   // Scan all operands.
   for (unsigned Idx = 0, E = Inst->getNumOperands(); Idx != E; ++Idx) {
-    collectConstantCandidates(ConstCandMap, Inst, Idx);
+    // The cost of materializing the constants (defined in
+    // `TargetTransformInfo::getIntImmCost`) for instructions which only take
+    // constant variables is lower than `TargetTransformInfo::TCC_Basic`. So
+    // it's safe for us to collect constant candidates from all IntrinsicInsts.
+    if (canReplaceOperandWithVariable(Inst, Idx) || isa<IntrinsicInst>(Inst)) {
+      collectConstantCandidates(ConstCandMap, Inst, Idx);
+    }
   } // end of for all operands
 }
 
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 0f92760a874b5..7fd77a082b822 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -670,7 +670,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
           if (auto *KnownCond = AvailableValues.lookup(CondI)) {
             // Is the condition known to be true?
             if (isa<ConstantInt>(KnownCond) &&
-                cast<ConstantInt>(KnownCond)->isOneValue()) {
+                cast<ConstantInt>(KnownCond)->isOne()) {
               DEBUG(dbgs() << "EarlyCSE removing guard: " << *Inst << '\n');
               removeMSSA(Inst);
               Inst->eraseFromParent();
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index c0f628eb61e61..0fe72f3f73318 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -80,10 +80,9 @@ MaxRecurseDepth("max-recurse-depth", cl::Hidden, cl::init(1000), cl::ZeroOrMore,
 struct llvm::GVN::Expression {
   uint32_t opcode;
   Type *type;
-  bool commutative;
   SmallVector<uint32_t, 4> varargs;
 
-  Expression(uint32_t o = ~2U) : opcode(o), commutative(false) {}
+  Expression(uint32_t o = ~2U) : opcode(o) {}
 
   bool operator==(const Expression &other) const {
     if (opcode != other.opcode)
@@ -247,7 +246,6 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) {
     assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!");
     if (e.varargs[0] > e.varargs[1])
       std::swap(e.varargs[0], e.varargs[1]);
-    e.commutative = true;
   }
 
   if (CmpInst *C = dyn_cast<CmpInst>(I)) {
@@ -258,7 +256,6 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) {
       Predicate = CmpInst::getSwappedPredicate(Predicate);
     }
     e.opcode = (C->getOpcode() << 8) | Predicate;
-    e.commutative = true;
   } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) {
     for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
          II != IE; ++II)
@@ -284,7 +281,6 @@ GVN::Expression GVN::ValueTable::createCmpExpr(unsigned Opcode,
     Predicate = CmpInst::getSwappedPredicate(Predicate);
   }
   e.opcode = (Opcode << 8) | Predicate;
-  e.commutative = true;
   return e;
 }
 
@@ -352,25 +348,25 @@ GVN::ValueTable::~ValueTable() = default;
 /// add - Insert a value into the table with a specified value number.
 void GVN::ValueTable::add(Value *V, uint32_t num) {
   valueNumbering.insert(std::make_pair(V, num));
-  if (PHINode *PN = dyn_cast<PHINode>(V))
-    NumberingPhi[num] = PN;
 }
 
 uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
   if (AA->doesNotAccessMemory(C)) {
     Expression exp = createExpr(C);
-    uint32_t e = assignExpNewValueNum(exp).first;
+    uint32_t &e = expressionNumbering[exp];
+    if (!e) e = nextValueNumber++;
     valueNumbering[C] = e;
     return e;
   } else if (AA->onlyReadsMemory(C)) {
     Expression exp = createExpr(C);
-    auto ValNum = assignExpNewValueNum(exp);
-    if (ValNum.second) {
-      valueNumbering[C] = ValNum.first;
-      return ValNum.first;
+    uint32_t &e = expressionNumbering[exp];
+    if (!e) {
+      e = nextValueNumber++;
+      valueNumbering[C] = e;
+      return e;
     }
     if (!MD) {
-      uint32_t e = assignExpNewValueNum(exp).first;
+      e = nextValueNumber++;
       valueNumbering[C] = e;
       return e;
     }
@@ -526,29 +522,23 @@ uint32_t GVN::ValueTable::lookupOrAdd(Value *V) {
     case Instruction::ExtractValue:
       exp = createExtractvalueExpr(cast<ExtractValueInst>(I));
       break;
-    case Instruction::PHI:
-      valueNumbering[V] = nextValueNumber;
-      NumberingPhi[nextValueNumber] = cast<PHINode>(V);
-      return nextValueNumber++;
     default:
       valueNumbering[V] = nextValueNumber;
       return nextValueNumber++;
   }
 
-  uint32_t e = assignExpNewValueNum(exp).first;
+  uint32_t& e = expressionNumbering[exp];
+  if (!e) e = nextValueNumber++;
   valueNumbering[V] = e;
   return e;
 }
 
 /// Returns the value number of the specified value. Fails if
 /// the value has not yet been numbered.
-uint32_t GVN::ValueTable::lookup(Value *V, bool Verify) const {
+uint32_t GVN::ValueTable::lookup(Value *V) const {
   DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
-  if (Verify) {
-    assert(VI != valueNumbering.end() && "Value not numbered?");
-    return VI->second;
-  }
-  return (VI != valueNumbering.end()) ? VI->second : 0;
+  assert(VI != valueNumbering.end() && "Value not numbered?");
+  return VI->second;
 }
 
 /// Returns the value number of the given comparison,
@@ -559,28 +549,21 @@ uint32_t GVN::ValueTable::lookupOrAddCmp(unsigned Opcode,
                                          CmpInst::Predicate Predicate,
                                          Value *LHS, Value *RHS) {
   Expression exp = createCmpExpr(Opcode, Predicate, LHS, RHS);
-  return assignExpNewValueNum(exp).first;
+  uint32_t& e = expressionNumbering[exp];
+  if (!e) e = nextValueNumber++;
+  return e;
 }
 
 /// Remove all entries from the ValueTable.
 void GVN::ValueTable::clear() {
   valueNumbering.clear();
   expressionNumbering.clear();
-  NumberingPhi.clear();
-  PhiTranslateTable.clear();
   nextValueNumber = 1;
-  Expressions.clear();
-  ExprIdx.clear();
-  nextExprNumber = 0;
 }
 
 /// Remove a value from the value numbering.
 void GVN::ValueTable::erase(Value *V) {
-  uint32_t Num = valueNumbering.lookup(V);
   valueNumbering.erase(V);
-  // If V is PHINode, V <--> value number is an one-to-one mapping.
-  if (isa<PHINode>(V))
-    NumberingPhi.erase(Num);
 }
 
 /// verifyRemoved - Verify that the value is removed from all internal data
@@ -1183,7 +1166,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
 
     auto *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre",
                                  LI->isVolatile(), LI->getAlignment(),
-                                 LI->getOrdering(), LI->getSynchScope(),
+                                 LI->getOrdering(), LI->getSyncScopeID(),
                                  UnavailablePred->getTerminator());
 
     // Transfer the old load's AA tags to the new load.
@@ -1219,7 +1202,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
     V->takeName(LI);
   if (Instruction *I = dyn_cast<Instruction>(V))
     I->setDebugLoc(LI->getDebugLoc());
-  if (V->getType()->getScalarType()->isPointerTy())
+  if (V->getType()->isPtrOrPtrVectorTy())
     MD->invalidateCachedPointerInfo(V);
   markInstructionForDeletion(LI);
   ORE->emit(OptimizationRemark(DEBUG_TYPE, "LoadPRE", LI)
@@ -1306,7 +1289,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
       // to propagate LI's DebugLoc because LI may not post-dominate I.
       if (LI->getDebugLoc() && LI->getParent() == I->getParent())
         I->setDebugLoc(LI->getDebugLoc());
-    if (V->getType()->getScalarType()->isPointerTy())
+    if (V->getType()->isPtrOrPtrVectorTy())
       MD->invalidateCachedPointerInfo(V);
     markInstructionForDeletion(LI);
     ++NumGVNLoad;
@@ -1460,7 +1443,7 @@ bool GVN::processLoad(LoadInst *L) {
     reportLoadElim(L, AvailableValue, ORE);
     // Tell MDA to rexamine the reused pointer since we might have more
     // information after forwarding it.
-    if (MD && AvailableValue->getType()->getScalarType()->isPointerTy())
+    if (MD && AvailableValue->getType()->isPtrOrPtrVectorTy())
       MD->invalidateCachedPointerInfo(AvailableValue);
     return true;
   }
@@ -1468,95 +1451,6 @@ bool GVN::processLoad(LoadInst *L) {
   return false;
 }
 
-/// Return a pair the first field showing the value number of \p Exp and the
-/// second field showing whether it is a value number newly created.
-std::pair<uint32_t, bool>
-GVN::ValueTable::assignExpNewValueNum(Expression &Exp) {
-  uint32_t &e = expressionNumbering[Exp];
-  bool CreateNewValNum = !e;
-  if (CreateNewValNum) {
-    Expressions.push_back(Exp);
-    if (ExprIdx.size() < nextValueNumber + 1)
-      ExprIdx.resize(nextValueNumber * 2);
-    e = nextValueNumber;
-    ExprIdx[nextValueNumber++] = nextExprNumber++;
-  }
-  return {e, CreateNewValNum};
-}
-
-/// Return whether all the values related with the same \p num are
-/// defined in \p BB.
-bool GVN::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB,
-                                     GVN &Gvn) {
-  LeaderTableEntry *Vals = &Gvn.LeaderTable[Num];
-  while (Vals && Vals->BB == BB)
-    Vals = Vals->Next;
-  return !Vals;
-}
-
-/// Wrap phiTranslateImpl to provide caching functionality.
-uint32_t GVN::ValueTable::phiTranslate(const BasicBlock *Pred,
-                                       const BasicBlock *PhiBlock, uint32_t Num,
-                                       GVN &Gvn) {
-  auto FindRes = PhiTranslateTable.find({Num, Pred});
-  if (FindRes != PhiTranslateTable.end())
-    return FindRes->second;
-  uint32_t NewNum = phiTranslateImpl(Pred, PhiBlock, Num, Gvn);
-  PhiTranslateTable.insert({{Num, Pred}, NewNum});
-  return NewNum;
-}
-
-/// Translate value number \p Num using phis, so that it has the values of
-/// the phis in BB.
-uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
-                                           const BasicBlock *PhiBlock,
-                                           uint32_t Num, GVN &Gvn) {
-  if (PHINode *PN = NumberingPhi[Num]) {
-    for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
-      if (PN->getParent() == PhiBlock && PN->getIncomingBlock(i) == Pred)
-        if (uint32_t TransVal = lookup(PN->getIncomingValue(i), false))
-          return TransVal;
-    }
-    return Num;
-  }
-
-  // If there is any value related with Num is defined in a BB other than
-  // PhiBlock, it cannot depend on a phi in PhiBlock without going through
-  // a backedge. We can do an early exit in that case to save compile time.
-  if (!areAllValsInBB(Num, PhiBlock, Gvn))
-    return Num;
-
-  if (Num >= ExprIdx.size() || ExprIdx[Num] == 0)
-    return Num;
-  Expression Exp = Expressions[ExprIdx[Num]];
-
-  for (unsigned i = 0; i < Exp.varargs.size(); i++) {
-    // For InsertValue and ExtractValue, some varargs are index numbers
-    // instead of value numbers. Those index numbers should not be
-    // translated.
-    if ((i > 1 && Exp.opcode == Instruction::InsertValue) ||
-        (i > 0 && Exp.opcode == Instruction::ExtractValue))
-      continue;
-    Exp.varargs[i] = phiTranslate(Pred, PhiBlock, Exp.varargs[i], Gvn);
-  }
-
-  if (Exp.commutative) {
-    assert(Exp.varargs.size() == 2 && "Unsupported commutative expression!");
-    if (Exp.varargs[0] > Exp.varargs[1]) {
-      std::swap(Exp.varargs[0], Exp.varargs[1]);
-      uint32_t Opcode = Exp.opcode >> 8;
-      if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp)
-        Exp.opcode = (Opcode << 8) |
-                     CmpInst::getSwappedPredicate(
-                         static_cast<CmpInst::Predicate>(Exp.opcode & 255));
-    }
-  }
-
-  if (uint32_t NewNum = expressionNumbering[Exp])
-    return NewNum;
-  return Num;
-}
-
 // In order to find a leader for a given value number at a
 // specific basic block, we first obtain the list of all Values for that number,
 // and then scan the list to find one whose block dominates the block in
@@ -1601,15 +1495,6 @@ static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
   return Pred != nullptr;
 }
 
-
-void GVN::assignBlockRPONumber(Function &F) {
-  uint32_t NextBlockNumber = 1;
-  ReversePostOrderTraversal<Function *> RPOT(&F);
-  for (BasicBlock *BB : RPOT)
-    BlockRPONumber[BB] = NextBlockNumber++;
-}
-
-
 // Tries to replace instruction with const, using information from
 // ReplaceWithConstMap.
 bool GVN::replaceOperandsWithConsts(Instruction *Instr) const {
@@ -1713,7 +1598,7 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
       // RHS neither 'true' nor 'false' - bail out.
       continue;
     // Whether RHS equals 'true'.  Otherwise it equals 'false'.
-    bool isKnownTrue = CI->isAllOnesValue();
+    bool isKnownTrue = CI->isMinusOne();
     bool isKnownFalse = !isKnownTrue;
 
     // If "A && B" is known true then both A and B are known true.  If "A || B"
@@ -1813,7 +1698,7 @@ bool GVN::processInstruction(Instruction *I) {
       Changed = true;
     }
     if (Changed) {
-      if (MD && V->getType()->getScalarType()->isPointerTy())
+      if (MD && V->getType()->isPtrOrPtrVectorTy())
         MD->invalidateCachedPointerInfo(V);
       ++NumGVNSimpl;
       return true;
@@ -1924,7 +1809,7 @@ bool GVN::processInstruction(Instruction *I) {
 
   // Remove it!
   patchAndReplaceAllUsesWith(I, Repl);
-  if (MD && Repl->getType()->getScalarType()->isPointerTy())
+  if (MD && Repl->getType()->isPtrOrPtrVectorTy())
     MD->invalidateCachedPointerInfo(Repl);
   markInstructionForDeletion(I);
   return true;
@@ -1971,7 +1856,6 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
     // Fabricate val-num for dead-code in order to suppress assertion in
     // performPRE().
     assignValNumForDeadCode();
-    assignBlockRPONumber(F);
     bool PREChanged = true;
     while (PREChanged) {
       PREChanged = performPRE(F);
@@ -2043,7 +1927,7 @@ bool GVN::processBlock(BasicBlock *BB) {
 
 // Instantiate an expression in a predecessor that lacked it.
 bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
-                                    BasicBlock *Curr, unsigned int ValNo) {
+                                    unsigned int ValNo) {
   // Because we are going top-down through the block, all value numbers
   // will be available in the predecessor by the time we need them.  Any
   // that weren't originally present will have been instantiated earlier
@@ -2061,9 +1945,7 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
       success = false;
       break;
     }
-    uint32_t TValNo =
-        VN.phiTranslate(Pred, Curr, VN.lookup(Op), *this);
-    if (Value *V = findLeader(Pred, TValNo)) {
+    if (Value *V = findLeader(Pred, VN.lookup(Op))) {
       Instr->setOperand(i, V);
     } else {
       success = false;
@@ -2080,12 +1962,10 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
   Instr->insertBefore(Pred->getTerminator());
   Instr->setName(Instr->getName() + ".pre");
   Instr->setDebugLoc(Instr->getDebugLoc());
-
-  unsigned Num = VN.lookupOrAdd(Instr);
-  VN.add(Instr, Num);
+  VN.add(Instr, ValNo);
 
   // Update the availability map to include the new instruction.
-  addToLeaderTable(Num, Instr, Pred);
+  addToLeaderTable(ValNo, Instr, Pred);
   return true;
 }
 
@@ -2123,27 +2003,18 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
 
   SmallVector<std::pair<Value *, BasicBlock *>, 8> predMap;
   for (BasicBlock *P : predecessors(CurrentBlock)) {
-    // We're not interested in PRE where blocks with predecessors that are
-    // not reachable.
-    if (!DT->isReachableFromEntry(P)) {
+    // We're not interested in PRE where the block is its
+    // own predecessor, or in blocks with predecessors
+    // that are not reachable.
+    if (P == CurrentBlock) {
       NumWithout = 2;
       break;
-    }
-    // It is not safe to do PRE when P->CurrentBlock is a loop backedge, and
-    // when CurInst has operand defined in CurrentBlock (so it may be defined
-    // by phi in the loop header).
-    if (BlockRPONumber[P] >= BlockRPONumber[CurrentBlock] &&
-        any_of(CurInst->operands(), [&](const Use &U) {
-          if (auto *Inst = dyn_cast<Instruction>(U.get()))
-            return Inst->getParent() == CurrentBlock;
-          return false;
-        })) {
+    } else if (!DT->isReachableFromEntry(P)) {
       NumWithout = 2;
       break;
     }
 
-    uint32_t TValNo = VN.phiTranslate(P, CurrentBlock, ValNo, *this);
-    Value *predV = findLeader(P, TValNo);
+    Value *predV = findLeader(P, ValNo);
     if (!predV) {
       predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P));
       PREPred = P;
@@ -2183,7 +2054,7 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
     }
     // We need to insert somewhere, so let's give it a shot
     PREInstr = CurInst->clone();
-    if (!performScalarPREInsertion(PREInstr, PREPred, CurrentBlock, ValNo)) {
+    if (!performScalarPREInsertion(PREInstr, PREPred, ValNo)) {
       // If we failed insertion, make sure we remove the instruction.
       DEBUG(verifyRemoved(PREInstr));
       PREInstr->deleteValue();
@@ -2212,7 +2083,7 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
   addToLeaderTable(ValNo, Phi, CurrentBlock);
   Phi->setDebugLoc(CurInst->getDebugLoc());
   CurInst->replaceAllUsesWith(Phi);
-  if (MD && Phi->getType()->getScalarType()->isPointerTy())
+  if (MD && Phi->getType()->isPtrOrPtrVectorTy())
     MD->invalidateCachedPointerInfo(Phi);
   VN.erase(CurInst);
   removeFromLeaderTable(ValNo, CurInst, CurrentBlock);
@@ -2297,7 +2168,6 @@ bool GVN::iterateOnFunction(Function &F) {
 void GVN::cleanupGlobalSets() {
   VN.clear();
   LeaderTable.clear();
-  BlockRPONumber.clear();
   TableAllocator.Reset();
 }
 
diff --git a/lib/Transforms/Scalar/InferAddressSpaces.cpp b/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 3c8fbd35bf8c1..89b28f0aeee6b 100644
--- a/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -232,7 +232,7 @@ bool InferAddressSpaces::rewriteIntrinsicOperands(IntrinsicInst *II,
   case Intrinsic::amdgcn_atomic_inc:
   case Intrinsic::amdgcn_atomic_dec:{
     const ConstantInt *IsVolatile = dyn_cast<ConstantInt>(II->getArgOperand(4));
-    if (!IsVolatile || !IsVolatile->isNullValue())
+    if (!IsVolatile || !IsVolatile->isZero())
       return false;
 
     LLVM_FALLTHROUGH;
@@ -358,7 +358,8 @@ InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {
     // If the operands of the expression on the top are already explored,
     // adds that expression to the resultant postorder.
     if (PostorderStack.back().second) {
-      Postorder.push_back(TopVal);
+      if (TopVal->getType()->getPointerAddressSpace() == FlatAddrSpace)
+        Postorder.push_back(TopVal);
       PostorderStack.pop_back();
       continue;
     }
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 05293eb0079fc..ee3de51b13606 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -1212,7 +1212,7 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
     LoadInst *NewVal = new LoadInst(
         LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred),
         LI->getName() + ".pr", false, LI->getAlignment(), LI->getOrdering(),
-        LI->getSynchScope(), UnavailablePred->getTerminator());
+        LI->getSyncScopeID(), UnavailablePred->getTerminator());
     NewVal->setDebugLoc(LI->getDebugLoc());
     if (AATags)
       NewVal->setAAMetadata(AATags);
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index c41cc42db5e2c..ac4dd44a0e906 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -148,25 +148,27 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
                              LoopInfo &LI, LPMUpdater *Updater = nullptr) {
   assert(L->isLCSSAForm(DT) && "Expected LCSSA!");
 
-  // We can only remove the loop if there is a preheader that we can
-  // branch from after removing it.
+  // We can only remove the loop if there is a preheader that we can branch from
+  // after removing it. Also, if LoopSimplify form is not available, stay out
+  // of trouble.
   BasicBlock *Preheader = L->getLoopPreheader();
-  if (!Preheader)
+  if (!Preheader || !L->hasDedicatedExits()) {
+    DEBUG(dbgs()
+          << "Deletion requires Loop with preheader and dedicated exits.\n");
     return false;
-
-  // If LoopSimplify form is not available, stay out of trouble.
-  if (!L->hasDedicatedExits())
-    return false;
-
+  }
   // We can't remove loops that contain subloops.  If the subloops were dead,
   // they would already have been removed in earlier executions of this pass.
-  if (L->begin() != L->end())
+  if (L->begin() != L->end()) {
+    DEBUG(dbgs() << "Loop contains subloops.\n");
     return false;
+  }
 
 
   BasicBlock *ExitBlock = L->getUniqueExitBlock();
 
   if (ExitBlock && isLoopNeverExecuted(L)) {
+    DEBUG(dbgs() << "Loop is proven to never execute, delete it!");
     // Set incoming value to undef for phi nodes in the exit block.
     BasicBlock::iterator BI = ExitBlock->begin();
     while (PHINode *P = dyn_cast<PHINode>(BI)) {
@@ -188,20 +190,26 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
   // be in the situation of needing to be able to solve statically which exit
   // block will be branched to, or trying to preserve the branching logic in
   // a loop invariant manner.
-  if (!ExitBlock)
+  if (!ExitBlock) {
+    DEBUG(dbgs() << "Deletion requires single exit block\n");
     return false;
-
+  }
   // Finally, we have to check that the loop really is dead.
   bool Changed = false;
-  if (!isLoopDead(L, SE, ExitingBlocks, ExitBlock, Changed, Preheader))
+  if (!isLoopDead(L, SE, ExitingBlocks, ExitBlock, Changed, Preheader)) {
+    DEBUG(dbgs() << "Loop is not invariant, cannot delete.\n");
     return Changed;
+  }
 
   // Don't remove loops for which we can't solve the trip count.
   // They could be infinite, in which case we'd be changing program behavior.
   const SCEV *S = SE.getMaxBackedgeTakenCount(L);
-  if (isa<SCEVCouldNotCompute>(S))
+  if (isa<SCEVCouldNotCompute>(S)) {
+    DEBUG(dbgs() << "Could not compute SCEV MaxBackedgeTakenCount.\n");
     return Changed;
+  }
 
+  DEBUG(dbgs() << "Loop is invariant, delete it!");
   deleteDeadLoop(L, DT, SE, LI, Updater);
   ++NumDeleted;
 
@@ -311,6 +319,9 @@ static void deleteDeadLoop(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
 PreservedAnalyses LoopDeletionPass::run(Loop &L, LoopAnalysisManager &AM,
                                         LoopStandardAnalysisResults &AR,
                                         LPMUpdater &Updater) {
+
+  DEBUG(dbgs() << "Analyzing Loop for deletion: ");
+  DEBUG(L.dump());
   if (!deleteLoopIfDead(&L, AR.DT, AR.SE, AR.LI, &Updater))
     return PreservedAnalyses::all();
 
@@ -350,5 +361,7 @@ bool LoopDeletionLegacyPass::runOnLoop(Loop *L, LPPassManager &) {
   ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
   LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
 
+  DEBUG(dbgs() << "Analyzing Loop for deletion: ");
+  DEBUG(L->dump());
   return deleteLoopIfDead(L, DT, SE, LI);
 }
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 8b435050ac769..4a6a35c0ab1b9 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1160,7 +1160,7 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
     if (!Dec ||
         !((SubInst->getOpcode() == Instruction::Sub && Dec->isOne()) ||
           (SubInst->getOpcode() == Instruction::Add &&
-           Dec->isAllOnesValue()))) {
+           Dec->isMinusOne()))) {
       return false;
     }
   }
diff --git a/lib/Transforms/Scalar/LoopInterchange.cpp b/lib/Transforms/Scalar/LoopInterchange.cpp
index 9f3875a3027f4..606136dc31a4b 100644
--- a/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -757,8 +757,11 @@ bool LoopInterchangeLegality::currentLimitations() {
   PHINode *InnerInductionVar;
   SmallVector<PHINode *, 8> Inductions;
   SmallVector<PHINode *, 8> Reductions;
-  if (!findInductionAndReductions(InnerLoop, Inductions, Reductions))
+  if (!findInductionAndReductions(InnerLoop, Inductions, Reductions)) {
+    DEBUG(dbgs() << "Only inner loops with induction or reduction PHI nodes "
+                 << "are supported currently.\n");
     return true;
+  }
 
   // TODO: Currently we handle only loops with 1 induction variable.
   if (Inductions.size() != 1) {
@@ -771,16 +774,25 @@ bool LoopInterchangeLegality::currentLimitations() {
 
   InnerInductionVar = Inductions.pop_back_val();
   Reductions.clear();
-  if (!findInductionAndReductions(OuterLoop, Inductions, Reductions))
+  if (!findInductionAndReductions(OuterLoop, Inductions, Reductions)) {
+    DEBUG(dbgs() << "Only outer loops with induction or reduction PHI nodes "
+                 << "are supported currently.\n");
     return true;
+  }
 
   // Outer loop cannot have reduction because then loops will not be tightly
   // nested.
-  if (!Reductions.empty())
+  if (!Reductions.empty()) {
+    DEBUG(dbgs() << "Outer loops with reductions are not supported "
+                 << "currently.\n");
     return true;
+  }
   // TODO: Currently we handle only loops with 1 induction variable.
-  if (Inductions.size() != 1)
+  if (Inductions.size() != 1) {
+    DEBUG(dbgs() << "Loops with more than 1 induction variables are not "
+                 << "supported currently.\n");
     return true;
+  }
 
   // TODO: Triangular loops are not handled for now.
   if (!isLoopStructureUnderstood(InnerInductionVar)) {
@@ -791,12 +803,16 @@ bool LoopInterchangeLegality::currentLimitations() {
   // TODO: We only handle LCSSA PHI's corresponding to reduction for now.
   BasicBlock *LoopExitBlock =
       getLoopLatchExitBlock(OuterLoopLatch, OuterLoopHeader);
-  if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, true))
+  if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, true)) {
+    DEBUG(dbgs() << "Can only handle LCSSA PHIs in outer loops currently.\n");
     return true;
+  }
 
   LoopExitBlock = getLoopLatchExitBlock(InnerLoopLatch, InnerLoopHeader);
-  if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, false))
+  if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, false)) {
+    DEBUG(dbgs() << "Can only handle LCSSA PHIs in inner loops currently.\n");
     return true;
+  }
 
   // TODO: Current limitation: Since we split the inner loop latch at the point
   // were induction variable is incremented (induction.next); We cannot have
@@ -816,8 +832,11 @@ bool LoopInterchangeLegality::currentLimitations() {
     InnerIndexVarInc =
         dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(0));
 
-  if (!InnerIndexVarInc)
+  if (!InnerIndexVarInc) {
+    DEBUG(dbgs() << "Did not find an instruction to increment the induction "
+                 << "variable.\n");
     return true;
+  }
 
   // Since we split the inner loop latch on this induction variable. Make sure
   // we do not have any instruction between the induction variable and branch
@@ -827,19 +846,24 @@ bool LoopInterchangeLegality::currentLimitations() {
   for (const Instruction &I : reverse(*InnerLoopLatch)) {
     if (isa<BranchInst>(I) || isa<CmpInst>(I) || isa<TruncInst>(I))
       continue;
+
     // We found an instruction. If this is not induction variable then it is not
     // safe to split this loop latch.
-    if (!I.isIdenticalTo(InnerIndexVarInc))
+    if (!I.isIdenticalTo(InnerIndexVarInc)) {
+      DEBUG(dbgs() << "Found unsupported instructions between induction "
+                   << "variable increment and branch.\n");
       return true;
+    }
 
     FoundInduction = true;
     break;
   }
   // The loop latch ended and we didn't find the induction variable return as
   // current limitation.
-  if (!FoundInduction)
+  if (!FoundInduction) {
+    DEBUG(dbgs() << "Did not find the induction variable.\n");
     return true;
-
+  }
   return false;
 }
 
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 7312d97f8efe1..3506ac343d594 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -485,10 +485,22 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
           DomTreeNode *Node = HeaderChildren[I];
           BasicBlock *BB = Node->getBlock();
 
-          pred_iterator PI = pred_begin(BB);
-          BasicBlock *NearestDom = *PI;
-          for (pred_iterator PE = pred_end(BB); PI != PE; ++PI)
-            NearestDom = DT->findNearestCommonDominator(NearestDom, *PI);
+          BasicBlock *NearestDom = nullptr;
+          for (BasicBlock *Pred : predecessors(BB)) {
+            // Consider only reachable basic blocks.
+            if (!DT->getNode(Pred))
+              continue;
+
+            if (!NearestDom) {
+              NearestDom = Pred;
+              continue;
+            }
+
+            NearestDom = DT->findNearestCommonDominator(NearestDom, Pred);
+            assert(NearestDom && "No NearestCommonDominator found");
+          }
+
+          assert(NearestDom && "Nearest dominator not found");
 
           // Remember if this changes the DomTree.
           if (Node->getIDom()->getBlock() != NearestDom) {
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 73436f13c94e4..3638da118cb7e 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -140,6 +140,13 @@ static cl::opt<bool> LSRExpNarrow(
   cl::desc("Narrow LSR complex solution using"
            " expectation of registers number"));
 
+// Flag to narrow search space by filtering non-optimal formulae with
+// the same ScaledReg and Scale.
+static cl::opt<bool> FilterSameScaledReg(
+    "lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true),
+    cl::desc("Narrow LSR search space by filtering non-optimal formulae"
+             " with the same ScaledReg and Scale"));
+
 #ifndef NDEBUG
 // Stress test IV chain generation.
 static cl::opt<bool> StressIVChain(
@@ -1902,6 +1909,7 @@ class LSRInstance {
   void NarrowSearchSpaceByDetectingSupersets();
   void NarrowSearchSpaceByCollapsingUnrolledCode();
   void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
+  void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
   void NarrowSearchSpaceByDeletingCostlyFormulas();
   void NarrowSearchSpaceByPickingWinnerRegs();
   void NarrowSearchSpaceUsingHeuristics();
@@ -2318,7 +2326,7 @@ LSRInstance::OptimizeLoopTermCond() {
                 dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) {
             const ConstantInt *C = D->getValue();
             // Stride of one or negative one can have reuse with non-addresses.
-            if (C->isOne() || C->isAllOnesValue())
+            if (C->isOne() || C->isMinusOne())
               goto decline_post_inc;
             // Avoid weird situations.
             if (C->getValue().getMinSignedBits() >= 64 ||
@@ -4306,6 +4314,104 @@ void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
   }
 }
 
+/// If a LSRUse has multiple formulae with the same ScaledReg and Scale.
+/// Pick the best one and delete the others.
+/// This narrowing heuristic is to keep as many formulae with different
+/// Scale and ScaledReg pair as possible while narrowing the search space.
+/// The benefit is that it is more likely to find out a better solution
+/// from a formulae set with more Scale and ScaledReg variations than
+/// a formulae set with the same Scale and ScaledReg. The picking winner
+/// reg heurstic will often keep the formulae with the same Scale and
+/// ScaledReg and filter others, and we want to avoid that if possible.
+void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
+  if (EstimateSearchSpaceComplexity() < ComplexityLimit)
+    return;
+
+  DEBUG(dbgs() << "The search space is too complex.\n"
+                  "Narrowing the search space by choosing the best Formula "
+                  "from the Formulae with the same Scale and ScaledReg.\n");
+
+  // Map the "Scale * ScaledReg" pair to the best formula of current LSRUse.
+  typedef DenseMap<std::pair<const SCEV *, int64_t>, size_t> BestFormulaeTy;
+  BestFormulaeTy BestFormulae;
+#ifndef NDEBUG
+  bool ChangedFormulae = false;
+#endif
+  DenseSet<const SCEV *> VisitedRegs;
+  SmallPtrSet<const SCEV *, 16> Regs;
+
+  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+    LSRUse &LU = Uses[LUIdx];
+    DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n');
+
+    // Return true if Formula FA is better than Formula FB.
+    auto IsBetterThan = [&](Formula &FA, Formula &FB) {
+      // First we will try to choose the Formula with fewer new registers.
+      // For a register used by current Formula, the more the register is
+      // shared among LSRUses, the less we increase the register number
+      // counter of the formula.
+      size_t FARegNum = 0;
+      for (const SCEV *Reg : FA.BaseRegs) {
+        const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
+        FARegNum += (NumUses - UsedByIndices.count() + 1);
+      }
+      size_t FBRegNum = 0;
+      for (const SCEV *Reg : FB.BaseRegs) {
+        const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
+        FBRegNum += (NumUses - UsedByIndices.count() + 1);
+      }
+      if (FARegNum != FBRegNum)
+        return FARegNum < FBRegNum;
+
+      // If the new register numbers are the same, choose the Formula with
+      // less Cost.
+      Cost CostFA, CostFB;
+      Regs.clear();
+      CostFA.RateFormula(TTI, FA, Regs, VisitedRegs, L, SE, DT, LU);
+      Regs.clear();
+      CostFB.RateFormula(TTI, FB, Regs, VisitedRegs, L, SE, DT, LU);
+      return CostFA.isLess(CostFB, TTI);
+    };
+
+    bool Any = false;
+    for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
+         ++FIdx) {
+      Formula &F = LU.Formulae[FIdx];
+      if (!F.ScaledReg)
+        continue;
+      auto P = BestFormulae.insert({{F.ScaledReg, F.Scale}, FIdx});
+      if (P.second)
+        continue;
+
+      Formula &Best = LU.Formulae[P.first->second];
+      if (IsBetterThan(F, Best))
+        std::swap(F, Best);
+      DEBUG(dbgs() << "  Filtering out formula "; F.print(dbgs());
+            dbgs() << "\n"
+                      "    in favor of formula ";
+            Best.print(dbgs()); dbgs() << '\n');
+#ifndef NDEBUG
+      ChangedFormulae = true;
+#endif
+      LU.DeleteFormula(F);
+      --FIdx;
+      --NumForms;
+      Any = true;
+    }
+    if (Any)
+      LU.RecomputeRegs(LUIdx, RegUses);
+
+    // Reset this to prepare for the next use.
+    BestFormulae.clear();
+  }
+
+  DEBUG(if (ChangedFormulae) {
+    dbgs() << "\n"
+              "After filtering out undesirable candidates:\n";
+    print_uses(dbgs());
+  });
+}
+
 /// The function delete formulas with high registers number expectation.
 /// Assuming we don't know the value of each formula (already delete
 /// all inefficient), generate probability of not selecting for each
@@ -4516,6 +4622,8 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
   NarrowSearchSpaceByDetectingSupersets();
   NarrowSearchSpaceByCollapsingUnrolledCode();
   NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
+  if (FilterSameScaledReg)
+    NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
   if (LSRExpNarrow)
     NarrowSearchSpaceByDeletingCostlyFormulas();
   else
diff --git a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index acd3ef6791bed..6727cf0179c18 100644
--- a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -238,7 +238,7 @@ PHINode *MergedLoadStoreMotion::getPHIOperand(BasicBlock *BB, StoreInst *S0,
                                 &BB->front());
   NewPN->addIncoming(Opd1, S0->getParent());
   NewPN->addIncoming(Opd2, S1->getParent());
-  if (MD && NewPN->getType()->getScalarType()->isPointerTy())
+  if (MD && NewPN->getType()->isPtrOrPtrVectorTy())
     MD->invalidateCachedPointerInfo(NewPN);
   return NewPN;
 }
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index 9cf01c6582b58..9d018563618ea 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -866,9 +866,7 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge,
     // Things in TOPClass are equivalent to everything.
     if (ValueToClass.lookup(*U) == TOPClass)
       return false;
-    if (lookupOperandLeader(*U) == PN)
-      return false;
-    return true;
+    return lookupOperandLeader(*U) != PN;
   });
   std::transform(Filtered.begin(), Filtered.end(), op_inserter(E),
                  [&](const Use *U) -> Value * {
@@ -2063,9 +2061,10 @@ Value *NewGVN::getNextValueLeader(CongruenceClass *CC) const {
 //
 // The invariants of this function are:
 //
-// I must be moving to NewClass from OldClass The StoreCount of OldClass and
-// NewClass is expected to have been updated for I already if it is is a store.
-// The OldClass memory leader has not been updated yet if I was the leader.
+// - I must be moving to NewClass from OldClass
+// - The StoreCount of OldClass and NewClass is expected to have been updated
+//   for I already if it is is a store.
+// - The OldClass memory leader has not been updated yet if I was the leader.
 void NewGVN::moveMemoryToNewCongruenceClass(Instruction *I,
                                             MemoryAccess *InstMA,
                                             CongruenceClass *OldClass,
@@ -2074,7 +2073,8 @@ void NewGVN::moveMemoryToNewCongruenceClass(Instruction *I,
   // be the MemoryAccess of OldClass.
   assert((!InstMA || !OldClass->getMemoryLeader() ||
           OldClass->getLeader() != I ||
-          OldClass->getMemoryLeader() == InstMA) &&
+          MemoryAccessToClass.lookup(OldClass->getMemoryLeader()) ==
+              MemoryAccessToClass.lookup(InstMA)) &&
          "Representative MemoryAccess mismatch");
   // First, see what happens to the new class
   if (!NewClass->getMemoryLeader()) {
@@ -2136,7 +2136,7 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E,
                      << NewClass->getID() << " from " << *NewClass->getLeader()
                      << " to  " << *SI << " because store joined class\n");
         // If we changed the leader, we have to mark it changed because we don't
-        // know what it will do to symbolic evlauation.
+        // know what it will do to symbolic evaluation.
         NewClass->setLeader(SI);
       }
       // We rely on the code below handling the MemoryAccess change.
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index cdba0062953f1..29d1ba406ae49 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -2148,7 +2148,7 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
     if (I->getOpcode() == Instruction::Mul &&
         cast<Instruction>(I->user_back())->getOpcode() == Instruction::Add &&
         isa<ConstantInt>(Ops.back().Op) &&
-        cast<ConstantInt>(Ops.back().Op)->isAllOnesValue()) {
+        cast<ConstantInt>(Ops.back().Op)->isMinusOne()) {
       ValueEntry Tmp = Ops.pop_back_val();
       Ops.insert(Ops.begin(), Tmp);
     } else if (I->getOpcode() == Instruction::FMul &&
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index a73e9aec06170..f19d45329d238 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -1994,7 +1994,7 @@ static void rematerializeLiveValues(CallSite CS,
       Instruction *LastClonedValue = nullptr;
       Instruction *LastValue = nullptr;
       for (Instruction *Instr: ChainToBase) {
-        // Only GEP's and casts are suported as we need to be careful to not
+        // Only GEP's and casts are supported as we need to be careful to not
         // introduce any new uses of pointers not in the liveset.
         // Note that it's fine to introduce new uses of pointers which were
         // otherwise not used after this statepoint.
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 7a6fa1711411d..a738ebb4607e4 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -963,7 +963,7 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
       } else {
         // X or -1 = -1
         if (ConstantInt *CI = NonOverdefVal->getConstantInt())
-          if (CI->isAllOnesValue())
+          if (CI->isMinusOne())
             return markConstant(IV, &I, NonOverdefVal->getConstant());
       }
     }
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 4729f4ef59567..b9cee5b2ba956 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -1673,8 +1673,7 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
 
   // See if we need inttoptr for this type pair. A cast involving both scalars
   // and vectors requires and additional bitcast.
-  if (OldTy->getScalarType()->isIntegerTy() &&
-      NewTy->getScalarType()->isPointerTy()) {
+  if (OldTy->isIntOrIntVectorTy() && NewTy->isPtrOrPtrVectorTy()) {
     // Expand <2 x i32> to i8* --> <2 x i32> to i64 to i8*
     if (OldTy->isVectorTy() && !NewTy->isVectorTy())
       return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)),
@@ -1690,8 +1689,7 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
 
   // See if we need ptrtoint for this type pair. A cast involving both scalars
   // and vectors requires and additional bitcast.
-  if (OldTy->getScalarType()->isPointerTy() &&
-      NewTy->getScalarType()->isIntegerTy()) {
+  if (OldTy->isPtrOrPtrVectorTy() && NewTy->isIntOrIntVectorTy()) {
     // Expand <2 x i8*> to i128 --> <2 x i8*> to <2 x i64> to i128
     if (OldTy->isVectorTy() && !NewTy->isVectorTy())
       return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
@@ -2400,7 +2398,7 @@ private:
       LoadInst *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
                                               LI.isVolatile(), LI.getName());
       if (LI.isVolatile())
-        NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
+        NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
 
       // Any !nonnull metadata or !range metadata on the old load is also valid
       // on the new load. This is even true in some cases even when the loads
@@ -2435,7 +2433,7 @@ private:
                                               getSliceAlign(TargetTy),
                                               LI.isVolatile(), LI.getName());
       if (LI.isVolatile())
-        NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
+        NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
 
       V = NewLI;
       IsPtrAdjusted = true;
@@ -2578,7 +2576,7 @@ private:
     }
     NewSI->copyMetadata(SI, LLVMContext::MD_mem_parallel_loop_access);
     if (SI.isVolatile())
-      NewSI->setAtomic(SI.getOrdering(), SI.getSynchScope());
+      NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
     Pass.DeadInsts.insert(&SI);
     deleteIfTriviallyDead(OldOp);
 
diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp
index 486f3e5a43d49..0cccb415efdb1 100644
--- a/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -329,7 +329,7 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) {
       Loops[Exit] = N->getEntry();
 
   } else {
-    // Test for sucessors as back edge
+    // Test for successors as back edge
     BasicBlock *BB = N->getNodeAs<BasicBlock>();
     BranchInst *Term = cast<BranchInst>(BB->getTerminator());
 
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 314c990293cc5..7e75e88477852 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -46,13 +46,21 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
   if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
 
   bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
-  
+  Module *TheModule = F ? F->getParent() : nullptr;
+
   // Loop over all instructions, and copy them over.
   for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end();
        II != IE; ++II) {
 
-    if (DIFinder && F->getParent() && II->getDebugLoc())
-      DIFinder->processLocation(*F->getParent(), II->getDebugLoc().get());
+    if (DIFinder && TheModule) {
+      if (auto *DDI = dyn_cast<DbgDeclareInst>(II))
+        DIFinder->processDeclare(*TheModule, DDI);
+      else if (auto *DVI = dyn_cast<DbgValueInst>(II))
+        DIFinder->processValue(*TheModule, DVI);
+
+      if (auto DbgLoc = II->getDebugLoc())
+        DIFinder->processLocation(*TheModule, DbgLoc.get());
+    }
 
     Instruction *NewInst = II->clone();
     if (II->hasName())
@@ -153,6 +161,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
   // When we remap instructions, we want to avoid duplicating inlined
   // DISubprograms, so record all subprograms we find as we duplicate
   // instructions and then freeze them in the MD map.
+  // We also record information about dbg.value and dbg.declare to avoid
+  // duplicating the types.
   DebugInfoFinder DIFinder;
 
   // Loop over all of the basic blocks in the function, cloning them as
@@ -193,6 +203,10 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
     }
   }
 
+  for (auto *Type : DIFinder.types()) {
+    VMap.MD()[Type].reset(Type);
+  }
+
   // Loop over all of the instructions in the function, fixing up operand
   // references as we go.  This uses VMap to do all the hard work.
   for (Function::iterator BB =
diff --git a/lib/Transforms/Utils/CmpInstAnalysis.cpp b/lib/Transforms/Utils/CmpInstAnalysis.cpp
index 9f4d9c7e39810..d9294c4993091 100644
--- a/lib/Transforms/Utils/CmpInstAnalysis.cpp
+++ b/lib/Transforms/Utils/CmpInstAnalysis.cpp
@@ -81,7 +81,7 @@ bool llvm::decomposeBitTestICmp(const ICmpInst *I, CmpInst::Predicate &Pred,
     break;
   case ICmpInst::ICMP_SGT:
     // X > -1 is equivalent to (X & SignMask) == 0.
-    if (!C->isAllOnesValue())
+    if (!C->isMinusOne())
       return false;
     Y = ConstantInt::get(I->getContext(), APInt::getSignMask(C->getBitWidth()));
     Pred = ICmpInst::ICMP_EQ;
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 30d8856cfbef1..1189714dfab10 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -1116,12 +1116,6 @@ Function *CodeExtractor::extractCodeRegion() {
         }
     }
 
-  //cerr << "NEW FUNCTION: " << *newFunction;
-  //  verifyFunction(*newFunction);
-
-  //  cerr << "OLD FUNCTION: " << *oldFunction;
-  //  verifyFunction(*oldFunction);
-
   DEBUG(if (verifyFunction(*newFunction)) 
         report_fatal_error("verifyFunction failed!"));
   return newFunction;
diff --git a/lib/Transforms/Utils/Evaluator.cpp b/lib/Transforms/Utils/Evaluator.cpp
index c97e544e620a9..1328f2f3ec012 100644
--- a/lib/Transforms/Utils/Evaluator.cpp
+++ b/lib/Transforms/Utils/Evaluator.cpp
@@ -402,7 +402,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
           Value *Ptr = PtrArg->stripPointerCasts();
           if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
             Type *ElemTy = GV->getValueType();
-            if (!Size->isAllOnesValue() &&
+            if (!Size->isMinusOne() &&
                 Size->getValue().getLimitedValue() >=
                     DL.getTypeStoreSize(ElemTy)) {
               Invariants.insert(GV);
diff --git a/lib/Transforms/Utils/FunctionComparator.cpp b/lib/Transforms/Utils/FunctionComparator.cpp
index 0457294361b56..4a2be3a531767 100644
--- a/lib/Transforms/Utils/FunctionComparator.cpp
+++ b/lib/Transforms/Utils/FunctionComparator.cpp
@@ -513,8 +513,8 @@ int FunctionComparator::cmpOperations(const Instruction *L,
     if (int Res =
             cmpOrderings(LI->getOrdering(), cast<LoadInst>(R)->getOrdering()))
       return Res;
-    if (int Res =
-            cmpNumbers(LI->getSynchScope(), cast<LoadInst>(R)->getSynchScope()))
+    if (int Res = cmpNumbers(LI->getSyncScopeID(),
+                             cast<LoadInst>(R)->getSyncScopeID()))
       return Res;
     return cmpRangeMetadata(LI->getMetadata(LLVMContext::MD_range),
         cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range));
@@ -529,7 +529,8 @@ int FunctionComparator::cmpOperations(const Instruction *L,
     if (int Res =
             cmpOrderings(SI->getOrdering(), cast<StoreInst>(R)->getOrdering()))
       return Res;
-    return cmpNumbers(SI->getSynchScope(), cast<StoreInst>(R)->getSynchScope());
+    return cmpNumbers(SI->getSyncScopeID(),
+                      cast<StoreInst>(R)->getSyncScopeID());
   }
   if (const CmpInst *CI = dyn_cast<CmpInst>(L))
     return cmpNumbers(CI->getPredicate(), cast<CmpInst>(R)->getPredicate());
@@ -584,7 +585,8 @@ int FunctionComparator::cmpOperations(const Instruction *L,
     if (int Res =
             cmpOrderings(FI->getOrdering(), cast<FenceInst>(R)->getOrdering()))
       return Res;
-    return cmpNumbers(FI->getSynchScope(), cast<FenceInst>(R)->getSynchScope());
+    return cmpNumbers(FI->getSyncScopeID(),
+                      cast<FenceInst>(R)->getSyncScopeID());
   }
   if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(L)) {
     if (int Res = cmpNumbers(CXI->isVolatile(),
@@ -601,8 +603,8 @@ int FunctionComparator::cmpOperations(const Instruction *L,
             cmpOrderings(CXI->getFailureOrdering(),
                          cast<AtomicCmpXchgInst>(R)->getFailureOrdering()))
       return Res;
-    return cmpNumbers(CXI->getSynchScope(),
-                      cast<AtomicCmpXchgInst>(R)->getSynchScope());
+    return cmpNumbers(CXI->getSyncScopeID(),
+                      cast<AtomicCmpXchgInst>(R)->getSyncScopeID());
   }
   if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(L)) {
     if (int Res = cmpNumbers(RMWI->getOperation(),
@@ -614,8 +616,8 @@ int FunctionComparator::cmpOperations(const Instruction *L,
     if (int Res = cmpOrderings(RMWI->getOrdering(),
                              cast<AtomicRMWInst>(R)->getOrdering()))
       return Res;
-    return cmpNumbers(RMWI->getSynchScope(),
-                      cast<AtomicRMWInst>(R)->getSynchScope());
+    return cmpNumbers(RMWI->getSyncScopeID(),
+                      cast<AtomicRMWInst>(R)->getSyncScopeID());
   }
   if (const PHINode *PNL = dyn_cast<PHINode>(L)) {
     const PHINode *PNR = cast<PHINode>(R);
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 5127eba3f9aea..74610613001c6 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -1662,9 +1662,10 @@ void llvm::removeUnwindEdge(BasicBlock *BB) {
   TI->eraseFromParent();
 }
 
-/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even
+/// removeUnreachableBlocks - Remove blocks that are not reachable, even
 /// if they are in a dead cycle.  Return true if a change was made, false
-/// otherwise.
+/// otherwise. If `LVI` is passed, this function preserves LazyValueInfo
+/// after modifying the CFG.
 bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI) {
   SmallPtrSet<BasicBlock*, 16> Reachable;
   bool Changed = markAliveBlocks(F, Reachable);
@@ -2168,6 +2169,9 @@ bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) {
     return true;
   case Instruction::Call:
   case Instruction::Invoke:
+    // Can't handle inline asm. Skip it.
+    if (isa<InlineAsm>(ImmutableCallSite(I).getCalledValue()))
+      return false;
     // Many arithmetic intrinsics have no issue taking a
     // variable, however it's hard to distingish these from
     // specials such as @llvm.frameaddress that require a constant.
@@ -2182,12 +2186,18 @@ bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) {
   case Instruction::ShuffleVector:
     // Shufflevector masks are constant.
     return OpIdx != 2;
+  case Instruction::Switch:
   case Instruction::ExtractValue:
-  case Instruction::InsertValue:
     // All operands apart from the first are constant.
     return OpIdx == 0;
+  case Instruction::InsertValue:
+    // All operands apart from the first and the second are constant.
+    return OpIdx < 2;
   case Instruction::Alloca:
-    return false;
+    // Static allocas (constant size in the entry block) are handled by
+    // prologue/epilogue insertion so they're free anyway. We definitely don't
+    // want to make them non-constant.
+    return !dyn_cast<AllocaInst>(I)->isStaticAlloca();
   case Instruction::GetElementPtr:
     if (OpIdx == 0)
       return true;
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 9ad2b707e6b23..5170c68e2915a 100644
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -65,9 +65,11 @@ static cl::opt<bool> UnrollRuntimeMultiExit(
 ///   than the unroll factor.
 ///
 static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
-                          BasicBlock *PrologExit, BasicBlock *PreHeader,
-                          BasicBlock *NewPreHeader, ValueToValueMapTy &VMap,
-                          DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA) {
+                          BasicBlock *PrologExit,
+                          BasicBlock *OriginalLoopLatchExit,
+                          BasicBlock *PreHeader, BasicBlock *NewPreHeader,
+                          ValueToValueMapTy &VMap, DominatorTree *DT,
+                          LoopInfo *LI, bool PreserveLCSSA) {
   BasicBlock *Latch = L->getLoopLatch();
   assert(Latch && "Loop must have a latch");
   BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]);
@@ -142,17 +144,15 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
   // then (BECount + 1) cannot unsigned-overflow.
   Value *BrLoopExit =
       B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1));
-  BasicBlock *Exit = L->getUniqueExitBlock();
-  assert(Exit && "Loop must have a single exit block only");
   // Split the exit to maintain loop canonicalization guarantees
-  SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
-  SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", DT, LI,
+  SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit));
+  SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI,
                          PreserveLCSSA);
   // Add the branch to the exit block (around the unrolled loop)
-  B.CreateCondBr(BrLoopExit, Exit, NewPreHeader);
+  B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader);
   InsertPt->eraseFromParent();
   if (DT)
-    DT->changeImmediateDominator(Exit, PrologExit);
+    DT->changeImmediateDominator(OriginalLoopLatchExit, PrologExit);
 }
 
 /// Connect the unrolling epilog code to the original loop.
@@ -427,6 +427,50 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
     return nullptr;
 }
 
+/// Returns true if we can safely unroll a multi-exit/exiting loop. OtherExits
+/// is populated with all the loop exit blocks other than the LatchExit block.
+static bool
+canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits,
+                             BasicBlock *LatchExit, bool PreserveLCSSA,
+                             bool UseEpilogRemainder) {
+
+  // Support runtime unrolling for multiple exit blocks and multiple exiting
+  // blocks.
+  if (!UnrollRuntimeMultiExit)
+    return false;
+  // Even if runtime multi exit is enabled, we currently have some correctness
+  // constrains in unrolling a multi-exit loop.
+  // We rely on LCSSA form being preserved when the exit blocks are transformed.
+  if (!PreserveLCSSA)
+    return false;
+  SmallVector<BasicBlock *, 4> Exits;
+  L->getUniqueExitBlocks(Exits);
+  for (auto *BB : Exits)
+    if (BB != LatchExit)
+      OtherExits.push_back(BB);
+
+  // TODO: Support multiple exiting blocks jumping to the `LatchExit` when
+  // UnrollRuntimeMultiExit is true. This will need updating the logic in
+  // connectEpilog/connectProlog.
+  if (!LatchExit->getSinglePredecessor()) {
+    DEBUG(dbgs() << "Bailout for multi-exit handling when latch exit has >1 "
+                    "predecessor.\n");
+    return false;
+  }
+  // FIXME: We bail out of multi-exit unrolling when epilog loop is generated
+  // and L is an inner loop. This is because in presence of multiple exits, the
+  // outer loop is incorrect: we do not add the EpilogPreheader and exit to the
+  // outer loop. This is automatically handled in the prolog case, so we do not
+  // have that bug in prolog generation.
+  if (UseEpilogRemainder && L->getParentLoop())
+    return false;
+
+  // All constraints have been satisfied.
+  return true;
+}
+
+
+
 /// Insert code in the prolog/epilog code when unrolling a loop with a
 /// run-time trip-count.
 ///
@@ -470,53 +514,40 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
                                       bool UseEpilogRemainder,
                                       LoopInfo *LI, ScalarEvolution *SE,
                                       DominatorTree *DT, bool PreserveLCSSA) {
-  // for now, only unroll loops that contain a single exit
-  if (!UnrollRuntimeMultiExit && !L->getExitingBlock())
-    return false;
+  DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
+  DEBUG(L->dump());
 
   // Make sure the loop is in canonical form.
-  if (!L->isLoopSimplifyForm())
+  if (!L->isLoopSimplifyForm()) {
+    DEBUG(dbgs() << "Not in simplify form!\n");
     return false;
+  }
 
   // Guaranteed by LoopSimplifyForm.
   BasicBlock *Latch = L->getLoopLatch();
   BasicBlock *Header = L->getHeader();
 
-  BasicBlock *LatchExit = L->getUniqueExitBlock(); // successor out of loop
-  if (!LatchExit && !UnrollRuntimeMultiExit)
-    return false;
-  // These are exit blocks other than the target of the latch exiting block.
-  SmallVector<BasicBlock *, 4> OtherExits;
   BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
-  unsigned int ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0;
+  unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0;
+  BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex);
   // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
   // targets of the Latch be an exit block out of the loop. This needs
   // to be guaranteed by the callers of UnrollRuntimeLoopRemainder.
-  assert(!L->contains(LatchBR->getSuccessor(ExitIndex)) &&
+  assert(!L->contains(LatchExit) &&
          "one of the loop latch successors should be the exit block!");
-  // Support runtime unrolling for multiple exit blocks and multiple exiting
-  // blocks.
-  if (!LatchExit) {
-    assert(UseEpilogRemainder && "Multi exit unrolling is currently supported "
-                                 "unrolling with epilog remainder only!");
-    LatchExit = LatchBR->getSuccessor(ExitIndex);
-    // We rely on LCSSA form being preserved when the exit blocks are
-    // transformed.
-    if (!PreserveLCSSA)
-      return false;
-    // TODO: Support multiple exiting blocks jumping to the `LatchExit`. This
-    // will need updating the logic in connectEpilog.
-    if (!LatchExit->getSinglePredecessor())
-        return false;
-    SmallVector<BasicBlock *, 4> Exits;
-    L->getUniqueExitBlocks(Exits);
-    for (auto *BB : Exits)
-      if (BB != LatchExit)
-        OtherExits.push_back(BB);
+  // These are exit blocks other than the target of the latch exiting block.
+  SmallVector<BasicBlock *, 4> OtherExits;
+  bool isMultiExitUnrollingEnabled = canSafelyUnrollMultiExitLoop(
+      L, OtherExits, LatchExit, PreserveLCSSA, UseEpilogRemainder);
+  // Support only single exit and exiting block unless multi-exit loop unrolling is enabled.
+  if (!isMultiExitUnrollingEnabled &&
+      (!L->getExitingBlock() || OtherExits.size())) {
+    DEBUG(
+        dbgs()
+        << "Multiple exit/exiting blocks in loop and multi-exit unrolling not "
+           "enabled!\n");
+    return false;
   }
-
-  assert(LatchExit && "Latch Exit should exist!");
-
   // Use Scalar Evolution to compute the trip count. This allows more loops to
   // be unrolled than relying on induction var simplification.
   if (!SE)
@@ -530,29 +561,38 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
   // exiting blocks).
   const SCEV *BECountSC = SE->getExitCount(L, Latch);
   if (isa<SCEVCouldNotCompute>(BECountSC) ||
-      !BECountSC->getType()->isIntegerTy())
+      !BECountSC->getType()->isIntegerTy()) {
+    DEBUG(dbgs() << "Could not compute exit block SCEV\n");
     return false;
+  }
 
   unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();
 
   // Add 1 since the backedge count doesn't include the first loop iteration.
   const SCEV *TripCountSC =
       SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
-  if (isa<SCEVCouldNotCompute>(TripCountSC))
+  if (isa<SCEVCouldNotCompute>(TripCountSC)) {
+    DEBUG(dbgs() << "Could not compute trip count SCEV.\n");
     return false;
+  }
 
   BasicBlock *PreHeader = L->getLoopPreheader();
   BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
   const DataLayout &DL = Header->getModule()->getDataLayout();
   SCEVExpander Expander(*SE, DL, "loop-unroll");
   if (!AllowExpensiveTripCount &&
-      Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR))
+      Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) {
+    DEBUG(dbgs() << "High cost for expanding trip count scev!\n");
     return false;
+  }
 
   // This constraint lets us deal with an overflowing trip count easily; see the
   // comment on ModVal below.
-  if (Log2_32(Count) > BEWidth)
+  if (Log2_32(Count) > BEWidth) {
+    DEBUG(dbgs()
+          << "Count failed constraint on overflow trip count calculation.\n");
     return false;
+  }
 
   // Loop structure is the following:
   //
@@ -711,11 +751,10 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
      // node.
      for (unsigned i =0; i < oldNumOperands; i++){
        Value *newVal = VMap[Phi->getIncomingValue(i)];
-       if (!newVal) {
-         assert(isa<Constant>(Phi->getIncomingValue(i)) &&
-                "VMap should exist for all values except constants!");
+       // newVal can be a constant or derived from values outside the loop, and
+       // hence need not have a VMap value.
+       if (!newVal)
          newVal = Phi->getIncomingValue(i);
-       }
        Phi->addIncoming(newVal,
                            cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)]));
      }
@@ -781,8 +820,8 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
   } else {
     // Connect the prolog code to the original loop and update the
     // PHI functions.
-    ConnectProlog(L, BECount, Count, PrologExit, PreHeader, NewPreHeader,
-                  VMap, DT, LI, PreserveLCSSA);
+    ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader,
+                  NewPreHeader, VMap, DT, LI, PreserveLCSSA);
   }
 
   // If this loop is nested, then the loop unroller changes the code in the
diff --git a/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/lib/Transforms/Utils/LowerMemIntrinsics.cpp
index 1c2a60a6b8b24..900450b400612 100644
--- a/lib/Transforms/Utils/LowerMemIntrinsics.cpp
+++ b/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -8,12 +8,256 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 
 using namespace llvm;
 
+static unsigned getLoopOperandSizeInBytes(Type *Type) {
+  if (VectorType *VTy = dyn_cast<VectorType>(Type)) {
+    return VTy->getBitWidth() / 8;
+  }
+
+  return Type->getPrimitiveSizeInBits() / 8;
+}
+
+void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
+                                     Value *DstAddr, ConstantInt *CopyLen,
+                                     unsigned SrcAlign, unsigned DestAlign,
+                                     bool SrcIsVolatile, bool DstIsVolatile,
+                                     const TargetTransformInfo &TTI) {
+  // No need to expand zero length copies.
+  if (CopyLen->isZero())
+    return;
+
+  BasicBlock *PreLoopBB = InsertBefore->getParent();
+  BasicBlock *PostLoopBB = nullptr;
+  Function *ParentFunc = PreLoopBB->getParent();
+  LLVMContext &Ctx = PreLoopBB->getContext();
+
+  Type *TypeOfCopyLen = CopyLen->getType();
+  Type *LoopOpType =
+      TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAlign, DestAlign);
+
+  unsigned LoopOpSize = getLoopOperandSizeInBytes(LoopOpType);
+  uint64_t LoopEndCount = CopyLen->getZExtValue() / LoopOpSize;
+
+  unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
+  unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
+
+  if (LoopEndCount != 0) {
+    // Split
+    PostLoopBB = PreLoopBB->splitBasicBlock(InsertBefore, "memcpy-split");
+    BasicBlock *LoopBB =
+        BasicBlock::Create(Ctx, "load-store-loop", ParentFunc, PostLoopBB);
+    PreLoopBB->getTerminator()->setSuccessor(0, LoopBB);
+
+    IRBuilder<> PLBuilder(PreLoopBB->getTerminator());
+
+    // Cast the Src and Dst pointers to pointers to the loop operand type (if
+    // needed).
+    PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS);
+    PointerType *DstOpType = PointerType::get(LoopOpType, DstAS);
+    if (SrcAddr->getType() != SrcOpType) {
+      SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType);
+    }
+    if (DstAddr->getType() != DstOpType) {
+      DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType);
+    }
+
+    IRBuilder<> LoopBuilder(LoopBB);
+    PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 2, "loop-index");
+    LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0U), PreLoopBB);
+    // Loop Body
+    Value *SrcGEP =
+        LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex);
+    Value *Load = LoopBuilder.CreateLoad(SrcGEP, SrcIsVolatile);
+    Value *DstGEP =
+        LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex);
+    LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile);
+
+    Value *NewIndex =
+        LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1U));
+    LoopIndex->addIncoming(NewIndex, LoopBB);
+
+    // Create the loop branch condition.
+    Constant *LoopEndCI = ConstantInt::get(TypeOfCopyLen, LoopEndCount);
+    LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, LoopEndCI),
+                             LoopBB, PostLoopBB);
+  }
+
+  uint64_t BytesCopied = LoopEndCount * LoopOpSize;
+  uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopied;
+  if (RemainingBytes) {
+    IRBuilder<> RBuilder(PostLoopBB ? PostLoopBB->getFirstNonPHI()
+                                    : InsertBefore);
+
+    // Update the alignment based on the copy size used in the loop body.
+    SrcAlign = std::min(SrcAlign, LoopOpSize);
+    DestAlign = std::min(DestAlign, LoopOpSize);
+
+    SmallVector<Type *, 5> RemainingOps;
+    TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
+                                          SrcAlign, DestAlign);
+
+    for (auto OpTy : RemainingOps) {
+      // Calaculate the new index
+      unsigned OperandSize = getLoopOperandSizeInBytes(OpTy);
+      uint64_t GepIndex = BytesCopied / OperandSize;
+      assert(GepIndex * OperandSize == BytesCopied &&
+             "Division should have no Remainder!");
+      // Cast source to operand type and load
+      PointerType *SrcPtrType = PointerType::get(OpTy, SrcAS);
+      Value *CastedSrc = SrcAddr->getType() == SrcPtrType
+                             ? SrcAddr
+                             : RBuilder.CreateBitCast(SrcAddr, SrcPtrType);
+      Value *SrcGEP = RBuilder.CreateInBoundsGEP(
+          OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex));
+      Value *Load = RBuilder.CreateLoad(SrcGEP, SrcIsVolatile);
+
+      // Cast destination to operand type and store.
+      PointerType *DstPtrType = PointerType::get(OpTy, DstAS);
+      Value *CastedDst = DstAddr->getType() == DstPtrType
+                             ? DstAddr
+                             : RBuilder.CreateBitCast(DstAddr, DstPtrType);
+      Value *DstGEP = RBuilder.CreateInBoundsGEP(
+          OpTy, CastedDst, ConstantInt::get(TypeOfCopyLen, GepIndex));
+      RBuilder.CreateStore(Load, DstGEP, DstIsVolatile);
+
+      BytesCopied += OperandSize;
+    }
+  }
+  assert(BytesCopied == CopyLen->getZExtValue() &&
+         "Bytes copied should match size in the call!");
+}
+
+void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore,
+                                       Value *SrcAddr, Value *DstAddr,
+                                       Value *CopyLen, unsigned SrcAlign,
+                                       unsigned DestAlign, bool SrcIsVolatile,
+                                       bool DstIsVolatile,
+                                       const TargetTransformInfo &TTI) {
+  BasicBlock *PreLoopBB = InsertBefore->getParent();
+  BasicBlock *PostLoopBB =
+      PreLoopBB->splitBasicBlock(InsertBefore, "post-loop-memcpy-expansion");
+
+  Function *ParentFunc = PreLoopBB->getParent();
+  LLVMContext &Ctx = PreLoopBB->getContext();
+
+  Type *LoopOpType =
+      TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAlign, DestAlign);
+  unsigned LoopOpSize = getLoopOperandSizeInBytes(LoopOpType);
+
+  IRBuilder<> PLBuilder(PreLoopBB->getTerminator());
+
+  unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
+  unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
+  PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS);
+  PointerType *DstOpType = PointerType::get(LoopOpType, DstAS);
+  if (SrcAddr->getType() != SrcOpType) {
+    SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType);
+  }
+  if (DstAddr->getType() != DstOpType) {
+    DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType);
+  }
+
+  // Calculate the loop trip count, and remaining bytes to copy after the loop.
+  Type *CopyLenType = CopyLen->getType();
+  IntegerType *ILengthType = dyn_cast<IntegerType>(CopyLenType);
+  assert(ILengthType &&
+         "expected size argument to memcpy to be an integer type!");
+  ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize);
+  Value *RuntimeLoopCount = PLBuilder.CreateUDiv(CopyLen, CILoopOpSize);
+  Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize);
+  Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual);
+
+  BasicBlock *LoopBB =
+      BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, nullptr);
+  IRBuilder<> LoopBuilder(LoopBB);
+
+  PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLenType, 2, "loop-index");
+  LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB);
+
+  Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex);
+  Value *Load = LoopBuilder.CreateLoad(SrcGEP, SrcIsVolatile);
+  Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex);
+  LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile);
+
+  Value *NewIndex =
+      LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U));
+  LoopIndex->addIncoming(NewIndex, LoopBB);
+
+  Type *Int8Type = Type::getInt8Ty(Ctx);
+  if (LoopOpType != Int8Type) {
+    // Loop body for the residual copy.
+    BasicBlock *ResLoopBB = BasicBlock::Create(Ctx, "loop-memcpy-residual",
+                                               PreLoopBB->getParent(), nullptr);
+    // Residual loop header.
+    BasicBlock *ResHeaderBB = BasicBlock::Create(
+        Ctx, "loop-memcpy-residual-header", PreLoopBB->getParent(), nullptr);
+
+    // Need to update the pre-loop basic block to branch to the correct place.
+    // branch to the main loop if the count is non-zero, branch to the residual
+    // loop if the copy size is smaller then 1 iteration of the main loop but
+    // non-zero and finally branch to after the residual loop if the memcpy
+    //  size is zero.
+    ConstantInt *Zero = ConstantInt::get(ILengthType, 0U);
+    PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero),
+                           LoopBB, ResHeaderBB);
+    PreLoopBB->getTerminator()->eraseFromParent();
+
+    LoopBuilder.CreateCondBr(
+        LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB,
+        ResHeaderBB);
+
+    // Determine if we need to branch to the residual loop or bypass it.
+    IRBuilder<> RHBuilder(ResHeaderBB);
+    RHBuilder.CreateCondBr(RHBuilder.CreateICmpNE(RuntimeResidual, Zero),
+                           ResLoopBB, PostLoopBB);
+
+    // Copy the residual with single byte load/store loop.
+    IRBuilder<> ResBuilder(ResLoopBB);
+    PHINode *ResidualIndex =
+        ResBuilder.CreatePHI(CopyLenType, 2, "residual-loop-index");
+    ResidualIndex->addIncoming(Zero, ResHeaderBB);
+
+    Value *SrcAsInt8 =
+        ResBuilder.CreateBitCast(SrcAddr, PointerType::get(Int8Type, SrcAS));
+    Value *DstAsInt8 =
+        ResBuilder.CreateBitCast(DstAddr, PointerType::get(Int8Type, DstAS));
+    Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex);
+    Value *SrcGEP =
+        ResBuilder.CreateInBoundsGEP(Int8Type, SrcAsInt8, FullOffset);
+    Value *Load = ResBuilder.CreateLoad(SrcGEP, SrcIsVolatile);
+    Value *DstGEP =
+        ResBuilder.CreateInBoundsGEP(Int8Type, DstAsInt8, FullOffset);
+    ResBuilder.CreateStore(Load, DstGEP, DstIsVolatile);
+
+    Value *ResNewIndex =
+        ResBuilder.CreateAdd(ResidualIndex, ConstantInt::get(CopyLenType, 1U));
+    ResidualIndex->addIncoming(ResNewIndex, ResLoopBB);
+
+    // Create the loop branch condition.
+    ResBuilder.CreateCondBr(
+        ResBuilder.CreateICmpULT(ResNewIndex, RuntimeResidual), ResLoopBB,
+        PostLoopBB);
+  } else {
+    // In this case the loop operand type was a byte, and there is no need for a
+    // residual loop to copy the remaining memory after the main loop.
+    // We do however need to patch up the control flow by creating the
+    // terminators for the preloop block and the memcpy loop.
+    ConstantInt *Zero = ConstantInt::get(ILengthType, 0U);
+    PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero),
+                           LoopBB, PostLoopBB);
+    PreLoopBB->getTerminator()->eraseFromParent();
+    LoopBuilder.CreateCondBr(
+        LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB,
+        PostLoopBB);
+  }
+}
+
 void llvm::createMemCpyLoop(Instruction *InsertBefore,
                             Value *SrcAddr, Value *DstAddr, Value *CopyLen,
                             unsigned SrcAlign, unsigned DestAlign,
@@ -208,15 +452,41 @@ static void createMemSetLoop(Instruction *InsertBefore,
                            NewBB);
 }
 
-void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy) {
-  createMemCpyLoop(/* InsertBefore */ Memcpy,
-                   /* SrcAddr */ Memcpy->getRawSource(),
-                   /* DstAddr */ Memcpy->getRawDest(),
-                   /* CopyLen */ Memcpy->getLength(),
-                   /* SrcAlign */ Memcpy->getAlignment(),
-                   /* DestAlign */ Memcpy->getAlignment(),
-                   /* SrcIsVolatile */ Memcpy->isVolatile(),
-                   /* DstIsVolatile */ Memcpy->isVolatile());
+void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,
+                              const TargetTransformInfo &TTI) {
+  // Original implementation
+  if (!TTI.useWideIRMemcpyLoopLowering()) {
+    createMemCpyLoop(/* InsertBefore */ Memcpy,
+                     /* SrcAddr */ Memcpy->getRawSource(),
+                     /* DstAddr */ Memcpy->getRawDest(),
+                     /* CopyLen */ Memcpy->getLength(),
+                     /* SrcAlign */ Memcpy->getAlignment(),
+                     /* DestAlign */ Memcpy->getAlignment(),
+                     /* SrcIsVolatile */ Memcpy->isVolatile(),
+                     /* DstIsVolatile */ Memcpy->isVolatile());
+  } else {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) {
+      createMemCpyLoopKnownSize(/* InsertBefore */ Memcpy,
+                                /* SrcAddr */ Memcpy->getRawSource(),
+                                /* DstAddr */ Memcpy->getRawDest(),
+                                /* CopyLen */ CI,
+                                /* SrcAlign */ Memcpy->getAlignment(),
+                                /* DestAlign */ Memcpy->getAlignment(),
+                                /* SrcIsVolatile */ Memcpy->isVolatile(),
+                                /* DstIsVolatile */ Memcpy->isVolatile(),
+                                /* TargetTransformInfo */ TTI);
+    } else {
+      createMemCpyLoopUnknownSize(/* InsertBefore */ Memcpy,
+                                  /* SrcAddr */ Memcpy->getRawSource(),
+                                  /* DstAddr */ Memcpy->getRawDest(),
+                                  /* CopyLen */ Memcpy->getLength(),
+                                  /* SrcAlign */ Memcpy->getAlignment(),
+                                  /* DestAlign */ Memcpy->getAlignment(),
+                                  /* SrcIsVolatile */ Memcpy->isVolatile(),
+                                  /* DstIsVolatile */ Memcpy->isVolatile(),
+                                  /* TargetTransfomrInfo */ TTI);
+    }
+  }
 }
 
 void llvm::expandMemMoveAsLoop(MemMoveInst *Memmove) {
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index e724b0a28c322..dee658f983932 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -5754,8 +5754,8 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
   if (BasicBlock *Dom = BB->getSinglePredecessor()) {
     auto *PBI = dyn_cast_or_null<BranchInst>(Dom->getTerminator());
     if (PBI && PBI->isConditional() &&
-        PBI->getSuccessor(0) != PBI->getSuccessor(1) &&
-        (PBI->getSuccessor(0) == BB || PBI->getSuccessor(1) == BB)) {
+        PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
+      assert(PBI->getSuccessor(0) == BB || PBI->getSuccessor(1) == BB);
       bool CondIsFalse = PBI->getSuccessor(1) == BB;
       Optional<bool> Implication = isImpliedCondition(
           PBI->getCondition(), BI->getCondition(), DL, CondIsFalse);
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index ec8b0d426265a..6d90e6b48358a 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -25,6 +25,7 @@
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -80,6 +81,7 @@ namespace {
                               bool IsSigned);
     bool eliminateSDiv(BinaryOperator *SDiv);
     bool strengthenOverflowingOperation(BinaryOperator *OBO, Value *IVOperand);
+    bool strengthenRightShift(BinaryOperator *BO, Value *IVOperand);
   };
 }
 
@@ -154,6 +156,7 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand)
 void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
   unsigned IVOperIdx = 0;
   ICmpInst::Predicate Pred = ICmp->getPredicate();
+  ICmpInst::Predicate OriginalPred = Pred;
   if (IVOperand != ICmp->getOperand(0)) {
     // Swapped
     assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand");
@@ -262,6 +265,16 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
     ICmp->setPredicate(InvariantPredicate);
     ICmp->setOperand(0, NewLHS);
     ICmp->setOperand(1, NewRHS);
+  } else if (ICmpInst::isSigned(OriginalPred) &&
+             SE->isKnownNonNegative(S) && SE->isKnownNonNegative(X)) {
+    // If we were unable to make anything above, all we can is to canonicalize
+    // the comparison hoping that it will open the doors for other
+    // optimizations. If we find out that we compare two non-negative values,
+    // we turn the instruction's predicate to its unsigned version. Note that
+    // we cannot rely on Pred here unless we check if we have swapped it.
+    assert(ICmp->getPredicate() == OriginalPred && "Predicate changed?");
+    DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp << '\n');
+    ICmp->setPredicate(ICmpInst::getUnsignedPredicate(OriginalPred));
   } else
     return;
 
@@ -583,6 +596,35 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,
   return Changed;
 }
 
+/// Annotate the Shr in (X << IVOperand) >> C as exact using the
+/// information from the IV's range. Returns true if anything changed, false
+/// otherwise.
+bool SimplifyIndvar::strengthenRightShift(BinaryOperator *BO,
+                                          Value *IVOperand) {
+  using namespace llvm::PatternMatch;
+
+  if (BO->getOpcode() == Instruction::Shl) {
+    bool Changed = false;
+    ConstantRange IVRange = SE->getUnsignedRange(SE->getSCEV(IVOperand));
+    for (auto *U : BO->users()) {
+      const APInt *C;
+      if (match(U,
+                m_AShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C))) ||
+          match(U,
+                m_LShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C)))) {
+        BinaryOperator *Shr = cast<BinaryOperator>(U);
+        if (!Shr->isExact() && IVRange.getUnsignedMin().uge(*C)) {
+          Shr->setIsExact(true);
+          Changed = true;
+        }
+      }
+    }
+    return Changed;
+  }
+
+  return false;
+}
+
 /// Add all uses of Def to the current IV's worklist.
 static void pushIVUsers(
   Instruction *Def,
@@ -675,8 +717,9 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
     }
 
     if (BinaryOperator *BO = dyn_cast<BinaryOperator>(UseOper.first)) {
-      if (isa<OverflowingBinaryOperator>(BO) &&
-          strengthenOverflowingOperation(BO, IVOperand)) {
+      if ((isa<OverflowingBinaryOperator>(BO) &&
+           strengthenOverflowingOperation(BO, IVOperand)) ||
+          (isa<ShlOperator>(BO) && strengthenRightShift(BO, IVOperand))) {
         // re-queue uses of the now modified binary operator and fall
         // through to the checks that remain.
         pushIVUsers(IVOperand, Simplified, SimpleIVUsers);
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index b723b65f35e59..77c0a41929ac7 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -656,7 +656,7 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) {
   ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
 
   // memchr(x, y, 0) -> null
-  if (LenC && LenC->isNullValue())
+  if (LenC && LenC->isZero())
     return Constant::getNullValue(CI->getType());
 
   // From now on we need at least constant length and string.
@@ -2280,7 +2280,7 @@ bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI,
     return true;
   if (ConstantInt *ObjSizeCI =
           dyn_cast<ConstantInt>(CI->getArgOperand(ObjSizeOp))) {
-    if (ObjSizeCI->isAllOnesValue())
+    if (ObjSizeCI->isMinusOne())
       return true;
     // If the object size wasn't -1 (unknown), bail out if we were asked to.
     if (OnlyLowerUnknownSize)
diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp
index 60d9ede2c4871..c3feea6a0a414 100644
--- a/lib/Transforms/Utils/VNCoercion.cpp
+++ b/lib/Transforms/Utils/VNCoercion.cpp
@@ -51,25 +51,24 @@ static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy,
   // If the store and reload are the same size, we can always reuse it.
   if (StoredValSize == LoadedValSize) {
     // Pointer to Pointer -> use bitcast.
-    if (StoredValTy->getScalarType()->isPointerTy() &&
-        LoadedTy->getScalarType()->isPointerTy()) {
+    if (StoredValTy->isPtrOrPtrVectorTy() && LoadedTy->isPtrOrPtrVectorTy()) {
       StoredVal = Helper.CreateBitCast(StoredVal, LoadedTy);
     } else {
       // Convert source pointers to integers, which can be bitcast.
-      if (StoredValTy->getScalarType()->isPointerTy()) {
+      if (StoredValTy->isPtrOrPtrVectorTy()) {
         StoredValTy = DL.getIntPtrType(StoredValTy);
         StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy);
       }
 
       Type *TypeToCastTo = LoadedTy;
-      if (TypeToCastTo->getScalarType()->isPointerTy())
+      if (TypeToCastTo->isPtrOrPtrVectorTy())
         TypeToCastTo = DL.getIntPtrType(TypeToCastTo);
 
       if (StoredValTy != TypeToCastTo)
         StoredVal = Helper.CreateBitCast(StoredVal, TypeToCastTo);
 
       // Cast to pointer if the load needs a pointer type.
-      if (LoadedTy->getScalarType()->isPointerTy())
+      if (LoadedTy->isPtrOrPtrVectorTy())
         StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy);
     }
 
@@ -86,7 +85,7 @@ static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy,
          "canCoerceMustAliasedValueToLoad fail");
 
   // Convert source pointers to integers, which can be manipulated.
-  if (StoredValTy->getScalarType()->isPointerTy()) {
+  if (StoredValTy->isPtrOrPtrVectorTy()) {
     StoredValTy = DL.getIntPtrType(StoredValTy);
     StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy);
   }
@@ -112,7 +111,7 @@ static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy,
 
   if (LoadedTy != NewIntTy) {
     // If the result is a pointer, inttoptr.
-    if (LoadedTy->getScalarType()->isPointerTy())
+    if (LoadedTy->isPtrOrPtrVectorTy())
       StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy);
     else
       // Otherwise, bitcast.
@@ -316,7 +315,7 @@ static T *getStoreValueForLoadHelper(T *SrcVal, unsigned Offset, Type *LoadTy,
   uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy) + 7) / 8;
   // Compute which bits of the stored value are being used by the load.  Convert
   // to an integer type to start with.
-  if (SrcVal->getType()->getScalarType()->isPointerTy())
+  if (SrcVal->getType()->isPtrOrPtrVectorTy())
     SrcVal = Helper.CreatePtrToInt(SrcVal, DL.getIntPtrType(SrcVal->getType()));
   if (!SrcVal->getType()->isIntegerTy())
     SrcVal = Helper.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize * 8));
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 193cc4d137870..eb82ee283d449 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5315,8 +5315,13 @@ void LoopVectorizationLegality::addInductionPhi(
 
   // Both the PHI node itself, and the "post-increment" value feeding
   // back into the PHI node may have external users.
-  AllowedExit.insert(Phi);
-  AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
+  // We can allow those uses, except if the SCEVs we have for them rely
+  // on predicates that only hold within the loop, since allowing the exit
+  // currently means re-using this SCEV outside the loop.
+  if (PSE.getUnionPredicate().isAlwaysTrue()) {
+    AllowedExit.insert(Phi);
+    AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
+  }
 
   DEBUG(dbgs() << "LV: Found an induction variable.\n");
   return;
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b494526369d6a..4425043ad39a0 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -860,7 +860,7 @@ private:
     bool tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP);
 
     /// Un-bundles a group of instructions.
-    void cancelScheduling(ArrayRef<Value *> VL);
+    void cancelScheduling(ArrayRef<Value *> VL, Value *OpValue);
 
     /// Extends the scheduling region so that V is inside the region.
     /// \returns true if the region size is within the limit.
@@ -1258,7 +1258,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
               cast<PHINode>(VL[j])->getIncomingValueForBlock(PH->getIncomingBlock(i)));
           if (Term) {
             DEBUG(dbgs() << "SLP: Need to swizzle PHINodes (TerminatorInst use).\n");
-            BS.cancelScheduling(VL);
+            BS.cancelScheduling(VL, VL0);
             newTreeEntry(VL, false, UserTreeIdx);
             return;
           }
@@ -1284,7 +1284,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
       if (Reuse) {
         DEBUG(dbgs() << "SLP: Reusing extract sequence.\n");
       } else {
-        BS.cancelScheduling(VL);
+        BS.cancelScheduling(VL, VL0);
       }
       newTreeEntry(VL, Reuse, UserTreeIdx);
       return;
@@ -1301,7 +1301,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
 
       if (DL->getTypeSizeInBits(ScalarTy) !=
           DL->getTypeAllocSizeInBits(ScalarTy)) {
-        BS.cancelScheduling(VL);
+        BS.cancelScheduling(VL, VL0);
         newTreeEntry(VL, false, UserTreeIdx);
         DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n");
         return;
@@ -1312,7 +1312,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
       for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) {
         LoadInst *L = cast<LoadInst>(VL[i]);
         if (!L->isSimple()) {
-          BS.cancelScheduling(VL);
+          BS.cancelScheduling(VL, VL0);
           newTreeEntry(VL, false, UserTreeIdx);
           DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
           return;
@@ -1349,7 +1349,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
             break;
           }
 
-      BS.cancelScheduling(VL);
+      BS.cancelScheduling(VL, VL0);
       newTreeEntry(VL, false, UserTreeIdx);
 
       if (ReverseConsecutive) {
@@ -1376,7 +1376,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
       for (unsigned i = 0; i < VL.size(); ++i) {
         Type *Ty = cast<Instruction>(VL[i])->getOperand(0)->getType();
         if (Ty != SrcTy || !isValidElementType(Ty)) {
-          BS.cancelScheduling(VL);
+          BS.cancelScheduling(VL, VL0);
           newTreeEntry(VL, false, UserTreeIdx);
           DEBUG(dbgs() << "SLP: Gathering casts with different src types.\n");
           return;
@@ -1404,7 +1404,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
         CmpInst *Cmp = cast<CmpInst>(VL[i]);
         if (Cmp->getPredicate() != P0 ||
             Cmp->getOperand(0)->getType() != ComparedTy) {
-          BS.cancelScheduling(VL);
+          BS.cancelScheduling(VL, VL0);
           newTreeEntry(VL, false, UserTreeIdx);
           DEBUG(dbgs() << "SLP: Gathering cmp with different predicate.\n");
           return;
@@ -1471,7 +1471,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
       for (unsigned j = 0; j < VL.size(); ++j) {
         if (cast<Instruction>(VL[j])->getNumOperands() != 2) {
           DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n");
-          BS.cancelScheduling(VL);
+          BS.cancelScheduling(VL, VL0);
           newTreeEntry(VL, false, UserTreeIdx);
           return;
         }
@@ -1484,7 +1484,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
         Type *CurTy = cast<Instruction>(VL[j])->getOperand(0)->getType();
         if (Ty0 != CurTy) {
           DEBUG(dbgs() << "SLP: not-vectorizable GEP (different types).\n");
-          BS.cancelScheduling(VL);
+          BS.cancelScheduling(VL, VL0);
           newTreeEntry(VL, false, UserTreeIdx);
           return;
         }
@@ -1496,7 +1496,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
         if (!isa<ConstantInt>(Op)) {
           DEBUG(
               dbgs() << "SLP: not-vectorizable GEP (non-constant indexes).\n");
-          BS.cancelScheduling(VL);
+          BS.cancelScheduling(VL, VL0);
           newTreeEntry(VL, false, UserTreeIdx);
           return;
         }
@@ -1518,7 +1518,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
       // Check if the stores are consecutive or of we need to swizzle them.
       for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
         if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) {
-          BS.cancelScheduling(VL);
+          BS.cancelScheduling(VL, VL0);
           newTreeEntry(VL, false, UserTreeIdx);
           DEBUG(dbgs() << "SLP: Non-consecutive store.\n");
           return;
@@ -1541,7 +1541,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
       // represented by an intrinsic call
       Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
       if (!isTriviallyVectorizable(ID)) {
-        BS.cancelScheduling(VL);
+        BS.cancelScheduling(VL, VL0);
         newTreeEntry(VL, false, UserTreeIdx);
         DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
         return;
@@ -1555,7 +1555,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
         if (!CI2 || CI2->getCalledFunction() != Int ||
             getVectorIntrinsicIDForCall(CI2, TLI) != ID ||
             !CI->hasIdenticalOperandBundleSchema(*CI2)) {
-          BS.cancelScheduling(VL);
+          BS.cancelScheduling(VL, VL0);
           newTreeEntry(VL, false, UserTreeIdx);
           DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *VL[i]
                        << "\n");
@@ -1566,7 +1566,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
         if (hasVectorInstrinsicScalarOpd(ID, 1)) {
           Value *A1J = CI2->getArgOperand(1);
           if (A1I != A1J) {
-            BS.cancelScheduling(VL);
+            BS.cancelScheduling(VL, VL0);
             newTreeEntry(VL, false, UserTreeIdx);
             DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI
                          << " argument "<< A1I<<"!=" << A1J
@@ -1579,7 +1579,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
             !std::equal(CI->op_begin() + CI->getBundleOperandsStartIndex(),
                         CI->op_begin() + CI->getBundleOperandsEndIndex(),
                         CI2->op_begin() + CI2->getBundleOperandsStartIndex())) {
-          BS.cancelScheduling(VL);
+          BS.cancelScheduling(VL, VL0);
           newTreeEntry(VL, false, UserTreeIdx);
           DEBUG(dbgs() << "SLP: mismatched bundle operands in calls:" << *CI << "!="
                        << *VL[i] << '\n');
@@ -1603,7 +1603,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
       // If this is not an alternate sequence of opcode like add-sub
       // then do not vectorize this instruction.
       if (!isAltShuffle) {
-        BS.cancelScheduling(VL);
+        BS.cancelScheduling(VL, VL0);
         newTreeEntry(VL, false, UserTreeIdx);
         DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
         return;
@@ -1631,7 +1631,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
       return;
     }
     default:
-      BS.cancelScheduling(VL);
+      BS.cancelScheduling(VL, VL0);
       newTreeEntry(VL, false, UserTreeIdx);
       DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n");
       return;
@@ -3177,17 +3177,18 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL,
     }
   }
   if (!Bundle->isReady()) {
-    cancelScheduling(VL);
+    cancelScheduling(VL, VL[0]);
     return false;
   }
   return true;
 }
 
-void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL) {
-  if (isa<PHINode>(VL[0]))
+void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL,
+                                                Value *OpValue) {
+  if (isa<PHINode>(OpValue))
     return;
 
-  ScheduleData *Bundle = getScheduleData(VL[0]);
+  ScheduleData *Bundle = getScheduleData(OpValue);
   DEBUG(dbgs() << "SLP:  cancel scheduling of " << *Bundle << "\n");
   assert(!Bundle->IsScheduled &&
          "Can't cancel bundle which is already scheduled");