vendor/llvm/llvm-trunk-r304460

author: Dimitry Andric <dim@FreeBSD.org> 2017-06-01 20:58:36 +0000
committer: Dimitry Andric <dim@FreeBSD.org> 2017-06-01 20:58:36 +0000
commit: f382538d471e38a9b98f016c4caebd24c8d60b62 (patch)
tree: d30f3d58b1044b5355d50c17a6a96c6a0b35703a /lib/Transforms
parent: ee2f195dd3e40f49698ca4dc2666ec09c770e80d (diff)
17 files changed, 309 insertions, 352 deletions
diff --git a/lib/Transforms/Coroutines/CoroCleanup.cpp b/lib/Transforms/Coroutines/CoroCleanup.cpp
index 5cf2a8c25d837..359876627fce1 100644
--- a/lib/Transforms/Coroutines/CoroCleanup.cpp
+++ b/lib/Transforms/Coroutines/CoroCleanup.cpp
@@ -101,7 +101,9 @@ namespace {
 struct CoroCleanup : FunctionPass {
   static char ID; // Pass identification, replacement for typeid
 
-  CoroCleanup() : FunctionPass(ID) {}
+  CoroCleanup() : FunctionPass(ID) {
+    initializeCoroCleanupPass(*PassRegistry::getPassRegistry());
+  }
 
   std::unique_ptr<Lowerer> L;
 
diff --git a/lib/Transforms/Coroutines/CoroEarly.cpp b/lib/Transforms/Coroutines/CoroEarly.cpp
index b529891861651..ba05896af150c 100644
--- a/lib/Transforms/Coroutines/CoroEarly.cpp
+++ b/lib/Transforms/Coroutines/CoroEarly.cpp
@@ -183,7 +183,9 @@ namespace {
 
 struct CoroEarly : public FunctionPass {
   static char ID; // Pass identification, replacement for typeid.
-  CoroEarly() : FunctionPass(ID) {}
+  CoroEarly() : FunctionPass(ID) {
+    initializeCoroEarlyPass(*PassRegistry::getPassRegistry());
+  }
 
   std::unique_ptr<Lowerer> L;
 
diff --git a/lib/Transforms/Coroutines/CoroElide.cpp b/lib/Transforms/Coroutines/CoroElide.cpp
index acb22449142b9..42fd6d7461459 100644
--- a/lib/Transforms/Coroutines/CoroElide.cpp
+++ b/lib/Transforms/Coroutines/CoroElide.cpp
@@ -258,7 +258,9 @@ static bool replaceDevirtTrigger(Function &F) {
 namespace {
 struct CoroElide : FunctionPass {
   static char ID;
-  CoroElide() : FunctionPass(ID) {}
+  CoroElide() : FunctionPass(ID) {
+    initializeCoroElidePass(*PassRegistry::getPassRegistry());
+  }
 
   std::unique_ptr<Lowerer> L;
 
diff --git a/lib/Transforms/Coroutines/CoroSplit.cpp b/lib/Transforms/Coroutines/CoroSplit.cpp
index cd549e4be2826..613b4a7f03e98 100644
--- a/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -681,7 +681,9 @@ namespace {
 
 struct CoroSplit : public CallGraphSCCPass {
   static char ID; // Pass identification, replacement for typeid
-  CoroSplit() : CallGraphSCCPass(ID) {}
+  CoroSplit() : CallGraphSCCPass(ID) {
+    initializeCoroSplitPass(*PassRegistry::getPassRegistry());
+  }
 
   bool Run = false;
 
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index 4c417f1c55ebe..bc0967448cddf 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -652,12 +652,21 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) {
   // only split block when necessary:
   PHINode *FirstPhi = getFirstPHI(PreReturn);
   unsigned NumPredsFromEntries = OI->ReturnBlockPreds.size();
+  auto IsTrivialPhi = [](PHINode *PN) -> Value * {
+    Value *CommonValue = PN->getIncomingValue(0);
+    if (all_of(PN->incoming_values(),
+               [&](Value *V) { return V == CommonValue; }))
+      return CommonValue;
+    return nullptr;
+  };
+
   if (FirstPhi && FirstPhi->getNumIncomingValues() > NumPredsFromEntries + 1) {
 
     NewReturnBlock = NewReturnBlock->splitBasicBlock(
         NewReturnBlock->getFirstNonPHI()->getIterator());
     BasicBlock::iterator I = PreReturn->begin();
     Instruction *Ins = &NewReturnBlock->front();
+    SmallVector<Instruction *, 4> DeadPhis;
     while (I != PreReturn->end()) {
       PHINode *OldPhi = dyn_cast<PHINode>(I);
       if (!OldPhi)
@@ -674,8 +683,22 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) {
         RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewE), NewE);
         OldPhi->removeIncomingValue(NewE);
       }
+
+      // After incoming values splitting, the old phi may become trivial.
+      // Keeping the trivial phi can introduce definition inside the outline
+      // region which is live-out, causing necessary overhead (load, store
+      // arg passing etc).
+      if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
+        OldPhi->replaceAllUsesWith(OldPhiVal);
+        DeadPhis.push_back(OldPhi);
+      }
+
       ++I;
     }
+
+    for (auto *DP : DeadPhis)
+      DP->eraseFromParent();
+
     for (auto E : OI->ReturnBlockPreds) {
       BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
       NewE->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock);
diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 659cb9df00a2c..9dede4cedd1de 100644
--- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -6,14 +6,8 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This pass prepares a module containing type metadata for ThinLTO by splitting
-// it into regular and thin LTO parts if possible, and writing both parts to
-// a multi-module bitcode file. Modules that do not contain type metadata are
-// written unmodified as a single module.
-//
-//===----------------------------------------------------------------------===//
 
+#include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
@@ -436,3 +430,15 @@ ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str,
                                                 raw_ostream *ThinLinkOS) {
   return new WriteThinLTOBitcode(Str, ThinLinkOS);
 }
+
+PreservedAnalyses
+llvm::ThinLTOBitcodeWriterPass::run(Module &M, ModuleAnalysisManager &AM) {
+  FunctionAnalysisManager &FAM =
+      AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+  writeThinLTOBitcode(OS, ThinLinkOS,
+                      [&FAM](Function &F) -> AAResults & {
+                        return FAM.getResult<AAManager>(F);
+                      },
+                      M, &AM.getResult<ModuleSummaryIndexAnalysis>(M));
+  return PreservedAnalyses::all();
+}
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 92a38f26dde7a..b44499ec4be9f 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3838,24 +3838,24 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
   // Mark any parameters that are known to be non-null with the nonnull
   // attribute.  This is helpful for inlining calls to functions with null
   // checks on their arguments.
-  SmallVector<unsigned, 4> Indices;
+  SmallVector<unsigned, 4> ArgNos;
   unsigned ArgNo = 0;
 
   for (Value *V : CS.args()) {
     if (V->getType()->isPointerTy() &&
         !CS.paramHasAttr(ArgNo, Attribute::NonNull) &&
         isKnownNonNullAt(V, CS.getInstruction(), &DT))
-      Indices.push_back(ArgNo + AttributeList::FirstArgIndex);
+      ArgNos.push_back(ArgNo);
     ArgNo++;
   }
 
   assert(ArgNo == CS.arg_size() && "sanity check");
 
-  if (!Indices.empty()) {
+  if (!ArgNos.empty()) {
     AttributeList AS = CS.getAttributes();
     LLVMContext &Ctx = CS.getInstruction()->getContext();
-    AS = AS.addAttribute(Ctx, Indices,
-                         Attribute::get(Ctx, Attribute::NonNull));
+    AS = AS.addParamAttribute(Ctx, ArgNos,
+                              Attribute::get(Ctx, Attribute::NonNull));
     CS.setAttributes(AS);
     Changed = true;
   }
diff --git a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 1e30dbf6b55a8..b2d95271479c3 100644
--- a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -182,6 +182,14 @@ static cl::opt<bool>
                   cl::desc("Use this option to turn on/off "
                            "memory intrinsic size profiling."));
 
+// Emit branch probability as optimization remarks.
+static cl::opt<bool>
+    EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden,
+                          cl::desc("When this option is on, the annotated "
+                                   "branch probability will be emitted as "
+                                   " optimization remarks: -Rpass-analysis="
+                                   "pgo-instr-use"));
+
 // Command line option to turn on CFG dot dump after profile annotation.
 // Defined in Analysis/BlockFrequencyInfo.cpp:  -pgo-view-counts
 extern cl::opt<bool> PGOViewCounts;
@@ -192,6 +200,39 @@ extern cl::opt<std::string> ViewBlockFreqFuncName;
 
 namespace {
 
+// Return a string describing the branch condition that can be
+// used in static branch probability heuristics:
+std::string getBranchCondString(Instruction *TI) {
+  BranchInst *BI = dyn_cast<BranchInst>(TI);
+  if (!BI || !BI->isConditional())
+    return std::string();
+
+  Value *Cond = BI->getCondition();
+  ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
+  if (!CI)
+    return std::string();
+
+  std::string result;
+  raw_string_ostream OS(result);
+  OS << CmpInst::getPredicateName(CI->getPredicate()) << "_";
+  CI->getOperand(0)->getType()->print(OS, true);
+
+  Value *RHS = CI->getOperand(1);
+  ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
+  if (CV) {
+    if (CV->isZero())
+      OS << "_Zero";
+    else if (CV->isOne())
+      OS << "_One";
+    else if (CV->isAllOnesValue())
+      OS << "_MinusOne";
+    else
+      OS << "_Const";
+  }
+  OS.flush();
+  return result;
+}
+
 /// The select instruction visitor plays three roles specified
 /// by the mode. In \c VM_counting mode, it simply counts the number of
 /// select instructions. In \c VM_instrument mode, it inserts code to count
@@ -1424,6 +1465,29 @@ void setProfMetadata(Module *M, Instruction *TI, ArrayRef<uint64_t> EdgeCounts,
         for (const auto &W : Weights) { dbgs() << W << " "; }
         dbgs() << "\n";);
   TI->setMetadata(llvm::LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
+  if (EmitBranchProbability) {
+    std::string BrCondStr = getBranchCondString(TI);
+    if (BrCondStr.empty())
+      return;
+
+    unsigned WSum =
+        std::accumulate(Weights.begin(), Weights.end(), 0,
+                        [](unsigned w1, unsigned w2) { return w1 + w2; });
+    uint64_t TotalCount =
+        std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), 0,
+                        [](uint64_t c1, uint64_t c2) { return c1 + c2; });
+    BranchProbability BP(Weights[0], WSum);
+    std::string BranchProbStr;
+    raw_string_ostream OS(BranchProbStr);
+    OS << BP;
+    OS << " (total count : " << TotalCount << ")";
+    OS.flush();
+    Function *F = TI->getParent()->getParent();
+    emitOptimizationRemarkAnalysis(
+        F->getContext(), "pgo-use-annot", *F, TI->getDebugLoc(),
+        Twine(BrCondStr) +
+            " is true with probability : " + Twine(BranchProbStr));
+  }
 }
 
 template <> struct GraphTraits<PGOUseFunc *> {
diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index 300085eccb0c2..325b64cd8b436 100644
--- a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -7,24 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// Coverage instrumentation that works with AddressSanitizer
-// and potentially with other Sanitizers.
-//
-// We create a Guard variable with the same linkage
-// as the function and inject this code into the entry block (SCK_Function)
-// or all blocks (SCK_BB):
-// if (Guard < 0) {
-//    __sanitizer_cov(&Guard);
-// }
-// The accesses to Guard are atomic. The rest of the logic is
-// in __sanitizer_cov (it's fine to call it more than once).
-//
-// With SCK_Edge we also split critical edges this effectively
-// instrumenting all edges.
-//
-// This coverage implementation provides very limited data:
-// it only tells if a given function (block) was ever executed. No counters.
-// But for many use cases this is what we need and the added slowdown small.
+// Coverage instrumentation done on LLVM IR level, works with Sanitizers.
 //
 //===----------------------------------------------------------------------===//
 
@@ -56,9 +39,6 @@ using namespace llvm;
 
 #define DEBUG_TYPE "sancov"
 
-static const char *const SanCovModuleInitName = "__sanitizer_cov_module_init";
-static const char *const SanCovName = "__sanitizer_cov";
-static const char *const SanCovWithCheckName = "__sanitizer_cov_with_check";
 static const char *const SanCovTracePCIndirName =
     "__sanitizer_cov_trace_pc_indir";
 static const char *const SanCovTracePCName = "__sanitizer_cov_trace_pc";
@@ -84,12 +64,6 @@ static cl::opt<int> ClCoverageLevel(
              "3: all blocks and critical edges"),
     cl::Hidden, cl::init(0));
 
-static cl::opt<unsigned> ClCoverageBlockThreshold(
-    "sanitizer-coverage-block-threshold",
-    cl::desc("Use a callback with a guard check inside it if there are"
-             " more than this number of blocks."),
-    cl::Hidden, cl::init(0));
-
 static cl::opt<bool> ClExperimentalTracePC("sanitizer-coverage-trace-pc",
                                            cl::desc("Experimental pc tracing"),
                                            cl::Hidden, cl::init(false));
@@ -151,6 +125,8 @@ SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) {
   Options.TraceGep |= ClGEPTracing;
   Options.TracePC |= ClExperimentalTracePC;
   Options.TracePCGuard |= ClTracePCGuard;
+  if (!Options.TracePCGuard && !Options.TracePC)
+    Options.TracePCGuard = true; // TracePCGuard is default.
   Options.NoPrune |= !ClPruneBlocks;
   return Options;
 }
@@ -184,18 +160,10 @@ private:
                             ArrayRef<Instruction *> SwitchTraceTargets);
   bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks);
   void CreateFunctionGuardArray(size_t NumGuards, Function &F);
-  void SetNoSanitizeMetadata(Instruction *I);
-  void InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx,
-                             bool UseCalls);
-  unsigned NumberOfInstrumentedBlocks() {
-    return SanCovFunction->getNumUses() +
-           SanCovWithCheckFunction->getNumUses();
-  }
+  void InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx);
   StringRef getSanCovTracePCGuardSection() const;
   StringRef getSanCovTracePCGuardSectionStart() const;
   StringRef getSanCovTracePCGuardSectionEnd() const;
-  Function *SanCovFunction;
-  Function *SanCovWithCheckFunction;
   Function *SanCovTracePCIndir;
   Function *SanCovTracePC, *SanCovTracePCGuard;
   Function *SanCovTraceCmpFunction[4];
@@ -209,7 +177,6 @@ private:
   LLVMContext *C;
   const DataLayout *DL;
 
-  GlobalVariable *GuardArray;
   GlobalVariable *FunctionGuardArray;  // for trace-pc-guard.
   bool HasSancovGuardsSection;
 
@@ -230,16 +197,11 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
   IntptrPtrTy = PointerType::getUnqual(IntptrTy);
   Type *VoidTy = Type::getVoidTy(*C);
   IRBuilder<> IRB(*C);
-  Type *Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty());
   Int64PtrTy = PointerType::getUnqual(IRB.getInt64Ty());
   Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty());
   Int64Ty = IRB.getInt64Ty();
   Int32Ty = IRB.getInt32Ty();
 
-  SanCovFunction = checkSanitizerInterfaceFunction(
-      M.getOrInsertFunction(SanCovName, VoidTy, Int32PtrTy));
-  SanCovWithCheckFunction = checkSanitizerInterfaceFunction(
-      M.getOrInsertFunction(SanCovWithCheckName, VoidTy, Int32PtrTy));
   SanCovTracePCIndir = checkSanitizerInterfaceFunction(
       M.getOrInsertFunction(SanCovTracePCIndirName, VoidTy, IntptrTy));
   SanCovTraceCmpFunction[0] =
@@ -278,41 +240,10 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
   SanCovTracePCGuard = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       SanCovTracePCGuardName, VoidTy, Int32PtrTy));
 
-  // At this point we create a dummy array of guards because we don't
-  // know how many elements we will need.
-  Type *Int32Ty = IRB.getInt32Ty();
-
-  if (!Options.TracePCGuard)
-    GuardArray =
-        new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage,
-                           nullptr, "__sancov_gen_cov_tmp");
-
   for (auto &F : M)
     runOnFunction(F);
 
-  auto N = NumberOfInstrumentedBlocks();
-
-  GlobalVariable *RealGuardArray = nullptr;
-  if (!Options.TracePCGuard) {
-    // Now we know how many elements we need. Create an array of guards
-    // with one extra element at the beginning for the size.
-    Type *Int32ArrayNTy = ArrayType::get(Int32Ty, N + 1);
-    RealGuardArray = new GlobalVariable(
-        M, Int32ArrayNTy, false, GlobalValue::PrivateLinkage,
-        Constant::getNullValue(Int32ArrayNTy), "__sancov_gen_cov");
-
-    // Replace the dummy array with the real one.
-    GuardArray->replaceAllUsesWith(
-        IRB.CreatePointerCast(RealGuardArray, Int32PtrTy));
-    GuardArray->eraseFromParent();
-  }
-
   // Create variable for module (compilation unit) name
-  Constant *ModNameStrConst =
-      ConstantDataArray::getString(M.getContext(), M.getName(), true);
-  GlobalVariable *ModuleName = new GlobalVariable(
-      M, ModNameStrConst->getType(), true, GlobalValue::PrivateLinkage,
-      ModNameStrConst, "__sancov_gen_modname");
   if (Options.TracePCGuard) {
     if (HasSancovGuardsSection) {
       Function *CtorFunc;
@@ -339,18 +270,7 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
         appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority);
       }
     }
-  } else if (!Options.TracePC) {
-    Function *CtorFunc;
-    std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions(
-        M, SanCovModuleCtorName, SanCovModuleInitName,
-        {Int32PtrTy, IntptrTy, Int8PtrTy, Int8PtrTy},
-        {IRB.CreatePointerCast(RealGuardArray, Int32PtrTy),
-         ConstantInt::get(IntptrTy, N), Constant::getNullValue(Int8PtrTy),
-         IRB.CreatePointerCast(ModuleName, Int8PtrTy)});
-
-    appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority);
   }
-
   return true;
 }
 
@@ -494,13 +414,12 @@ bool SanitizerCoverageModule::InjectCoverage(Function &F,
     return false;
   case SanitizerCoverageOptions::SCK_Function:
     CreateFunctionGuardArray(1, F);
-    InjectCoverageAtBlock(F, F.getEntryBlock(), 0, false);
+    InjectCoverageAtBlock(F, F.getEntryBlock(), 0);
     return true;
   default: {
-    bool UseCalls = ClCoverageBlockThreshold < AllBlocks.size();
     CreateFunctionGuardArray(AllBlocks.size(), F);
     for (size_t i = 0, N = AllBlocks.size(); i < N; i++)
-      InjectCoverageAtBlock(F, *AllBlocks[i], i, UseCalls);
+      InjectCoverageAtBlock(F, *AllBlocks[i], i);
     return true;
   }
   }
@@ -517,8 +436,7 @@ void SanitizerCoverageModule::InjectCoverageForIndirectCalls(
     Function &F, ArrayRef<Instruction *> IndirCalls) {
   if (IndirCalls.empty())
     return;
-  if (!Options.TracePC && !Options.TracePCGuard)
-    return;
+  assert(Options.TracePC || Options.TracePCGuard);
   for (auto I : IndirCalls) {
     IRBuilder<> IRB(I);
     CallSite CS(I);
@@ -625,13 +543,8 @@ void SanitizerCoverageModule::InjectTraceForCmp(
   }
 }
 
-void SanitizerCoverageModule::SetNoSanitizeMetadata(Instruction *I) {
-  I->setMetadata(I->getModule()->getMDKindID("nosanitize"),
-                 MDNode::get(*C, None));
-}
-
 void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
-                                                    size_t Idx, bool UseCalls) {
+                                                    size_t Idx) {
   BasicBlock::iterator IP = BB.getFirstInsertionPt();
   bool IsEntryBB = &BB == &F.getEntryBlock();
   DebugLoc EntryLoc;
@@ -651,47 +564,14 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
   if (Options.TracePC) {
     IRB.CreateCall(SanCovTracePC); // gets the PC using GET_CALLER_PC.
     IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge.
-  } else if (Options.TracePCGuard) {
+  } else {
+    assert(Options.TracePCGuard);
     auto GuardPtr = IRB.CreateIntToPtr(
         IRB.CreateAdd(IRB.CreatePointerCast(FunctionGuardArray, IntptrTy),
                       ConstantInt::get(IntptrTy, Idx * 4)),
         Int32PtrTy);
-    if (!UseCalls) {
-      auto GuardLoad = IRB.CreateLoad(GuardPtr);
-      GuardLoad->setAtomic(AtomicOrdering::Monotonic);
-      GuardLoad->setAlignment(8);
-      SetNoSanitizeMetadata(GuardLoad);  // Don't instrument with e.g. asan.
-      auto Cmp = IRB.CreateICmpNE(
-          GuardLoad, Constant::getNullValue(GuardLoad->getType()));
-      auto Ins = SplitBlockAndInsertIfThen(
-          Cmp, &*IP, false, MDBuilder(*C).createBranchWeights(1, 100000));
-      IRB.SetInsertPoint(Ins);
-      IRB.SetCurrentDebugLocation(EntryLoc);
-    }
     IRB.CreateCall(SanCovTracePCGuard, GuardPtr);
     IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge.
-  } else {
-    Value *GuardP = IRB.CreateAdd(
-        IRB.CreatePointerCast(GuardArray, IntptrTy),
-        ConstantInt::get(IntptrTy, (1 + NumberOfInstrumentedBlocks()) * 4));
-    GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy);
-    if (UseCalls) {
-      IRB.CreateCall(SanCovWithCheckFunction, GuardP);
-    } else {
-      LoadInst *Load = IRB.CreateLoad(GuardP);
-      Load->setAtomic(AtomicOrdering::Monotonic);
-      Load->setAlignment(4);
-      SetNoSanitizeMetadata(Load);
-      Value *Cmp =
-          IRB.CreateICmpSGE(Constant::getNullValue(Load->getType()), Load);
-      Instruction *Ins = SplitBlockAndInsertIfThen(
-          Cmp, &*IP, false, MDBuilder(*C).createBranchWeights(1, 100000));
-      IRB.SetInsertPoint(Ins);
-      IRB.SetCurrentDebugLocation(EntryLoc);
-      // __sanitizer_cov gets the PC of the instruction using GET_CALLER_PC.
-      IRB.CreateCall(SanCovFunction, GuardP);
-      IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge.
-    }
   }
 }
 
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index ee493a8ec7e18..7b625b9b136ec 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -305,7 +305,7 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI) {
 
 /// Infer nonnull attributes for the arguments at the specified callsite.
 static bool processCallSite(CallSite CS, LazyValueInfo *LVI) {
-  SmallVector<unsigned, 4> Indices;
+  SmallVector<unsigned, 4> ArgNos;
   unsigned ArgNo = 0;
 
   for (Value *V : CS.args()) {
@@ -318,18 +318,19 @@ static bool processCallSite(CallSite CS, LazyValueInfo *LVI) {
         LVI->getPredicateAt(ICmpInst::ICMP_EQ, V,
                             ConstantPointerNull::get(Type),
                             CS.getInstruction()) == LazyValueInfo::False)
-      Indices.push_back(ArgNo + AttributeList::FirstArgIndex);
+      ArgNos.push_back(ArgNo);
     ArgNo++;
   }
 
   assert(ArgNo == CS.arg_size() && "sanity check");
 
-  if (Indices.empty())
+  if (ArgNos.empty())
     return false;
 
   AttributeList AS = CS.getAttributes();
   LLVMContext &Ctx = CS.getInstruction()->getContext();
-  AS = AS.addAttribute(Ctx, Indices, Attribute::get(Ctx, Attribute::NonNull));
+  AS = AS.addParamAttribute(Ctx, ArgNos,
+                            Attribute::get(Ctx, Attribute::NonNull));
   CS.setAttributes(AS);
 
   return true;
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 0d6e0538261d0..0490d93f64553 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -80,10 +80,9 @@ MaxRecurseDepth("max-recurse-depth", cl::Hidden, cl::init(1000), cl::ZeroOrMore,
 struct llvm::GVN::Expression {
   uint32_t opcode;
   Type *type;
-  bool commutative;
   SmallVector<uint32_t, 4> varargs;
 
-  Expression(uint32_t o = ~2U) : opcode(o), commutative(false) {}
+  Expression(uint32_t o = ~2U) : opcode(o) {}
 
   bool operator==(const Expression &other) const {
     if (opcode != other.opcode)
@@ -247,7 +246,6 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) {
     assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!");
     if (e.varargs[0] > e.varargs[1])
       std::swap(e.varargs[0], e.varargs[1]);
-    e.commutative = true;
   }
 
   if (CmpInst *C = dyn_cast<CmpInst>(I)) {
@@ -258,7 +256,6 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) {
       Predicate = CmpInst::getSwappedPredicate(Predicate);
     }
     e.opcode = (C->getOpcode() << 8) | Predicate;
-    e.commutative = true;
   } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) {
     for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
          II != IE; ++II)
@@ -284,7 +281,6 @@ GVN::Expression GVN::ValueTable::createCmpExpr(unsigned Opcode,
     Predicate = CmpInst::getSwappedPredicate(Predicate);
   }
   e.opcode = (Opcode << 8) | Predicate;
-  e.commutative = true;
   return e;
 }
 
@@ -352,25 +348,25 @@ GVN::ValueTable::~ValueTable() = default;
 /// add - Insert a value into the table with a specified value number.
 void GVN::ValueTable::add(Value *V, uint32_t num) {
   valueNumbering.insert(std::make_pair(V, num));
-  if (PHINode *PN = dyn_cast<PHINode>(V))
-    NumberingPhi[num] = PN;
 }
 
 uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
   if (AA->doesNotAccessMemory(C)) {
     Expression exp = createExpr(C);
-    uint32_t e = assignExpNewValueNum(exp).first;
+    uint32_t &e = expressionNumbering[exp];
+    if (!e) e = nextValueNumber++;
     valueNumbering[C] = e;
     return e;
   } else if (AA->onlyReadsMemory(C)) {
     Expression exp = createExpr(C);
-    auto ValNum = assignExpNewValueNum(exp);
-    if (ValNum.second) {
-      valueNumbering[C] = ValNum.first;
-      return ValNum.first;
+    uint32_t &e = expressionNumbering[exp];
+    if (!e) {
+      e = nextValueNumber++;
+      valueNumbering[C] = e;
+      return e;
     }
     if (!MD) {
-      uint32_t e = assignExpNewValueNum(exp).first;
+      e = nextValueNumber++;
       valueNumbering[C] = e;
       return e;
     }
@@ -526,29 +522,23 @@ uint32_t GVN::ValueTable::lookupOrAdd(Value *V) {
     case Instruction::ExtractValue:
       exp = createExtractvalueExpr(cast<ExtractValueInst>(I));
       break;
-    case Instruction::PHI:
-      valueNumbering[V] = nextValueNumber;
-      NumberingPhi[nextValueNumber] = cast<PHINode>(V);
-      return nextValueNumber++;
     default:
       valueNumbering[V] = nextValueNumber;
       return nextValueNumber++;
   }
 
-  uint32_t e = assignExpNewValueNum(exp).first;
+  uint32_t& e = expressionNumbering[exp];
+  if (!e) e = nextValueNumber++;
   valueNumbering[V] = e;
   return e;
 }
 
 /// Returns the value number of the specified value. Fails if
 /// the value has not yet been numbered.
-uint32_t GVN::ValueTable::lookup(Value *V, bool Verify) const {
+uint32_t GVN::ValueTable::lookup(Value *V) const {
   DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
-  if (Verify) {
-    assert(VI != valueNumbering.end() && "Value not numbered?");
-    return VI->second;
-  }
-  return (VI != valueNumbering.end()) ? VI->second : 0;
+  assert(VI != valueNumbering.end() && "Value not numbered?");
+  return VI->second;
 }
 
 /// Returns the value number of the given comparison,
@@ -559,29 +549,21 @@ uint32_t GVN::ValueTable::lookupOrAddCmp(unsigned Opcode,
                                          CmpInst::Predicate Predicate,
                                          Value *LHS, Value *RHS) {
   Expression exp = createCmpExpr(Opcode, Predicate, LHS, RHS);
-  return assignExpNewValueNum(exp).first;
+  uint32_t& e = expressionNumbering[exp];
+  if (!e) e = nextValueNumber++;
+  return e;
 }
 
 /// Remove all entries from the ValueTable.
 void GVN::ValueTable::clear() {
   valueNumbering.clear();
   expressionNumbering.clear();
-  NumberingPhi.clear();
-  PhiTranslateTable.clear();
-  BlockRPONumber.clear();
   nextValueNumber = 1;
-  Expressions.clear();
-  ExprIdx.clear();
-  nextExprNumber = 0;
 }
 
 /// Remove a value from the value numbering.
 void GVN::ValueTable::erase(Value *V) {
-  uint32_t Num = valueNumbering.lookup(V);
   valueNumbering.erase(V);
-  // If V is PHINode, V <--> value number is an one-to-one mapping.
-  if (isa<PHINode>(V))
-    NumberingPhi.erase(Num);
 }
 
 /// verifyRemoved - Verify that the value is removed from all internal data
@@ -1469,104 +1451,6 @@ bool GVN::processLoad(LoadInst *L) {
   return false;
 }
 
-/// Return a pair the first field showing the value number of \p Exp and the
-/// second field showing whether it is a value number newly created.
-std::pair<uint32_t, bool>
-GVN::ValueTable::assignExpNewValueNum(Expression &Exp) {
-  uint32_t &e = expressionNumbering[Exp];
-  bool CreateNewValNum = !e;
-  if (CreateNewValNum) {
-    Expressions.push_back(Exp);
-    if (ExprIdx.size() < nextValueNumber + 1)
-      ExprIdx.resize(nextValueNumber * 2);
-    e = nextValueNumber;
-    ExprIdx[nextValueNumber++] = nextExprNumber++;
-  }
-  return {e, CreateNewValNum};
-}
-
-void GVN::ValueTable::assignBlockRPONumber(Function &F) {
-  uint32_t NextBlockNumber = 1;
-  ReversePostOrderTraversal<Function *> RPOT(&F);
-  for (BasicBlock *BB : RPOT)
-    BlockRPONumber[BB] = NextBlockNumber++;
-}
-
-/// Return whether all the values related with the same \p num are
-/// defined in \p BB.
-bool GVN::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB,
-                                     GVN &Gvn) {
-  LeaderTableEntry *Vals = &Gvn.LeaderTable[Num];
-  while (Vals && Vals->BB == BB)
-    Vals = Vals->Next;
-  return !Vals;
-}
-
-/// Wrap phiTranslateImpl to provide caching functionality.
-uint32_t GVN::ValueTable::phiTranslate(const BasicBlock *Pred,
-                                       const BasicBlock *PhiBlock, uint32_t Num,
-                                       GVN &Gvn) {
-  auto FindRes = PhiTranslateTable.find({Num, Pred});
-  if (FindRes != PhiTranslateTable.end())
-    return FindRes->second;
-  uint32_t NewNum = phiTranslateImpl(Pred, PhiBlock, Num, Gvn);
-  PhiTranslateTable.insert({{Num, Pred}, NewNum});
-  return NewNum;
-}
-
-/// Translate value number \p Num using phis, so that it has the values of
-/// the phis in BB.
-uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
-                                           const BasicBlock *PhiBlock,
-                                           uint32_t Num, GVN &Gvn) {
-  if (PHINode *PN = NumberingPhi[Num]) {
-    if (BlockRPONumber[Pred] >= BlockRPONumber[PhiBlock])
-      return Num;
-    for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
-      if (PN->getParent() == PhiBlock && PN->getIncomingBlock(i) == Pred)
-        if (uint32_t TransVal = lookup(PN->getIncomingValue(i), false))
-          return TransVal;
-    }
-    return Num;
-  }
-
-  // If there is any value related with Num is defined in a BB other than
-  // PhiBlock, it cannot depend on a phi in PhiBlock without going through
-  // a backedge. We can do an early exit in that case to save compile time.
-  if (!areAllValsInBB(Num, PhiBlock, Gvn))
-    return Num;
-
-  if (ExprIdx[Num] == 0 || Num >= ExprIdx.size())
-    return Num;
-  Expression Exp = Expressions[ExprIdx[Num]];
-
-  for (unsigned i = 0; i < Exp.varargs.size(); i++) {
-    // For InsertValue and ExtractValue, some varargs are index numbers
-    // instead of value numbers. Those index numbers should not be
-    // translated.
-    if ((i > 1 && Exp.opcode == Instruction::InsertValue) ||
-        (i > 0 && Exp.opcode == Instruction::ExtractValue))
-      continue;
-    Exp.varargs[i] = phiTranslate(Pred, PhiBlock, Exp.varargs[i], Gvn);
-  }
-
-  if (Exp.commutative) {
-    assert(Exp.varargs.size() == 2 && "Unsupported commutative expression!");
-    if (Exp.varargs[0] > Exp.varargs[1]) {
-      std::swap(Exp.varargs[0], Exp.varargs[1]);
-      uint32_t Opcode = Exp.opcode >> 8;
-      if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp)
-        Exp.opcode = (Opcode << 8) |
-                     CmpInst::getSwappedPredicate(
-                         static_cast<CmpInst::Predicate>(Exp.opcode & 255));
-    }
-  }
-
-  if (uint32_t NewNum = expressionNumbering[Exp])
-    return NewNum;
-  return Num;
-}
-
 // In order to find a leader for a given value number at a
 // specific basic block, we first obtain the list of all Values for that number,
 // and then scan the list to find one whose block dominates the block in
@@ -1972,7 +1856,6 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
     // Fabricate val-num for dead-code in order to suppress assertion in
     // performPRE().
     assignValNumForDeadCode();
-    VN.assignBlockRPONumber(F);
     bool PREChanged = true;
     while (PREChanged) {
       PREChanged = performPRE(F);
@@ -2062,9 +1945,7 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
       success = false;
       break;
     }
-    uint32_t TValNo =
-        VN.phiTranslate(Pred, Instr->getParent(), VN.lookup(Op), *this);
-    if (Value *V = findLeader(Pred, TValNo)) {
+    if (Value *V = findLeader(Pred, VN.lookup(Op))) {
       Instr->setOperand(i, V);
     } else {
       success = false;
@@ -2081,12 +1962,10 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
   Instr->insertBefore(Pred->getTerminator());
   Instr->setName(Instr->getName() + ".pre");
   Instr->setDebugLoc(Instr->getDebugLoc());
-
-  unsigned Num = VN.lookupOrAdd(Instr);
-  VN.add(Instr, Num);
+  VN.add(Instr, ValNo);
 
   // Update the availability map to include the new instruction.
-  addToLeaderTable(Num, Instr, Pred);
+  addToLeaderTable(ValNo, Instr, Pred);
   return true;
 }
 
@@ -2135,8 +2014,7 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
       break;
     }
 
-    uint32_t TValNo = VN.phiTranslate(P, CurrentBlock, ValNo, *this);
-    Value *predV = findLeader(P, TValNo);
+    Value *predV = findLeader(P, ValNo);
     if (!predV) {
       predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P));
       PREPred = P;
diff --git a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index a143b9a3c645f..930696b036c00 100644
--- a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -98,11 +98,20 @@ template <class BrSelInst> static bool handleBrSelExpect(BrSelInst &BSI) {
   CallInst *CI;
 
   ICmpInst *CmpI = dyn_cast<ICmpInst>(BSI.getCondition());
+  CmpInst::Predicate Predicate;
+  uint64_t ValueComparedTo = 0;
   if (!CmpI) {
     CI = dyn_cast<CallInst>(BSI.getCondition());
+    Predicate = CmpInst::ICMP_NE;
+    ValueComparedTo = 0;
   } else {
-    if (CmpI->getPredicate() != CmpInst::ICMP_NE)
+    Predicate = CmpI->getPredicate();
+    if (Predicate != CmpInst::ICMP_NE && Predicate != CmpInst::ICMP_EQ)
       return false;
+    ConstantInt *CmpConstOperand = dyn_cast<ConstantInt>(CmpI->getOperand(1));
+    if (!CmpConstOperand)
+      return false;
+    ValueComparedTo = CmpConstOperand->getZExtValue();
     CI = dyn_cast<CallInst>(CmpI->getOperand(0));
   }
 
@@ -121,9 +130,8 @@ template <class BrSelInst> static bool handleBrSelExpect(BrSelInst &BSI) {
   MDBuilder MDB(CI->getContext());
   MDNode *Node;
 
-  // If expect value is equal to 1 it means that we are more likely to take
-  // branch 0, in other case more likely is branch 1.
-  if (ExpectedValue->isOne())
+  if ((ExpectedValue->getZExtValue() == ValueComparedTo) ==
+      (Predicate == CmpInst::ICMP_EQ))
     Node = MDB.createBranchWeights(LikelyBranchWeight, UnlikelyBranchWeight);
   else
     Node = MDB.createBranchWeights(UnlikelyBranchWeight, LikelyBranchWeight);
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index 5e9f40019ce8c..27809f5b6f661 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -613,7 +613,7 @@ private:
     return CClass;
   }
   void initializeCongruenceClasses(Function &F);
-  const Expression *makePossiblePhiOfOps(Instruction *, bool,
+  const Expression *makePossiblePhiOfOps(Instruction *,
                                          SmallPtrSetImpl<Value *> &);
   void addPhiOfOps(PHINode *Op, BasicBlock *BB, Instruction *ExistingValue);
 
@@ -1937,7 +1937,8 @@ void NewGVN::touchAndErase(Map &M, const KeyType &Key) {
 }
 
 void NewGVN::addAdditionalUsers(Value *To, Value *User) const {
-  AdditionalUsers[To].insert(User);
+  if (isa<Instruction>(To))
+    AdditionalUsers[To].insert(User);
 }
 
 void NewGVN::markUsersTouched(Value *V) {
@@ -2423,7 +2424,7 @@ static bool okayForPHIOfOps(const Instruction *I) {
 // When we see an instruction that is an op of phis, generate the equivalent phi
 // of ops form.
 const Expression *
-NewGVN::makePossiblePhiOfOps(Instruction *I, bool HasBackedge,
+NewGVN::makePossiblePhiOfOps(Instruction *I,
                              SmallPtrSetImpl<Value *> &Visited) {
   if (!okayForPHIOfOps(I))
     return nullptr;
@@ -2438,24 +2439,6 @@ NewGVN::makePossiblePhiOfOps(Instruction *I, bool HasBackedge,
     return nullptr;
 
   unsigned IDFSNum = InstrToDFSNum(I);
-  // Pretty much all of the instructions we can convert to phi of ops over a
-  // backedge that are adds, are really induction variables, and those are
-  // pretty much pointless to convert.  This is very coarse-grained for a
-  // test, so if we do find some value, we can change it later.
-  // But otherwise, what can happen is we convert the induction variable from
-  //
-  // i = phi (0, tmp)
-  // tmp = i + 1
-  //
-  // to
-  // i = phi (0, tmpphi)
-  // tmpphi = phi(1, tmpphi+1)
-  //
-  // Which we don't want to happen.  We could just avoid this for all non-cycle
-  // free phis, and we made go that route.
-  if (HasBackedge && I->getOpcode() == Instruction::Add)
-    return nullptr;
-
   SmallPtrSet<const Value *, 8> ProcessedPHIs;
   // TODO: We don't do phi translation on memory accesses because it's
   // complicated. For a load, we'd need to be able to simulate a new memoryuse,
@@ -2470,6 +2453,16 @@ NewGVN::makePossiblePhiOfOps(Instruction *I, bool HasBackedge,
 
   // Convert op of phis to phi of ops
   for (auto &Op : I->operands()) {
+    // TODO: We can't handle expressions that must be recursively translated
+    // IE
+    // a = phi (b, c)
+    // f = use a
+    // g = f + phi of something
+    // To properly make a phi of ops for g, we'd have to properly translate and
+    // use the instruction for f.  We should add this by splitting out the
+    // instruction creation we do below.
+    if (isa<Instruction>(Op) && PHINodeUses.count(cast<Instruction>(Op)))
+      return nullptr;
     if (!isa<PHINode>(Op))
       continue;
     auto *OpPHI = cast<PHINode>(Op);
@@ -2782,8 +2775,7 @@ void NewGVN::valueNumberInstruction(Instruction *I) {
       // Make a phi of ops if necessary
       if (Symbolized && !isa<ConstantExpression>(Symbolized) &&
           !isa<VariableExpression>(Symbolized) && PHINodeUses.count(I)) {
-        // FIXME: Backedge argument
-        auto *PHIE = makePossiblePhiOfOps(I, false, Visited);
+        auto *PHIE = makePossiblePhiOfOps(I, Visited);
         if (PHIE)
           Symbolized = PHIE;
       }
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index ed72099ec3ed6..24d28a6c28311 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -27,6 +27,7 @@
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/MDBuilder.h"
@@ -141,16 +142,77 @@ static bool definedInCaller(const SetVector<BasicBlock *> &Blocks, Value *V) {
   return false;
 }
 
-void CodeExtractor::findInputsOutputs(ValueSet &Inputs,
-                                      ValueSet &Outputs) const {
+void CodeExtractor::findAllocas(ValueSet &SinkCands) const {
+  Function *Func = (*Blocks.begin())->getParent();
+  for (BasicBlock &BB : *Func) {
+    if (Blocks.count(&BB))
+      continue;
+    for (Instruction &II : BB) {
+      auto *AI = dyn_cast<AllocaInst>(&II);
+      if (!AI)
+        continue;
+
+      // Returns true if matching life time markers are found within
+      // the outlined region.
+      auto GetLifeTimeMarkers = [&](Instruction *Addr) {
+        Instruction *LifeStart = nullptr, *LifeEnd = nullptr;
+        for (User *U : Addr->users()) {
+          if (!definedInRegion(Blocks, U))
+            return false;
+
+          IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(U);
+          if (IntrInst) {
+            if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start)
+              LifeStart = IntrInst;
+            if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end)
+              LifeEnd = IntrInst;
+          }
+        }
+        return LifeStart && LifeEnd;
+      };
+
+      if (GetLifeTimeMarkers(AI)) {
+        SinkCands.insert(AI);
+        continue;
+      }
+
+      // Follow the bitcast:
+      Instruction *MarkerAddr = nullptr;
+      for (User *U : AI->users()) {
+        if (U->stripPointerCasts() == AI) {
+          Instruction *Bitcast = cast<Instruction>(U);
+          if (GetLifeTimeMarkers(Bitcast)) {
+            MarkerAddr = Bitcast;
+            continue;
+          }
+        }
+        if (!definedInRegion(Blocks, U)) {
+          MarkerAddr = nullptr;
+          break;
+        }
+      }
+      if (MarkerAddr) {
+        if (!definedInRegion(Blocks, MarkerAddr))
+          SinkCands.insert(MarkerAddr);
+        SinkCands.insert(AI);
+      }
+    }
+  }
+}
+
+void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
+                                      const ValueSet &SinkCands) const {
+
   for (BasicBlock *BB : Blocks) {
     // If a used value is defined outside the region, it's an input.  If an
     // instruction is used outside the region, it's an output.
     for (Instruction &II : *BB) {
       for (User::op_iterator OI = II.op_begin(), OE = II.op_end(); OI != OE;
-           ++OI)
-        if (definedInCaller(Blocks, *OI))
-          Inputs.insert(*OI);
+           ++OI) {
+        Value *V = *OI;
+        if (!SinkCands.count(V) && definedInCaller(Blocks, V))
+          Inputs.insert(V);
+      }
 
       for (User *U : II.users())
         if (!definedInRegion(Blocks, U)) {
@@ -718,7 +780,7 @@ Function *CodeExtractor::extractCodeRegion() {
   if (!isEligible())
     return nullptr;
 
-  ValueSet inputs, outputs;
+  ValueSet inputs, outputs, SinkingCands;
 
   // Assumption: this is a single-entry code region, and the header is the first
   // block in the region.
@@ -757,8 +819,15 @@ Function *CodeExtractor::extractCodeRegion() {
                                                "newFuncRoot");
   newFuncRoot->getInstList().push_back(BranchInst::Create(header));
 
+  findAllocas(SinkingCands);
+
   // Find inputs to, outputs from the code region.
-  findInputsOutputs(inputs, outputs);
+  findInputsOutputs(inputs, outputs, SinkingCands);
+
+  // Now sink all instructions which only have non-phi uses inside the region
+  for (auto *II : SinkingCands)
+    cast<Instruction>(II)->moveBefore(*newFuncRoot,
+                                      newFuncRoot->getFirstInsertionPt());
 
   // Calculate the exit blocks for the extracted region and the total exit
   //  weights for each of those blocks.
diff --git a/lib/Transforms/Utils/PredicateInfo.cpp b/lib/Transforms/Utils/PredicateInfo.cpp
index 8877aeafecdec..9e71cba4f1b7a 100644
--- a/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/lib/Transforms/Utils/PredicateInfo.cpp
@@ -541,7 +541,40 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter,
 //
 // TODO: Use this algorithm to perform fast single-variable renaming in
 // promotememtoreg and memoryssa.
-void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpsToRename) {
+void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpSet) {
+  // Sort OpsToRename since we are going to iterate it.
+  SmallVector<Value *, 8> OpsToRename(OpSet.begin(), OpSet.end());
+  std::sort(OpsToRename.begin(), OpsToRename.end(), [&](const Value *A,
+                                                        const Value *B) {
+    auto *ArgA = dyn_cast_or_null<Argument>(A);
+    auto *ArgB = dyn_cast_or_null<Argument>(B);
+
+    // If A and B are args, order them based on their arg no.
+    if (ArgA && !ArgB)
+      return true;
+    if (ArgB && !ArgA)
+      return false;
+    if (ArgA && ArgB)
+      return ArgA->getArgNo() < ArgB->getArgNo();
+
+    // Else, A are B are instructions.
+    // If they belong to different BBs, order them by the dominance of BBs.
+    auto *AInst = cast<Instruction>(A);
+    auto *BInst = cast<Instruction>(B);
+    if (AInst->getParent() != BInst->getParent())
+      return DT.dominates(AInst->getParent(), BInst->getParent());
+
+    // Else, A and B belong to the same BB.
+    // Order A and B by their dominance.
+    auto *BB = AInst->getParent();
+    auto LookupResult = OBBMap.find(BB);
+    if (LookupResult != OBBMap.end())
+      return LookupResult->second->dominates(AInst, BInst);
+
+    auto Result = OBBMap.insert({BB, make_unique<OrderedBasicBlock>(BB)});
+    return Result.first->second->dominates(AInst, BInst);
+  });
+
   ValueDFS_Compare Compare(OBBMap);
   // Compute liveness, and rename in O(uses) per Op.
   for (auto *Op : OpsToRename) {
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 49effda5d833c..cc6c47e8f978d 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -85,20 +85,6 @@ static bool isCallingConvCCompatible(CallInst *CI) {
   return false;
 }
 
-/// Return true if it only matters that the value is equal or not-equal to zero.
-static bool isOnlyUsedInZeroEqualityComparison(Value *V) {
-  for (User *U : V->users()) {
-    if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
-      if (IC->isEquality())
-        if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
-          if (C->isNullValue())
-            continue;
-    // Unknown instruction.
-    return false;
-  }
-  return true;
-}
-
 /// Return true if it is only used in equality comparisons with With.
 static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) {
   for (User *U : V->users()) {
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 2b83b8426d147..8b9a64c220ccd 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7170,10 +7170,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
   Type *VectorTy;
   unsigned C = getInstructionCost(I, VF, VectorTy);
 
-  // Note: Even if all instructions are scalarized, return true if any memory
-  // accesses appear in the loop to get benefits from address folding etc.
   bool TypeNotScalarized =
-      VF > 1 && !VectorTy->isVoidTy() && TTI.getNumberOfParts(VectorTy) < VF;
+      VF > 1 && VectorTy->isVectorTy() && TTI.getNumberOfParts(VectorTy) < VF;
   return VectorizationCostTy(C, TypeNotScalarized);
 }
 
@@ -7312,7 +7310,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
   Type *RetTy = I->getType();
   if (canTruncateToMinimalBitwidth(I, VF))
     RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]);
-  VectorTy = ToVectorTy(RetTy, VF);
+  VectorTy = isScalarAfterVectorization(I, VF) ? RetTy : ToVectorTy(RetTy, VF);
   auto SE = PSE.getSE();
 
   // TODO: We need to estimate the cost of intrinsic calls.
@@ -7445,9 +7443,10 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
     } else if (Legal->isUniform(Op2)) {
       Op2VK = TargetTransformInfo::OK_UniformValue;
     }
-    SmallVector<const Value *, 4> Operands(I->operand_values()); 
-    return TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK,
-                                      Op2VK, Op1VP, Op2VP, Operands);
+    SmallVector<const Value *, 4> Operands(I->operand_values());
+    unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1;
+    return N * TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK,
+                                          Op2VK, Op1VP, Op2VP, Operands);
   }
   case Instruction::Select: {
     SelectInst *SI = cast<SelectInst>(I);
@@ -7470,7 +7469,15 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
   }
   case Instruction::Store:
   case Instruction::Load: {
-    VectorTy = ToVectorTy(getMemInstValueType(I), VF);
+    unsigned Width = VF;
+    if (Width > 1) {
+      InstWidening Decision = getWideningDecision(I, Width);
+      assert(Decision != CM_Unknown &&
+             "CM decision should be taken at this point");
+      if (Decision == CM_Scalarize)
+        Width = 1;
+    }
+    VectorTy = ToVectorTy(getMemInstValueType(I), Width);
     return getMemoryInstructionCost(I, VF);
   }
   case Instruction::ZExt:
@@ -7495,7 +7502,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
     }
 
     Type *SrcScalarTy = I->getOperand(0)->getType();
-    Type *SrcVecTy = ToVectorTy(SrcScalarTy, VF);
+    Type *SrcVecTy =
+        VectorTy->isVectorTy() ? ToVectorTy(SrcScalarTy, VF) : SrcScalarTy;
     if (canTruncateToMinimalBitwidth(I, VF)) {
       // This cast is going to be shrunk. This may remove the cast or it might
       // turn it into slightly different cast. For example, if MinBW == 16,
@@ -7515,7 +7523,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
       }
     }
 
-    return TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy, I);
+    unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1;
+    return N * TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy, I);
   }
   case Instruction::Call: {
     bool NeedToScalarize;
author	Dimitry Andric <dim@FreeBSD.org>	2017-06-01 20:58:36 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2017-06-01 20:58:36 +0000
commit	f382538d471e38a9b98f016c4caebd24c8d60b62 (patch)
tree	d30f3d58b1044b5355d50c17a6a96c6a0b35703a /lib/Transforms
parent	ee2f195dd3e40f49698ca4dc2666ec09c770e80d (diff)