14 files changed, 1206 insertions, 82 deletions
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index 523390758769..f5196cc46181 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -13,6 +13,7 @@ add_llvm_library(LLVMScalarOpts
   GuardWidening.cpp
   GVN.cpp
   GVNHoist.cpp
+  GVNSink.cpp
   IVUsersPrinter.cpp
   InductiveRangeCheckElimination.cpp
   IndVarSimplify.cpp
diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp
index f62e111460ca..c3810366bf22 100644
--- a/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -164,9 +164,9 @@ Instruction *ConstantHoistingPass::findMatInsertPt(Instruction *Inst,
 /// \brief Given \p BBs as input, find another set of BBs which collectively
 /// dominates \p BBs and have the minimal sum of frequencies. Return the BB
 /// set found in \p BBs.
-void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI,
-                          BasicBlock *Entry,
-                          SmallPtrSet<BasicBlock *, 8> &BBs) {
+static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI,
+                                 BasicBlock *Entry,
+                                 SmallPtrSet<BasicBlock *, 8> &BBs) {
   assert(!BBs.count(Entry) && "Assume Entry is not in BBs");
   // Nodes on the current path to the root.
   SmallPtrSet<BasicBlock *, 8> Path;
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 0490d93f6455..0d6e0538261d 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -80,9 +80,10 @@ MaxRecurseDepth("max-recurse-depth", cl::Hidden, cl::init(1000), cl::ZeroOrMore,
 struct llvm::GVN::Expression {
   uint32_t opcode;
   Type *type;
+  bool commutative;
   SmallVector<uint32_t, 4> varargs;
 
-  Expression(uint32_t o = ~2U) : opcode(o) {}
+  Expression(uint32_t o = ~2U) : opcode(o), commutative(false) {}
 
   bool operator==(const Expression &other) const {
     if (opcode != other.opcode)
@@ -246,6 +247,7 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) {
     assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!");
     if (e.varargs[0] > e.varargs[1])
       std::swap(e.varargs[0], e.varargs[1]);
+    e.commutative = true;
   }
 
   if (CmpInst *C = dyn_cast<CmpInst>(I)) {
@@ -256,6 +258,7 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) {
       Predicate = CmpInst::getSwappedPredicate(Predicate);
     }
     e.opcode = (C->getOpcode() << 8) | Predicate;
+    e.commutative = true;
   } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) {
     for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
          II != IE; ++II)
@@ -281,6 +284,7 @@ GVN::Expression GVN::ValueTable::createCmpExpr(unsigned Opcode,
     Predicate = CmpInst::getSwappedPredicate(Predicate);
   }
   e.opcode = (Opcode << 8) | Predicate;
+  e.commutative = true;
   return e;
 }
 
@@ -348,25 +352,25 @@ GVN::ValueTable::~ValueTable() = default;
 /// add - Insert a value into the table with a specified value number.
 void GVN::ValueTable::add(Value *V, uint32_t num) {
   valueNumbering.insert(std::make_pair(V, num));
+  if (PHINode *PN = dyn_cast<PHINode>(V))
+    NumberingPhi[num] = PN;
 }
 
 uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
   if (AA->doesNotAccessMemory(C)) {
     Expression exp = createExpr(C);
-    uint32_t &e = expressionNumbering[exp];
-    if (!e) e = nextValueNumber++;
+    uint32_t e = assignExpNewValueNum(exp).first;
     valueNumbering[C] = e;
     return e;
   } else if (AA->onlyReadsMemory(C)) {
     Expression exp = createExpr(C);
-    uint32_t &e = expressionNumbering[exp];
-    if (!e) {
-      e = nextValueNumber++;
-      valueNumbering[C] = e;
-      return e;
+    auto ValNum = assignExpNewValueNum(exp);
+    if (ValNum.second) {
+      valueNumbering[C] = ValNum.first;
+      return ValNum.first;
     }
     if (!MD) {
-      e = nextValueNumber++;
+      uint32_t e = assignExpNewValueNum(exp).first;
       valueNumbering[C] = e;
       return e;
     }
@@ -522,23 +526,29 @@ uint32_t GVN::ValueTable::lookupOrAdd(Value *V) {
     case Instruction::ExtractValue:
       exp = createExtractvalueExpr(cast<ExtractValueInst>(I));
       break;
+    case Instruction::PHI:
+      valueNumbering[V] = nextValueNumber;
+      NumberingPhi[nextValueNumber] = cast<PHINode>(V);
+      return nextValueNumber++;
     default:
       valueNumbering[V] = nextValueNumber;
       return nextValueNumber++;
   }
 
-  uint32_t& e = expressionNumbering[exp];
-  if (!e) e = nextValueNumber++;
+  uint32_t e = assignExpNewValueNum(exp).first;
   valueNumbering[V] = e;
   return e;
 }
 
 /// Returns the value number of the specified value. Fails if
 /// the value has not yet been numbered.
-uint32_t GVN::ValueTable::lookup(Value *V) const {
+uint32_t GVN::ValueTable::lookup(Value *V, bool Verify) const {
   DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
-  assert(VI != valueNumbering.end() && "Value not numbered?");
-  return VI->second;
+  if (Verify) {
+    assert(VI != valueNumbering.end() && "Value not numbered?");
+    return VI->second;
+  }
+  return (VI != valueNumbering.end()) ? VI->second : 0;
 }
 
 /// Returns the value number of the given comparison,
@@ -549,21 +559,29 @@ uint32_t GVN::ValueTable::lookupOrAddCmp(unsigned Opcode,
                                          CmpInst::Predicate Predicate,
                                          Value *LHS, Value *RHS) {
   Expression exp = createCmpExpr(Opcode, Predicate, LHS, RHS);
-  uint32_t& e = expressionNumbering[exp];
-  if (!e) e = nextValueNumber++;
-  return e;
+  return assignExpNewValueNum(exp).first;
 }
 
 /// Remove all entries from the ValueTable.
 void GVN::ValueTable::clear() {
   valueNumbering.clear();
   expressionNumbering.clear();
+  NumberingPhi.clear();
+  PhiTranslateTable.clear();
+  BlockRPONumber.clear();
   nextValueNumber = 1;
+  Expressions.clear();
+  ExprIdx.clear();
+  nextExprNumber = 0;
 }
 
 /// Remove a value from the value numbering.
 void GVN::ValueTable::erase(Value *V) {
+  uint32_t Num = valueNumbering.lookup(V);
   valueNumbering.erase(V);
+  // If V is PHINode, V <--> value number is an one-to-one mapping.
+  if (isa<PHINode>(V))
+    NumberingPhi.erase(Num);
 }
 
 /// verifyRemoved - Verify that the value is removed from all internal data
@@ -1451,6 +1469,104 @@ bool GVN::processLoad(LoadInst *L) {
   return false;
 }
 
+/// Return a pair the first field showing the value number of \p Exp and the
+/// second field showing whether it is a value number newly created.
+std::pair<uint32_t, bool>
+GVN::ValueTable::assignExpNewValueNum(Expression &Exp) {
+  uint32_t &e = expressionNumbering[Exp];
+  bool CreateNewValNum = !e;
+  if (CreateNewValNum) {
+    Expressions.push_back(Exp);
+    if (ExprIdx.size() < nextValueNumber + 1)
+      ExprIdx.resize(nextValueNumber * 2);
+    e = nextValueNumber;
+    ExprIdx[nextValueNumber++] = nextExprNumber++;
+  }
+  return {e, CreateNewValNum};
+}
+
+void GVN::ValueTable::assignBlockRPONumber(Function &F) {
+  uint32_t NextBlockNumber = 1;
+  ReversePostOrderTraversal<Function *> RPOT(&F);
+  for (BasicBlock *BB : RPOT)
+    BlockRPONumber[BB] = NextBlockNumber++;
+}
+
+/// Return whether all the values related with the same \p num are
+/// defined in \p BB.
+bool GVN::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB,
+                                     GVN &Gvn) {
+  LeaderTableEntry *Vals = &Gvn.LeaderTable[Num];
+  while (Vals && Vals->BB == BB)
+    Vals = Vals->Next;
+  return !Vals;
+}
+
+/// Wrap phiTranslateImpl to provide caching functionality.
+uint32_t GVN::ValueTable::phiTranslate(const BasicBlock *Pred,
+                                       const BasicBlock *PhiBlock, uint32_t Num,
+                                       GVN &Gvn) {
+  auto FindRes = PhiTranslateTable.find({Num, Pred});
+  if (FindRes != PhiTranslateTable.end())
+    return FindRes->second;
+  uint32_t NewNum = phiTranslateImpl(Pred, PhiBlock, Num, Gvn);
+  PhiTranslateTable.insert({{Num, Pred}, NewNum});
+  return NewNum;
+}
+
+/// Translate value number \p Num using phis, so that it has the values of
+/// the phis in BB.
+uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
+                                           const BasicBlock *PhiBlock,
+                                           uint32_t Num, GVN &Gvn) {
+  if (PHINode *PN = NumberingPhi[Num]) {
+    if (BlockRPONumber[Pred] >= BlockRPONumber[PhiBlock])
+      return Num;
+    for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
+      if (PN->getParent() == PhiBlock && PN->getIncomingBlock(i) == Pred)
+        if (uint32_t TransVal = lookup(PN->getIncomingValue(i), false))
+          return TransVal;
+    }
+    return Num;
+  }
+
+  // If there is any value related with Num is defined in a BB other than
+  // PhiBlock, it cannot depend on a phi in PhiBlock without going through
+  // a backedge. We can do an early exit in that case to save compile time.
+  if (!areAllValsInBB(Num, PhiBlock, Gvn))
+    return Num;
+
+  if (ExprIdx[Num] == 0 || Num >= ExprIdx.size())
+    return Num;
+  Expression Exp = Expressions[ExprIdx[Num]];
+
+  for (unsigned i = 0; i < Exp.varargs.size(); i++) {
+    // For InsertValue and ExtractValue, some varargs are index numbers
+    // instead of value numbers. Those index numbers should not be
+    // translated.
+    if ((i > 1 && Exp.opcode == Instruction::InsertValue) ||
+        (i > 0 && Exp.opcode == Instruction::ExtractValue))
+      continue;
+    Exp.varargs[i] = phiTranslate(Pred, PhiBlock, Exp.varargs[i], Gvn);
+  }
+
+  if (Exp.commutative) {
+    assert(Exp.varargs.size() == 2 && "Unsupported commutative expression!");
+    if (Exp.varargs[0] > Exp.varargs[1]) {
+      std::swap(Exp.varargs[0], Exp.varargs[1]);
+      uint32_t Opcode = Exp.opcode >> 8;
+      if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp)
+        Exp.opcode = (Opcode << 8) |
+                     CmpInst::getSwappedPredicate(
+                         static_cast<CmpInst::Predicate>(Exp.opcode & 255));
+    }
+  }
+
+  if (uint32_t NewNum = expressionNumbering[Exp])
+    return NewNum;
+  return Num;
+}
+
 // In order to find a leader for a given value number at a
 // specific basic block, we first obtain the list of all Values for that number,
 // and then scan the list to find one whose block dominates the block in
@@ -1856,6 +1972,7 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
     // Fabricate val-num for dead-code in order to suppress assertion in
     // performPRE().
     assignValNumForDeadCode();
+    VN.assignBlockRPONumber(F);
     bool PREChanged = true;
     while (PREChanged) {
       PREChanged = performPRE(F);
@@ -1945,7 +2062,9 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
       success = false;
       break;
     }
-    if (Value *V = findLeader(Pred, VN.lookup(Op))) {
+    uint32_t TValNo =
+        VN.phiTranslate(Pred, Instr->getParent(), VN.lookup(Op), *this);
+    if (Value *V = findLeader(Pred, TValNo)) {
       Instr->setOperand(i, V);
     } else {
       success = false;
@@ -1962,10 +2081,12 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
   Instr->insertBefore(Pred->getTerminator());
   Instr->setName(Instr->getName() + ".pre");
   Instr->setDebugLoc(Instr->getDebugLoc());
-  VN.add(Instr, ValNo);
+
+  unsigned Num = VN.lookupOrAdd(Instr);
+  VN.add(Instr, Num);
 
   // Update the availability map to include the new instruction.
-  addToLeaderTable(ValNo, Instr, Pred);
+  addToLeaderTable(Num, Instr, Pred);
   return true;
 }
 
@@ -2014,7 +2135,8 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
       break;
     }
 
-    Value *predV = findLeader(P, ValNo);
+    uint32_t TValNo = VN.phiTranslate(P, CurrentBlock, ValNo, *this);
+    Value *predV = findLeader(P, TValNo);
     if (!predV) {
       predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P));
       PREPred = P;
diff --git a/lib/Transforms/Scalar/GVNSink.cpp b/lib/Transforms/Scalar/GVNSink.cpp
new file mode 100644
index 000000000000..5c75f39e381d
--- /dev/null
+++ b/lib/Transforms/Scalar/GVNSink.cpp
@@ -0,0 +1,872 @@
+//===- GVNSink.cpp - sink expressions into successors -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file GVNSink.cpp
+/// This pass attempts to sink instructions into successors, reducing static
+/// instruction count and enabling if-conversion.
+///
+/// We use a variant of global value numbering to decide what can be sunk.
+/// Consider:
+///
+/// [ %a1 = add i32 %b, 1  ]   [ %c1 = add i32 %d, 1  ]
+/// [ %a2 = xor i32 %a1, 1 ]   [ %c2 = xor i32 %c1, 1 ]
+///                  \           /
+///            [ %e = phi i32 %a2, %c2 ]
+///            [ add i32 %e, 4         ]
+///
+///
+/// GVN would number %a1 and %c1 differently because they compute different
+/// results - the VN of an instruction is a function of its opcode and the
+/// transitive closure of its operands. This is the key property for hoisting
+/// and CSE.
+///
+/// What we want when sinking however is for a numbering that is a function of
+/// the *uses* of an instruction, which allows us to answer the question "if I
+/// replace %a1 with %c1, will it contribute in an equivalent way to all
+/// successive instructions?". The PostValueTable class in GVN provides this
+/// mapping.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Scalar/GVNExpression.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <unordered_set>
+using namespace llvm;
+
+#define DEBUG_TYPE "gvn-sink"
+
+STATISTIC(NumRemoved, "Number of instructions removed");
+
+namespace {
+
+static bool isMemoryInst(const Instruction *I) {
+  return isa<LoadInst>(I) || isa<StoreInst>(I) ||
+         (isa<InvokeInst>(I) && !cast<InvokeInst>(I)->doesNotAccessMemory()) ||
+         (isa<CallInst>(I) && !cast<CallInst>(I)->doesNotAccessMemory());
+}
+
+/// Iterates through instructions in a set of blocks in reverse order from the
+/// first non-terminator. For example (assume all blocks have size n):
+///   LockstepReverseIterator I([B1, B2, B3]);
+///   *I-- = [B1[n], B2[n], B3[n]];
+///   *I-- = [B1[n-1], B2[n-1], B3[n-1]];
+///   *I-- = [B1[n-2], B2[n-2], B3[n-2]];
+///   ...
+///
+/// It continues until all blocks have been exhausted. Use \c getActiveBlocks()
+/// to
+/// determine which blocks are still going and the order they appear in the
+/// list returned by operator*.
+class LockstepReverseIterator {
+  ArrayRef<BasicBlock *> Blocks;
+  SmallPtrSet<BasicBlock *, 4> ActiveBlocks;
+  SmallVector<Instruction *, 4> Insts;
+  bool Fail;
+
+public:
+  LockstepReverseIterator(ArrayRef<BasicBlock *> Blocks) : Blocks(Blocks) {
+    reset();
+  }
+
+  void reset() {
+    Fail = false;
+    ActiveBlocks.clear();
+    for (BasicBlock *BB : Blocks)
+      ActiveBlocks.insert(BB);
+    Insts.clear();
+    for (BasicBlock *BB : Blocks) {
+      if (BB->size() <= 1) {
+        // Block wasn't big enough - only contained a terminator.
+        ActiveBlocks.erase(BB);
+        continue;
+      }
+      Insts.push_back(BB->getTerminator()->getPrevNode());
+    }
+    if (Insts.empty())
+      Fail = true;
+  }
+
+  bool isValid() const { return !Fail; }
+  ArrayRef<Instruction *> operator*() const { return Insts; }
+  SmallPtrSet<BasicBlock *, 4> &getActiveBlocks() { return ActiveBlocks; }
+
+  void restrictToBlocks(SmallPtrSetImpl<BasicBlock *> &Blocks) {
+    for (auto II = Insts.begin(); II != Insts.end();) {
+      if (std::find(Blocks.begin(), Blocks.end(), (*II)->getParent()) ==
+          Blocks.end()) {
+        ActiveBlocks.erase((*II)->getParent());
+        II = Insts.erase(II);
+      } else {
+        ++II;
+      }
+    }
+  }
+
+  void operator--() {
+    if (Fail)
+      return;
+    SmallVector<Instruction *, 4> NewInsts;
+    for (auto *Inst : Insts) {
+      if (Inst == &Inst->getParent()->front())
+        ActiveBlocks.erase(Inst->getParent());
+      else
+        NewInsts.push_back(Inst->getPrevNode());
+    }
+    if (NewInsts.empty()) {
+      Fail = true;
+      return;
+    }
+    Insts = NewInsts;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+
+/// Candidate solution for sinking. There may be different ways to
+/// sink instructions, differing in the number of instructions sunk,
+/// the number of predecessors sunk from and the number of PHIs
+/// required.
+struct SinkingInstructionCandidate {
+  unsigned NumBlocks;
+  unsigned NumInstructions;
+  unsigned NumPHIs;
+  unsigned NumMemoryInsts;
+  int Cost = -1;
+  SmallVector<BasicBlock *, 4> Blocks;
+
+  void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) {
+    unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs;
+    unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0;
+    Cost = (NumInstructions * (NumBlocks - 1)) -
+           (NumExtraPHIs *
+            NumExtraPHIs) // PHIs are expensive, so make sure they're worth it.
+           - SplitEdgeCost;
+  }
+  bool operator>=(const SinkingInstructionCandidate &Other) const {
+    return Cost >= Other.Cost;
+  }
+};
+
+#ifndef NDEBUG
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
+                              const SinkingInstructionCandidate &C) {
+  OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
+     << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
+  return OS;
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+
+/// Describes a PHI node that may or may not exist. These track the PHIs
+/// that must be created if we sunk a sequence of instructions. It provides
+/// a hash function for efficient equality comparisons.
+class ModelledPHI {
+  SmallVector<Value *, 4> Values;
+  SmallVector<BasicBlock *, 4> Blocks;
+
+public:
+  ModelledPHI() {}
+  ModelledPHI(const PHINode *PN) {
+    for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I)
+      Blocks.push_back(PN->getIncomingBlock(I));
+    std::sort(Blocks.begin(), Blocks.end());
+
+    // This assumes the PHI is already well-formed and there aren't conflicting
+    // incoming values for the same block.
+    for (auto *B : Blocks)
+      Values.push_back(PN->getIncomingValueForBlock(B));
+  }
+  /// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI
+  /// without the same ID.
+  /// \note This is specifically for DenseMapInfo - do not use this!
+  static ModelledPHI createDummy(size_t ID) {
+    ModelledPHI M;
+    M.Values.push_back(reinterpret_cast<Value*>(ID));
+    return M;
+  }
+
+  /// Create a PHI from an array of incoming values and incoming blocks.
+  template <typename VArray, typename BArray>
+  ModelledPHI(const VArray &V, const BArray &B) {
+    std::copy(V.begin(), V.end(), std::back_inserter(Values));
+    std::copy(B.begin(), B.end(), std::back_inserter(Blocks));
+  }
+
+  /// Create a PHI from [I[OpNum] for I in Insts].
+  template <typename BArray>
+  ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum, const BArray &B) {
+    std::copy(B.begin(), B.end(), std::back_inserter(Blocks));
+    for (auto *I : Insts)
+      Values.push_back(I->getOperand(OpNum));
+  }
+
+  /// Restrict the PHI's contents down to only \c NewBlocks.
+  /// \c NewBlocks must be a subset of \c this->Blocks.
+  void restrictToBlocks(const SmallPtrSetImpl<BasicBlock *> &NewBlocks) {
+    auto BI = Blocks.begin();
+    auto VI = Values.begin();
+    while (BI != Blocks.end()) {
+      assert(VI != Values.end());
+      if (std::find(NewBlocks.begin(), NewBlocks.end(), *BI) ==
+          NewBlocks.end()) {
+        BI = Blocks.erase(BI);
+        VI = Values.erase(VI);
+      } else {
+        ++BI;
+        ++VI;
+      }
+    }
+    assert(Blocks.size() == NewBlocks.size());
+  }
+
+  ArrayRef<Value *> getValues() const { return Values; }
+
+  bool areAllIncomingValuesSame() const {
+    return all_of(Values, [&](Value *V) { return V == Values[0]; });
+  }
+  bool areAllIncomingValuesSameType() const {
+    return all_of(
+        Values, [&](Value *V) { return V->getType() == Values[0]->getType(); });
+  }
+  bool areAnyIncomingValuesConstant() const {
+    return any_of(Values, [&](Value *V) { return isa<Constant>(V); });
+  }
+  // Hash functor
+  unsigned hash() const {
+      return (unsigned)hash_combine_range(Values.begin(), Values.end());
+  }
+  bool operator==(const ModelledPHI &Other) const {
+    return Values == Other.Values && Blocks == Other.Blocks;
+  }
+};
+
+template <typename ModelledPHI> struct DenseMapInfo {
+  static inline ModelledPHI &getEmptyKey() {
+    static ModelledPHI Dummy = ModelledPHI::createDummy(0);
+    return Dummy;
+  }
+  static inline ModelledPHI &getTombstoneKey() {
+    static ModelledPHI Dummy = ModelledPHI::createDummy(1);
+    return Dummy;
+  }
+  static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); }
+  static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) {
+    return LHS == RHS;
+  }
+};
+
+typedef DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>> ModelledPHISet;
+
+//===----------------------------------------------------------------------===//
+//                             ValueTable
+//===----------------------------------------------------------------------===//
+// This is a value number table where the value number is a function of the
+// *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know
+// that the program would be equivalent if we replaced A with PHI(A, B).
+//===----------------------------------------------------------------------===//
+
+/// A GVN expression describing how an instruction is used. The operands
+/// field of BasicExpression is used to store uses, not operands.
+///
+/// This class also contains fields for discriminators used when determining
+/// equivalence of instructions with sideeffects.
+class InstructionUseExpr : public GVNExpression::BasicExpression {
+  unsigned MemoryUseOrder = -1;
+  bool Volatile = false;
+
+public:
+  InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R,
+                     BumpPtrAllocator &A)
+      : GVNExpression::BasicExpression(I->getNumUses()) {
+    allocateOperands(R, A);
+    setOpcode(I->getOpcode());
+    setType(I->getType());
+
+    for (auto &U : I->uses())
+      op_push_back(U.getUser());
+    std::sort(op_begin(), op_end());
+  }
+  void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; }
+  void setVolatile(bool V) { Volatile = V; }
+
+  virtual hash_code getHashValue() const {
+    return hash_combine(GVNExpression::BasicExpression::getHashValue(),
+                        MemoryUseOrder, Volatile);
+  }
+
+  template <typename Function> hash_code getHashValue(Function MapFn) {
+    hash_code H =
+        hash_combine(getOpcode(), getType(), MemoryUseOrder, Volatile);
+    for (auto *V : operands())
+      H = hash_combine(H, MapFn(V));
+    return H;
+  }
+};
+
+class ValueTable {
+  DenseMap<Value *, uint32_t> ValueNumbering;
+  DenseMap<GVNExpression::Expression *, uint32_t> ExpressionNumbering;
+  DenseMap<size_t, uint32_t> HashNumbering;
+  BumpPtrAllocator Allocator;
+  ArrayRecycler<Value *> Recycler;
+  uint32_t nextValueNumber;
+
+  /// Create an expression for I based on its opcode and its uses. If I
+  /// touches or reads memory, the expression is also based upon its memory
+  /// order - see \c getMemoryUseOrder().
+  InstructionUseExpr *createExpr(Instruction *I) {
+    InstructionUseExpr *E =
+        new (Allocator) InstructionUseExpr(I, Recycler, Allocator);
+    if (isMemoryInst(I))
+      E->setMemoryUseOrder(getMemoryUseOrder(I));
+
+    if (CmpInst *C = dyn_cast<CmpInst>(I)) {
+      CmpInst::Predicate Predicate = C->getPredicate();
+      E->setOpcode((C->getOpcode() << 8) | Predicate);
+    }
+    return E;
+  }
+
+  /// Helper to compute the value number for a memory instruction
+  /// (LoadInst/StoreInst), including checking the memory ordering and
+  /// volatility.
+  template <class Inst> InstructionUseExpr *createMemoryExpr(Inst *I) {
+    if (isStrongerThanUnordered(I->getOrdering()) || I->isAtomic())
+      return nullptr;
+    InstructionUseExpr *E = createExpr(I);
+    E->setVolatile(I->isVolatile());
+    return E;
+  }
+
+public:
+  /// Returns the value number for the specified value, assigning
+  /// it a new number if it did not have one before.
+  uint32_t lookupOrAdd(Value *V) {
+    auto VI = ValueNumbering.find(V);
+    if (VI != ValueNumbering.end())
+      return VI->second;
+
+    if (!isa<Instruction>(V)) {
+      ValueNumbering[V] = nextValueNumber;
+      return nextValueNumber++;
+    }
+
+    Instruction *I = cast<Instruction>(V);
+    InstructionUseExpr *exp = nullptr;
+    switch (I->getOpcode()) {
+    case Instruction::Load:
+      exp = createMemoryExpr(cast<LoadInst>(I));
+      break;
+    case Instruction::Store:
+      exp = createMemoryExpr(cast<StoreInst>(I));
+      break;
+    case Instruction::Call:
+    case Instruction::Invoke:
+    case Instruction::Add:
+    case Instruction::FAdd:
+    case Instruction::Sub:
+    case Instruction::FSub:
+    case Instruction::Mul:
+    case Instruction::FMul:
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+    case Instruction::FDiv:
+    case Instruction::URem:
+    case Instruction::SRem:
+    case Instruction::FRem:
+    case Instruction::Shl:
+    case Instruction::LShr:
+    case Instruction::AShr:
+    case Instruction::And:
+    case Instruction::Or:
+    case Instruction::Xor:
+    case Instruction::ICmp:
+    case Instruction::FCmp:
+    case Instruction::Trunc:
+    case Instruction::ZExt:
+    case Instruction::SExt:
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+    case Instruction::UIToFP:
+    case Instruction::SIToFP:
+    case Instruction::FPTrunc:
+    case Instruction::FPExt:
+    case Instruction::PtrToInt:
+    case Instruction::IntToPtr:
+    case Instruction::BitCast:
+    case Instruction::Select:
+    case Instruction::ExtractElement:
+    case Instruction::InsertElement:
+    case Instruction::ShuffleVector:
+    case Instruction::InsertValue:
+    case Instruction::GetElementPtr:
+      exp = createExpr(I);
+      break;
+    default:
+      break;
+    }
+
+    if (!exp) {
+      ValueNumbering[V] = nextValueNumber;
+      return nextValueNumber++;
+    }
+
+    uint32_t e = ExpressionNumbering[exp];
+    if (!e) {
+      hash_code H = exp->getHashValue([=](Value *V) { return lookupOrAdd(V); });
+      auto I = HashNumbering.find(H);
+      if (I != HashNumbering.end()) {
+        e = I->second;
+      } else {
+        e = nextValueNumber++;
+        HashNumbering[H] = e;
+        ExpressionNumbering[exp] = e;
+      }
+    }
+    ValueNumbering[V] = e;
+    return e;
+  }
+
+  /// Returns the value number of the specified value. Fails if the value has
+  /// not yet been numbered.
+  uint32_t lookup(Value *V) const {
+    auto VI = ValueNumbering.find(V);
+    assert(VI != ValueNumbering.end() && "Value not numbered?");
+    return VI->second;
+  }
+
+  /// Removes all value numberings and resets the value table.
+  void clear() {
+    ValueNumbering.clear();
+    ExpressionNumbering.clear();
+    HashNumbering.clear();
+    Recycler.clear(Allocator);
+    nextValueNumber = 1;
+  }
+
+  ValueTable() : nextValueNumber(1) {}
+
+  /// \c Inst uses or touches memory. Return an ID describing the memory state
+  /// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2),
+  /// the exact same memory operations happen after I1 and I2.
+  ///
+  /// This is a very hard problem in general, so we use domain-specific
+  /// knowledge that we only ever check for equivalence between blocks sharing a
+  /// single immediate successor that is common, and when determining if I1 ==
+  /// I2 we will have already determined that next(I1) == next(I2). This
+  /// inductive property allows us to simply return the value number of the next
+  /// instruction that defines memory.
+  uint32_t getMemoryUseOrder(Instruction *Inst) {
+    auto *BB = Inst->getParent();
+    for (auto I = std::next(Inst->getIterator()), E = BB->end();
+         I != E && !I->isTerminator(); ++I) {
+      if (!isMemoryInst(&*I))
+        continue;
+      if (isa<LoadInst>(&*I))
+        continue;
+      CallInst *CI = dyn_cast<CallInst>(&*I);
+      if (CI && CI->onlyReadsMemory())
+        continue;
+      InvokeInst *II = dyn_cast<InvokeInst>(&*I);
+      if (II && II->onlyReadsMemory())
+        continue;
+      return lookupOrAdd(&*I);
+    }
+    return 0;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+
+class GVNSink {
+public:
+  GVNSink() : VN() {}
+  bool run(Function &F) {
+    DEBUG(dbgs() << "GVNSink: running on function @" << F.getName() << "\n");
+
+    unsigned NumSunk = 0;
+    ReversePostOrderTraversal<Function*> RPOT(&F);
+    for (auto *N : RPOT)
+      NumSunk += sinkBB(N);
+    
+    return NumSunk > 0;
+  }
+
+private:
+  ValueTable VN;
+
+  bool isInstructionBlacklisted(Instruction *I) {
+    // These instructions may change or break semantics if moved.
+    if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
+        I->getType()->isTokenTy())
+      return true;
+    return false;
+  }
+
+  /// The main heuristic function. Analyze the set of instructions pointed to by
+  /// LRI and return a candidate solution if these instructions can be sunk, or
+  /// None otherwise.
+  Optional<SinkingInstructionCandidate> analyzeInstructionForSinking(
+      LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
+      ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents);
+
+  /// Create a ModelledPHI for each PHI in BB, adding to PHIs.
+  void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs,
+                          SmallPtrSetImpl<Value *> &PHIContents) {
+    for (auto &I : *BB) {
+      auto *PN = dyn_cast<PHINode>(&I);
+      if (!PN)
+        return;
+
+      auto MPHI = ModelledPHI(PN);
+      PHIs.insert(MPHI);
+      for (auto *V : MPHI.getValues())
+        PHIContents.insert(V);
+    }
+  }
+
+  /// The main instruction sinking driver. Set up state and try and sink
+  /// instructions into BBEnd from its predecessors.
+  unsigned sinkBB(BasicBlock *BBEnd);
+
+  /// Perform the actual mechanics of sinking an instruction from Blocks into
+  /// BBEnd, which is their only successor.
+  void sinkLastInstruction(ArrayRef<BasicBlock *> Blocks, BasicBlock *BBEnd);
+
+  /// Remove PHIs that all have the same incoming value.
+  void foldPointlessPHINodes(BasicBlock *BB) {
+    auto I = BB->begin();
+    while (PHINode *PN = dyn_cast<PHINode>(I++)) {
+      if (!all_of(PN->incoming_values(),
+                  [&](const Value *V) { return V == PN->getIncomingValue(0); }))
+        continue;
+      if (PN->getIncomingValue(0) != PN)
+        PN->replaceAllUsesWith(PN->getIncomingValue(0));
+      else
+        PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+      PN->eraseFromParent();
+    }
+  }
+};
+
+Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking(
+  LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
+  ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents) {
+  auto Insts = *LRI;
+  DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I
+                                                             : Insts) {
+    I->dump();
+  } dbgs() << " ]\n";);
+
+  DenseMap<uint32_t, unsigned> VNums;
+  for (auto *I : Insts) {
+    uint32_t N = VN.lookupOrAdd(I);
+    DEBUG(dbgs() << " VN=" << utohexstr(N) << " for" << *I << "\n");
+    if (N == ~0U)
+      return None;
+    VNums[N]++;
+  }
+  unsigned VNumToSink =
+      std::max_element(VNums.begin(), VNums.end(),
+                       [](const std::pair<uint32_t, unsigned> &I,
+                          const std::pair<uint32_t, unsigned> &J) {
+                         return I.second < J.second;
+                       })
+          ->first;
+
+  if (VNums[VNumToSink] == 1)
+    // Can't sink anything!
+    return None;
+
+  // Now restrict the number of incoming blocks down to only those with
+  // VNumToSink.
+  auto &ActivePreds = LRI.getActiveBlocks();
+  unsigned InitialActivePredSize = ActivePreds.size();
+  SmallVector<Instruction *, 4> NewInsts;
+  for (auto *I : Insts) {
+    if (VN.lookup(I) != VNumToSink)
+      ActivePreds.erase(I->getParent());
+    else
+      NewInsts.push_back(I);
+  }
+  for (auto *I : NewInsts)
+    if (isInstructionBlacklisted(I))
+      return None;
+
+  // If we've restricted the incoming blocks, restrict all needed PHIs also
+  // to that set.
+  bool RecomputePHIContents = false;
+  if (ActivePreds.size() != InitialActivePredSize) {
+    ModelledPHISet NewNeededPHIs;
+    for (auto P : NeededPHIs) {
+      P.restrictToBlocks(ActivePreds);
+      NewNeededPHIs.insert(P);
+    }
+    NeededPHIs = NewNeededPHIs;
+    LRI.restrictToBlocks(ActivePreds);
+    RecomputePHIContents = true;
+  }
+
+  // The sunk instruction's results.
+  ModelledPHI NewPHI(NewInsts, ActivePreds);
+
+  // Does sinking this instruction render previous PHIs redundant?
+  if (NeededPHIs.find(NewPHI) != NeededPHIs.end()) {
+    NeededPHIs.erase(NewPHI);
+    RecomputePHIContents = true;
+  }
+
+  if (RecomputePHIContents) {
+    // The needed PHIs have changed, so recompute the set of all needed
+    // values.
+    PHIContents.clear();
+    for (auto &PHI : NeededPHIs)
+      PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
+  }
+
+  // Is this instruction required by a later PHI that doesn't match this PHI?
+  // if so, we can't sink this instruction.
+  for (auto *V : NewPHI.getValues())
+    if (PHIContents.count(V))
+      // V exists in this PHI, but the whole PHI is different to NewPHI
+      // (else it would have been removed earlier). We cannot continue
+      // because this isn't representable.
+      return None;
+
+  // Which operands need PHIs?
+  // FIXME: If any of these fail, we should partition up the candidates to
+  // try and continue making progress.
+  Instruction *I0 = NewInsts[0];
+  for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) {
+    ModelledPHI PHI(NewInsts, OpNum, ActivePreds);
+    if (PHI.areAllIncomingValuesSame())
+      continue;
+    if (!canReplaceOperandWithVariable(I0, OpNum))
+      // We can 't create a PHI from this instruction!
+      return None;
+    if (NeededPHIs.count(PHI))
+      continue;
+    if (!PHI.areAllIncomingValuesSameType())
+      return None;
+    // Don't create indirect calls! The called value is the final operand.
+    if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OpNum == E - 1 &&
+        PHI.areAnyIncomingValuesConstant())
+      return None;
+
+    NeededPHIs.reserve(NeededPHIs.size());
+    NeededPHIs.insert(PHI);
+    PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
+  }
+
+  if (isMemoryInst(NewInsts[0]))
+    ++MemoryInstNum;
+
+  SinkingInstructionCandidate Cand;
+  Cand.NumInstructions = ++InstNum;
+  Cand.NumMemoryInsts = MemoryInstNum;
+  Cand.NumBlocks = ActivePreds.size();
+  Cand.NumPHIs = NeededPHIs.size();
+  for (auto *C : ActivePreds)
+    Cand.Blocks.push_back(C);
+
+  return Cand;
+}
+
+unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
+  DEBUG(dbgs() << "GVNSink: running on basic block ";
+        BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
+  SmallVector<BasicBlock *, 4> Preds;
+  for (auto *B : predecessors(BBEnd)) {
+    auto *T = B->getTerminator();
+    if (isa<BranchInst>(T) || isa<SwitchInst>(T))
+      Preds.push_back(B);
+    else
+      return 0;
+  }
+  if (Preds.size() < 2)
+    return 0;
+  std::sort(Preds.begin(), Preds.end());
+
+  unsigned NumOrigPreds = Preds.size();
+  // We can only sink instructions through unconditional branches.
+  for (auto I = Preds.begin(); I != Preds.end();) {
+    if ((*I)->getTerminator()->getNumSuccessors() != 1)
+      I = Preds.erase(I);
+    else
+      ++I;
+  }
+
+  LockstepReverseIterator LRI(Preds);
+  SmallVector<SinkingInstructionCandidate, 4> Candidates;
+  unsigned InstNum = 0, MemoryInstNum = 0;
+  ModelledPHISet NeededPHIs;
+  SmallPtrSet<Value *, 4> PHIContents;
+  analyzeInitialPHIs(BBEnd, NeededPHIs, PHIContents);
+  unsigned NumOrigPHIs = NeededPHIs.size();
+
+  while (LRI.isValid()) {
+    auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum,
+                                             NeededPHIs, PHIContents);
+    if (!Cand)
+      break;
+    Cand->calculateCost(NumOrigPHIs, Preds.size());
+    Candidates.emplace_back(*Cand);
+    --LRI;
+  }
+
+  std::stable_sort(
+      Candidates.begin(), Candidates.end(),
+      [](const SinkingInstructionCandidate &A,
+         const SinkingInstructionCandidate &B) { return A >= B; });
+  DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C
+                                                    : Candidates) dbgs()
+                                               << "  " << C << "\n";);
+
+  // Pick the top candidate, as long it is positive!
+  if (Candidates.empty() || Candidates.front().Cost <= 0)
+    return 0;
+  auto C = Candidates.front();
+
+  DEBUG(dbgs() << " -- Sinking: " << C << "\n");
+  BasicBlock *InsertBB = BBEnd;
+  if (C.Blocks.size() < NumOrigPreds) {
+    DEBUG(dbgs() << " -- Splitting edge to "; BBEnd->printAsOperand(dbgs());
+          dbgs() << "\n");
+    InsertBB = SplitBlockPredecessors(BBEnd, C.Blocks, ".gvnsink.split");
+    if (!InsertBB) {
+      DEBUG(dbgs() << " -- FAILED to split edge!\n");
+      // Edge couldn't be split.
+      return 0;
+    }
+  }
+
+  for (unsigned I = 0; I < C.NumInstructions; ++I)
+    sinkLastInstruction(C.Blocks, InsertBB);
+
+  return C.NumInstructions;
+}
+
+void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
+                                  BasicBlock *BBEnd) {
+  SmallVector<Instruction *, 4> Insts;
+  for (BasicBlock *BB : Blocks)
+    Insts.push_back(BB->getTerminator()->getPrevNode());
+  Instruction *I0 = Insts.front();
+
+  SmallVector<Value *, 4> NewOperands;
+  for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
+    bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
+      return I->getOperand(O) != I0->getOperand(O);
+    });
+    if (!NeedPHI) {
+      NewOperands.push_back(I0->getOperand(O));
+      continue;
+    }
+
+    // Create a new PHI in the successor block and populate it.
+    auto *Op = I0->getOperand(O);
+    assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
+    auto *PN = PHINode::Create(Op->getType(), Insts.size(),
+                               Op->getName() + ".sink", &BBEnd->front());
+    for (auto *I : Insts)
+      PN->addIncoming(I->getOperand(O), I->getParent());
+    NewOperands.push_back(PN);
+  }
+
+  // Arbitrarily use I0 as the new "common" instruction; remap its operands
+  // and move it to the start of the successor block.
+  for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
+    I0->getOperandUse(O).set(NewOperands[O]);
+  I0->moveBefore(&*BBEnd->getFirstInsertionPt());
+
+  // Update metadata and IR flags.
+  for (auto *I : Insts)
+    if (I != I0) {
+      combineMetadataForCSE(I0, I);
+      I0->andIRFlags(I);
+    }
+
+  for (auto *I : Insts)
+    if (I != I0)
+      I->replaceAllUsesWith(I0);
+  foldPointlessPHINodes(BBEnd);
+
+  // Finally nuke all instructions apart from the common instruction.
+  for (auto *I : Insts)
+    if (I != I0)
+      I->eraseFromParent();
+
+  NumRemoved += Insts.size() - 1;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Pass machinery / boilerplate
+
+class GVNSinkLegacyPass : public FunctionPass {
+public:
+  static char ID;
+
+  GVNSinkLegacyPass() : FunctionPass(ID) {
+    initializeGVNSinkLegacyPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) override {
+    if (skipFunction(F))
+      return false;
+    GVNSink G;
+    return G.run(F);
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addPreserved<GlobalsAAWrapperPass>();
+  }
+};
+} // namespace
+
+PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) {
+  GVNSink G;
+  if (!G.run(F))
+    return PreservedAnalyses::all();
+
+  PreservedAnalyses PA;
+  PA.preserve<GlobalsAA>();
+  return PA;
+}
+
+char GVNSinkLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(GVNSinkLegacyPass, "gvn-sink",
+                      "Early GVN sinking of Expressions", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
+INITIALIZE_PASS_END(GVNSinkLegacyPass, "gvn-sink",
+                    "Early GVN sinking of Expressions", false, false)
+
+FunctionPass *llvm::createGVNSinkPass() { return new GVNSinkLegacyPass(); }
diff --git a/lib/Transforms/Scalar/GuardWidening.cpp b/lib/Transforms/Scalar/GuardWidening.cpp
index 198d2b2b024f..65a2cd955672 100644
--- a/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/lib/Transforms/Scalar/GuardWidening.cpp
@@ -537,9 +537,7 @@ bool GuardWideningImpl::parseRangeChecks(
       Changed = true;
     } else if (match(Check.getBase(),
                      m_Or(m_Value(OpLHS), m_ConstantInt(OpRHS)))) {
-      unsigned BitWidth = OpLHS->getType()->getScalarSizeInBits();
-      KnownBits Known(BitWidth);
-      computeKnownBits(OpLHS, Known, DL);
+      KnownBits Known = computeKnownBits(OpLHS, DL);
       if ((OpRHS->getValue() & Known.Zero) == OpRHS->getValue()) {
         Check.setBase(OpLHS);
         APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue();
diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index 85db6e5e1105..e21b0feb7c5a 100644
--- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -1228,7 +1228,12 @@ void LoopConstrainer::addToParentLoopIfNeeded(ArrayRef<BasicBlock *> BBs) {
 
 Loop *LoopConstrainer::createClonedLoopStructure(Loop *Original, Loop *Parent,
                                                  ValueToValueMapTy &VM) {
-  Loop &New = LPM.addLoop(Parent);
+  Loop &New = *new Loop();
+  if (Parent)
+    Parent->addChildLoop(&New);
+  else
+    LI.addTopLevelLoop(&New);
+  LPM.addLoop(New);
 
   // Add all of the blocks in Original to the new loop.
   for (auto *BB : Original->blocks())
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index ada22ae38eb8..2ef8f8563bb9 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -253,6 +253,35 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
   return EverChanged;
 }
 
+// Replace uses of Cond with ToVal when safe to do so. If all uses are
+// replaced, we can remove Cond. We cannot blindly replace all uses of Cond
+// because we may incorrectly replace uses when guards/assumes are uses of
+// of `Cond` and we used the guards/assume to reason about the `Cond` value
+// at the end of block. RAUW unconditionally replaces all uses
+// including the guards/assumes themselves and the uses before the
+// guard/assume.
+static void ReplaceFoldableUses(Instruction *Cond, Value *ToVal) {
+  assert(Cond->getType() == ToVal->getType());
+  auto *BB = Cond->getParent();
+  // We can unconditionally replace all uses in non-local blocks (i.e. uses
+  // strictly dominated by BB), since LVI information is true from the
+  // terminator of BB.
+  replaceNonLocalUsesWith(Cond, ToVal);
+  for (Instruction &I : reverse(*BB)) {
+    // Reached the Cond whose uses we are trying to replace, so there are no
+    // more uses.
+    if (&I == Cond)
+      break;
+    // We only replace uses in instructions that are guaranteed to reach the end
+    // of BB, where we know Cond is ToVal.
+    if (!isGuaranteedToTransferExecutionToSuccessor(&I))
+      break;
+    I.replaceUsesOfWith(Cond, ToVal);
+  }
+  if (Cond->use_empty() && !Cond->mayHaveSideEffects())
+    Cond->eraseFromParent();
+}
+
 /// Return the cost of duplicating a piece of this block from first non-phi
 /// and before StopAt instruction to thread across it. Stop scanning the block
 /// when exceeding the threshold. If duplication is impossible, returns ~0U.
@@ -833,13 +862,19 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
         CondBr->eraseFromParent();
         if (CondCmp->use_empty())
           CondCmp->eraseFromParent();
-        // TODO: We can safely replace *some* uses of the CondInst if it has
+        // We can safely replace *some* uses of the CondInst if it has
         // exactly one value as returned by LVI. RAUW is incorrect in the
         // presence of guards and assumes, that have the `Cond` as the use. This
         // is because we use the guards/assume to reason about the `Cond` value
         // at the end of block, but RAUW unconditionally replaces all uses
         // including the guards/assumes themselves and the uses before the
         // guard/assume.
+        else if (CondCmp->getParent() == BB) {
+          auto *CI = Ret == LazyValueInfo::True ?
+            ConstantInt::getTrue(CondCmp->getType()) :
+            ConstantInt::getFalse(CondCmp->getType());
+          ReplaceFoldableUses(CondCmp, CI);
+        }
         return true;
       }
 
@@ -1325,13 +1360,16 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
       if (auto *CondInst = dyn_cast<Instruction>(Cond)) {
         if (CondInst->use_empty() && !CondInst->mayHaveSideEffects())
           CondInst->eraseFromParent();
-        // TODO: We can safely replace *some* uses of the CondInst if it has
+        // We can safely replace *some* uses of the CondInst if it has
         // exactly one value as returned by LVI. RAUW is incorrect in the
         // presence of guards and assumes, that have the `Cond` as the use. This
         // is because we use the guards/assume to reason about the `Cond` value
         // at the end of block, but RAUW unconditionally replaces all uses
         // including the guards/assumes themselves and the uses before the
         // guard/assume.
+        else if (OnlyVal && OnlyVal != MultipleVal &&
+                 CondInst->getParent() == BB)
+          ReplaceFoldableUses(CondInst, OnlyVal);
       }
       return true;
     }
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 97337ea5ba62..c6a05ecbd0b1 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1035,6 +1035,17 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry) {
   return nullptr;
 }
 
+// Check if the recurrence variable `VarX` is in the right form to create
+// the idiom. Returns the value coerced to a PHINode if so.
+static PHINode *getRecurrenceVar(Value *VarX, Instruction *DefX,
+                                 BasicBlock *LoopEntry) {
+  auto *PhiX = dyn_cast<PHINode>(VarX);
+  if (PhiX && PhiX->getParent() == LoopEntry &&
+      (PhiX->getOperand(0) == DefX || PhiX->getOperand(1) == DefX))
+    return PhiX;
+  return nullptr;
+}
+
 /// Return true iff the idiom is detected in the loop.
 ///
 /// Additionally:
@@ -1110,13 +1121,9 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
   }
 
   // step 3: Check the recurrence of variable X
-  {
-    PhiX = dyn_cast<PHINode>(VarX1);
-    if (!PhiX ||
-        (PhiX->getOperand(0) != DefX2 && PhiX->getOperand(1) != DefX2)) {
-      return false;
-    }
-  }
+  PhiX = getRecurrenceVar(VarX1, DefX2, LoopEntry);
+  if (!PhiX)
+    return false;
 
   // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
   {
@@ -1132,8 +1139,8 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
       if (!Inc || !Inc->isOne())
         continue;
 
-      PHINode *Phi = dyn_cast<PHINode>(Inst->getOperand(0));
-      if (!Phi || Phi->getParent() != LoopEntry)
+      PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
+      if (!Phi)
         continue;
 
       // Check if the result of the instruction is live of the loop.
@@ -1227,8 +1234,8 @@ static bool detectCTLZIdiom(Loop *CurLoop, PHINode *&PhiX,
   VarX = DefX->getOperand(0);
 
   // step 3: Check the recurrence of variable X
-  PhiX = dyn_cast<PHINode>(VarX);
-  if (!PhiX || (PhiX->getOperand(0) != DefX && PhiX->getOperand(1) != DefX))
+  PhiX = getRecurrenceVar(VarX, DefX, LoopEntry);
+  if (!PhiX)
     return false;
 
   // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
@@ -1248,8 +1255,8 @@ static bool detectCTLZIdiom(Loop *CurLoop, PHINode *&PhiX,
     if (!Inc || !Inc->isOne())
       continue;
 
-    PHINode *Phi = dyn_cast<PHINode>(Inst->getOperand(0));
-    if (!Phi || Phi->getParent() != LoopEntry)
+    PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
+    if (!Phi)
       continue;
 
     CntInst = Inst;
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 6ef1464e9338..19daebd0613a 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -831,7 +831,12 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val,
 /// mapping the blocks with the specified map.
 static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
                        LoopInfo *LI, LPPassManager *LPM) {
-  Loop &New = LPM->addLoop(PL);
+  Loop &New = *new Loop();
+  if (PL)
+    PL->addChildLoop(&New);
+  else
+    LI->addTopLevelLoop(&New);
+  LPM->addLoop(New);
 
   // Add all of the blocks in L to the new loop.
   for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index 5cfbf6baeaa9..67abc3116988 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -858,7 +858,14 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge,
 
   // Filter out unreachable phi operands.
   auto Filtered = make_filter_range(PHIOperands, [&](const Use *U) {
-    return ReachableEdges.count({PN->getIncomingBlock(*U), PHIBlock});
+    if (*U == PN)
+      return false;
+    if (!ReachableEdges.count({PN->getIncomingBlock(*U), PHIBlock}))
+      return false;
+    // Things in TOPClass are equivalent to everything.
+    if (ValueToClass.lookup(*U) == TOPClass)
+      return false;
+    return true;
   });
   std::transform(Filtered.begin(), Filtered.end(), op_inserter(E),
                  [&](const Use *U) -> Value * {
@@ -866,14 +873,6 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge,
                    HasBackedge = HasBackedge || isBackedge(BB, PHIBlock);
                    OriginalOpsConstant =
                        OriginalOpsConstant && isa<Constant>(*U);
-                   // Use nullptr to distinguish between things that were
-                   // originally self-defined and those that have an operand
-                   // leader that is self-defined.
-                   if (*U == PN)
-                     return nullptr;
-                   // Things in TOPClass are equivalent to everything.
-                   if (ValueToClass.lookup(*U) == TOPClass)
-                     return nullptr;
                    return lookupOperandLeader(*U);
                  });
   return E;
@@ -955,6 +954,10 @@ const Expression *NewGVN::checkSimplificationResults(Expression *E,
 
   CongruenceClass *CC = ValueToClass.lookup(V);
   if (CC && CC->getDefiningExpr()) {
+    // If we simplified to something else, we need to communicate
+    // that we're users of the value we simplified to.
+    if (I != V)
+      addAdditionalUsers(V, I);
     if (I)
       DEBUG(dbgs() << "Simplified " << *I << " to "
                    << " expression " << *CC->getDefiningExpr() << "\n");
@@ -1581,6 +1584,30 @@ bool NewGVN::isCycleFree(const Instruction *I) const {
 
 // Evaluate PHI nodes symbolically, and create an expression result.
 const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const {
+  // Resolve irreducible and reducible phi cycles.
+  // FIXME: This is hopefully a temporary solution while we resolve the issues
+  // with fixpointing self-cycles.  It currently should be "guaranteed" to be
+  // correct, but non-optimal.  The SCCFinder does not, for example, take
+  // reachability of arguments into account, etc.
+  SCCFinder.Start(I);
+  bool CanOptimize = true;
+  SmallPtrSet<Value *, 8> OuterOps;
+
+  auto &Component = SCCFinder.getComponentFor(I);
+  for (auto *Member : Component) {
+    if (!isa<PHINode>(Member)) {
+      CanOptimize = false;
+      break;
+    }
+    for (auto &PHIOp : cast<PHINode>(Member)->operands())
+      if (!isa<PHINode>(PHIOp) || !Component.count(cast<PHINode>(PHIOp)))
+        OuterOps.insert(PHIOp);
+  }
+  if (CanOptimize && OuterOps.size() == 1) {
+    DEBUG(dbgs() << "Resolving cyclic phi to value " << *(*OuterOps.begin())
+                 << "\n");
+    return createVariableOrConstant(*OuterOps.begin());
+  }
   // True if one of the incoming phi edges is a backedge.
   bool HasBackedge = false;
   // All constant tracks the state of whether all the *original* phi operands
@@ -1594,17 +1621,7 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const {
   // See if all arguments are the same.
   // We track if any were undef because they need special handling.
   bool HasUndef = false;
-  bool CycleFree = isCycleFree(I);
   auto Filtered = make_filter_range(E->operands(), [&](Value *Arg) {
-    if (Arg == nullptr)
-      return false;
-    // Original self-operands are already eliminated during expression creation.
-    // We can only eliminate value-wise self-operands if it's cycle
-    // free. Otherwise, eliminating the operand can cause our value to change,
-    // which can cause us to not eliminate the operand, which changes the value
-    // back to what it was before, cycling forever.
-    if (CycleFree && Arg == I)
-      return false;
     if (isa<UndefValue>(Arg)) {
       HasUndef = true;
       return false;
@@ -1613,6 +1630,14 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const {
   });
   // If we are left with no operands, it's dead.
   if (Filtered.begin() == Filtered.end()) {
+    // If it has undef at this point, it means there are no-non-undef arguments,
+    // and thus, the value of the phi node must be undef.
+    if (HasUndef) {
+      DEBUG(dbgs() << "PHI Node " << *I
+                   << " has no non-undef arguments, valuing it as undef\n");
+      return createConstantExpression(UndefValue::get(I->getType()));
+    }
+
     DEBUG(dbgs() << "No arguments of PHI node " << *I << " are live\n");
     deleteExpression(E);
     return createDeadExpression();
@@ -1642,7 +1667,7 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const {
       // constants, or all operands are ignored but the undef, it also must be
       // cycle free.
       if (!AllConstant && HasBackedge && NumOps > 0 &&
-          !isa<UndefValue>(AllSameValue) && !CycleFree)
+          !isa<UndefValue>(AllSameValue) && !isCycleFree(I))
         return E;
 
       // Only have to check for instructions
@@ -3556,6 +3581,7 @@ bool NewGVN::eliminateInstructions(Function &F) {
   // Map to store the use counts
   DenseMap<const Value *, unsigned int> UseCounts;
   for (auto *CC : reverse(CongruenceClasses)) {
+    DEBUG(dbgs() << "Eliminating in congruence class " << CC->getID() << "\n");
     // Track the equivalent store info so we can decide whether to try
     // dead store elimination.
     SmallVector<ValueDFS, 8> PossibleDeadStores;
@@ -3602,8 +3628,6 @@ bool NewGVN::eliminateInstructions(Function &F) {
       }
       CC->swap(MembersLeft);
     } else {
-      DEBUG(dbgs() << "Eliminating in congruence class " << CC->getID()
-                   << "\n");
       // If this is a singleton, we can skip it.
       if (CC->size() != 1 || RealToTemp.lookup(Leader)) {
         // This is a stack because equality replacement/etc may place
@@ -3846,6 +3870,7 @@ bool NewGVN::shouldSwapOperands(const Value *A, const Value *B) const {
   return std::make_pair(getRank(A), A) > std::make_pair(getRank(B), B);
 }
 
+namespace {
 class NewGVNLegacyPass : public FunctionPass {
 public:
   static char ID; // Pass identification, replacement for typeid.
@@ -3865,6 +3890,7 @@ private:
     AU.addPreserved<GlobalsAAWrapperPass>();
   }
 };
+} // namespace
 
 bool NewGVNLegacyPass::runOnFunction(Function &F) {
   if (skipFunction(F))
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 8908dae2f545..1d0e8396f6a2 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -1779,8 +1779,9 @@ static bool runIPSCCP(Module &M, const DataLayout &DL,
     // arguments and return value aggressively, and can assume it is not called
     // unless we see evidence to the contrary.
     if (F.hasLocalLinkage()) {
-      if (AddressIsTaken(&F))
+      if (F.hasAddressTaken()) {
         AddressTakenFunctions.insert(&F);
+      }
       else {
         Solver.AddArgumentTrackedFunction(&F);
         continue;
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 24bd0a2b7bdf..6e113bccff94 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -326,7 +326,7 @@ private:
   /// partition.
   uint64_t BeginOffset, EndOffset;
 
-  /// \brief The start end end iterators of this partition.
+  /// \brief The start and end iterators of this partition.
   iterator SI, SJ;
 
   /// \brief A collection of split slice tails overlapping the partition.
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 52201d8f3e51..9fa43da99da9 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -48,6 +48,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeEarlyCSELegacyPassPass(Registry);
   initializeEarlyCSEMemSSALegacyPassPass(Registry);
   initializeGVNHoistLegacyPassPass(Registry);
+  initializeGVNSinkLegacyPassPass(Registry);
   initializeFlattenCFGPassPass(Registry);
   initializeInductiveRangeCheckEliminationPass(Registry);
   initializeIndVarSimplifyLegacyPassPass(Registry);
diff --git a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index b32a61a7e8f8..0f170e26ce5f 100644
--- a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -123,11 +123,62 @@ static void updateDTAfterUnswitch(BasicBlock *UnswitchedBB, BasicBlock *OldPH,
   // exit block.
   DT.changeImmediateDominator(UnswitchedNode, OldPHNode);
 
-  // Blocks reachable from the unswitched block may need to change their IDom
-  // as well.
+  // For everything that moves up the dominator tree, we need to examine the
+  // dominator frontier to see if it additionally should move up the dominator
+  // tree. This lambda appends the dominator frontier for a node on the
+  // worklist.
+  //
+  // Note that we don't currently use the IDFCalculator here for two reasons:
+  // 1) It computes dominator tree levels for the entire function on each run
+  //    of 'compute'. While this isn't terrible, given that we expect to update
+  //    relatively small subtrees of the domtree, it isn't necessarily the right
+  //    tradeoff.
+  // 2) The interface doesn't fit this usage well. It doesn't operate in
+  //    append-only, and builds several sets that we don't need.
+  //
+  // FIXME: Neither of these issues are a big deal and could be addressed with
+  // some amount of refactoring of IDFCalculator. That would allow us to share
+  // the core logic here (which is solving the same core problem).
   SmallSetVector<BasicBlock *, 4> Worklist;
-  for (auto *SuccBB : successors(UnswitchedBB))
-    Worklist.insert(SuccBB);
+  SmallVector<DomTreeNode *, 4> DomNodes;
+  SmallPtrSet<BasicBlock *, 4> DomSet;
+  auto AppendDomFrontier = [&](DomTreeNode *Node) {
+    assert(DomNodes.empty() && "Must start with no dominator nodes.");
+    assert(DomSet.empty() && "Must start with an empty dominator set.");
+
+    // First flatten this subtree into sequence of nodes by doing a pre-order
+    // walk.
+    DomNodes.push_back(Node);
+    // We intentionally re-evaluate the size as each node can add new children.
+    // Because this is a tree walk, this cannot add any duplicates.
+    for (int i = 0; i < (int)DomNodes.size(); ++i)
+      DomNodes.insert(DomNodes.end(), DomNodes[i]->begin(), DomNodes[i]->end());
+
+    // Now create a set of the basic blocks so we can quickly test for
+    // dominated successors. We could in theory use the DFS numbers of the
+    // dominator tree for this, but we want this to remain predictably fast
+    // even while we mutate the dominator tree in ways that would invalidate
+    // the DFS numbering.
+    for (DomTreeNode *InnerN : DomNodes)
+      DomSet.insert(InnerN->getBlock());
+
+    // Now re-walk the nodes, appending every successor of every node that isn't
+    // in the set. Note that we don't append the node itself, even though if it
+    // is a successor it does not strictly dominate itself and thus it would be
+    // part of the dominance frontier. The reason we don't append it is that
+    // the node passed in came *from* the worklist and so it has already been
+    // processed.
+    for (DomTreeNode *InnerN : DomNodes)
+      for (BasicBlock *SuccBB : successors(InnerN->getBlock()))
+        if (!DomSet.count(SuccBB))
+          Worklist.insert(SuccBB);
+
+    DomNodes.clear();
+    DomSet.clear();
+  };
+
+  // Append the initial dom frontier nodes.
+  AppendDomFrontier(UnswitchedNode);
 
   // Walk the worklist. We grow the list in the loop and so must recompute size.
   for (int i = 0; i < (int)Worklist.size(); ++i) {
@@ -136,20 +187,17 @@ static void updateDTAfterUnswitch(BasicBlock *UnswitchedBB, BasicBlock *OldPH,
     DomTreeNode *Node = DT[BB];
     assert(!DomChain.count(Node) &&
            "Cannot be dominated by a block you can reach!");
-    // If this block doesn't have an immediate dominator somewhere in the chain
-    // we hoisted over, then its position in the domtree hasn't changed. Either
-    // it is above the region hoisted and still valid, or it is below the
-    // hoisted block and so was trivially updated. This also applies to
-    // everything reachable from this block so we're completely done with the
-    // it.
+
+    // If this block had an immediate dominator somewhere in the chain
+    // we hoisted over, then its position in the domtree needs to move as it is
+    // reachable from a node hoisted over this chain.
     if (!DomChain.count(Node->getIDom()))
       continue;
 
-    // We need to change the IDom for this node but also walk its successors
-    // which could have similar dominance position.
     DT.changeImmediateDominator(Node, OldPHNode);
-    for (auto *SuccBB : successors(BB))
-      Worklist.insert(SuccBB);
+
+    // Now add this node's dominator frontier to the worklist as well.
+    AppendDomFrontier(Node);
   }
 }