summaryrefslogtreecommitdiff
path: root/lib/Transforms/Scalar
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Scalar')
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt1
-rw-r--r--lib/Transforms/Scalar/ConstantHoisting.cpp6
-rw-r--r--lib/Transforms/Scalar/GVN.cpp164
-rw-r--r--lib/Transforms/Scalar/GVNSink.cpp872
-rw-r--r--lib/Transforms/Scalar/GuardWidening.cpp4
-rw-r--r--lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp7
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp42
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp33
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp7
-rw-r--r--lib/Transforms/Scalar/NewGVN.cpp70
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp3
-rw-r--r--lib/Transforms/Scalar/SROA.cpp2
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp1
-rw-r--r--lib/Transforms/Scalar/SimpleLoopUnswitch.cpp76
14 files changed, 1206 insertions, 82 deletions
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index 523390758769..f5196cc46181 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -13,6 +13,7 @@ add_llvm_library(LLVMScalarOpts
GuardWidening.cpp
GVN.cpp
GVNHoist.cpp
+ GVNSink.cpp
IVUsersPrinter.cpp
InductiveRangeCheckElimination.cpp
IndVarSimplify.cpp
diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp
index f62e111460ca..c3810366bf22 100644
--- a/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -164,9 +164,9 @@ Instruction *ConstantHoistingPass::findMatInsertPt(Instruction *Inst,
/// \brief Given \p BBs as input, find another set of BBs which collectively
/// dominates \p BBs and have the minimal sum of frequencies. Return the BB
/// set found in \p BBs.
-void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI,
- BasicBlock *Entry,
- SmallPtrSet<BasicBlock *, 8> &BBs) {
+static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI,
+ BasicBlock *Entry,
+ SmallPtrSet<BasicBlock *, 8> &BBs) {
assert(!BBs.count(Entry) && "Assume Entry is not in BBs");
// Nodes on the current path to the root.
SmallPtrSet<BasicBlock *, 8> Path;
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 0490d93f6455..0d6e0538261d 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -80,9 +80,10 @@ MaxRecurseDepth("max-recurse-depth", cl::Hidden, cl::init(1000), cl::ZeroOrMore,
struct llvm::GVN::Expression {
uint32_t opcode;
Type *type;
+ bool commutative;
SmallVector<uint32_t, 4> varargs;
- Expression(uint32_t o = ~2U) : opcode(o) {}
+ Expression(uint32_t o = ~2U) : opcode(o), commutative(false) {}
bool operator==(const Expression &other) const {
if (opcode != other.opcode)
@@ -246,6 +247,7 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) {
assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!");
if (e.varargs[0] > e.varargs[1])
std::swap(e.varargs[0], e.varargs[1]);
+ e.commutative = true;
}
if (CmpInst *C = dyn_cast<CmpInst>(I)) {
@@ -256,6 +258,7 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) {
Predicate = CmpInst::getSwappedPredicate(Predicate);
}
e.opcode = (C->getOpcode() << 8) | Predicate;
+ e.commutative = true;
} else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) {
for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
II != IE; ++II)
@@ -281,6 +284,7 @@ GVN::Expression GVN::ValueTable::createCmpExpr(unsigned Opcode,
Predicate = CmpInst::getSwappedPredicate(Predicate);
}
e.opcode = (Opcode << 8) | Predicate;
+ e.commutative = true;
return e;
}
@@ -348,25 +352,25 @@ GVN::ValueTable::~ValueTable() = default;
/// add - Insert a value into the table with a specified value number.
void GVN::ValueTable::add(Value *V, uint32_t num) {
valueNumbering.insert(std::make_pair(V, num));
+ if (PHINode *PN = dyn_cast<PHINode>(V))
+ NumberingPhi[num] = PN;
}
uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
if (AA->doesNotAccessMemory(C)) {
Expression exp = createExpr(C);
- uint32_t &e = expressionNumbering[exp];
- if (!e) e = nextValueNumber++;
+ uint32_t e = assignExpNewValueNum(exp).first;
valueNumbering[C] = e;
return e;
} else if (AA->onlyReadsMemory(C)) {
Expression exp = createExpr(C);
- uint32_t &e = expressionNumbering[exp];
- if (!e) {
- e = nextValueNumber++;
- valueNumbering[C] = e;
- return e;
+ auto ValNum = assignExpNewValueNum(exp);
+ if (ValNum.second) {
+ valueNumbering[C] = ValNum.first;
+ return ValNum.first;
}
if (!MD) {
- e = nextValueNumber++;
+ uint32_t e = assignExpNewValueNum(exp).first;
valueNumbering[C] = e;
return e;
}
@@ -522,23 +526,29 @@ uint32_t GVN::ValueTable::lookupOrAdd(Value *V) {
case Instruction::ExtractValue:
exp = createExtractvalueExpr(cast<ExtractValueInst>(I));
break;
+ case Instruction::PHI:
+ valueNumbering[V] = nextValueNumber;
+ NumberingPhi[nextValueNumber] = cast<PHINode>(V);
+ return nextValueNumber++;
default:
valueNumbering[V] = nextValueNumber;
return nextValueNumber++;
}
- uint32_t& e = expressionNumbering[exp];
- if (!e) e = nextValueNumber++;
+ uint32_t e = assignExpNewValueNum(exp).first;
valueNumbering[V] = e;
return e;
}
/// Returns the value number of the specified value. Fails if
/// the value has not yet been numbered.
-uint32_t GVN::ValueTable::lookup(Value *V) const {
+uint32_t GVN::ValueTable::lookup(Value *V, bool Verify) const {
DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
- assert(VI != valueNumbering.end() && "Value not numbered?");
- return VI->second;
+ if (Verify) {
+ assert(VI != valueNumbering.end() && "Value not numbered?");
+ return VI->second;
+ }
+ return (VI != valueNumbering.end()) ? VI->second : 0;
}
/// Returns the value number of the given comparison,
@@ -549,21 +559,29 @@ uint32_t GVN::ValueTable::lookupOrAddCmp(unsigned Opcode,
CmpInst::Predicate Predicate,
Value *LHS, Value *RHS) {
Expression exp = createCmpExpr(Opcode, Predicate, LHS, RHS);
- uint32_t& e = expressionNumbering[exp];
- if (!e) e = nextValueNumber++;
- return e;
+ return assignExpNewValueNum(exp).first;
}
/// Remove all entries from the ValueTable.
void GVN::ValueTable::clear() {
valueNumbering.clear();
expressionNumbering.clear();
+ NumberingPhi.clear();
+ PhiTranslateTable.clear();
+ BlockRPONumber.clear();
nextValueNumber = 1;
+ Expressions.clear();
+ ExprIdx.clear();
+ nextExprNumber = 0;
}
/// Remove a value from the value numbering.
void GVN::ValueTable::erase(Value *V) {
+ uint32_t Num = valueNumbering.lookup(V);
valueNumbering.erase(V);
+ // If V is PHINode, V <--> value number is an one-to-one mapping.
+ if (isa<PHINode>(V))
+ NumberingPhi.erase(Num);
}
/// verifyRemoved - Verify that the value is removed from all internal data
@@ -1451,6 +1469,104 @@ bool GVN::processLoad(LoadInst *L) {
return false;
}
+/// Return a pair the first field showing the value number of \p Exp and the
+/// second field showing whether it is a value number newly created.
+std::pair<uint32_t, bool>
+GVN::ValueTable::assignExpNewValueNum(Expression &Exp) {
+ uint32_t &e = expressionNumbering[Exp];
+ bool CreateNewValNum = !e;
+ if (CreateNewValNum) {
+ Expressions.push_back(Exp);
+ if (ExprIdx.size() < nextValueNumber + 1)
+ ExprIdx.resize(nextValueNumber * 2);
+ e = nextValueNumber;
+ ExprIdx[nextValueNumber++] = nextExprNumber++;
+ }
+ return {e, CreateNewValNum};
+}
+
+void GVN::ValueTable::assignBlockRPONumber(Function &F) {
+ uint32_t NextBlockNumber = 1;
+ ReversePostOrderTraversal<Function *> RPOT(&F);
+ for (BasicBlock *BB : RPOT)
+ BlockRPONumber[BB] = NextBlockNumber++;
+}
+
+/// Return whether all the values related with the same \p num are
+/// defined in \p BB.
+bool GVN::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB,
+ GVN &Gvn) {
+ LeaderTableEntry *Vals = &Gvn.LeaderTable[Num];
+ while (Vals && Vals->BB == BB)
+ Vals = Vals->Next;
+ return !Vals;
+}
+
+/// Wrap phiTranslateImpl to provide caching functionality.
+uint32_t GVN::ValueTable::phiTranslate(const BasicBlock *Pred,
+ const BasicBlock *PhiBlock, uint32_t Num,
+ GVN &Gvn) {
+ auto FindRes = PhiTranslateTable.find({Num, Pred});
+ if (FindRes != PhiTranslateTable.end())
+ return FindRes->second;
+ uint32_t NewNum = phiTranslateImpl(Pred, PhiBlock, Num, Gvn);
+ PhiTranslateTable.insert({{Num, Pred}, NewNum});
+ return NewNum;
+}
+
+/// Translate value number \p Num using phis, so that it has the values of
+/// the phis in BB.
+uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
+ const BasicBlock *PhiBlock,
+ uint32_t Num, GVN &Gvn) {
+ if (PHINode *PN = NumberingPhi[Num]) {
+ if (BlockRPONumber[Pred] >= BlockRPONumber[PhiBlock])
+ return Num;
+ for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
+ if (PN->getParent() == PhiBlock && PN->getIncomingBlock(i) == Pred)
+ if (uint32_t TransVal = lookup(PN->getIncomingValue(i), false))
+ return TransVal;
+ }
+ return Num;
+ }
+
+ // If there is any value related with Num is defined in a BB other than
+ // PhiBlock, it cannot depend on a phi in PhiBlock without going through
+ // a backedge. We can do an early exit in that case to save compile time.
+ if (!areAllValsInBB(Num, PhiBlock, Gvn))
+ return Num;
+
+ if (ExprIdx[Num] == 0 || Num >= ExprIdx.size())
+ return Num;
+ Expression Exp = Expressions[ExprIdx[Num]];
+
+ for (unsigned i = 0; i < Exp.varargs.size(); i++) {
+ // For InsertValue and ExtractValue, some varargs are index numbers
+ // instead of value numbers. Those index numbers should not be
+ // translated.
+ if ((i > 1 && Exp.opcode == Instruction::InsertValue) ||
+ (i > 0 && Exp.opcode == Instruction::ExtractValue))
+ continue;
+ Exp.varargs[i] = phiTranslate(Pred, PhiBlock, Exp.varargs[i], Gvn);
+ }
+
+ if (Exp.commutative) {
+ assert(Exp.varargs.size() == 2 && "Unsupported commutative expression!");
+ if (Exp.varargs[0] > Exp.varargs[1]) {
+ std::swap(Exp.varargs[0], Exp.varargs[1]);
+ uint32_t Opcode = Exp.opcode >> 8;
+ if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp)
+ Exp.opcode = (Opcode << 8) |
+ CmpInst::getSwappedPredicate(
+ static_cast<CmpInst::Predicate>(Exp.opcode & 255));
+ }
+ }
+
+ if (uint32_t NewNum = expressionNumbering[Exp])
+ return NewNum;
+ return Num;
+}
+
// In order to find a leader for a given value number at a
// specific basic block, we first obtain the list of all Values for that number,
// and then scan the list to find one whose block dominates the block in
@@ -1856,6 +1972,7 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
// Fabricate val-num for dead-code in order to suppress assertion in
// performPRE().
assignValNumForDeadCode();
+ VN.assignBlockRPONumber(F);
bool PREChanged = true;
while (PREChanged) {
PREChanged = performPRE(F);
@@ -1945,7 +2062,9 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
success = false;
break;
}
- if (Value *V = findLeader(Pred, VN.lookup(Op))) {
+ uint32_t TValNo =
+ VN.phiTranslate(Pred, Instr->getParent(), VN.lookup(Op), *this);
+ if (Value *V = findLeader(Pred, TValNo)) {
Instr->setOperand(i, V);
} else {
success = false;
@@ -1962,10 +2081,12 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
Instr->insertBefore(Pred->getTerminator());
Instr->setName(Instr->getName() + ".pre");
Instr->setDebugLoc(Instr->getDebugLoc());
- VN.add(Instr, ValNo);
+
+ unsigned Num = VN.lookupOrAdd(Instr);
+ VN.add(Instr, Num);
// Update the availability map to include the new instruction.
- addToLeaderTable(ValNo, Instr, Pred);
+ addToLeaderTable(Num, Instr, Pred);
return true;
}
@@ -2014,7 +2135,8 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
break;
}
- Value *predV = findLeader(P, ValNo);
+ uint32_t TValNo = VN.phiTranslate(P, CurrentBlock, ValNo, *this);
+ Value *predV = findLeader(P, TValNo);
if (!predV) {
predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P));
PREPred = P;
diff --git a/lib/Transforms/Scalar/GVNSink.cpp b/lib/Transforms/Scalar/GVNSink.cpp
new file mode 100644
index 000000000000..5c75f39e381d
--- /dev/null
+++ b/lib/Transforms/Scalar/GVNSink.cpp
@@ -0,0 +1,872 @@
+//===- GVNSink.cpp - sink expressions into successors -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file GVNSink.cpp
+/// This pass attempts to sink instructions into successors, reducing static
+/// instruction count and enabling if-conversion.
+///
+/// We use a variant of global value numbering to decide what can be sunk.
+/// Consider:
+///
+/// [ %a1 = add i32 %b, 1 ] [ %c1 = add i32 %d, 1 ]
+/// [ %a2 = xor i32 %a1, 1 ] [ %c2 = xor i32 %c1, 1 ]
+/// \ /
+/// [ %e = phi i32 %a2, %c2 ]
+/// [ add i32 %e, 4 ]
+///
+///
+/// GVN would number %a1 and %c1 differently because they compute different
+/// results - the VN of an instruction is a function of its opcode and the
+/// transitive closure of its operands. This is the key property for hoisting
+/// and CSE.
+///
+/// What we want when sinking however is for a numbering that is a function of
+/// the *uses* of an instruction, which allows us to answer the question "if I
+/// replace %a1 with %c1, will it contribute in an equivalent way to all
+/// successive instructions?". The PostValueTable class in GVN provides this
+/// mapping.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Scalar/GVNExpression.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <unordered_set>
+using namespace llvm;
+
+#define DEBUG_TYPE "gvn-sink"
+
+STATISTIC(NumRemoved, "Number of instructions removed");
+
+namespace {
+
+static bool isMemoryInst(const Instruction *I) {
+ return isa<LoadInst>(I) || isa<StoreInst>(I) ||
+ (isa<InvokeInst>(I) && !cast<InvokeInst>(I)->doesNotAccessMemory()) ||
+ (isa<CallInst>(I) && !cast<CallInst>(I)->doesNotAccessMemory());
+}
+
+/// Iterates through instructions in a set of blocks in reverse order from the
+/// first non-terminator. For example (assume all blocks have size n):
+/// LockstepReverseIterator I([B1, B2, B3]);
+/// *I-- = [B1[n], B2[n], B3[n]];
+/// *I-- = [B1[n-1], B2[n-1], B3[n-1]];
+/// *I-- = [B1[n-2], B2[n-2], B3[n-2]];
+/// ...
+///
+/// It continues until all blocks have been exhausted. Use \c getActiveBlocks()
+/// to
+/// determine which blocks are still going and the order they appear in the
+/// list returned by operator*.
+class LockstepReverseIterator {
+ ArrayRef<BasicBlock *> Blocks;
+ SmallPtrSet<BasicBlock *, 4> ActiveBlocks;
+ SmallVector<Instruction *, 4> Insts;
+ bool Fail;
+
+public:
+ LockstepReverseIterator(ArrayRef<BasicBlock *> Blocks) : Blocks(Blocks) {
+ reset();
+ }
+
+ void reset() {
+ Fail = false;
+ ActiveBlocks.clear();
+ for (BasicBlock *BB : Blocks)
+ ActiveBlocks.insert(BB);
+ Insts.clear();
+ for (BasicBlock *BB : Blocks) {
+ if (BB->size() <= 1) {
+ // Block wasn't big enough - only contained a terminator.
+ ActiveBlocks.erase(BB);
+ continue;
+ }
+ Insts.push_back(BB->getTerminator()->getPrevNode());
+ }
+ if (Insts.empty())
+ Fail = true;
+ }
+
+ bool isValid() const { return !Fail; }
+ ArrayRef<Instruction *> operator*() const { return Insts; }
+ SmallPtrSet<BasicBlock *, 4> &getActiveBlocks() { return ActiveBlocks; }
+
+ void restrictToBlocks(SmallPtrSetImpl<BasicBlock *> &Blocks) {
+ for (auto II = Insts.begin(); II != Insts.end();) {
+ if (std::find(Blocks.begin(), Blocks.end(), (*II)->getParent()) ==
+ Blocks.end()) {
+ ActiveBlocks.erase((*II)->getParent());
+ II = Insts.erase(II);
+ } else {
+ ++II;
+ }
+ }
+ }
+
+ void operator--() {
+ if (Fail)
+ return;
+ SmallVector<Instruction *, 4> NewInsts;
+ for (auto *Inst : Insts) {
+ if (Inst == &Inst->getParent()->front())
+ ActiveBlocks.erase(Inst->getParent());
+ else
+ NewInsts.push_back(Inst->getPrevNode());
+ }
+ if (NewInsts.empty()) {
+ Fail = true;
+ return;
+ }
+ Insts = NewInsts;
+ }
+};
+
+//===----------------------------------------------------------------------===//
+
+/// Candidate solution for sinking. There may be different ways to
+/// sink instructions, differing in the number of instructions sunk,
+/// the number of predecessors sunk from and the number of PHIs
+/// required.
+struct SinkingInstructionCandidate {
+ unsigned NumBlocks;
+ unsigned NumInstructions;
+ unsigned NumPHIs;
+ unsigned NumMemoryInsts;
+ int Cost = -1;
+ SmallVector<BasicBlock *, 4> Blocks;
+
+ void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) {
+ unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs;
+ unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0;
+ Cost = (NumInstructions * (NumBlocks - 1)) -
+ (NumExtraPHIs *
+ NumExtraPHIs) // PHIs are expensive, so make sure they're worth it.
+ - SplitEdgeCost;
+ }
+ bool operator>=(const SinkingInstructionCandidate &Other) const {
+ return Cost >= Other.Cost;
+ }
+};
+
+#ifndef NDEBUG
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
+ const SinkingInstructionCandidate &C) {
+ OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
+ << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
+ return OS;
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+
+/// Describes a PHI node that may or may not exist. These track the PHIs
+/// that must be created if we sunk a sequence of instructions. It provides
+/// a hash function for efficient equality comparisons.
+class ModelledPHI {
+ SmallVector<Value *, 4> Values;
+ SmallVector<BasicBlock *, 4> Blocks;
+
+public:
+ ModelledPHI() {}
+ ModelledPHI(const PHINode *PN) {
+ for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I)
+ Blocks.push_back(PN->getIncomingBlock(I));
+ std::sort(Blocks.begin(), Blocks.end());
+
+ // This assumes the PHI is already well-formed and there aren't conflicting
+ // incoming values for the same block.
+ for (auto *B : Blocks)
+ Values.push_back(PN->getIncomingValueForBlock(B));
+ }
+ /// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI
+ /// without the same ID.
+ /// \note This is specifically for DenseMapInfo - do not use this!
+ static ModelledPHI createDummy(size_t ID) {
+ ModelledPHI M;
+ M.Values.push_back(reinterpret_cast<Value*>(ID));
+ return M;
+ }
+
+ /// Create a PHI from an array of incoming values and incoming blocks.
+ template <typename VArray, typename BArray>
+ ModelledPHI(const VArray &V, const BArray &B) {
+ std::copy(V.begin(), V.end(), std::back_inserter(Values));
+ std::copy(B.begin(), B.end(), std::back_inserter(Blocks));
+ }
+
+ /// Create a PHI from [I[OpNum] for I in Insts].
+ template <typename BArray>
+ ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum, const BArray &B) {
+ std::copy(B.begin(), B.end(), std::back_inserter(Blocks));
+ for (auto *I : Insts)
+ Values.push_back(I->getOperand(OpNum));
+ }
+
+ /// Restrict the PHI's contents down to only \c NewBlocks.
+ /// \c NewBlocks must be a subset of \c this->Blocks.
+ void restrictToBlocks(const SmallPtrSetImpl<BasicBlock *> &NewBlocks) {
+ auto BI = Blocks.begin();
+ auto VI = Values.begin();
+ while (BI != Blocks.end()) {
+ assert(VI != Values.end());
+ if (std::find(NewBlocks.begin(), NewBlocks.end(), *BI) ==
+ NewBlocks.end()) {
+ BI = Blocks.erase(BI);
+ VI = Values.erase(VI);
+ } else {
+ ++BI;
+ ++VI;
+ }
+ }
+ assert(Blocks.size() == NewBlocks.size());
+ }
+
+ ArrayRef<Value *> getValues() const { return Values; }
+
+ bool areAllIncomingValuesSame() const {
+ return all_of(Values, [&](Value *V) { return V == Values[0]; });
+ }
+ bool areAllIncomingValuesSameType() const {
+ return all_of(
+ Values, [&](Value *V) { return V->getType() == Values[0]->getType(); });
+ }
+ bool areAnyIncomingValuesConstant() const {
+ return any_of(Values, [&](Value *V) { return isa<Constant>(V); });
+ }
+ // Hash functor
+ unsigned hash() const {
+ return (unsigned)hash_combine_range(Values.begin(), Values.end());
+ }
+ bool operator==(const ModelledPHI &Other) const {
+ return Values == Other.Values && Blocks == Other.Blocks;
+ }
+};
+
+template <typename ModelledPHI> struct DenseMapInfo {
+ static inline ModelledPHI &getEmptyKey() {
+ static ModelledPHI Dummy = ModelledPHI::createDummy(0);
+ return Dummy;
+ }
+ static inline ModelledPHI &getTombstoneKey() {
+ static ModelledPHI Dummy = ModelledPHI::createDummy(1);
+ return Dummy;
+ }
+ static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); }
+ static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) {
+ return LHS == RHS;
+ }
+};
+
+typedef DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>> ModelledPHISet;
+
+//===----------------------------------------------------------------------===//
+// ValueTable
+//===----------------------------------------------------------------------===//
+// This is a value number table where the value number is a function of the
+// *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know
+// that the program would be equivalent if we replaced A with PHI(A, B).
+//===----------------------------------------------------------------------===//
+
+/// A GVN expression describing how an instruction is used. The operands
+/// field of BasicExpression is used to store uses, not operands.
+///
+/// This class also contains fields for discriminators used when determining
+/// equivalence of instructions with sideeffects.
+class InstructionUseExpr : public GVNExpression::BasicExpression {
+ unsigned MemoryUseOrder = -1;
+ bool Volatile = false;
+
+public:
+ InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R,
+ BumpPtrAllocator &A)
+ : GVNExpression::BasicExpression(I->getNumUses()) {
+ allocateOperands(R, A);
+ setOpcode(I->getOpcode());
+ setType(I->getType());
+
+ for (auto &U : I->uses())
+ op_push_back(U.getUser());
+ std::sort(op_begin(), op_end());
+ }
+ void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; }
+ void setVolatile(bool V) { Volatile = V; }
+
+ virtual hash_code getHashValue() const {
+ return hash_combine(GVNExpression::BasicExpression::getHashValue(),
+ MemoryUseOrder, Volatile);
+ }
+
+ template <typename Function> hash_code getHashValue(Function MapFn) {
+ hash_code H =
+ hash_combine(getOpcode(), getType(), MemoryUseOrder, Volatile);
+ for (auto *V : operands())
+ H = hash_combine(H, MapFn(V));
+ return H;
+ }
+};
+
+class ValueTable {
+ DenseMap<Value *, uint32_t> ValueNumbering;
+ DenseMap<GVNExpression::Expression *, uint32_t> ExpressionNumbering;
+ DenseMap<size_t, uint32_t> HashNumbering;
+ BumpPtrAllocator Allocator;
+ ArrayRecycler<Value *> Recycler;
+ uint32_t nextValueNumber;
+
+ /// Create an expression for I based on its opcode and its uses. If I
+ /// touches or reads memory, the expression is also based upon its memory
+ /// order - see \c getMemoryUseOrder().
+ InstructionUseExpr *createExpr(Instruction *I) {
+ InstructionUseExpr *E =
+ new (Allocator) InstructionUseExpr(I, Recycler, Allocator);
+ if (isMemoryInst(I))
+ E->setMemoryUseOrder(getMemoryUseOrder(I));
+
+ if (CmpInst *C = dyn_cast<CmpInst>(I)) {
+ CmpInst::Predicate Predicate = C->getPredicate();
+ E->setOpcode((C->getOpcode() << 8) | Predicate);
+ }
+ return E;
+ }
+
+ /// Helper to compute the value number for a memory instruction
+ /// (LoadInst/StoreInst), including checking the memory ordering and
+ /// volatility.
+ template <class Inst> InstructionUseExpr *createMemoryExpr(Inst *I) {
+ if (isStrongerThanUnordered(I->getOrdering()) || I->isAtomic())
+ return nullptr;
+ InstructionUseExpr *E = createExpr(I);
+ E->setVolatile(I->isVolatile());
+ return E;
+ }
+
+public:
+ /// Returns the value number for the specified value, assigning
+ /// it a new number if it did not have one before.
+ uint32_t lookupOrAdd(Value *V) {
+ auto VI = ValueNumbering.find(V);
+ if (VI != ValueNumbering.end())
+ return VI->second;
+
+ if (!isa<Instruction>(V)) {
+ ValueNumbering[V] = nextValueNumber;
+ return nextValueNumber++;
+ }
+
+ Instruction *I = cast<Instruction>(V);
+ InstructionUseExpr *exp = nullptr;
+ switch (I->getOpcode()) {
+ case Instruction::Load:
+ exp = createMemoryExpr(cast<LoadInst>(I));
+ break;
+ case Instruction::Store:
+ exp = createMemoryExpr(cast<StoreInst>(I));
+ break;
+ case Instruction::Call:
+ case Instruction::Invoke:
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ case Instruction::Select:
+ case Instruction::ExtractElement:
+ case Instruction::InsertElement:
+ case Instruction::ShuffleVector:
+ case Instruction::InsertValue:
+ case Instruction::GetElementPtr:
+ exp = createExpr(I);
+ break;
+ default:
+ break;
+ }
+
+ if (!exp) {
+ ValueNumbering[V] = nextValueNumber;
+ return nextValueNumber++;
+ }
+
+ uint32_t e = ExpressionNumbering[exp];
+ if (!e) {
+ hash_code H = exp->getHashValue([=](Value *V) { return lookupOrAdd(V); });
+ auto I = HashNumbering.find(H);
+ if (I != HashNumbering.end()) {
+ e = I->second;
+ } else {
+ e = nextValueNumber++;
+ HashNumbering[H] = e;
+ ExpressionNumbering[exp] = e;
+ }
+ }
+ ValueNumbering[V] = e;
+ return e;
+ }
+
+ /// Returns the value number of the specified value. Fails if the value has
+ /// not yet been numbered.
+ uint32_t lookup(Value *V) const {
+ auto VI = ValueNumbering.find(V);
+ assert(VI != ValueNumbering.end() && "Value not numbered?");
+ return VI->second;
+ }
+
+ /// Removes all value numberings and resets the value table.
+ void clear() {
+ ValueNumbering.clear();
+ ExpressionNumbering.clear();
+ HashNumbering.clear();
+ Recycler.clear(Allocator);
+ nextValueNumber = 1;
+ }
+
+ ValueTable() : nextValueNumber(1) {}
+
+ /// \c Inst uses or touches memory. Return an ID describing the memory state
+ /// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2),
+ /// the exact same memory operations happen after I1 and I2.
+ ///
+ /// This is a very hard problem in general, so we use domain-specific
+ /// knowledge that we only ever check for equivalence between blocks sharing a
+ /// single immediate successor that is common, and when determining if I1 ==
+ /// I2 we will have already determined that next(I1) == next(I2). This
+ /// inductive property allows us to simply return the value number of the next
+ /// instruction that defines memory.
+ uint32_t getMemoryUseOrder(Instruction *Inst) {
+ auto *BB = Inst->getParent();
+ for (auto I = std::next(Inst->getIterator()), E = BB->end();
+ I != E && !I->isTerminator(); ++I) {
+ if (!isMemoryInst(&*I))
+ continue;
+ if (isa<LoadInst>(&*I))
+ continue;
+ CallInst *CI = dyn_cast<CallInst>(&*I);
+ if (CI && CI->onlyReadsMemory())
+ continue;
+ InvokeInst *II = dyn_cast<InvokeInst>(&*I);
+ if (II && II->onlyReadsMemory())
+ continue;
+ return lookupOrAdd(&*I);
+ }
+ return 0;
+ }
+};
+
+//===----------------------------------------------------------------------===//
+
+class GVNSink {
+public:
+ GVNSink() : VN() {}
+ bool run(Function &F) {
+ DEBUG(dbgs() << "GVNSink: running on function @" << F.getName() << "\n");
+
+ unsigned NumSunk = 0;
+ ReversePostOrderTraversal<Function*> RPOT(&F);
+ for (auto *N : RPOT)
+ NumSunk += sinkBB(N);
+
+ return NumSunk > 0;
+ }
+
+private:
+ ValueTable VN;
+
+ bool isInstructionBlacklisted(Instruction *I) {
+ // These instructions may change or break semantics if moved.
+ if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
+ I->getType()->isTokenTy())
+ return true;
+ return false;
+ }
+
+ /// The main heuristic function. Analyze the set of instructions pointed to by
+ /// LRI and return a candidate solution if these instructions can be sunk, or
+ /// None otherwise.
+ Optional<SinkingInstructionCandidate> analyzeInstructionForSinking(
+ LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
+ ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents);
+
+ /// Create a ModelledPHI for each PHI in BB, adding to PHIs.
+ void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs,
+ SmallPtrSetImpl<Value *> &PHIContents) {
+ for (auto &I : *BB) {
+ auto *PN = dyn_cast<PHINode>(&I);
+ if (!PN)
+ return;
+
+ auto MPHI = ModelledPHI(PN);
+ PHIs.insert(MPHI);
+ for (auto *V : MPHI.getValues())
+ PHIContents.insert(V);
+ }
+ }
+
+ /// The main instruction sinking driver. Set up state and try and sink
+ /// instructions into BBEnd from its predecessors.
+ unsigned sinkBB(BasicBlock *BBEnd);
+
+ /// Perform the actual mechanics of sinking an instruction from Blocks into
+ /// BBEnd, which is their only successor.
+ void sinkLastInstruction(ArrayRef<BasicBlock *> Blocks, BasicBlock *BBEnd);
+
+ /// Remove PHIs that all have the same incoming value.
+ void foldPointlessPHINodes(BasicBlock *BB) {
+ auto I = BB->begin();
+ while (PHINode *PN = dyn_cast<PHINode>(I++)) {
+ if (!all_of(PN->incoming_values(),
+ [&](const Value *V) { return V == PN->getIncomingValue(0); }))
+ continue;
+ if (PN->getIncomingValue(0) != PN)
+ PN->replaceAllUsesWith(PN->getIncomingValue(0));
+ else
+ PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+ PN->eraseFromParent();
+ }
+ }
+};
+
+Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking(
+ LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
+ ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents) {
+ auto Insts = *LRI;
+ DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I
+ : Insts) {
+ I->dump();
+ } dbgs() << " ]\n";);
+
+ DenseMap<uint32_t, unsigned> VNums;
+ for (auto *I : Insts) {
+ uint32_t N = VN.lookupOrAdd(I);
+ DEBUG(dbgs() << " VN=" << utohexstr(N) << " for" << *I << "\n");
+ if (N == ~0U)
+ return None;
+ VNums[N]++;
+ }
+ unsigned VNumToSink =
+ std::max_element(VNums.begin(), VNums.end(),
+ [](const std::pair<uint32_t, unsigned> &I,
+ const std::pair<uint32_t, unsigned> &J) {
+ return I.second < J.second;
+ })
+ ->first;
+
+ if (VNums[VNumToSink] == 1)
+ // Can't sink anything!
+ return None;
+
+ // Now restrict the number of incoming blocks down to only those with
+ // VNumToSink.
+ auto &ActivePreds = LRI.getActiveBlocks();
+ unsigned InitialActivePredSize = ActivePreds.size();
+ SmallVector<Instruction *, 4> NewInsts;
+ for (auto *I : Insts) {
+ if (VN.lookup(I) != VNumToSink)
+ ActivePreds.erase(I->getParent());
+ else
+ NewInsts.push_back(I);
+ }
+ for (auto *I : NewInsts)
+ if (isInstructionBlacklisted(I))
+ return None;
+
+ // If we've restricted the incoming blocks, restrict all needed PHIs also
+ // to that set.
+ bool RecomputePHIContents = false;
+ if (ActivePreds.size() != InitialActivePredSize) {
+ ModelledPHISet NewNeededPHIs;
+ for (auto P : NeededPHIs) {
+ P.restrictToBlocks(ActivePreds);
+ NewNeededPHIs.insert(P);
+ }
+ NeededPHIs = NewNeededPHIs;
+ LRI.restrictToBlocks(ActivePreds);
+ RecomputePHIContents = true;
+ }
+
+ // The sunk instruction's results.
+ ModelledPHI NewPHI(NewInsts, ActivePreds);
+
+ // Does sinking this instruction render previous PHIs redundant?
+ if (NeededPHIs.find(NewPHI) != NeededPHIs.end()) {
+ NeededPHIs.erase(NewPHI);
+ RecomputePHIContents = true;
+ }
+
+ if (RecomputePHIContents) {
+ // The needed PHIs have changed, so recompute the set of all needed
+ // values.
+ PHIContents.clear();
+ for (auto &PHI : NeededPHIs)
+ PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
+ }
+
+ // Is this instruction required by a later PHI that doesn't match this PHI?
+ // if so, we can't sink this instruction.
+ for (auto *V : NewPHI.getValues())
+ if (PHIContents.count(V))
+ // V exists in this PHI, but the whole PHI is different to NewPHI
+ // (else it would have been removed earlier). We cannot continue
+ // because this isn't representable.
+ return None;
+
+ // Which operands need PHIs?
+ // FIXME: If any of these fail, we should partition up the candidates to
+ // try and continue making progress.
+ Instruction *I0 = NewInsts[0];
+ for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) {
+ ModelledPHI PHI(NewInsts, OpNum, ActivePreds);
+ if (PHI.areAllIncomingValuesSame())
+ continue;
+ if (!canReplaceOperandWithVariable(I0, OpNum))
+ // We can 't create a PHI from this instruction!
+ return None;
+ if (NeededPHIs.count(PHI))
+ continue;
+ if (!PHI.areAllIncomingValuesSameType())
+ return None;
+ // Don't create indirect calls! The called value is the final operand.
+ if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OpNum == E - 1 &&
+ PHI.areAnyIncomingValuesConstant())
+ return None;
+
+ NeededPHIs.reserve(NeededPHIs.size());
+ NeededPHIs.insert(PHI);
+ PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
+ }
+
+ if (isMemoryInst(NewInsts[0]))
+ ++MemoryInstNum;
+
+ SinkingInstructionCandidate Cand;
+ Cand.NumInstructions = ++InstNum;
+ Cand.NumMemoryInsts = MemoryInstNum;
+ Cand.NumBlocks = ActivePreds.size();
+ Cand.NumPHIs = NeededPHIs.size();
+ for (auto *C : ActivePreds)
+ Cand.Blocks.push_back(C);
+
+ return Cand;
+}
+
+unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
+ DEBUG(dbgs() << "GVNSink: running on basic block ";
+ BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
+ SmallVector<BasicBlock *, 4> Preds;
+ for (auto *B : predecessors(BBEnd)) {
+ auto *T = B->getTerminator();
+ if (isa<BranchInst>(T) || isa<SwitchInst>(T))
+ Preds.push_back(B);
+ else
+ return 0;
+ }
+ if (Preds.size() < 2)
+ return 0;
+ std::sort(Preds.begin(), Preds.end());
+
+ unsigned NumOrigPreds = Preds.size();
+ // We can only sink instructions through unconditional branches.
+ for (auto I = Preds.begin(); I != Preds.end();) {
+ if ((*I)->getTerminator()->getNumSuccessors() != 1)
+ I = Preds.erase(I);
+ else
+ ++I;
+ }
+
+ LockstepReverseIterator LRI(Preds);
+ SmallVector<SinkingInstructionCandidate, 4> Candidates;
+ unsigned InstNum = 0, MemoryInstNum = 0;
+ ModelledPHISet NeededPHIs;
+ SmallPtrSet<Value *, 4> PHIContents;
+ analyzeInitialPHIs(BBEnd, NeededPHIs, PHIContents);
+ unsigned NumOrigPHIs = NeededPHIs.size();
+
+ while (LRI.isValid()) {
+ auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum,
+ NeededPHIs, PHIContents);
+ if (!Cand)
+ break;
+ Cand->calculateCost(NumOrigPHIs, Preds.size());
+ Candidates.emplace_back(*Cand);
+ --LRI;
+ }
+
+ std::stable_sort(
+ Candidates.begin(), Candidates.end(),
+ [](const SinkingInstructionCandidate &A,
+ const SinkingInstructionCandidate &B) { return A >= B; });
+ DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C
+ : Candidates) dbgs()
+ << " " << C << "\n";);
+
+ // Pick the top candidate, as long it is positive!
+ if (Candidates.empty() || Candidates.front().Cost <= 0)
+ return 0;
+ auto C = Candidates.front();
+
+ DEBUG(dbgs() << " -- Sinking: " << C << "\n");
+ BasicBlock *InsertBB = BBEnd;
+ if (C.Blocks.size() < NumOrigPreds) {
+ DEBUG(dbgs() << " -- Splitting edge to "; BBEnd->printAsOperand(dbgs());
+ dbgs() << "\n");
+ InsertBB = SplitBlockPredecessors(BBEnd, C.Blocks, ".gvnsink.split");
+ if (!InsertBB) {
+ DEBUG(dbgs() << " -- FAILED to split edge!\n");
+ // Edge couldn't be split.
+ return 0;
+ }
+ }
+
+ for (unsigned I = 0; I < C.NumInstructions; ++I)
+ sinkLastInstruction(C.Blocks, InsertBB);
+
+ return C.NumInstructions;
+}
+
+void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
+ BasicBlock *BBEnd) {
+ SmallVector<Instruction *, 4> Insts;
+ for (BasicBlock *BB : Blocks)
+ Insts.push_back(BB->getTerminator()->getPrevNode());
+ Instruction *I0 = Insts.front();
+
+ SmallVector<Value *, 4> NewOperands;
+ for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
+ bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
+ return I->getOperand(O) != I0->getOperand(O);
+ });
+ if (!NeedPHI) {
+ NewOperands.push_back(I0->getOperand(O));
+ continue;
+ }
+
+ // Create a new PHI in the successor block and populate it.
+ auto *Op = I0->getOperand(O);
+ assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
+ auto *PN = PHINode::Create(Op->getType(), Insts.size(),
+ Op->getName() + ".sink", &BBEnd->front());
+ for (auto *I : Insts)
+ PN->addIncoming(I->getOperand(O), I->getParent());
+ NewOperands.push_back(PN);
+ }
+
+ // Arbitrarily use I0 as the new "common" instruction; remap its operands
+ // and move it to the start of the successor block.
+ for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
+ I0->getOperandUse(O).set(NewOperands[O]);
+ I0->moveBefore(&*BBEnd->getFirstInsertionPt());
+
+ // Update metadata and IR flags.
+ for (auto *I : Insts)
+ if (I != I0) {
+ combineMetadataForCSE(I0, I);
+ I0->andIRFlags(I);
+ }
+
+ for (auto *I : Insts)
+ if (I != I0)
+ I->replaceAllUsesWith(I0);
+ foldPointlessPHINodes(BBEnd);
+
+ // Finally nuke all instructions apart from the common instruction.
+ for (auto *I : Insts)
+ if (I != I0)
+ I->eraseFromParent();
+
+ NumRemoved += Insts.size() - 1;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Pass machinery / boilerplate
+
+class GVNSinkLegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ GVNSinkLegacyPass() : FunctionPass(ID) {
+ initializeGVNSinkLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
+ GVNSink G;
+ return G.run(F);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+};
+} // namespace
+
+PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) {
+ GVNSink G;
+ if (!G.run(F))
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserve<GlobalsAA>();
+ return PA;
+}
+
+char GVNSinkLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(GVNSinkLegacyPass, "gvn-sink",
+ "Early GVN sinking of Expressions", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
+INITIALIZE_PASS_END(GVNSinkLegacyPass, "gvn-sink",
+ "Early GVN sinking of Expressions", false, false)
+
+FunctionPass *llvm::createGVNSinkPass() { return new GVNSinkLegacyPass(); }
diff --git a/lib/Transforms/Scalar/GuardWidening.cpp b/lib/Transforms/Scalar/GuardWidening.cpp
index 198d2b2b024f..65a2cd955672 100644
--- a/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/lib/Transforms/Scalar/GuardWidening.cpp
@@ -537,9 +537,7 @@ bool GuardWideningImpl::parseRangeChecks(
Changed = true;
} else if (match(Check.getBase(),
m_Or(m_Value(OpLHS), m_ConstantInt(OpRHS)))) {
- unsigned BitWidth = OpLHS->getType()->getScalarSizeInBits();
- KnownBits Known(BitWidth);
- computeKnownBits(OpLHS, Known, DL);
+ KnownBits Known = computeKnownBits(OpLHS, DL);
if ((OpRHS->getValue() & Known.Zero) == OpRHS->getValue()) {
Check.setBase(OpLHS);
APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue();
diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index 85db6e5e1105..e21b0feb7c5a 100644
--- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -1228,7 +1228,12 @@ void LoopConstrainer::addToParentLoopIfNeeded(ArrayRef<BasicBlock *> BBs) {
Loop *LoopConstrainer::createClonedLoopStructure(Loop *Original, Loop *Parent,
ValueToValueMapTy &VM) {
- Loop &New = LPM.addLoop(Parent);
+ Loop &New = *new Loop();
+ if (Parent)
+ Parent->addChildLoop(&New);
+ else
+ LI.addTopLevelLoop(&New);
+ LPM.addLoop(New);
// Add all of the blocks in Original to the new loop.
for (auto *BB : Original->blocks())
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index ada22ae38eb8..2ef8f8563bb9 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -253,6 +253,35 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
return EverChanged;
}
+// Replace uses of Cond with ToVal when safe to do so. If all uses are
+// replaced, we can remove Cond. We cannot blindly replace all uses of Cond
+// because we may incorrectly replace uses when guards/assumes are uses of
+// of `Cond` and we used the guards/assume to reason about the `Cond` value
+// at the end of block. RAUW unconditionally replaces all uses
+// including the guards/assumes themselves and the uses before the
+// guard/assume.
+static void ReplaceFoldableUses(Instruction *Cond, Value *ToVal) {
+ assert(Cond->getType() == ToVal->getType());
+ auto *BB = Cond->getParent();
+ // We can unconditionally replace all uses in non-local blocks (i.e. uses
+ // strictly dominated by BB), since LVI information is true from the
+ // terminator of BB.
+ replaceNonLocalUsesWith(Cond, ToVal);
+ for (Instruction &I : reverse(*BB)) {
+ // Reached the Cond whose uses we are trying to replace, so there are no
+ // more uses.
+ if (&I == Cond)
+ break;
+ // We only replace uses in instructions that are guaranteed to reach the end
+ // of BB, where we know Cond is ToVal.
+ if (!isGuaranteedToTransferExecutionToSuccessor(&I))
+ break;
+ I.replaceUsesOfWith(Cond, ToVal);
+ }
+ if (Cond->use_empty() && !Cond->mayHaveSideEffects())
+ Cond->eraseFromParent();
+}
+
/// Return the cost of duplicating a piece of this block from first non-phi
/// and before StopAt instruction to thread across it. Stop scanning the block
/// when exceeding the threshold. If duplication is impossible, returns ~0U.
@@ -833,13 +862,19 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
CondBr->eraseFromParent();
if (CondCmp->use_empty())
CondCmp->eraseFromParent();
- // TODO: We can safely replace *some* uses of the CondInst if it has
+ // We can safely replace *some* uses of the CondInst if it has
// exactly one value as returned by LVI. RAUW is incorrect in the
// presence of guards and assumes, that have the `Cond` as the use. This
// is because we use the guards/assume to reason about the `Cond` value
// at the end of block, but RAUW unconditionally replaces all uses
// including the guards/assumes themselves and the uses before the
// guard/assume.
+ else if (CondCmp->getParent() == BB) {
+ auto *CI = Ret == LazyValueInfo::True ?
+ ConstantInt::getTrue(CondCmp->getType()) :
+ ConstantInt::getFalse(CondCmp->getType());
+ ReplaceFoldableUses(CondCmp, CI);
+ }
return true;
}
@@ -1325,13 +1360,16 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
if (auto *CondInst = dyn_cast<Instruction>(Cond)) {
if (CondInst->use_empty() && !CondInst->mayHaveSideEffects())
CondInst->eraseFromParent();
- // TODO: We can safely replace *some* uses of the CondInst if it has
+ // We can safely replace *some* uses of the CondInst if it has
// exactly one value as returned by LVI. RAUW is incorrect in the
// presence of guards and assumes, that have the `Cond` as the use. This
// is because we use the guards/assume to reason about the `Cond` value
// at the end of block, but RAUW unconditionally replaces all uses
// including the guards/assumes themselves and the uses before the
// guard/assume.
+ else if (OnlyVal && OnlyVal != MultipleVal &&
+ CondInst->getParent() == BB)
+ ReplaceFoldableUses(CondInst, OnlyVal);
}
return true;
}
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 97337ea5ba62..c6a05ecbd0b1 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1035,6 +1035,17 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry) {
return nullptr;
}
+// Check if the recurrence variable `VarX` is in the right form to create
+// the idiom. Returns the value coerced to a PHINode if so.
+static PHINode *getRecurrenceVar(Value *VarX, Instruction *DefX,
+ BasicBlock *LoopEntry) {
+ auto *PhiX = dyn_cast<PHINode>(VarX);
+ if (PhiX && PhiX->getParent() == LoopEntry &&
+ (PhiX->getOperand(0) == DefX || PhiX->getOperand(1) == DefX))
+ return PhiX;
+ return nullptr;
+}
+
/// Return true iff the idiom is detected in the loop.
///
/// Additionally:
@@ -1110,13 +1121,9 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
}
// step 3: Check the recurrence of variable X
- {
- PhiX = dyn_cast<PHINode>(VarX1);
- if (!PhiX ||
- (PhiX->getOperand(0) != DefX2 && PhiX->getOperand(1) != DefX2)) {
- return false;
- }
- }
+ PhiX = getRecurrenceVar(VarX1, DefX2, LoopEntry);
+ if (!PhiX)
+ return false;
// step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
{
@@ -1132,8 +1139,8 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
if (!Inc || !Inc->isOne())
continue;
- PHINode *Phi = dyn_cast<PHINode>(Inst->getOperand(0));
- if (!Phi || Phi->getParent() != LoopEntry)
+ PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
+ if (!Phi)
continue;
// Check if the result of the instruction is live of the loop.
@@ -1227,8 +1234,8 @@ static bool detectCTLZIdiom(Loop *CurLoop, PHINode *&PhiX,
VarX = DefX->getOperand(0);
// step 3: Check the recurrence of variable X
- PhiX = dyn_cast<PHINode>(VarX);
- if (!PhiX || (PhiX->getOperand(0) != DefX && PhiX->getOperand(1) != DefX))
+ PhiX = getRecurrenceVar(VarX, DefX, LoopEntry);
+ if (!PhiX)
return false;
// step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
@@ -1248,8 +1255,8 @@ static bool detectCTLZIdiom(Loop *CurLoop, PHINode *&PhiX,
if (!Inc || !Inc->isOne())
continue;
- PHINode *Phi = dyn_cast<PHINode>(Inst->getOperand(0));
- if (!Phi || Phi->getParent() != LoopEntry)
+ PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
+ if (!Phi)
continue;
CntInst = Inst;
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 6ef1464e9338..19daebd0613a 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -831,7 +831,12 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val,
/// mapping the blocks with the specified map.
static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
LoopInfo *LI, LPPassManager *LPM) {
- Loop &New = LPM->addLoop(PL);
+ Loop &New = *new Loop();
+ if (PL)
+ PL->addChildLoop(&New);
+ else
+ LI->addTopLevelLoop(&New);
+ LPM->addLoop(New);
// Add all of the blocks in L to the new loop.
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index 5cfbf6baeaa9..67abc3116988 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -858,7 +858,14 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge,
// Filter out unreachable phi operands.
auto Filtered = make_filter_range(PHIOperands, [&](const Use *U) {
- return ReachableEdges.count({PN->getIncomingBlock(*U), PHIBlock});
+ if (*U == PN)
+ return false;
+ if (!ReachableEdges.count({PN->getIncomingBlock(*U), PHIBlock}))
+ return false;
+ // Things in TOPClass are equivalent to everything.
+ if (ValueToClass.lookup(*U) == TOPClass)
+ return false;
+ return true;
});
std::transform(Filtered.begin(), Filtered.end(), op_inserter(E),
[&](const Use *U) -> Value * {
@@ -866,14 +873,6 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge,
HasBackedge = HasBackedge || isBackedge(BB, PHIBlock);
OriginalOpsConstant =
OriginalOpsConstant && isa<Constant>(*U);
- // Use nullptr to distinguish between things that were
- // originally self-defined and those that have an operand
- // leader that is self-defined.
- if (*U == PN)
- return nullptr;
- // Things in TOPClass are equivalent to everything.
- if (ValueToClass.lookup(*U) == TOPClass)
- return nullptr;
return lookupOperandLeader(*U);
});
return E;
@@ -955,6 +954,10 @@ const Expression *NewGVN::checkSimplificationResults(Expression *E,
CongruenceClass *CC = ValueToClass.lookup(V);
if (CC && CC->getDefiningExpr()) {
+ // If we simplified to something else, we need to communicate
+ // that we're users of the value we simplified to.
+ if (I != V)
+ addAdditionalUsers(V, I);
if (I)
DEBUG(dbgs() << "Simplified " << *I << " to "
<< " expression " << *CC->getDefiningExpr() << "\n");
@@ -1581,6 +1584,30 @@ bool NewGVN::isCycleFree(const Instruction *I) const {
// Evaluate PHI nodes symbolically, and create an expression result.
const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const {
+ // Resolve irreducible and reducible phi cycles.
+ // FIXME: This is hopefully a temporary solution while we resolve the issues
+ // with fixpointing self-cycles. It currently should be "guaranteed" to be
+ // correct, but non-optimal. The SCCFinder does not, for example, take
+ // reachability of arguments into account, etc.
+ SCCFinder.Start(I);
+ bool CanOptimize = true;
+ SmallPtrSet<Value *, 8> OuterOps;
+
+ auto &Component = SCCFinder.getComponentFor(I);
+ for (auto *Member : Component) {
+ if (!isa<PHINode>(Member)) {
+ CanOptimize = false;
+ break;
+ }
+ for (auto &PHIOp : cast<PHINode>(Member)->operands())
+ if (!isa<PHINode>(PHIOp) || !Component.count(cast<PHINode>(PHIOp)))
+ OuterOps.insert(PHIOp);
+ }
+ if (CanOptimize && OuterOps.size() == 1) {
+ DEBUG(dbgs() << "Resolving cyclic phi to value " << *(*OuterOps.begin())
+ << "\n");
+ return createVariableOrConstant(*OuterOps.begin());
+ }
// True if one of the incoming phi edges is a backedge.
bool HasBackedge = false;
// All constant tracks the state of whether all the *original* phi operands
@@ -1594,17 +1621,7 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const {
// See if all arguments are the same.
// We track if any were undef because they need special handling.
bool HasUndef = false;
- bool CycleFree = isCycleFree(I);
auto Filtered = make_filter_range(E->operands(), [&](Value *Arg) {
- if (Arg == nullptr)
- return false;
- // Original self-operands are already eliminated during expression creation.
- // We can only eliminate value-wise self-operands if it's cycle
- // free. Otherwise, eliminating the operand can cause our value to change,
- // which can cause us to not eliminate the operand, which changes the value
- // back to what it was before, cycling forever.
- if (CycleFree && Arg == I)
- return false;
if (isa<UndefValue>(Arg)) {
HasUndef = true;
return false;
@@ -1613,6 +1630,14 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const {
});
// If we are left with no operands, it's dead.
if (Filtered.begin() == Filtered.end()) {
+ // If it has undef at this point, it means there are no-non-undef arguments,
+ // and thus, the value of the phi node must be undef.
+ if (HasUndef) {
+ DEBUG(dbgs() << "PHI Node " << *I
+ << " has no non-undef arguments, valuing it as undef\n");
+ return createConstantExpression(UndefValue::get(I->getType()));
+ }
+
DEBUG(dbgs() << "No arguments of PHI node " << *I << " are live\n");
deleteExpression(E);
return createDeadExpression();
@@ -1642,7 +1667,7 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const {
// constants, or all operands are ignored but the undef, it also must be
// cycle free.
if (!AllConstant && HasBackedge && NumOps > 0 &&
- !isa<UndefValue>(AllSameValue) && !CycleFree)
+ !isa<UndefValue>(AllSameValue) && !isCycleFree(I))
return E;
// Only have to check for instructions
@@ -3556,6 +3581,7 @@ bool NewGVN::eliminateInstructions(Function &F) {
// Map to store the use counts
DenseMap<const Value *, unsigned int> UseCounts;
for (auto *CC : reverse(CongruenceClasses)) {
+ DEBUG(dbgs() << "Eliminating in congruence class " << CC->getID() << "\n");
// Track the equivalent store info so we can decide whether to try
// dead store elimination.
SmallVector<ValueDFS, 8> PossibleDeadStores;
@@ -3602,8 +3628,6 @@ bool NewGVN::eliminateInstructions(Function &F) {
}
CC->swap(MembersLeft);
} else {
- DEBUG(dbgs() << "Eliminating in congruence class " << CC->getID()
- << "\n");
// If this is a singleton, we can skip it.
if (CC->size() != 1 || RealToTemp.lookup(Leader)) {
// This is a stack because equality replacement/etc may place
@@ -3846,6 +3870,7 @@ bool NewGVN::shouldSwapOperands(const Value *A, const Value *B) const {
return std::make_pair(getRank(A), A) > std::make_pair(getRank(B), B);
}
+namespace {
class NewGVNLegacyPass : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid.
@@ -3865,6 +3890,7 @@ private:
AU.addPreserved<GlobalsAAWrapperPass>();
}
};
+} // namespace
bool NewGVNLegacyPass::runOnFunction(Function &F) {
if (skipFunction(F))
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 8908dae2f545..1d0e8396f6a2 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -1779,8 +1779,9 @@ static bool runIPSCCP(Module &M, const DataLayout &DL,
// arguments and return value aggressively, and can assume it is not called
// unless we see evidence to the contrary.
if (F.hasLocalLinkage()) {
- if (AddressIsTaken(&F))
+ if (F.hasAddressTaken()) {
AddressTakenFunctions.insert(&F);
+ }
else {
Solver.AddArgumentTrackedFunction(&F);
continue;
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 24bd0a2b7bdf..6e113bccff94 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -326,7 +326,7 @@ private:
/// partition.
uint64_t BeginOffset, EndOffset;
- /// \brief The start end end iterators of this partition.
+ /// \brief The start and end iterators of this partition.
iterator SI, SJ;
/// \brief A collection of split slice tails overlapping the partition.
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 52201d8f3e51..9fa43da99da9 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -48,6 +48,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeEarlyCSELegacyPassPass(Registry);
initializeEarlyCSEMemSSALegacyPassPass(Registry);
initializeGVNHoistLegacyPassPass(Registry);
+ initializeGVNSinkLegacyPassPass(Registry);
initializeFlattenCFGPassPass(Registry);
initializeInductiveRangeCheckEliminationPass(Registry);
initializeIndVarSimplifyLegacyPassPass(Registry);
diff --git a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index b32a61a7e8f8..0f170e26ce5f 100644
--- a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -123,11 +123,62 @@ static void updateDTAfterUnswitch(BasicBlock *UnswitchedBB, BasicBlock *OldPH,
// exit block.
DT.changeImmediateDominator(UnswitchedNode, OldPHNode);
- // Blocks reachable from the unswitched block may need to change their IDom
- // as well.
+ // For everything that moves up the dominator tree, we need to examine the
+ // dominator frontier to see if it additionally should move up the dominator
+ // tree. This lambda appends the dominator frontier for a node on the
+ // worklist.
+ //
+ // Note that we don't currently use the IDFCalculator here for two reasons:
+ // 1) It computes dominator tree levels for the entire function on each run
+ // of 'compute'. While this isn't terrible, given that we expect to update
+ // relatively small subtrees of the domtree, it isn't necessarily the right
+ // tradeoff.
+ // 2) The interface doesn't fit this usage well. It doesn't operate in
+ // append-only, and builds several sets that we don't need.
+ //
+ // FIXME: Neither of these issues are a big deal and could be addressed with
+ // some amount of refactoring of IDFCalculator. That would allow us to share
+ // the core logic here (which is solving the same core problem).
SmallSetVector<BasicBlock *, 4> Worklist;
- for (auto *SuccBB : successors(UnswitchedBB))
- Worklist.insert(SuccBB);
+ SmallVector<DomTreeNode *, 4> DomNodes;
+ SmallPtrSet<BasicBlock *, 4> DomSet;
+ auto AppendDomFrontier = [&](DomTreeNode *Node) {
+ assert(DomNodes.empty() && "Must start with no dominator nodes.");
+ assert(DomSet.empty() && "Must start with an empty dominator set.");
+
+ // First flatten this subtree into sequence of nodes by doing a pre-order
+ // walk.
+ DomNodes.push_back(Node);
+ // We intentionally re-evaluate the size as each node can add new children.
+ // Because this is a tree walk, this cannot add any duplicates.
+ for (int i = 0; i < (int)DomNodes.size(); ++i)
+ DomNodes.insert(DomNodes.end(), DomNodes[i]->begin(), DomNodes[i]->end());
+
+ // Now create a set of the basic blocks so we can quickly test for
+ // dominated successors. We could in theory use the DFS numbers of the
+ // dominator tree for this, but we want this to remain predictably fast
+ // even while we mutate the dominator tree in ways that would invalidate
+ // the DFS numbering.
+ for (DomTreeNode *InnerN : DomNodes)
+ DomSet.insert(InnerN->getBlock());
+
+ // Now re-walk the nodes, appending every successor of every node that isn't
+ // in the set. Note that we don't append the node itself, even though if it
+ // is a successor it does not strictly dominate itself and thus it would be
+ // part of the dominance frontier. The reason we don't append it is that
+ // the node passed in came *from* the worklist and so it has already been
+ // processed.
+ for (DomTreeNode *InnerN : DomNodes)
+ for (BasicBlock *SuccBB : successors(InnerN->getBlock()))
+ if (!DomSet.count(SuccBB))
+ Worklist.insert(SuccBB);
+
+ DomNodes.clear();
+ DomSet.clear();
+ };
+
+ // Append the initial dom frontier nodes.
+ AppendDomFrontier(UnswitchedNode);
// Walk the worklist. We grow the list in the loop and so must recompute size.
for (int i = 0; i < (int)Worklist.size(); ++i) {
@@ -136,20 +187,17 @@ static void updateDTAfterUnswitch(BasicBlock *UnswitchedBB, BasicBlock *OldPH,
DomTreeNode *Node = DT[BB];
assert(!DomChain.count(Node) &&
"Cannot be dominated by a block you can reach!");
- // If this block doesn't have an immediate dominator somewhere in the chain
- // we hoisted over, then its position in the domtree hasn't changed. Either
- // it is above the region hoisted and still valid, or it is below the
- // hoisted block and so was trivially updated. This also applies to
- // everything reachable from this block so we're completely done with the
- // it.
+
+ // If this block had an immediate dominator somewhere in the chain
+ // we hoisted over, then its position in the domtree needs to move as it is
+ // reachable from a node hoisted over this chain.
if (!DomChain.count(Node->getIDom()))
continue;
- // We need to change the IDom for this node but also walk its successors
- // which could have similar dominance position.
DT.changeImmediateDominator(Node, OldPHNode);
- for (auto *SuccBB : successors(BB))
- Worklist.insert(SuccBB);
+
+ // Now add this node's dominator frontier to the worklist as well.
+ AppendDomFrontier(Node);
}
}