summaryrefslogtreecommitdiff
path: root/lib/Transforms/Scalar
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Scalar')
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt1
-rw-r--r--lib/Transforms/Scalar/CorrelatedValuePropagation.cpp17
-rw-r--r--lib/Transforms/Scalar/EarlyCSE.cpp49
-rw-r--r--lib/Transforms/Scalar/GVN.cpp2
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp31
-rw-r--r--lib/Transforms/Scalar/InferAddressSpaces.cpp85
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp62
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp4
-rw-r--r--lib/Transforms/Scalar/LoopInstSimplify.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp12
-rw-r--r--lib/Transforms/Scalar/LoopSimplifyCFG.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp53
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp12
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp2
-rw-r--r--lib/Transforms/Scalar/NaryReassociate.cpp10
-rw-r--r--lib/Transforms/Scalar/NewGVN.cpp36
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp4
-rw-r--r--lib/Transforms/Scalar/RewriteStatepointsForGC.cpp72
-rw-r--r--lib/Transforms/Scalar/SROA.cpp26
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp2
-rw-r--r--lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp2
-rw-r--r--lib/Transforms/Scalar/SimpleLoopUnswitch.cpp626
22 files changed, 895 insertions, 217 deletions
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index b323ab3bd443..523390758769 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -55,6 +55,7 @@ add_llvm_library(LLVMScalarOpts
Scalar.cpp
Scalarizer.cpp
SeparateConstOffsetFromGEP.cpp
+ SimpleLoopUnswitch.cpp
SimplifyCFGPass.cpp
Sink.cpp
SpeculativeExecution.cpp
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index b5a4cc2f3953..dc864f48bf1f 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -151,7 +151,7 @@ static bool processPHI(PHINode *P, LazyValueInfo *LVI,
Changed = true;
}
- if (Value *V = SimplifyInstruction(P, SQ.getWithInstruction(P))) {
+ if (Value *V = SimplifyInstruction(P, SQ)) {
P->replaceAllUsesWith(V);
P->eraseFromParent();
Changed = true;
@@ -565,25 +565,14 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) {
return false;
LazyValueInfo *LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
- auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
- auto *TLIWP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr;
- auto *ACWP = getAnalysisIfAvailable<AssumptionCacheTracker>();
- auto *AC = ACWP ? &ACWP->getAssumptionCache(F) : nullptr;
- const SimplifyQuery SQ(F.getParent()->getDataLayout(), TLI, DT, AC);
- return runImpl(F, LVI, SQ);
+ return runImpl(F, LVI, getBestSimplifyQuery(*this, F));
}
PreservedAnalyses
CorrelatedValuePropagationPass::run(Function &F, FunctionAnalysisManager &AM) {
LazyValueInfo *LVI = &AM.getResult<LazyValueAnalysis>(F);
- auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
- auto *TLI = AM.getCachedResult<TargetLibraryAnalysis>(F);
- auto *AC = AM.getCachedResult<AssumptionAnalysis>(F);
- const SimplifyQuery SQ(F.getParent()->getDataLayout(), TLI, DT, AC);
- bool Changed = runImpl(F, LVI, SQ);
+ bool Changed = runImpl(F, LVI, getBestSimplifyQuery(AM, F));
if (!Changed)
return PreservedAnalyses::all();
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 04479b6e49ac..d8f8a58a5fdf 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -253,6 +253,7 @@ public:
const TargetTransformInfo &TTI;
DominatorTree &DT;
AssumptionCache &AC;
+ const SimplifyQuery SQ;
MemorySSA *MSSA;
std::unique_ptr<MemorySSAUpdater> MSSAUpdater;
typedef RecyclingAllocator<
@@ -315,9 +316,10 @@ public:
unsigned CurrentGeneration;
/// \brief Set up the EarlyCSE runner for a particular function.
- EarlyCSE(const TargetLibraryInfo &TLI, const TargetTransformInfo &TTI,
- DominatorTree &DT, AssumptionCache &AC, MemorySSA *MSSA)
- : TLI(TLI), TTI(TTI), DT(DT), AC(AC), MSSA(MSSA),
+ EarlyCSE(const DataLayout &DL, const TargetLibraryInfo &TLI,
+ const TargetTransformInfo &TTI, DominatorTree &DT,
+ AssumptionCache &AC, MemorySSA *MSSA)
+ : TLI(TLI), TTI(TTI), DT(DT), AC(AC), SQ(DL, &TLI, &DT, &AC), MSSA(MSSA),
MSSAUpdater(make_unique<MemorySSAUpdater>(MSSA)), CurrentGeneration(0) {
}
@@ -616,8 +618,6 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
/// stores which can occur in bitfield code among other things.
Instruction *LastStore = nullptr;
- const DataLayout &DL = BB->getModule()->getDataLayout();
-
// See if any instructions in the block can be eliminated. If so, do it. If
// not, add them to AvailableValues.
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
@@ -635,10 +635,16 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// Skip assume intrinsics, they don't really have side effects (although
// they're marked as such to ensure preservation of control dependencies),
- // and this pass will not disturb any of the assumption's control
- // dependencies.
+ // and this pass will not bother with its removal. However, we should mark
+ // its condition as true for all dominated blocks.
if (match(Inst, m_Intrinsic<Intrinsic::assume>())) {
- DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst << '\n');
+ auto *CondI =
+ dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0));
+ if (CondI && SimpleValue::canHandle(CondI)) {
+ DEBUG(dbgs() << "EarlyCSE considering assumption: " << *Inst << '\n');
+ AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
+ } else
+ DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst << '\n');
continue;
}
@@ -658,10 +664,25 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
if (match(Inst, m_Intrinsic<Intrinsic::experimental_guard>())) {
if (auto *CondI =
dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0))) {
- // The condition we're on guarding here is true for all dominated
- // locations.
- if (SimpleValue::canHandle(CondI))
+ if (SimpleValue::canHandle(CondI)) {
+ // Do we already know the actual value of this condition?
+ if (auto *KnownCond = AvailableValues.lookup(CondI)) {
+ // Is the condition known to be true?
+ if (isa<ConstantInt>(KnownCond) &&
+ cast<ConstantInt>(KnownCond)->isOneValue()) {
+ DEBUG(dbgs() << "EarlyCSE removing guard: " << *Inst << '\n');
+ removeMSSA(Inst);
+ Inst->eraseFromParent();
+ Changed = true;
+ continue;
+ } else
+ // Use the known value if it wasn't true.
+ cast<CallInst>(Inst)->setArgOperand(0, KnownCond);
+ }
+ // The condition we're on guarding here is true for all dominated
+ // locations.
AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
+ }
}
// Guard intrinsics read all memory, but don't write any memory.
@@ -673,7 +694,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// If the instruction can be simplified (e.g. X+0 = X) then replace it with
// its simpler value.
- if (Value *V = SimplifyInstruction(Inst, DL, &TLI, &DT, &AC)) {
+ if (Value *V = SimplifyInstruction(Inst, SQ)) {
DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V << '\n');
bool Killed = false;
if (!Inst->use_empty()) {
@@ -964,7 +985,7 @@ PreservedAnalyses EarlyCSEPass::run(Function &F,
auto *MSSA =
UseMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr;
- EarlyCSE CSE(TLI, TTI, DT, AC, MSSA);
+ EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
if (!CSE.run())
return PreservedAnalyses::all();
@@ -1008,7 +1029,7 @@ public:
auto *MSSA =
UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() : nullptr;
- EarlyCSE CSE(TLI, TTI, DT, AC, MSSA);
+ EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
return CSE.run();
}
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index be696df548d5..c04646eed49a 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -1687,7 +1687,7 @@ bool GVN::processInstruction(Instruction *I) {
// example if it determines that %y is equal to %x then the instruction
// "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
const DataLayout &DL = I->getModule()->getDataLayout();
- if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC)) {
+ if (Value *V = SimplifyInstruction(I, {DL, TLI, DT, AC})) {
bool Changed = false;
if (!I->use_empty()) {
I->replaceAllUsesWith(V);
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index dcb2a4a0c6e6..3953198fe605 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -97,7 +97,7 @@ class IndVarSimplify {
TargetLibraryInfo *TLI;
const TargetTransformInfo *TTI;
- SmallVector<WeakVH, 16> DeadInsts;
+ SmallVector<WeakTrackingVH, 16> DeadInsts;
bool Changed = false;
bool isValidRewrite(Value *FromVal, Value *ToVal);
@@ -415,8 +415,8 @@ void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
Compare->getName());
// In the following deletions, PN may become dead and may be deleted.
- // Use a WeakVH to observe whether this happens.
- WeakVH WeakPH = PN;
+ // Use a WeakTrackingVH to observe whether this happens.
+ WeakTrackingVH WeakPH = PN;
// Delete the old floating point exit comparison. The branch starts using the
// new comparison.
@@ -451,7 +451,7 @@ void IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
//
BasicBlock *Header = L->getHeader();
- SmallVector<WeakVH, 8> PHIs;
+ SmallVector<WeakTrackingVH, 8> PHIs;
for (BasicBlock::iterator I = Header->begin();
PHINode *PN = dyn_cast<PHINode>(I); ++I)
PHIs.push_back(PN);
@@ -901,7 +901,7 @@ class WidenIV {
PHINode *WidePhi;
Instruction *WideInc;
const SCEV *WideIncExpr;
- SmallVectorImpl<WeakVH> &DeadInsts;
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts;
SmallPtrSet<Instruction *,16> Widened;
SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
@@ -941,20 +941,13 @@ class WidenIV {
}
public:
- WidenIV(const WideIVInfo &WI, LoopInfo *LInfo,
- ScalarEvolution *SEv, DominatorTree *DTree,
- SmallVectorImpl<WeakVH> &DI, bool HasGuards) :
- OrigPhi(WI.NarrowIV),
- WideType(WI.WidestNativeType),
- LI(LInfo),
- L(LI->getLoopFor(OrigPhi->getParent())),
- SE(SEv),
- DT(DTree),
- HasGuards(HasGuards),
- WidePhi(nullptr),
- WideInc(nullptr),
- WideIncExpr(nullptr),
- DeadInsts(DI) {
+ WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
+ DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
+ bool HasGuards)
+ : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
+ L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
+ HasGuards(HasGuards), WidePhi(nullptr), WideInc(nullptr),
+ WideIncExpr(nullptr), DeadInsts(DI) {
assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
}
diff --git a/lib/Transforms/Scalar/InferAddressSpaces.cpp b/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 9e2563879da2..5e116ef2fe75 100644
--- a/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -138,7 +138,7 @@ private:
// Tries to infer the specific address space of each address expression in
// Postorder.
- void inferAddressSpaces(const std::vector<Value *> &Postorder,
+ void inferAddressSpaces(ArrayRef<WeakTrackingVH> Postorder,
ValueToAddrSpaceMapTy *InferredAddrSpace) const;
bool isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const;
@@ -147,7 +147,7 @@ private:
// address spaces if InferredAddrSpace says so. Postorder is the postorder of
// all flat expressions in the use-def graph of function F.
bool
- rewriteWithNewAddressSpaces(const std::vector<Value *> &Postorder,
+ rewriteWithNewAddressSpaces(ArrayRef<WeakTrackingVH> Postorder,
const ValueToAddrSpaceMapTy &InferredAddrSpace,
Function *F) const;
@@ -162,7 +162,7 @@ private:
std::vector<std::pair<Value *, bool>> &PostorderStack,
DenseSet<Value *> &Visited) const;
- std::vector<Value *> collectFlatAddressExpressions(Function &F) const;
+ std::vector<WeakTrackingVH> collectFlatAddressExpressions(Function &F) const;
Value *cloneValueWithNewAddressSpace(
Value *V, unsigned NewAddrSpace,
@@ -274,16 +274,36 @@ void InferAddressSpaces::appendsFlatAddressExpressionToPostorderStack(
Value *V, std::vector<std::pair<Value *, bool>> &PostorderStack,
DenseSet<Value *> &Visited) const {
assert(V->getType()->isPointerTy());
+
+ // Generic addressing expressions may be hidden in nested constant
+ // expressions.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ // TODO: Look in non-address parts, like icmp operands.
+ if (isAddressExpression(*CE) && Visited.insert(CE).second)
+ PostorderStack.push_back(std::make_pair(CE, false));
+
+ return;
+ }
+
if (isAddressExpression(*V) &&
V->getType()->getPointerAddressSpace() == FlatAddrSpace) {
- if (Visited.insert(V).second)
+ if (Visited.insert(V).second) {
PostorderStack.push_back(std::make_pair(V, false));
+
+ Operator *Op = cast<Operator>(V);
+ for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op->getOperand(I))) {
+ if (isAddressExpression(*CE) && Visited.insert(CE).second)
+ PostorderStack.emplace_back(CE, false);
+ }
+ }
+ }
}
}
// Returns all flat address expressions in function F. The elements are ordered
// ordered in postorder.
-std::vector<Value *>
+std::vector<WeakTrackingVH>
InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {
// This function implements a non-recursive postorder traversal of a partial
// use-def graph of function F.
@@ -326,21 +346,25 @@ InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {
PushPtrOperand(Cmp->getOperand(0));
PushPtrOperand(Cmp->getOperand(1));
}
+ } else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
+ if (!ASC->getType()->isVectorTy())
+ PushPtrOperand(ASC->getPointerOperand());
}
}
- std::vector<Value *> Postorder; // The resultant postorder.
+ std::vector<WeakTrackingVH> Postorder; // The resultant postorder.
while (!PostorderStack.empty()) {
+ Value *TopVal = PostorderStack.back().first;
// If the operands of the expression on the top are already explored,
// adds that expression to the resultant postorder.
if (PostorderStack.back().second) {
- Postorder.push_back(PostorderStack.back().first);
+ Postorder.push_back(TopVal);
PostorderStack.pop_back();
continue;
}
// Otherwise, adds its operands to the stack and explores them.
PostorderStack.back().second = true;
- for (Value *PtrOperand : getPointerOperands(*PostorderStack.back().first)) {
+ for (Value *PtrOperand : getPointerOperands(*TopVal)) {
appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack,
Visited);
}
@@ -559,7 +583,7 @@ bool InferAddressSpaces::runOnFunction(Function &F) {
return false;
// Collects all flat address expressions in postorder.
- std::vector<Value *> Postorder = collectFlatAddressExpressions(F);
+ std::vector<WeakTrackingVH> Postorder = collectFlatAddressExpressions(F);
// Runs a data-flow analysis to refine the address spaces of every expression
// in Postorder.
@@ -571,8 +595,10 @@ bool InferAddressSpaces::runOnFunction(Function &F) {
return rewriteWithNewAddressSpaces(Postorder, InferredAddrSpace, &F);
}
+// Constants need to be tracked through RAUW to handle cases with nested
+// constant expressions, so wrap values in WeakTrackingVH.
void InferAddressSpaces::inferAddressSpaces(
- const std::vector<Value *> &Postorder,
+ ArrayRef<WeakTrackingVH> Postorder,
ValueToAddrSpaceMapTy *InferredAddrSpace) const {
SetVector<Value *> Worklist(Postorder.begin(), Postorder.end());
// Initially, all expressions are in the uninitialized address space.
@@ -784,8 +810,8 @@ static Value::use_iterator skipToNextUser(Value::use_iterator I,
}
bool InferAddressSpaces::rewriteWithNewAddressSpaces(
- const std::vector<Value *> &Postorder,
- const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const {
+ ArrayRef<WeakTrackingVH> Postorder,
+ const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const {
// For each address expression to be modified, creates a clone of it with its
// pointer operands converted to the new address space. Since the pointer
// operands are converted, the clone is naturally in the new address space by
@@ -812,8 +838,12 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(
NewV->setOperand(OperandNo, ValueWithNewAddrSpace.lookup(UndefUse->get()));
}
+ SmallVector<Instruction *, 16> DeadInstructions;
+
// Replaces the uses of the old address expressions with the new ones.
- for (Value *V : Postorder) {
+ for (const WeakTrackingVH &WVH : Postorder) {
+ assert(WVH && "value was unexpectedly deleted");
+ Value *V = WVH;
Value *NewV = ValueWithNewAddrSpace.lookup(V);
if (NewV == nullptr)
continue;
@@ -821,6 +851,17 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(
DEBUG(dbgs() << "Replacing the uses of " << *V
<< "\n with\n " << *NewV << '\n');
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ Constant *Replace = ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV),
+ C->getType());
+ if (C != Replace) {
+ DEBUG(dbgs() << "Inserting replacement const cast: "
+ << Replace << ": " << *Replace << '\n');
+ C->replaceAllUsesWith(Replace);
+ V = Replace;
+ }
+ }
+
Value::use_iterator I, E, Next;
for (I = V->use_begin(), E = V->use_end(); I != E; ) {
Use &U = *I;
@@ -881,6 +922,15 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(
}
}
+ if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(CurUser)) {
+ unsigned NewAS = NewV->getType()->getPointerAddressSpace();
+ if (ASC->getDestAddressSpace() == NewAS) {
+ ASC->replaceAllUsesWith(NewV);
+ DeadInstructions.push_back(ASC);
+ continue;
+ }
+ }
+
// Otherwise, replaces the use with flat(NewV).
if (Instruction *I = dyn_cast<Instruction>(V)) {
BasicBlock::iterator InsertPos = std::next(I->getIterator());
@@ -894,10 +944,15 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(
}
}
- if (V->use_empty())
- RecursivelyDeleteTriviallyDeadInstructions(V);
+ if (V->use_empty()) {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ DeadInstructions.push_back(I);
+ }
}
+ for (Instruction *I : DeadInstructions)
+ RecursivelyDeleteTriviallyDeadInstructions(I);
+
return true;
}
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index a0da81605a80..7dacaba1193e 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -557,7 +557,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
Value *LHS = PN->getIncomingValue(i);
Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB);
- Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, DL);
+ Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, {DL});
if (!Res) {
if (!isa<Constant>(RHS))
continue;
@@ -1250,37 +1250,53 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
BasicBlock *OnlyDest = nullptr;
BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
+ Constant *OnlyVal = nullptr;
+ Constant *MultipleVal = (Constant *)(intptr_t)~0ULL;
+ unsigned PredWithKnownDest = 0;
for (const auto &PredValue : PredValues) {
BasicBlock *Pred = PredValue.second;
if (!SeenPreds.insert(Pred).second)
continue; // Duplicate predecessor entry.
- // If the predecessor ends with an indirect goto, we can't change its
- // destination.
- if (isa<IndirectBrInst>(Pred->getTerminator()))
- continue;
-
Constant *Val = PredValue.first;
BasicBlock *DestBB;
if (isa<UndefValue>(Val))
DestBB = nullptr;
- else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
+ else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+ assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
- else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+ assert(isa<ConstantInt>(Val) && "Expecting a constant integer");
DestBB = SI->findCaseValue(cast<ConstantInt>(Val))->getCaseSuccessor();
} else {
assert(isa<IndirectBrInst>(BB->getTerminator())
&& "Unexpected terminator");
+ assert(isa<BlockAddress>(Val) && "Expecting a constant blockaddress");
DestBB = cast<BlockAddress>(Val)->getBasicBlock();
}
// If we have exactly one destination, remember it for efficiency below.
- if (PredToDestList.empty())
+ if (PredToDestList.empty()) {
OnlyDest = DestBB;
- else if (OnlyDest != DestBB)
- OnlyDest = MultipleDestSentinel;
+ OnlyVal = Val;
+ } else {
+ if (OnlyDest != DestBB)
+ OnlyDest = MultipleDestSentinel;
+ // It possible we have same destination, but different value, e.g. default
+ // case in switchinst.
+ if (Val != OnlyVal)
+ OnlyVal = MultipleVal;
+ }
+
+ // We know where this predecessor is going.
+ ++PredWithKnownDest;
+
+ // If the predecessor ends with an indirect goto, we can't change its
+ // destination.
+ if (isa<IndirectBrInst>(Pred->getTerminator()))
+ continue;
PredToDestList.push_back(std::make_pair(Pred, DestBB));
}
@@ -1293,7 +1309,7 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
// not thread. By doing so, we do not need to duplicate the current block and
// also miss potential opportunities in case we dont/cant duplicate.
if (OnlyDest && OnlyDest != MultipleDestSentinel) {
- if (PredToDestList.size() ==
+ if (PredWithKnownDest ==
(size_t)std::distance(pred_begin(BB), pred_end(BB))) {
bool SeenFirstBranchToOnlyDest = false;
for (BasicBlock *SuccBB : successors(BB)) {
@@ -1310,11 +1326,18 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
// If the condition is now dead due to the removal of the old terminator,
// erase it.
- auto *CondInst = dyn_cast<Instruction>(Cond);
- if (CondInst && CondInst->use_empty())
- CondInst->eraseFromParent();
- // FIXME: in case this instruction is defined in the current BB and it
- // resolves to a single value from all predecessors, we can do RAUW.
+ if (auto *CondInst = dyn_cast<Instruction>(Cond)) {
+ if (CondInst->use_empty() && !CondInst->mayHaveSideEffects())
+ CondInst->eraseFromParent();
+ else if (OnlyVal && OnlyVal != MultipleVal &&
+ CondInst->getParent() == BB) {
+ // If we just learned Cond is the same value for all uses of the
+ // condition, replace it with a constant value
+ CondInst->replaceAllUsesWith(OnlyVal);
+ if (!CondInst->mayHaveSideEffects())
+ CondInst->eraseFromParent();
+ }
+ }
return true;
}
}
@@ -1883,8 +1906,9 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred(
// If this instruction can be simplified after the operands are updated,
// just use the simplified value instead. This frequently happens due to
// phi translation.
- if (Value *IV =
- SimplifyInstruction(New, BB->getModule()->getDataLayout())) {
+ if (Value *IV = SimplifyInstruction(
+ New,
+ {BB->getModule()->getDataLayout(), TLI, nullptr, nullptr, New})) {
ValueMapping[&*BI] = IV;
if (!New->mayHaveSideEffects()) {
delete New;
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 5042fc18d7c4..410fbb03068f 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -499,7 +499,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(
Instruction *Inst = &*I++;
// Look for memset instructions, which may be optimized to a larger memset.
if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) {
- WeakVH InstPtr(&*I);
+ WeakTrackingVH InstPtr(&*I);
if (!processLoopMemSet(MSI, BECount))
continue;
MadeChange = true;
@@ -856,7 +856,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
/// If the stored value is a strided load in the same loop with the same stride
/// this may be transformable into a memcpy. This kicks in for stuff like
-/// for (i) A[i] = B[i];
+/// for (i) A[i] = B[i];
bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
const SCEV *BECount) {
assert(SI->isSimple() && "Expected only non-volatile stores.");
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
index 28e71ca05436..af095560cc02 100644
--- a/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -77,7 +77,7 @@ static bool SimplifyLoopInst(Loop *L, DominatorTree *DT, LoopInfo *LI,
// Don't bother simplifying unused instructions.
if (!I->use_empty()) {
- Value *V = SimplifyInstruction(I, DL, TLI, DT, AC);
+ Value *V = SimplifyInstruction(I, {DL, TLI, DT, AC});
if (V && LI->replacementPreservesLCSSAForm(I, V)) {
// Mark all uses for resimplification next time round the loop.
for (User *U : I->users())
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 8ce96cf1b7a6..2ba9265566a8 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -341,7 +341,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// With the operands remapped, see if the instruction constant folds or is
// otherwise simplifyable. This commonly occurs because the entry from PHI
// nodes allows icmps and other instructions to fold.
- Value *V = SimplifyInstruction(C, SQ.getWithInstruction(C));
+ Value *V = SimplifyInstruction(C, SQ);
if (V && LI->replacementPreservesLCSSAForm(C, V)) {
// If so, then delete the temporary instruction and stick the folded value
// in the map.
@@ -670,8 +670,9 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM,
LPMUpdater &) {
int Threshold = EnableHeaderDuplication ? DefaultRotationThreshold : 0;
const DataLayout &DL = L.getHeader()->getModule()->getDataLayout();
- const SimplifyQuery SQ(DL, &AR.TLI, &AR.DT, &AR.AC);
- LoopRotate LR(Threshold, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE, SQ);
+ const SimplifyQuery SQ = getBestSimplifyQuery(AR, DL);
+ LoopRotate LR(Threshold, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE,
+ SQ);
bool Changed = LR.processLoop(&L);
if (!Changed)
@@ -714,10 +715,7 @@ public:
auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
auto *SE = SEWP ? &SEWP->getSE() : nullptr;
- auto *TLIWP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr;
- const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
- const SimplifyQuery SQ(DL, TLI, DT, AC);
+ const SimplifyQuery SQ = getBestSimplifyQuery(*this, F);
LoopRotate LR(MaxHeaderSize, LI, TTI, AC, DT, SE, SQ);
return LR.processLoop(L);
}
diff --git a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index a5a81c33a8eb..35c05e84fd68 100644
--- a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -40,7 +40,7 @@ static bool simplifyLoopCFG(Loop &L, DominatorTree &DT, LoopInfo &LI) {
bool Changed = false;
// Copy blocks into a temporary array to avoid iterator invalidation issues
// as we remove them.
- SmallVector<WeakVH, 16> Blocks(L.blocks());
+ SmallVector<WeakTrackingVH, 16> Blocks(L.blocks());
for (auto &Block : Blocks) {
// Attempt to merge blocks in the trivial case. Don't modify blocks which
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index af137f6faa63..ccedb98d7fa1 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -900,7 +900,7 @@ static bool isHighCostExpansion(const SCEV *S,
/// If any of the instructions is the specified set are trivially dead, delete
/// them and see if this makes any of their operands subsequently dead.
static bool
-DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
+DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
bool Changed = false;
while (!DeadInsts.empty()) {
@@ -1845,7 +1845,7 @@ class LSRInstance {
void FinalizeChain(IVChain &Chain);
void CollectChains();
void GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
- SmallVectorImpl<WeakVH> &DeadInsts);
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts);
void CollectInterestingTypesAndFactors();
void CollectFixupsAndInitialFormulae();
@@ -1920,19 +1920,15 @@ class LSRInstance {
const LSRUse &LU,
SCEVExpander &Rewriter) const;
- Value *Expand(const LSRUse &LU, const LSRFixup &LF,
- const Formula &F,
- BasicBlock::iterator IP,
- SCEVExpander &Rewriter,
- SmallVectorImpl<WeakVH> &DeadInsts) const;
+ Value *Expand(const LSRUse &LU, const LSRFixup &LF, const Formula &F,
+ BasicBlock::iterator IP, SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF,
- const Formula &F,
- SCEVExpander &Rewriter,
- SmallVectorImpl<WeakVH> &DeadInsts) const;
- void Rewrite(const LSRUse &LU, const LSRFixup &LF,
- const Formula &F,
+ const Formula &F, SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
+ void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F,
SCEVExpander &Rewriter,
- SmallVectorImpl<WeakVH> &DeadInsts) const;
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution);
public:
@@ -3014,7 +3010,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
/// Generate an add or subtract for each IVInc in a chain to materialize the IV
/// user's operand from the previous IV user's operand.
void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
- SmallVectorImpl<WeakVH> &DeadInsts) {
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
// Find the new IVOperand for the head of the chain. It may have been replaced
// by LSR.
const IVInc &Head = Chain.Incs[0];
@@ -4759,12 +4755,10 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
/// Emit instructions for the leading candidate expression for this LSRUse (this
/// is called "expanding").
-Value *LSRInstance::Expand(const LSRUse &LU,
- const LSRFixup &LF,
- const Formula &F,
- BasicBlock::iterator IP,
+Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF,
+ const Formula &F, BasicBlock::iterator IP,
SCEVExpander &Rewriter,
- SmallVectorImpl<WeakVH> &DeadInsts) const {
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
if (LU.RigidFormula)
return LF.OperandValToReplace;
@@ -4939,12 +4933,9 @@ Value *LSRInstance::Expand(const LSRUse &LU,
/// Helper for Rewrite. PHI nodes are special because the use of their operands
/// effectively happens in their predecessor blocks, so the expression may need
/// to be expanded in multiple places.
-void LSRInstance::RewriteForPHI(PHINode *PN,
- const LSRUse &LU,
- const LSRFixup &LF,
- const Formula &F,
- SCEVExpander &Rewriter,
- SmallVectorImpl<WeakVH> &DeadInsts) const {
+void LSRInstance::RewriteForPHI(
+ PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F,
+ SCEVExpander &Rewriter, SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
DenseMap<BasicBlock *, Value *> Inserted;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
@@ -5016,11 +5007,9 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
/// Emit instructions for the leading candidate expression for this LSRUse (this
/// is called "expanding"), and update the UserInst to reference the newly
/// expanded value.
-void LSRInstance::Rewrite(const LSRUse &LU,
- const LSRFixup &LF,
- const Formula &F,
- SCEVExpander &Rewriter,
- SmallVectorImpl<WeakVH> &DeadInsts) const {
+void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF,
+ const Formula &F, SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
// First, find an insertion point that dominates UserInst. For PHI nodes,
// find the nearest block which dominates all the relevant uses.
if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
@@ -5058,7 +5047,7 @@ void LSRInstance::ImplementSolution(
const SmallVectorImpl<const Formula *> &Solution) {
// Keep track of instructions we may have made dead, so that
// we can remove them after we are done working.
- SmallVector<WeakVH, 16> DeadInsts;
+ SmallVector<WeakTrackingVH, 16> DeadInsts;
SCEVExpander Rewriter(SE, L->getHeader()->getModule()->getDataLayout(),
"lsr");
@@ -5308,7 +5297,7 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
// Remove any extra phis created by processing inner loops.
Changed |= DeleteDeadPHIs(L->getHeader());
if (EnablePhiElim && L->isLoopSimplifyForm()) {
- SmallVector<WeakVH, 16> DeadInsts;
+ SmallVector<WeakTrackingVH, 16> DeadInsts;
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
SCEVExpander Rewriter(SE, DL, "lsr");
#ifndef NDEBUG
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 8fa806a7e8bc..6ef1464e9338 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -1231,11 +1231,12 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
LoopProcessWorklist.push_back(NewLoop);
redoLoop = true;
- // Keep a WeakVH holding onto LIC. If the first call to RewriteLoopBody
+ // Keep a WeakTrackingVH holding onto LIC. If the first call to
+ // RewriteLoopBody
// deletes the instruction (for example by simplifying a PHI that feeds into
// the condition that we're unswitching on), we don't rewrite the second
// iteration.
- WeakVH LICHandle(LIC);
+ WeakTrackingVH LICHandle(LIC);
// Now we rewrite the original code to know that the condition is true and the
// new code to know that the condition is false.
@@ -1262,7 +1263,7 @@ static void RemoveFromWorklist(Instruction *I,
static void ReplaceUsesOfWith(Instruction *I, Value *V,
std::vector<Instruction*> &Worklist,
Loop *L, LPPassManager *LPM) {
- DEBUG(dbgs() << "Replace with '" << *V << "': " << *I);
+ DEBUG(dbgs() << "Replace with '" << *V << "': " << *I << "\n");
// Add uses to the worklist, which may be dead now.
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
@@ -1275,7 +1276,8 @@ static void ReplaceUsesOfWith(Instruction *I, Value *V,
LPM->deleteSimpleAnalysisValue(I, L);
RemoveFromWorklist(I, Worklist);
I->replaceAllUsesWith(V);
- I->eraseFromParent();
+ if (!I->mayHaveSideEffects())
+ I->eraseFromParent();
++NumSimplify;
}
@@ -1431,7 +1433,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
// Simple DCE.
if (isInstructionTriviallyDead(I)) {
- DEBUG(dbgs() << "Remove dead instruction '" << *I);
+ DEBUG(dbgs() << "Remove dead instruction '" << *I << "\n");
// Add uses to the worklist, which may be dead now.
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index a3f3f25c1e0f..21a632073da7 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1323,7 +1323,7 @@ bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) {
// Get the alignment of the byval. If the call doesn't specify the alignment,
// then it is some target specific value that we can't know.
- unsigned ByValAlign = CS.getParamAlignment(ArgNo+1);
+ unsigned ByValAlign = CS.getParamAlignment(ArgNo);
if (ByValAlign == 0) return false;
// If it is greater than the memcpy, then we check to see if we can force the
diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp
index c5bf2f28d185..d0bfe3603897 100644
--- a/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -211,7 +211,8 @@ bool NaryReassociatePass::doOneIteration(Function &F) {
Changed = true;
SE->forgetValue(&*I);
I->replaceAllUsesWith(NewI);
- // If SeenExprs constains I's WeakVH, that entry will be replaced with
+ // If SeenExprs constains I's WeakTrackingVH, that entry will be
+ // replaced with
// nullptr.
RecursivelyDeleteTriviallyDeadInstructions(&*I, TLI);
I = NewI->getIterator();
@@ -219,7 +220,7 @@ bool NaryReassociatePass::doOneIteration(Function &F) {
// Add the rewritten instruction to SeenExprs; the original instruction
// is deleted.
const SCEV *NewSCEV = SE->getSCEV(&*I);
- SeenExprs[NewSCEV].push_back(WeakVH(&*I));
+ SeenExprs[NewSCEV].push_back(WeakTrackingVH(&*I));
// Ideally, NewSCEV should equal OldSCEV because tryReassociate(I)
// is equivalent to I. However, ScalarEvolution::getSCEV may
// weaken nsw causing NewSCEV not to equal OldSCEV. For example, suppose
@@ -239,7 +240,7 @@ bool NaryReassociatePass::doOneIteration(Function &F) {
//
// This improvement is exercised in @reassociate_gep_nsw in nary-gep.ll.
if (NewSCEV != OldSCEV)
- SeenExprs[OldSCEV].push_back(WeakVH(&*I));
+ SeenExprs[OldSCEV].push_back(WeakTrackingVH(&*I));
}
}
}
@@ -494,7 +495,8 @@ NaryReassociatePass::findClosestMatchingDominator(const SCEV *CandidateExpr,
// future instruction either. Therefore, we pop it out of the stack. This
// optimization makes the algorithm O(n).
while (!Candidates.empty()) {
- // Candidates stores WeakVHs, so a candidate can be nullptr if it's removed
+ // Candidates stores WeakTrackingVHs, so a candidate can be nullptr if it's
+ // removed
// during rewriting.
if (Value *Candidate = Candidates.back()) {
Instruction *CandidateInstruction = cast<Instruction>(Candidate);
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index a014ddd9ba0a..162d91beae76 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -395,7 +395,6 @@ namespace {
class NewGVN {
Function &F;
DominatorTree *DT;
- AssumptionCache *AC;
const TargetLibraryInfo *TLI;
AliasAnalysis *AA;
MemorySSA *MSSA;
@@ -405,6 +404,7 @@ class NewGVN {
BumpPtrAllocator ExpressionAllocator;
ArrayRecycler<Value *> ArgRecycler;
TarjanSCC SCCFinder;
+ const SimplifyQuery SQ;
// Number of function arguments, used by ranking
unsigned int NumFuncArgs;
@@ -504,8 +504,9 @@ public:
NewGVN(Function &F, DominatorTree *DT, AssumptionCache *AC,
TargetLibraryInfo *TLI, AliasAnalysis *AA, MemorySSA *MSSA,
const DataLayout &DL)
- : F(F), DT(DT), AC(AC), TLI(TLI), AA(AA), MSSA(MSSA), DL(DL),
- PredInfo(make_unique<PredicateInfo>(F, *DT, *AC)) {}
+ : F(F), DT(DT), TLI(TLI), AA(AA), MSSA(MSSA), DL(DL),
+ PredInfo(make_unique<PredicateInfo>(F, *DT, *AC)), SQ(DL, TLI, DT, AC) {
+ }
bool runGVN();
private:
@@ -782,8 +783,7 @@ const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T,
E->op_push_back(lookupOperandLeader(Arg1));
E->op_push_back(lookupOperandLeader(Arg2));
- Value *V = SimplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), DL, TLI,
- DT, AC);
+ Value *V = SimplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), SQ);
if (const Expression *SimplifiedE = checkSimplificationResults(E, nullptr, V))
return SimplifiedE;
return E;
@@ -864,8 +864,8 @@ const Expression *NewGVN::createExpression(Instruction *I) {
"Wrong types on cmp instruction");
assert((E->getOperand(0)->getType() == I->getOperand(0)->getType() &&
E->getOperand(1)->getType() == I->getOperand(1)->getType()));
- Value *V = SimplifyCmpInst(Predicate, E->getOperand(0), E->getOperand(1),
- DL, TLI, DT, AC);
+ Value *V =
+ SimplifyCmpInst(Predicate, E->getOperand(0), E->getOperand(1), SQ);
if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
return SimplifiedE;
} else if (isa<SelectInst>(I)) {
@@ -874,23 +874,23 @@ const Expression *NewGVN::createExpression(Instruction *I) {
assert(E->getOperand(1)->getType() == I->getOperand(1)->getType() &&
E->getOperand(2)->getType() == I->getOperand(2)->getType());
Value *V = SimplifySelectInst(E->getOperand(0), E->getOperand(1),
- E->getOperand(2), DL, TLI, DT, AC);
+ E->getOperand(2), SQ);
if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
return SimplifiedE;
}
} else if (I->isBinaryOp()) {
- Value *V = SimplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1),
- DL, TLI, DT, AC);
+ Value *V =
+ SimplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1), SQ);
if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
return SimplifiedE;
} else if (auto *BI = dyn_cast<BitCastInst>(I)) {
- Value *V = SimplifyInstruction(BI, DL, TLI, DT, AC);
+ Value *V =
+ SimplifyCastInst(BI->getOpcode(), BI->getOperand(0), BI->getType(), SQ);
if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
return SimplifiedE;
} else if (isa<GetElementPtrInst>(I)) {
- Value *V = SimplifyGEPInst(E->getType(),
- ArrayRef<Value *>(E->op_begin(), E->op_end()),
- DL, TLI, DT, AC);
+ Value *V = SimplifyGEPInst(
+ E->getType(), ArrayRef<Value *>(E->op_begin(), E->op_end()), SQ);
if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
return SimplifiedE;
} else if (AllConstant) {
@@ -1628,15 +1628,15 @@ const Expression *NewGVN::performSymbolicCmpEvaluation(Instruction *I) {
if (PBranch->TrueEdge) {
// If we know the previous predicate is true and we are in the true
// edge then we may be implied true or false.
- if (CmpInst::isImpliedTrueByMatchingCmp(OurPredicate,
- BranchPredicate)) {
+ if (CmpInst::isImpliedTrueByMatchingCmp(BranchPredicate,
+ OurPredicate)) {
addPredicateUsers(PI, I);
return createConstantExpression(
ConstantInt::getTrue(CI->getType()));
}
- if (CmpInst::isImpliedFalseByMatchingCmp(OurPredicate,
- BranchPredicate)) {
+ if (CmpInst::isImpliedFalseByMatchingCmp(BranchPredicate,
+ OurPredicate)) {
addPredicateUsers(PI, I);
return createConstantExpression(
ConstantInt::getFalse(CI->getType()));
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 3dcab6090789..ef29d4141600 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -982,7 +982,7 @@ static unsigned FindInOperandList(SmallVectorImpl<ValueEntry> &Ops, unsigned i,
/// Emit a tree of add instructions, summing Ops together
/// and returning the result. Insert the tree before I.
static Value *EmitAddTreeOfValues(Instruction *I,
- SmallVectorImpl<WeakVH> &Ops){
+ SmallVectorImpl<WeakTrackingVH> &Ops) {
if (Ops.size() == 1) return Ops.back();
Value *V1 = Ops.back();
@@ -1559,7 +1559,7 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I,
? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal)
: BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal);
- SmallVector<WeakVH, 4> NewMulOps;
+ SmallVector<WeakTrackingVH, 4> NewMulOps;
for (unsigned i = 0; i != Ops.size(); ++i) {
// Only try to remove factors from expressions we're allowed to.
BinaryOperator *BOp =
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index f344eb151464..c11247c06b85 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -1128,39 +1128,23 @@ normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent,
// Create new attribute set containing only attributes which can be transferred
// from original call to the safepoint.
-static AttributeList legalizeCallAttributes(AttributeList AS) {
- AttributeList Ret;
-
- for (unsigned Slot = 0; Slot < AS.getNumSlots(); Slot++) {
- unsigned Index = AS.getSlotIndex(Slot);
-
- if (Index == AttributeList::ReturnIndex ||
- Index == AttributeList::FunctionIndex) {
-
- for (Attribute Attr : make_range(AS.begin(Slot), AS.end(Slot))) {
-
- // Do not allow certain attributes - just skip them
- // Safepoint can not be read only or read none.
- if (Attr.hasAttribute(Attribute::ReadNone) ||
- Attr.hasAttribute(Attribute::ReadOnly))
- continue;
-
- // These attributes control the generation of the gc.statepoint call /
- // invoke itself; and once the gc.statepoint is in place, they're of no
- // use.
- if (isStatepointDirectiveAttr(Attr))
- continue;
-
- Ret = Ret.addAttributes(
- AS.getContext(), Index,
- AttributeList::get(AS.getContext(), Index, AttrBuilder(Attr)));
- }
- }
-
- // Just skip parameter attributes for now
- }
-
- return Ret;
+static AttributeList legalizeCallAttributes(AttributeList AL) {
+ if (AL.isEmpty())
+ return AL;
+
+ // Remove the readonly, readnone, and statepoint function attributes.
+ AttrBuilder FnAttrs = AL.getFnAttributes();
+ FnAttrs.removeAttribute(Attribute::ReadNone);
+ FnAttrs.removeAttribute(Attribute::ReadOnly);
+ for (Attribute A : AL.getFnAttributes()) {
+ if (isStatepointDirectiveAttr(A))
+ FnAttrs.remove(A);
+ }
+
+ // Just skip parameter and return attributes for now
+ LLVMContext &Ctx = AL.getContext();
+ return AttributeList::get(Ctx, AttributeList::FunctionIndex,
+ AttributeSet::get(Ctx, FnAttrs));
}
/// Helper function to place all gc relocates necessary for the given
@@ -1402,13 +1386,10 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
Call->setCallingConv(ToReplace->getCallingConv());
// Currently we will fail on parameter attributes and on certain
- // function attributes.
- AttributeList NewAttrs = legalizeCallAttributes(ToReplace->getAttributes());
- // In case if we can handle this set of attributes - set up function attrs
- // directly on statepoint and return attrs later for gc_result intrinsic.
- Call->setAttributes(AttributeList::get(Call->getContext(),
- AttributeList::FunctionIndex,
- NewAttrs.getFnAttributes()));
+ // function attributes. In case if we can handle this set of attributes -
+ // set up function attrs directly on statepoint and return attrs later for
+ // gc_result intrinsic.
+ Call->setAttributes(legalizeCallAttributes(ToReplace->getAttributes()));
Token = Call;
@@ -1431,13 +1412,10 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
Invoke->setCallingConv(ToReplace->getCallingConv());
// Currently we will fail on parameter attributes and on certain
- // function attributes.
- AttributeList NewAttrs = legalizeCallAttributes(ToReplace->getAttributes());
- // In case if we can handle this set of attributes - set up function attrs
- // directly on statepoint and return attrs later for gc_result intrinsic.
- Invoke->setAttributes(AttributeList::get(Invoke->getContext(),
- AttributeList::FunctionIndex,
- NewAttrs.getFnAttributes()));
+ // function attributes. In case if we can handle this set of attributes -
+ // set up function attrs directly on statepoint and return attrs later for
+ // gc_result intrinsic.
+ Invoke->setAttributes(legalizeCallAttributes(ToReplace->getAttributes()));
Token = Invoke;
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index d01e91a7f235..1d9beffaf06b 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -25,6 +25,7 @@
#include "llvm/Transforms/Scalar/SROA.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
@@ -2186,8 +2187,8 @@ class llvm::sroa::AllocaSliceRewriter
Instruction *OldPtr;
// Track post-rewrite users which are PHI nodes and Selects.
- SmallPtrSetImpl<PHINode *> &PHIUsers;
- SmallPtrSetImpl<SelectInst *> &SelectUsers;
+ SmallSetVector<PHINode *, 8> &PHIUsers;
+ SmallSetVector<SelectInst *, 8> &SelectUsers;
// Utility IR builder, whose name prefix is setup for each visited use, and
// the insertion point is set to point to the user.
@@ -2199,8 +2200,8 @@ public:
uint64_t NewAllocaBeginOffset,
uint64_t NewAllocaEndOffset, bool IsIntegerPromotable,
VectorType *PromotableVecTy,
- SmallPtrSetImpl<PHINode *> &PHIUsers,
- SmallPtrSetImpl<SelectInst *> &SelectUsers)
+ SmallSetVector<PHINode *, 8> &PHIUsers,
+ SmallSetVector<SelectInst *, 8> &SelectUsers)
: DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI),
NewAllocaBeginOffset(NewAllocaBeginOffset),
NewAllocaEndOffset(NewAllocaEndOffset),
@@ -3880,8 +3881,8 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
// fact scheduled for promotion.
unsigned PPWOldSize = PostPromotionWorklist.size();
unsigned NumUses = 0;
- SmallPtrSet<PHINode *, 8> PHIUsers;
- SmallPtrSet<SelectInst *, 8> SelectUsers;
+ SmallSetVector<PHINode *, 8> PHIUsers;
+ SmallSetVector<SelectInst *, 8> SelectUsers;
AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(),
P.endOffset(), IsIntegerPromotable, VecTy,
@@ -3902,19 +3903,16 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
// Now that we've processed all the slices in the new partition, check if any
// PHIs or Selects would block promotion.
- for (SmallPtrSetImpl<PHINode *>::iterator I = PHIUsers.begin(),
- E = PHIUsers.end();
- I != E; ++I)
- if (!isSafePHIToSpeculate(**I)) {
+ for (PHINode *PHI : PHIUsers)
+ if (!isSafePHIToSpeculate(*PHI)) {
Promotable = false;
PHIUsers.clear();
SelectUsers.clear();
break;
}
- for (SmallPtrSetImpl<SelectInst *>::iterator I = SelectUsers.begin(),
- E = SelectUsers.end();
- I != E; ++I)
- if (!isSafeSelectToSpeculate(**I)) {
+
+ for (SelectInst *Sel : SelectUsers)
+ if (!isSafeSelectToSpeculate(*Sel)) {
Promotable = false;
PHIUsers.clear();
SelectUsers.clear();
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 00e3c95f6f06..52201d8f3e51 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -21,6 +21,7 @@
#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
@@ -83,6 +84,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeCFGSimplifyPassPass(Registry);
initializeLateCFGSimplifyPassPass(Registry);
initializeStructurizeCFGPass(Registry);
+ initializeSimpleLoopUnswitchLegacyPassPass(Registry);
initializeSinkingLegacyPassPass(Registry);
initializeTailCallElimPass(Registry);
initializeSeparateConstOffsetFromGEPPass(Registry);
diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 4d594532c365..cde659b9d189 100644
--- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -1138,7 +1138,7 @@ bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
// Add I to DominatingExprs if it's an add/sub that can't sign overflow.
if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS))) ||
match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) {
- if (isKnownNotFullPoison(I)) {
+ if (programUndefinedIfFullPoison(I)) {
const SCEV *Key =
SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
DominatingExprs[Key].push_back(I);
diff --git a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
new file mode 100644
index 000000000000..fb1b47c48276
--- /dev/null
+++ b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -0,0 +1,626 @@
+//===-- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+
+#define DEBUG_TYPE "simple-loop-unswitch"
+
+using namespace llvm;
+
+STATISTIC(NumBranches, "Number of branches unswitched");
+STATISTIC(NumSwitches, "Number of switches unswitched");
+STATISTIC(NumTrivial, "Number of unswitches that are trivial");
+
+static void replaceLoopUsesWithConstant(Loop &L, Value &LIC,
+ Constant &Replacement) {
+ assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?");
+
+ // Replace uses of LIC in the loop with the given constant.
+ for (auto UI = LIC.use_begin(), UE = LIC.use_end(); UI != UE;) {
+ // Grab the use and walk past it so we can clobber it in the use list.
+ Use *U = &*UI++;
+ Instruction *UserI = dyn_cast<Instruction>(U->getUser());
+ if (!UserI || !L.contains(UserI))
+ continue;
+
+ // Replace this use within the loop body.
+ *U = &Replacement;
+ }
+}
+
+/// Update the dominator tree after removing one exiting predecessor of a loop
+/// exit block.
+static void updateLoopExitIDom(BasicBlock *LoopExitBB, Loop &L,
+ DominatorTree &DT) {
+ assert(pred_begin(LoopExitBB) != pred_end(LoopExitBB) &&
+ "Cannot have empty predecessors of the loop exit block if we split "
+ "off a block to unswitch!");
+
+ BasicBlock *IDom = *pred_begin(LoopExitBB);
+ // Walk all of the other predecessors finding the nearest common dominator
+ // until all predecessors are covered or we reach the loop header. The loop
+ // header necessarily dominates all loop exit blocks in loop simplified form
+ // so we can early-exit the moment we hit that block.
+ for (auto PI = std::next(pred_begin(LoopExitBB)), PE = pred_end(LoopExitBB);
+ PI != PE && IDom != L.getHeader(); ++PI)
+ IDom = DT.findNearestCommonDominator(IDom, *PI);
+
+ DT.changeImmediateDominator(LoopExitBB, IDom);
+}
+
+/// Update the dominator tree after unswitching a particular former exit block.
+///
+/// This handles the full update of the dominator tree after hoisting a block
+/// that previously was an exit block (or split off of an exit block) up to be
+/// reached from the new immediate dominator of the preheader.
+///
+/// The common case is simple -- we just move the unswitched block to have an
+/// immediate dominator of the old preheader. But in complex cases, there may
+/// be other blocks reachable from the unswitched block that are immediately
+/// dominated by some node between the unswitched one and the old preheader.
+/// All of these also need to be hoisted in the dominator tree. We also want to
+/// minimize queries to the dominator tree because each step of this
+/// invalidates any DFS numbers that would make queries fast.
+static void updateDTAfterUnswitch(BasicBlock *UnswitchedBB, BasicBlock *OldPH,
+ DominatorTree &DT) {
+ DomTreeNode *OldPHNode = DT[OldPH];
+ DomTreeNode *UnswitchedNode = DT[UnswitchedBB];
+ // If the dominator tree has already been updated for this unswitched node,
+ // we're done. This makes it easier to use this routine if there are multiple
+ // paths to the same unswitched destination.
+ if (UnswitchedNode->getIDom() == OldPHNode)
+ return;
+
+ // First collect the domtree nodes that we are hoisting over. These are the
+ // set of nodes which may have children that need to be hoisted as well.
+ SmallPtrSet<DomTreeNode *, 4> DomChain;
+ for (auto *IDom = UnswitchedNode->getIDom(); IDom != OldPHNode;
+ IDom = IDom->getIDom())
+ DomChain.insert(IDom);
+
+ // The unswitched block ends up immediately dominated by the old preheader --
+ // regardless of whether it is the loop exit block or split off of the loop
+ // exit block.
+ DT.changeImmediateDominator(UnswitchedNode, OldPHNode);
+
+ // Blocks reachable from the unswitched block may need to change their IDom
+ // as well.
+ SmallSetVector<BasicBlock *, 4> Worklist;
+ for (auto *SuccBB : successors(UnswitchedBB))
+ Worklist.insert(SuccBB);
+
+ // Walk the worklist. We grow the list in the loop and so must recompute size.
+ for (int i = 0; i < (int)Worklist.size(); ++i) {
+ auto *BB = Worklist[i];
+
+ DomTreeNode *Node = DT[BB];
+ assert(!DomChain.count(Node) &&
+ "Cannot be dominated by a block you can reach!");
+ // If this block doesn't have an immediate dominator somewhere in the chain
+ // we hoisted over, then its position in the domtree hasn't changed. Either
+ // it is above the region hoisted and still valid, or it is below the
+ // hoisted block and so was trivially updated. This also applies to
+ // everything reachable from this block so we're completely done with the
+ // it.
+ if (!DomChain.count(Node->getIDom()))
+ continue;
+
+ // We need to change the IDom for this node but also walk its successors
+ // which could have similar dominance position.
+ DT.changeImmediateDominator(Node, OldPHNode);
+ for (auto *SuccBB : successors(BB))
+ Worklist.insert(SuccBB);
+ }
+}
+
+/// Unswitch a trivial branch if the condition is loop invariant.
+///
+/// This routine should only be called when loop code leading to the branch has
+/// been validated as trivial (no side effects). This routine checks if the
+/// condition is invariant and one of the successors is a loop exit. This
+/// allows us to unswitch without duplicating the loop, making it trivial.
+///
+/// If this routine fails to unswitch the branch it returns false.
+///
+/// If the branch can be unswitched, this routine splits the preheader and
+/// hoists the branch above that split. Preserves loop simplified form
+/// (splitting the exit block as necessary). It simplifies the branch within
+/// the loop to an unconditional branch but doesn't remove it entirely. Further
+/// cleanup can be done with some simplify-cfg like pass.
+static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
+ LoopInfo &LI) {
+ assert(BI.isConditional() && "Can only unswitch a conditional branch!");
+ DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n");
+
+ Value *LoopCond = BI.getCondition();
+
+ // Need a trivial loop condition to unswitch.
+ if (!L.isLoopInvariant(LoopCond))
+ return false;
+
+ // FIXME: We should compute this once at the start and update it!
+ SmallVector<BasicBlock *, 16> ExitBlocks;
+ L.getExitBlocks(ExitBlocks);
+ SmallPtrSet<BasicBlock *, 16> ExitBlockSet(ExitBlocks.begin(),
+ ExitBlocks.end());
+
+ // Check to see if a successor of the branch is guaranteed to
+ // exit through a unique exit block without having any
+ // side-effects. If so, determine the value of Cond that causes
+ // it to do this.
+ ConstantInt *CondVal = ConstantInt::getTrue(BI.getContext());
+ ConstantInt *Replacement = ConstantInt::getFalse(BI.getContext());
+ int LoopExitSuccIdx = 0;
+ auto *LoopExitBB = BI.getSuccessor(0);
+ if (!ExitBlockSet.count(LoopExitBB)) {
+ std::swap(CondVal, Replacement);
+ LoopExitSuccIdx = 1;
+ LoopExitBB = BI.getSuccessor(1);
+ if (!ExitBlockSet.count(LoopExitBB))
+ return false;
+ }
+ auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
+ assert(L.contains(ContinueBB) &&
+ "Cannot have both successors exit and still be in the loop!");
+
+ // If the loop exit block contains phi nodes, this isn't trivial.
+ // FIXME: We should examine the PHI to determine whether or not we can handle
+ // it trivially.
+ if (isa<PHINode>(LoopExitBB->begin()))
+ return false;
+
+ DEBUG(dbgs() << " unswitching trivial branch when: " << CondVal
+ << " == " << LoopCond << "\n");
+
+ // Split the preheader, so that we know that there is a safe place to insert
+ // the conditional branch. We will change the preheader to have a conditional
+ // branch on LoopCond.
+ BasicBlock *OldPH = L.getLoopPreheader();
+ BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI);
+
+ // Now that we have a place to insert the conditional branch, create a place
+ // to branch to: this is the exit block out of the loop that we are
+ // unswitching. We need to split this if there are other loop predecessors.
+ // Because the loop is in simplified form, *any* other predecessor is enough.
+ BasicBlock *UnswitchedBB;
+ if (BasicBlock *PredBB = LoopExitBB->getUniquePredecessor()) {
+ (void)PredBB;
+ assert(PredBB == BI.getParent() && "A branch's parent is't a predecessor!");
+ UnswitchedBB = LoopExitBB;
+ } else {
+ UnswitchedBB = SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI);
+ }
+
+ BasicBlock *ParentBB = BI.getParent();
+
+ // Now splice the branch to gate reaching the new preheader and re-point its
+ // successors.
+ OldPH->getInstList().splice(std::prev(OldPH->end()),
+ BI.getParent()->getInstList(), BI);
+ OldPH->getTerminator()->eraseFromParent();
+ BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
+ BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
+
+ // Create a new unconditional branch that will continue the loop as a new
+ // terminator.
+ BranchInst::Create(ContinueBB, ParentBB);
+
+ // Now we need to update the dominator tree.
+ updateDTAfterUnswitch(UnswitchedBB, OldPH, DT);
+ // But if we split something off of the loop exit block then we also removed
+ // one of the predecessors for the loop exit block and may need to update its
+ // idom.
+ if (UnswitchedBB != LoopExitBB)
+ updateLoopExitIDom(LoopExitBB, L, DT);
+
+ // Since this is an i1 condition we can also trivially replace uses of it
+ // within the loop with a constant.
+ replaceLoopUsesWithConstant(L, *LoopCond, *Replacement);
+
+ ++NumTrivial;
+ ++NumBranches;
+ return true;
+}
+
+/// Unswitch a trivial switch if the condition is loop invariant.
+///
+/// This routine should only be called when loop code leading to the switch has
+/// been validated as trivial (no side effects). This routine checks if the
+/// condition is invariant and that at least one of the successors is a loop
+/// exit. This allows us to unswitch without duplicating the loop, making it
+/// trivial.
+///
+/// If this routine fails to unswitch the switch it returns false.
+///
+/// If the switch can be unswitched, this routine splits the preheader and
+/// copies the switch above that split. If the default case is one of the
+/// exiting cases, it copies the non-exiting cases and points them at the new
+/// preheader. If the default case is not exiting, it copies the exiting cases
+/// and points the default at the preheader. It preserves loop simplified form
+/// (splitting the exit blocks as necessary). It simplifies the switch within
+/// the loop by removing now-dead cases. If the default case is one of those
+/// unswitched, it replaces its destination with a new basic block containing
+/// only unreachable. Such basic blocks, while technically loop exits, are not
+/// considered for unswitching so this is a stable transform and the same
+/// switch will not be revisited. If after unswitching there is only a single
+/// in-loop successor, the switch is further simplified to an unconditional
+/// branch. Still more cleanup can be done with some simplify-cfg like pass.
+static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
+ LoopInfo &LI) {
+ DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n");
+ Value *LoopCond = SI.getCondition();
+
+ // If this isn't switching on an invariant condition, we can't unswitch it.
+ if (!L.isLoopInvariant(LoopCond))
+ return false;
+
+ // FIXME: We should compute this once at the start and update it!
+ SmallVector<BasicBlock *, 16> ExitBlocks;
+ L.getExitBlocks(ExitBlocks);
+ SmallPtrSet<BasicBlock *, 16> ExitBlockSet(ExitBlocks.begin(),
+ ExitBlocks.end());
+
+ SmallVector<int, 4> ExitCaseIndices;
+ for (auto Case : SI.cases()) {
+ auto *SuccBB = Case.getCaseSuccessor();
+ if (ExitBlockSet.count(SuccBB) && !isa<PHINode>(SuccBB->begin()))
+ ExitCaseIndices.push_back(Case.getCaseIndex());
+ }
+ BasicBlock *DefaultExitBB = nullptr;
+ if (ExitBlockSet.count(SI.getDefaultDest()) &&
+ !isa<PHINode>(SI.getDefaultDest()->begin()) &&
+ !isa<UnreachableInst>(SI.getDefaultDest()->getTerminator()))
+ DefaultExitBB = SI.getDefaultDest();
+ else if (ExitCaseIndices.empty())
+ return false;
+
+ DEBUG(dbgs() << " unswitching trivial cases...\n");
+
+ SmallVector<std::pair<ConstantInt *, BasicBlock *>, 4> ExitCases;
+ ExitCases.reserve(ExitCaseIndices.size());
+ // We walk the case indices backwards so that we remove the last case first
+ // and don't disrupt the earlier indices.
+ for (unsigned Index : reverse(ExitCaseIndices)) {
+ auto CaseI = SI.case_begin() + Index;
+ // Save the value of this case.
+ ExitCases.push_back({CaseI->getCaseValue(), CaseI->getCaseSuccessor()});
+ // Delete the unswitched cases.
+ SI.removeCase(CaseI);
+ }
+
+ // Check if after this all of the remaining cases point at the same
+ // successor.
+ BasicBlock *CommonSuccBB = nullptr;
+ if (SI.getNumCases() > 0 &&
+ std::all_of(std::next(SI.case_begin()), SI.case_end(),
+ [&SI](const SwitchInst::CaseHandle &Case) {
+ return Case.getCaseSuccessor() ==
+ SI.case_begin()->getCaseSuccessor();
+ }))
+ CommonSuccBB = SI.case_begin()->getCaseSuccessor();
+
+ if (DefaultExitBB) {
+ // We can't remove the default edge so replace it with an edge to either
+ // the single common remaining successor (if we have one) or an unreachable
+ // block.
+ if (CommonSuccBB) {
+ SI.setDefaultDest(CommonSuccBB);
+ } else {
+ BasicBlock *ParentBB = SI.getParent();
+ BasicBlock *UnreachableBB = BasicBlock::Create(
+ ParentBB->getContext(),
+ Twine(ParentBB->getName()) + ".unreachable_default",
+ ParentBB->getParent());
+ new UnreachableInst(ParentBB->getContext(), UnreachableBB);
+ SI.setDefaultDest(UnreachableBB);
+ DT.addNewBlock(UnreachableBB, ParentBB);
+ }
+ } else {
+ // If we're not unswitching the default, we need it to match any cases to
+ // have a common successor or if we have no cases it is the common
+ // successor.
+ if (SI.getNumCases() == 0)
+ CommonSuccBB = SI.getDefaultDest();
+ else if (SI.getDefaultDest() != CommonSuccBB)
+ CommonSuccBB = nullptr;
+ }
+
+ // Split the preheader, so that we know that there is a safe place to insert
+ // the switch.
+ BasicBlock *OldPH = L.getLoopPreheader();
+ BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI);
+ OldPH->getTerminator()->eraseFromParent();
+
+ // Now add the unswitched switch.
+ auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
+
+ // Split any exit blocks with remaining in-loop predecessors. We walk in
+ // reverse so that we split in the same order as the cases appeared. This is
+ // purely for convenience of reading the resulting IR, but it doesn't cost
+ // anything really.
+ SmallDenseMap<BasicBlock *, BasicBlock *, 2> SplitExitBBMap;
+ // Handle the default exit if necessary.
+ // FIXME: It'd be great if we could merge this with the loop below but LLVM's
+ // ranges aren't quite powerful enough yet.
+ if (DefaultExitBB && !pred_empty(DefaultExitBB)) {
+ auto *SplitBB =
+ SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI);
+ updateLoopExitIDom(DefaultExitBB, L, DT);
+ DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
+ }
+ // Note that we must use a reference in the for loop so that we update the
+ // container.
+ for (auto &CasePair : reverse(ExitCases)) {
+ // Grab a reference to the exit block in the pair so that we can update it.
+ BasicBlock *&ExitBB = CasePair.second;
+
+ // If this case is the last edge into the exit block, we can simply reuse it
+ // as it will no longer be a loop exit. No mapping necessary.
+ if (pred_empty(ExitBB))
+ continue;
+
+ // Otherwise we need to split the exit block so that we retain an exit
+ // block from the loop and a target for the unswitched condition.
+ BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
+ if (!SplitExitBB) {
+ // If this is the first time we see this, do the split and remember it.
+ SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI);
+ updateLoopExitIDom(ExitBB, L, DT);
+ }
+ ExitBB = SplitExitBB;
+ }
+
+ // Now add the unswitched cases. We do this in reverse order as we built them
+ // in reverse order.
+ for (auto CasePair : reverse(ExitCases)) {
+ ConstantInt *CaseVal = CasePair.first;
+ BasicBlock *UnswitchedBB = CasePair.second;
+
+ NewSI->addCase(CaseVal, UnswitchedBB);
+ updateDTAfterUnswitch(UnswitchedBB, OldPH, DT);
+ }
+
+ // If the default was unswitched, re-point it and add explicit cases for
+ // entering the loop.
+ if (DefaultExitBB) {
+ NewSI->setDefaultDest(DefaultExitBB);
+ updateDTAfterUnswitch(DefaultExitBB, OldPH, DT);
+
+ // We removed all the exit cases, so we just copy the cases to the
+ // unswitched switch.
+ for (auto Case : SI.cases())
+ NewSI->addCase(Case.getCaseValue(), NewPH);
+ }
+
+ // If we ended up with a common successor for every path through the switch
+ // after unswitching, rewrite it to an unconditional branch to make it easy
+ // to recognize. Otherwise we potentially have to recognize the default case
+ // pointing at unreachable and other complexity.
+ if (CommonSuccBB) {
+ BasicBlock *BB = SI.getParent();
+ SI.eraseFromParent();
+ BranchInst::Create(CommonSuccBB, BB);
+ }
+
+ DT.verifyDomTree();
+ ++NumTrivial;
+ ++NumSwitches;
+ return true;
+}
+
+/// This routine scans the loop to find a branch or switch which occurs before
+/// any side effects occur. These can potentially be unswitched without
+/// duplicating the loop. If a branch or switch is successfully unswitched the
+/// scanning continues to see if subsequent branches or switches have become
+/// trivial. Once all trivial candidates have been unswitched, this routine
+/// returns.
+///
+/// The return value indicates whether anything was unswitched (and therefore
+/// changed).
+static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT,
+ LoopInfo &LI) {
+ bool Changed = false;
+
+ // If loop header has only one reachable successor we should keep looking for
+ // trivial condition candidates in the successor as well. An alternative is
+ // to constant fold conditions and merge successors into loop header (then we
+ // only need to check header's terminator). The reason for not doing this in
+ // LoopUnswitch pass is that it could potentially break LoopPassManager's
+ // invariants. Folding dead branches could either eliminate the current loop
+ // or make other loops unreachable. LCSSA form might also not be preserved
+ // after deleting branches. The following code keeps traversing loop header's
+ // successors until it finds the trivial condition candidate (condition that
+ // is not a constant). Since unswitching generates branches with constant
+ // conditions, this scenario could be very common in practice.
+ BasicBlock *CurrentBB = L.getHeader();
+ SmallPtrSet<BasicBlock *, 8> Visited;
+ Visited.insert(CurrentBB);
+ do {
+ // Check if there are any side-effecting instructions (e.g. stores, calls,
+ // volatile loads) in the part of the loop that the code *would* execute
+ // without unswitching.
+ if (llvm::any_of(*CurrentBB,
+ [](Instruction &I) { return I.mayHaveSideEffects(); }))
+ return Changed;
+
+ TerminatorInst *CurrentTerm = CurrentBB->getTerminator();
+
+ if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
+ // Don't bother trying to unswitch past a switch with a constant
+ // condition. This should be removed prior to running this pass by
+ // simplify-cfg.
+ if (isa<Constant>(SI->getCondition()))
+ return Changed;
+
+ if (!unswitchTrivialSwitch(L, *SI, DT, LI))
+ // Coludn't unswitch this one so we're done.
+ return Changed;
+
+ // Mark that we managed to unswitch something.
+ Changed = true;
+
+ // If unswitching turned the terminator into an unconditional branch then
+ // we can continue. The unswitching logic specifically works to fold any
+ // cases it can into an unconditional branch to make it easier to
+ // recognize here.
+ auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
+ if (!BI || BI->isConditional())
+ return Changed;
+
+ CurrentBB = BI->getSuccessor(0);
+ continue;
+ }
+
+ auto *BI = dyn_cast<BranchInst>(CurrentTerm);
+ if (!BI)
+ // We do not understand other terminator instructions.
+ return Changed;
+
+ // Don't bother trying to unswitch past an unconditional branch or a branch
+ // with a constant value. These should be removed by simplify-cfg prior to
+ // running this pass.
+ if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
+ return Changed;
+
+ // Found a trivial condition candidate: non-foldable conditional branch. If
+ // we fail to unswitch this, we can't do anything else that is trivial.
+ if (!unswitchTrivialBranch(L, *BI, DT, LI))
+ return Changed;
+
+ // Mark that we managed to unswitch something.
+ Changed = true;
+
+ // We unswitched the branch. This should always leave us with an
+ // unconditional branch that we can follow now.
+ BI = cast<BranchInst>(CurrentBB->getTerminator());
+ assert(!BI->isConditional() &&
+ "Cannot form a conditional branch by unswitching1");
+ CurrentBB = BI->getSuccessor(0);
+
+ // When continuing, if we exit the loop or reach a previous visited block,
+ // then we can not reach any trivial condition candidates (unfoldable
+ // branch instructions or switch instructions) and no unswitch can happen.
+ } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
+
+ return Changed;
+}
+
+/// Unswitch control flow predicated on loop invariant conditions.
+///
+/// This first hoists all branches or switches which are trivial (IE, do not
+/// require duplicating any part of the loop) out of the loop body. It then
+/// looks at other loop invariant control flows and tries to unswitch those as
+/// well by cloning the loop if the result is small enough.
+static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
+ AssumptionCache &AC) {
+ assert(L.isLCSSAForm(DT) &&
+ "Loops must be in LCSSA form before unswitching.");
+ bool Changed = false;
+
+ // Must be in loop simplified form: we need a preheader and dedicated exits.
+ if (!L.isLoopSimplifyForm())
+ return false;
+
+ // Try trivial unswitch first before loop over other basic blocks in the loop.
+ Changed |= unswitchAllTrivialConditions(L, DT, LI);
+
+ // FIXME: Add support for non-trivial unswitching by cloning the loop.
+
+ return Changed;
+}
+
+PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR,
+ LPMUpdater &U) {
+ Function &F = *L.getHeader()->getParent();
+ (void)F;
+
+ DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L << "\n");
+
+ if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC))
+ return PreservedAnalyses::all();
+
+#ifndef NDEBUG
+ // Historically this pass has had issues with the dominator tree so verify it
+ // in asserts builds.
+ AR.DT.verifyDomTree();
+#endif
+ return getLoopPassPreservedAnalyses();
+}
+
+namespace {
+class SimpleLoopUnswitchLegacyPass : public LoopPass {
+public:
+ static char ID; // Pass ID, replacement for typeid
+ explicit SimpleLoopUnswitchLegacyPass() : LoopPass(ID) {
+ initializeSimpleLoopUnswitchLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+ getLoopAnalysisUsage(AU);
+ }
+};
+} // namespace
+
+bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
+ if (skipLoop(L))
+ return false;
+
+ Function &F = *L->getHeader()->getParent();
+
+ DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L << "\n");
+
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+
+ bool Changed = unswitchLoop(*L, DT, LI, AC);
+
+#ifndef NDEBUG
+ // Historically this pass has had issues with the dominator tree so verify it
+ // in asserts builds.
+ DT.verifyDomTree();
+#endif
+ return Changed;
+}
+
+char SimpleLoopUnswitchLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
+ "Simple unswitch loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(LoopPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",
+ "Simple unswitch loops", false, false)
+
+Pass *llvm::createSimpleLoopUnswitchLegacyPass() {
+ return new SimpleLoopUnswitchLegacyPass();
+}