aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp')
-rw-r--r--llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp319
1 files changed, 197 insertions, 122 deletions
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 0535608244cc..7e08120f923d 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/GuardUtils.h"
@@ -26,6 +27,7 @@
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/MustExecute.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -61,6 +63,7 @@
#include <cassert>
#include <iterator>
#include <numeric>
+#include <optional>
#include <utility>
#define DEBUG_TYPE "simple-loop-unswitch"
@@ -115,6 +118,18 @@ static cl::opt<bool> FreezeLoopUnswitchCond(
cl::desc("If enabled, the freeze instruction will be added to condition "
"of loop unswitch to prevent miscompilation."));
+namespace {
+struct NonTrivialUnswitchCandidate {
+ Instruction *TI = nullptr;
+ TinyPtrVector<Value *> Invariants;
+ std::optional<InstructionCost> Cost;
+ NonTrivialUnswitchCandidate(
+ Instruction *TI, ArrayRef<Value *> Invariants,
+ std::optional<InstructionCost> Cost = std::nullopt)
+ : TI(TI), Invariants(Invariants), Cost(Cost){};
+};
+} // end anonymous namespace.
+
// Helper to skip (select x, true, false), which matches both a logical AND and
// OR and can confuse code that tries to determine if \p Cond is either a
// logical AND or OR but not both.
@@ -133,8 +148,8 @@ static Value *skipTrivialSelect(Value *Cond) {
/// inputs which are loop invariant. For some operations these can be
/// re-associated and unswitched out of the loop entirely.
static TinyPtrVector<Value *>
-collectHomogenousInstGraphLoopInvariants(Loop &L, Instruction &Root,
- LoopInfo &LI) {
+collectHomogenousInstGraphLoopInvariants(const Loop &L, Instruction &Root,
+ const LoopInfo &LI) {
assert(!L.isLoopInvariant(&Root) &&
"Only need to walk the graph if root itself is not invariant.");
TinyPtrVector<Value *> Invariants;
@@ -175,7 +190,7 @@ collectHomogenousInstGraphLoopInvariants(Loop &L, Instruction &Root,
return Invariants;
}
-static void replaceLoopInvariantUses(Loop &L, Value *Invariant,
+static void replaceLoopInvariantUses(const Loop &L, Value *Invariant,
Constant &Replacement) {
assert(!isa<Constant>(Invariant) && "Why are we unswitching on a constant?");
@@ -192,9 +207,10 @@ static void replaceLoopInvariantUses(Loop &L, Value *Invariant,
/// Check that all the LCSSA PHI nodes in the loop exit block have trivial
/// incoming values along this edge.
-static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB,
- BasicBlock &ExitBB) {
- for (Instruction &I : ExitBB) {
+static bool areLoopExitPHIsLoopInvariant(const Loop &L,
+ const BasicBlock &ExitingBB,
+ const BasicBlock &ExitBB) {
+ for (const Instruction &I : ExitBB) {
auto *PN = dyn_cast<PHINode>(&I);
if (!PN)
// No more PHIs to check.
@@ -214,7 +230,7 @@ static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB,
static void buildPartialUnswitchConditionalBranch(
BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction,
BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze,
- Instruction *I, AssumptionCache *AC, DominatorTree &DT) {
+ const Instruction *I, AssumptionCache *AC, const DominatorTree &DT) {
IRBuilder<> IRB(&BB);
SmallVector<Value *> FrozenInvariants;
@@ -239,7 +255,7 @@ static void buildPartialInvariantUnswitchConditionalBranch(
for (auto *Val : reverse(ToDuplicate)) {
Instruction *Inst = cast<Instruction>(Val);
Instruction *NewInst = Inst->clone();
- BB.getInstList().insert(BB.end(), NewInst);
+ NewInst->insertInto(&BB, BB.end());
RemapInstruction(NewInst, VMap,
RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
VMap[Val] = NewInst;
@@ -418,9 +434,10 @@ static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
// Return the top-most loop containing ExitBB and having ExitBB as exiting block
// or the loop containing ExitBB, if there is no parent loop containing ExitBB
// as exiting block.
-static Loop *getTopMostExitingLoop(BasicBlock *ExitBB, LoopInfo &LI) {
- Loop *TopMost = LI.getLoopFor(ExitBB);
- Loop *Current = TopMost;
+static const Loop *getTopMostExitingLoop(const BasicBlock *ExitBB,
+ const LoopInfo &LI) {
+ const Loop *TopMost = LI.getLoopFor(ExitBB);
+ const Loop *Current = TopMost;
while (Current) {
if (Current->isLoopExiting(ExitBB))
TopMost = Current;
@@ -521,11 +538,12 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
// loop, the loop containing the exit block and the topmost parent loop
// exiting via LoopExitBB.
if (SE) {
- if (Loop *ExitL = getTopMostExitingLoop(LoopExitBB, LI))
+ if (const Loop *ExitL = getTopMostExitingLoop(LoopExitBB, LI))
SE->forgetLoop(ExitL);
else
// Forget the entire nest as this exits the entire nest.
SE->forgetTopmostLoop(&L);
+ SE->forgetBlockAndLoopDispositions();
}
if (MSSAU && VerifyMemorySSA)
@@ -562,13 +580,12 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
// If fully unswitching, we can use the existing branch instruction.
// Splice it into the old PH to gate reaching the new preheader and re-point
// its successors.
- OldPH->getInstList().splice(OldPH->end(), BI.getParent()->getInstList(),
- BI);
+ OldPH->splice(OldPH->end(), BI.getParent(), BI.getIterator());
BI.setCondition(Cond);
if (MSSAU) {
// Temporarily clone the terminator, to make MSSA update cheaper by
// separating "insert edge" updates from "remove edge" ones.
- ParentBB->getInstList().push_back(BI.clone());
+ BI.clone()->insertInto(ParentBB, ParentBB->end());
} else {
// Create a new unconditional branch that will continue the loop as a new
// terminator.
@@ -1098,7 +1115,8 @@ static BasicBlock *buildClonedLoopBlocks(
const SmallDenseMap<BasicBlock *, BasicBlock *, 16> &DominatingSucc,
ValueToValueMapTy &VMap,
SmallVectorImpl<DominatorTree::UpdateType> &DTUpdates, AssumptionCache &AC,
- DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU) {
+ DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU,
+ ScalarEvolution *SE) {
SmallVector<BasicBlock *, 4> NewBlocks;
NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
@@ -1174,6 +1192,10 @@ static BasicBlock *buildClonedLoopBlocks(
// We should have a value map between the instruction and its clone.
assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!");
+ // Forget SCEVs based on exit phis in case SCEV looked through the phi.
+ if (SE && isa<PHINode>(I))
+ SE->forgetValue(&I);
+
auto *MergePN =
PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi",
&*MergeBB->getFirstInsertionPt());
@@ -1550,7 +1572,7 @@ static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
// We need a stable insertion order. We use the order of the original loop
// order and map into the correct parent loop.
for (auto *BB : llvm::concat<BasicBlock *const>(
- makeArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
+ ArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
if (Loop *OuterL = ExitLoopMap.lookup(BB))
OuterL->addBasicBlockToLoop(BB, LI);
@@ -1590,7 +1612,7 @@ deleteDeadClonedBlocks(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
// Find all the dead clones, and remove them from their successors.
SmallVector<BasicBlock *, 16> DeadBlocks;
for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
- for (auto &VMap : VMaps)
+ for (const auto &VMap : VMaps)
if (BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
if (!DT.isReachableFromEntry(ClonedBB)) {
for (BasicBlock *SuccBB : successors(ClonedBB))
@@ -1618,6 +1640,7 @@ deleteDeadBlocksFromLoop(Loop &L,
SmallVectorImpl<BasicBlock *> &ExitBlocks,
DominatorTree &DT, LoopInfo &LI,
MemorySSAUpdater *MSSAU,
+ ScalarEvolution *SE,
function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
// Find all the dead blocks tied to this loop, and remove them from their
// successors.
@@ -1669,6 +1692,8 @@ deleteDeadBlocksFromLoop(Loop &L,
"If the child loop header is dead all blocks in the child loop must "
"be dead as well!");
DestroyLoopCB(*ChildL, ChildL->getName());
+ if (SE)
+ SE->forgetBlockAndLoopDispositions();
LI.destroy(ChildL);
return true;
});
@@ -1818,7 +1843,8 @@ static SmallPtrSet<const BasicBlock *, 16> recomputeLoopBlockSet(Loop &L,
/// referenced).
static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
LoopInfo &LI,
- SmallVectorImpl<Loop *> &HoistedLoops) {
+ SmallVectorImpl<Loop *> &HoistedLoops,
+ ScalarEvolution *SE) {
auto *PH = L.getLoopPreheader();
// Compute the actual parent loop from the exit blocks. Because we may have
@@ -2011,6 +2037,8 @@ static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
LI.removeLoop(llvm::find(LI, &L));
// markLoopAsDeleted for L should be triggered by the caller (it is typically
// done by using the UnswitchCB callback).
+ if (SE)
+ SE->forgetBlockAndLoopDispositions();
LI.destroy(&L);
return false;
}
@@ -2047,8 +2075,8 @@ void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
static void unswitchNontrivialInvariants(
Loop &L, Instruction &TI, ArrayRef<Value *> Invariants,
- SmallVectorImpl<BasicBlock *> &ExitBlocks, IVConditionInfo &PartialIVInfo,
- DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
+ IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI,
+ AssumptionCache &AC,
function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
@@ -2129,6 +2157,8 @@ static void unswitchNontrivialInvariants(
// furthest up our loopnest which can be mutated, which we will use below to
// update things.
Loop *OuterExitL = &L;
+ SmallVector<BasicBlock *, 4> ExitBlocks;
+ L.getUniqueExitBlocks(ExitBlocks);
for (auto *ExitBB : ExitBlocks) {
Loop *NewOuterExitL = LI.getLoopFor(ExitBB);
if (!NewOuterExitL) {
@@ -2148,6 +2178,7 @@ static void unswitchNontrivialInvariants(
SE->forgetLoop(OuterExitL);
else
SE->forgetTopmostLoop(&L);
+ SE->forgetBlockAndLoopDispositions();
}
bool InsertFreeze = false;
@@ -2157,14 +2188,26 @@ static void unswitchNontrivialInvariants(
InsertFreeze = !SafetyInfo.isGuaranteedToExecute(TI, &DT, &L);
}
+ // Perform the isGuaranteedNotToBeUndefOrPoison() query before the transform,
+ // otherwise the branch instruction will have been moved outside the loop
+ // already, and may imply that a poison condition is always UB.
+ Value *FullUnswitchCond = nullptr;
+ if (FullUnswitch) {
+ FullUnswitchCond =
+ BI ? skipTrivialSelect(BI->getCondition()) : SI->getCondition();
+ if (InsertFreeze)
+ InsertFreeze = !isGuaranteedNotToBeUndefOrPoison(
+ FullUnswitchCond, &AC, L.getLoopPreheader()->getTerminator(), &DT);
+ }
+
// If the edge from this terminator to a successor dominates that successor,
// store a map from each block in its dominator subtree to it. This lets us
// tell when cloning for a particular successor if a block is dominated by
// some *other* successor with a single data structure. We use this to
// significantly reduce cloning.
SmallDenseMap<BasicBlock *, BasicBlock *, 16> DominatingSucc;
- for (auto *SuccBB : llvm::concat<BasicBlock *const>(
- makeArrayRef(RetainedSuccBB), UnswitchedSuccBBs))
+ for (auto *SuccBB : llvm::concat<BasicBlock *const>(ArrayRef(RetainedSuccBB),
+ UnswitchedSuccBBs))
if (SuccBB->getUniquePredecessor() ||
llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
return PredBB == ParentBB || DT.dominates(SuccBB, PredBB);
@@ -2193,7 +2236,7 @@ static void unswitchNontrivialInvariants(
VMaps.emplace_back(new ValueToValueMapTy());
ClonedPHs[SuccBB] = buildClonedLoopBlocks(
L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
- DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU);
+ DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU, SE);
}
// Drop metadata if we may break its semantics by moving this instr into the
@@ -2220,23 +2263,21 @@ static void unswitchNontrivialInvariants(
if (FullUnswitch) {
// Splice the terminator from the original loop and rewrite its
// successors.
- SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), TI);
+ SplitBB->splice(SplitBB->end(), ParentBB, TI.getIterator());
// Keep a clone of the terminator for MSSA updates.
Instruction *NewTI = TI.clone();
- ParentBB->getInstList().push_back(NewTI);
+ NewTI->insertInto(ParentBB, ParentBB->end());
// First wire up the moved terminator to the preheaders.
if (BI) {
BasicBlock *ClonedPH = ClonedPHs.begin()->second;
BI->setSuccessor(ClonedSucc, ClonedPH);
BI->setSuccessor(1 - ClonedSucc, LoopPH);
- Value *Cond = skipTrivialSelect(BI->getCondition());
- if (InsertFreeze) {
- if (!isGuaranteedNotToBeUndefOrPoison(Cond, &AC, BI, &DT))
- Cond = new FreezeInst(Cond, Cond->getName() + ".fr", BI);
- }
- BI->setCondition(Cond);
+ if (InsertFreeze)
+ FullUnswitchCond = new FreezeInst(
+ FullUnswitchCond, FullUnswitchCond->getName() + ".fr", BI);
+ BI->setCondition(FullUnswitchCond);
DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
} else {
assert(SI && "Must either be a branch or switch!");
@@ -2245,17 +2286,16 @@ static void unswitchNontrivialInvariants(
assert(SI->getDefaultDest() == RetainedSuccBB &&
"Not retaining default successor!");
SI->setDefaultDest(LoopPH);
- for (auto &Case : SI->cases())
+ for (const auto &Case : SI->cases())
if (Case.getCaseSuccessor() == RetainedSuccBB)
Case.setSuccessor(LoopPH);
else
Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
- if (InsertFreeze) {
- auto Cond = SI->getCondition();
- if (!isGuaranteedNotToBeUndefOrPoison(Cond, &AC, SI, &DT))
- SI->setCondition(new FreezeInst(Cond, Cond->getName() + ".fr", SI));
- }
+ if (InsertFreeze)
+ SI->setCondition(new FreezeInst(
+ FullUnswitchCond, FullUnswitchCond->getName() + ".fr", SI));
+
// We need to use the set to populate domtree updates as even when there
// are multiple cases pointing at the same successor we only want to
// remove and insert one edge in the domtree.
@@ -2306,7 +2346,7 @@ static void unswitchNontrivialInvariants(
SwitchInst *NewSI = cast<SwitchInst>(NewTI);
assert(NewSI->getDefaultDest() == RetainedSuccBB &&
"Not retaining default successor!");
- for (auto &Case : NewSI->cases())
+ for (const auto &Case : NewSI->cases())
Case.getCaseSuccessor()->removePredecessor(
ParentBB,
/*KeepOneInputPHIs*/ true);
@@ -2372,13 +2412,14 @@ static void unswitchNontrivialInvariants(
// Now that our cloned loops have been built, we can update the original loop.
// First we delete the dead blocks from it and then we rebuild the loop
// structure taking these deletions into account.
- deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU, DestroyLoopCB);
+ deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU, SE,DestroyLoopCB);
if (MSSAU && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
SmallVector<Loop *, 4> HoistedLoops;
- bool IsStillLoop = rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops);
+ bool IsStillLoop =
+ rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops, SE);
if (MSSAU && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
@@ -2573,10 +2614,9 @@ static InstructionCost computeDomSubtreeCost(
///
/// It also makes all relevant DT and LI updates, so that all structures are in
/// valid state after this transform.
-static BranchInst *
-turnGuardIntoBranch(IntrinsicInst *GI, Loop &L,
- SmallVectorImpl<BasicBlock *> &ExitBlocks,
- DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU) {
+static BranchInst *turnGuardIntoBranch(IntrinsicInst *GI, Loop &L,
+ DominatorTree &DT, LoopInfo &LI,
+ MemorySSAUpdater *MSSAU) {
SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n");
BasicBlock *CheckBB = GI->getParent();
@@ -2603,9 +2643,6 @@ turnGuardIntoBranch(IntrinsicInst *GI, Loop &L,
CheckBI->getSuccessor(1)->setName("deopt");
BasicBlock *DeoptBlock = CheckBI->getSuccessor(1);
- // We now have a new exit block.
- ExitBlocks.push_back(CheckBI->getSuccessor(1));
-
if (MSSAU)
MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI);
@@ -2651,19 +2688,19 @@ turnGuardIntoBranch(IntrinsicInst *GI, Loop &L,
/// That requires knowing not just the number of "remaining" candidates but
/// also costs of unswitching for each of these candidates.
static int CalculateUnswitchCostMultiplier(
- Instruction &TI, Loop &L, LoopInfo &LI, DominatorTree &DT,
- ArrayRef<std::pair<Instruction *, TinyPtrVector<Value *>>>
- UnswitchCandidates) {
+ const Instruction &TI, const Loop &L, const LoopInfo &LI,
+ const DominatorTree &DT,
+ ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates) {
// Guards and other exiting conditions do not contribute to exponential
// explosion as soon as they dominate the latch (otherwise there might be
// another path to the latch remaining that does not allow to eliminate the
// loop copy on unswitch).
- BasicBlock *Latch = L.getLoopLatch();
- BasicBlock *CondBlock = TI.getParent();
+ const BasicBlock *Latch = L.getLoopLatch();
+ const BasicBlock *CondBlock = TI.getParent();
if (DT.dominates(CondBlock, Latch) &&
(isGuard(&TI) ||
- llvm::count_if(successors(&TI), [&L](BasicBlock *SuccBB) {
+ llvm::count_if(successors(&TI), [&L](const BasicBlock *SuccBB) {
return L.contains(SuccBB);
}) <= 1)) {
NumCostMultiplierSkipped++;
@@ -2677,16 +2714,17 @@ static int CalculateUnswitchCostMultiplier(
// unswitching. Branch/guard counts as 1, switch counts as log2 of its cases.
int UnswitchedClones = 0;
for (auto Candidate : UnswitchCandidates) {
- Instruction *CI = Candidate.first;
- BasicBlock *CondBlock = CI->getParent();
+ const Instruction *CI = Candidate.TI;
+ const BasicBlock *CondBlock = CI->getParent();
bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
if (isGuard(CI)) {
if (!SkipExitingSuccessors)
UnswitchedClones++;
continue;
}
- int NonExitingSuccessors = llvm::count_if(
- successors(CondBlock), [SkipExitingSuccessors, &L](BasicBlock *SuccBB) {
+ int NonExitingSuccessors =
+ llvm::count_if(successors(CondBlock),
+ [SkipExitingSuccessors, &L](const BasicBlock *SuccBB) {
return !SkipExitingSuccessors || L.contains(SuccBB);
});
UnswitchedClones += Log2_32(NonExitingSuccessors);
@@ -2722,17 +2760,12 @@ static int CalculateUnswitchCostMultiplier(
return CostMultiplier;
}
-static bool unswitchBestCondition(
- Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
- AAResults &AA, TargetTransformInfo &TTI,
- function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
- ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
- function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
- // Collect all invariant conditions within this loop (as opposed to an inner
- // loop which would be handled when visiting that inner loop).
- SmallVector<std::pair<Instruction *, TinyPtrVector<Value *>>, 4>
- UnswitchCandidates;
-
+static bool collectUnswitchCandidates(
+ SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates,
+ IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch,
+ const Loop &L, const LoopInfo &LI, AAResults &AA,
+ const MemorySSAUpdater *MSSAU) {
+ assert(UnswitchCandidates.empty() && "Should be!");
// Whether or not we should also collect guards in the loop.
bool CollectGuards = false;
if (UnswitchGuards) {
@@ -2742,7 +2775,6 @@ static bool unswitchBestCondition(
CollectGuards = true;
}
- IVConditionInfo PartialIVInfo;
for (auto *BB : L.blocks()) {
if (LI.getLoopFor(BB) != &L)
continue;
@@ -2750,7 +2782,8 @@ static bool unswitchBestCondition(
if (CollectGuards)
for (auto &I : *BB)
if (isGuard(&I)) {
- auto *Cond = cast<IntrinsicInst>(&I)->getArgOperand(0);
+ auto *Cond =
+ skipTrivialSelect(cast<IntrinsicInst>(&I)->getArgOperand(0));
// TODO: Support AND, OR conditions and partial unswitching.
if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
UnswitchCandidates.push_back({&I, {Cond}});
@@ -2791,11 +2824,10 @@ static bool unswitchBestCondition(
}
}
- Instruction *PartialIVCondBranch = nullptr;
if (MSSAU && !findOptionMDForLoop(&L, "llvm.loop.unswitch.partial.disable") &&
!any_of(UnswitchCandidates, [&L](auto &TerminatorAndInvariants) {
- return TerminatorAndInvariants.first == L.getHeader()->getTerminator();
- })) {
+ return TerminatorAndInvariants.TI == L.getHeader()->getTerminator();
+ })) {
MemorySSA *MSSA = MSSAU->getMemorySSA();
if (auto Info = hasPartialIVCondition(L, MSSAThreshold, *MSSA, AA)) {
LLVM_DEBUG(
@@ -2809,10 +2841,22 @@ static bool unswitchBestCondition(
{L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});
}
}
+ return !UnswitchCandidates.empty();
+}
- // If we didn't find any candidates, we're done.
- if (UnswitchCandidates.empty())
+static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI) {
+ if (!L.isSafeToClone())
return false;
+ for (auto *BB : L.blocks())
+ for (auto &I : *BB) {
+ if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
+ return false;
+ if (auto *CB = dyn_cast<CallBase>(&I)) {
+ assert(!CB->cannotDuplicate() && "Checked by L.isSafeToClone().");
+ if (CB->isConvergent())
+ return false;
+ }
+ }
// Check if there are irreducible CFG cycles in this loop. If so, we cannot
// easily unswitch non-trivial edges out of the loop. Doing so might turn the
@@ -2827,7 +2871,6 @@ static bool unswitchBestCondition(
SmallVector<BasicBlock *, 4> ExitBlocks;
L.getUniqueExitBlocks(ExitBlocks);
-
// We cannot unswitch if exit blocks contain a cleanuppad/catchswitch
// instruction as we don't know how to split those exit blocks.
// FIXME: We should teach SplitBlock to handle this and remove this
@@ -2841,10 +2884,13 @@ static bool unswitchBestCondition(
}
}
- LLVM_DEBUG(
- dbgs() << "Considering " << UnswitchCandidates.size()
- << " non-trivial loop invariant conditions for unswitching.\n");
+ return true;
+}
+static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(
+ ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates, const Loop &L,
+ const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC,
+ const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo) {
// Given that unswitching these terminators will require duplicating parts of
// the loop, so we need to be able to model that cost. Compute the ephemeral
// values and set up a data structure to hold per-BB costs. We cache each
@@ -2869,14 +2915,7 @@ static bool unswitchBestCondition(
for (auto &I : *BB) {
if (EphValues.count(&I))
continue;
-
- if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
- return false;
- if (auto *CB = dyn_cast<CallBase>(&I))
- if (CB->isConvergent() || CB->cannotDuplicate())
- return false;
-
- Cost += TTI.getUserCost(&I, CostKind);
+ Cost += TTI.getInstructionCost(&I, CostKind);
}
assert(Cost >= 0 && "Must not have negative costs!");
LoopCost += Cost;
@@ -2958,12 +2997,11 @@ static bool unswitchBestCondition(
"Cannot unswitch a condition without multiple distinct successors!");
return (LoopCost - Cost) * (SuccessorsCount - 1);
};
- Instruction *BestUnswitchTI = nullptr;
- InstructionCost BestUnswitchCost = 0;
- ArrayRef<Value *> BestUnswitchInvariants;
- for (auto &TerminatorAndInvariants : UnswitchCandidates) {
- Instruction &TI = *TerminatorAndInvariants.first;
- ArrayRef<Value *> Invariants = TerminatorAndInvariants.second;
+
+ std::optional<NonTrivialUnswitchCandidate> Best;
+ for (auto &Candidate : UnswitchCandidates) {
+ Instruction &TI = *Candidate.TI;
+ ArrayRef<Value *> Invariants = Candidate.Invariants;
BranchInst *BI = dyn_cast<BranchInst>(&TI);
InstructionCost CandidateCost = ComputeUnswitchedCost(
TI, /*FullUnswitch*/ !BI ||
@@ -2986,34 +3024,59 @@ static bool unswitchBestCondition(
<< " for unswitch candidate: " << TI << "\n");
}
- if (!BestUnswitchTI || CandidateCost < BestUnswitchCost) {
- BestUnswitchTI = &TI;
- BestUnswitchCost = CandidateCost;
- BestUnswitchInvariants = Invariants;
+ if (!Best || CandidateCost < Best->Cost) {
+ Best = Candidate;
+ Best->Cost = CandidateCost;
}
}
- assert(BestUnswitchTI && "Failed to find loop unswitch candidate");
+ assert(Best && "Must be!");
+ return *Best;
+}
- if (BestUnswitchCost >= UnswitchThreshold) {
- LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: "
- << BestUnswitchCost << "\n");
+static bool unswitchBestCondition(
+ Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
+ AAResults &AA, TargetTransformInfo &TTI,
+ function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
+ ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
+ function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
+ // Collect all invariant conditions within this loop (as opposed to an inner
+ // loop which would be handled when visiting that inner loop).
+ SmallVector<NonTrivialUnswitchCandidate, 4> UnswitchCandidates;
+ IVConditionInfo PartialIVInfo;
+ Instruction *PartialIVCondBranch = nullptr;
+ // If we didn't find any candidates, we're done.
+ if (!collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo,
+ PartialIVCondBranch, L, LI, AA, MSSAU))
+ return false;
+
+ LLVM_DEBUG(
+ dbgs() << "Considering " << UnswitchCandidates.size()
+ << " non-trivial loop invariant conditions for unswitching.\n");
+
+ NonTrivialUnswitchCandidate Best = findBestNonTrivialUnswitchCandidate(
+ UnswitchCandidates, L, DT, LI, AC, TTI, PartialIVInfo);
+
+ assert(Best.TI && "Failed to find loop unswitch candidate");
+ assert(Best.Cost && "Failed to compute cost");
+
+ if (*Best.Cost >= UnswitchThreshold) {
+ LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: " << *Best.Cost
+ << "\n");
return false;
}
- if (BestUnswitchTI != PartialIVCondBranch)
+ if (Best.TI != PartialIVCondBranch)
PartialIVInfo.InstToDuplicate.clear();
// If the best candidate is a guard, turn it into a branch.
- if (isGuard(BestUnswitchTI))
- BestUnswitchTI = turnGuardIntoBranch(cast<IntrinsicInst>(BestUnswitchTI), L,
- ExitBlocks, DT, LI, MSSAU);
-
- LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = "
- << BestUnswitchCost << ") terminator: " << *BestUnswitchTI
- << "\n");
- unswitchNontrivialInvariants(L, *BestUnswitchTI, BestUnswitchInvariants,
- ExitBlocks, PartialIVInfo, DT, LI, AC,
- UnswitchCB, SE, MSSAU, DestroyLoopCB);
+ if (isGuard(Best.TI))
+ Best.TI =
+ turnGuardIntoBranch(cast<IntrinsicInst>(Best.TI), L, DT, LI, MSSAU);
+
+ LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = " << Best.Cost
+ << ") terminator: " << *Best.TI << "\n");
+ unswitchNontrivialInvariants(L, *Best.TI, Best.Invariants, PartialIVInfo, DT,
+ LI, AC, UnswitchCB, SE, MSSAU, DestroyLoopCB);
return true;
}
@@ -3044,6 +3107,7 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
bool NonTrivial,
function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
+ ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,
function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
assert(L.isRecursivelyLCSSAForm(DT, LI) &&
"Loops must be in LCSSA form before unswitching.");
@@ -3080,8 +3144,16 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
if (L.getHeader()->getParent()->hasOptSize())
return false;
- // Skip non-trivial unswitching for loops that cannot be cloned.
- if (!L.isSafeToClone())
+ // Skip cold loops, as unswitching them brings little benefit
+ // but increases the code size
+ if (PSI && PSI->hasProfileSummary() && BFI &&
+ PSI->isFunctionColdInCallGraph(L.getHeader()->getParent(), *BFI)) {
+ LLVM_DEBUG(dbgs() << " Skip cold loop: " << L << "\n");
+ return false;
+ }
+
+ // Perform legality checks.
+ if (!isSafeForNoNTrivialUnswitching(L, LI))
return false;
// For non-trivial unswitching, because it often creates new loops, we rely on
@@ -3105,7 +3177,11 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
LPMUpdater &U) {
Function &F = *L.getHeader()->getParent();
(void)F;
-
+ ProfileSummaryInfo *PSI = nullptr;
+ if (auto OuterProxy =
+ AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR)
+ .getCachedResult<ModuleAnalysisManagerFunctionProxy>(F))
+ PSI = OuterProxy->getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L
<< "\n");
@@ -3144,14 +3220,14 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
U.markLoopAsDeleted(L, Name);
};
- Optional<MemorySSAUpdater> MSSAU;
+ std::optional<MemorySSAUpdater> MSSAU;
if (AR.MSSA) {
MSSAU = MemorySSAUpdater(AR.MSSA);
if (VerifyMemorySSA)
AR.MSSA->verifyMemorySSA();
}
if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
- UnswitchCB, &AR.SE, MSSAU ? MSSAU.getPointer() : nullptr,
+ UnswitchCB, &AR.SE, MSSAU ? &*MSSAU : nullptr, PSI, AR.BFI,
DestroyLoopCB))
return PreservedAnalyses::all();
@@ -3214,7 +3290,6 @@ bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L
<< "\n");
-
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
@@ -3251,9 +3326,9 @@ bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
if (VerifyMemorySSA)
MSSA->verifyMemorySSA();
-
- bool Changed = unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial,
- UnswitchCB, SE, &MSSAU, DestroyLoopCB);
+ bool Changed =
+ unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial, UnswitchCB, SE,
+ &MSSAU, nullptr, nullptr, DestroyLoopCB);
if (VerifyMemorySSA)
MSSA->verifyMemorySSA();