aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp')
-rw-r--r--llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp695
1 files changed, 574 insertions, 121 deletions
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 7e08120f923d..ad7d34b61470 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -42,6 +43,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
@@ -73,11 +75,14 @@ using namespace llvm::PatternMatch;
STATISTIC(NumBranches, "Number of branches unswitched");
STATISTIC(NumSwitches, "Number of switches unswitched");
+STATISTIC(NumSelects, "Number of selects turned into branches for unswitching");
STATISTIC(NumGuards, "Number of guards turned into branches for unswitching");
STATISTIC(NumTrivial, "Number of unswitches that are trivial");
STATISTIC(
NumCostMultiplierSkipped,
"Number of unswitch candidates that had their cost multiplier skipped");
+STATISTIC(NumInvariantConditionsInjected,
+ "Number of invariant conditions injected and unswitched");
static cl::opt<bool> EnableNonTrivialUnswitch(
"enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
@@ -118,15 +123,53 @@ static cl::opt<bool> FreezeLoopUnswitchCond(
cl::desc("If enabled, the freeze instruction will be added to condition "
"of loop unswitch to prevent miscompilation."));
+static cl::opt<bool> InjectInvariantConditions(
+ "simple-loop-unswitch-inject-invariant-conditions", cl::Hidden,
+ cl::desc("Whether we should inject new invariants and unswitch them to "
+ "eliminate some existing (non-invariant) conditions."),
+ cl::init(true));
+
+static cl::opt<unsigned> InjectInvariantConditionHotnesThreshold(
+ "simple-loop-unswitch-inject-invariant-condition-hotness-threshold",
+ cl::Hidden, cl::desc("Only try to inject loop invariant conditions and "
+ "unswitch on them to eliminate branches that are "
+ "not-taken 1/<this option> times or less."),
+ cl::init(16));
+
namespace {
+struct CompareDesc {
+ BranchInst *Term;
+ Value *Invariant;
+ BasicBlock *InLoopSucc;
+
+ CompareDesc(BranchInst *Term, Value *Invariant, BasicBlock *InLoopSucc)
+ : Term(Term), Invariant(Invariant), InLoopSucc(InLoopSucc) {}
+};
+
+struct InjectedInvariant {
+ ICmpInst::Predicate Pred;
+ Value *LHS;
+ Value *RHS;
+ BasicBlock *InLoopSucc;
+
+ InjectedInvariant(ICmpInst::Predicate Pred, Value *LHS, Value *RHS,
+ BasicBlock *InLoopSucc)
+ : Pred(Pred), LHS(LHS), RHS(RHS), InLoopSucc(InLoopSucc) {}
+};
+
struct NonTrivialUnswitchCandidate {
Instruction *TI = nullptr;
TinyPtrVector<Value *> Invariants;
std::optional<InstructionCost> Cost;
+ std::optional<InjectedInvariant> PendingInjection;
NonTrivialUnswitchCandidate(
Instruction *TI, ArrayRef<Value *> Invariants,
- std::optional<InstructionCost> Cost = std::nullopt)
- : TI(TI), Invariants(Invariants), Cost(Cost){};
+ std::optional<InstructionCost> Cost = std::nullopt,
+ std::optional<InjectedInvariant> PendingInjection = std::nullopt)
+ : TI(TI), Invariants(Invariants), Cost(Cost),
+ PendingInjection(PendingInjection) {};
+
+ bool hasPendingInjection() const { return PendingInjection.has_value(); }
};
} // end anonymous namespace.
@@ -434,10 +477,10 @@ static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
// Return the top-most loop containing ExitBB and having ExitBB as exiting block
// or the loop containing ExitBB, if there is no parent loop containing ExitBB
// as exiting block.
-static const Loop *getTopMostExitingLoop(const BasicBlock *ExitBB,
- const LoopInfo &LI) {
- const Loop *TopMost = LI.getLoopFor(ExitBB);
- const Loop *Current = TopMost;
+static Loop *getTopMostExitingLoop(const BasicBlock *ExitBB,
+ const LoopInfo &LI) {
+ Loop *TopMost = LI.getLoopFor(ExitBB);
+ Loop *Current = TopMost;
while (Current) {
if (Current->isLoopExiting(ExitBB))
TopMost = Current;
@@ -750,15 +793,32 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
Loop *OuterL = &L;
if (DefaultExitBB) {
- // Clear out the default destination temporarily to allow accurate
- // predecessor lists to be examined below.
- SI.setDefaultDest(nullptr);
// Check the loop containing this exit.
- Loop *ExitL = LI.getLoopFor(DefaultExitBB);
+ Loop *ExitL = getTopMostExitingLoop(DefaultExitBB, LI);
+ if (!ExitL || ExitL->contains(OuterL))
+ OuterL = ExitL;
+ }
+ for (unsigned Index : ExitCaseIndices) {
+ auto CaseI = SI.case_begin() + Index;
+ // Compute the outer loop from this exit.
+ Loop *ExitL = getTopMostExitingLoop(CaseI->getCaseSuccessor(), LI);
if (!ExitL || ExitL->contains(OuterL))
OuterL = ExitL;
}
+ if (SE) {
+ if (OuterL)
+ SE->forgetLoop(OuterL);
+ else
+ SE->forgetTopmostLoop(&L);
+ }
+
+ if (DefaultExitBB) {
+ // Clear out the default destination temporarily to allow accurate
+ // predecessor lists to be examined below.
+ SI.setDefaultDest(nullptr);
+ }
+
// Store the exit cases into a separate data structure and remove them from
// the switch.
SmallVector<std::tuple<ConstantInt *, BasicBlock *,
@@ -770,10 +830,6 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
// and don't disrupt the earlier indices.
for (unsigned Index : reverse(ExitCaseIndices)) {
auto CaseI = SI.case_begin() + Index;
- // Compute the outer loop from this exit.
- Loop *ExitL = LI.getLoopFor(CaseI->getCaseSuccessor());
- if (!ExitL || ExitL->contains(OuterL))
- OuterL = ExitL;
// Save the value of this case.
auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex());
ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
@@ -781,13 +837,6 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
SIW.removeCase(CaseI);
}
- if (SE) {
- if (OuterL)
- SE->forgetLoop(OuterL);
- else
- SE->forgetTopmostLoop(&L);
- }
-
// Check if after this all of the remaining cases point at the same
// successor.
BasicBlock *CommonSuccBB = nullptr;
@@ -2079,7 +2128,7 @@ static void unswitchNontrivialInvariants(
AssumptionCache &AC,
function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
- function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
+ function_ref<void(Loop &, StringRef)> DestroyLoopCB, bool InsertFreeze) {
auto *ParentBB = TI.getParent();
BranchInst *BI = dyn_cast<BranchInst>(&TI);
SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
@@ -2160,7 +2209,9 @@ static void unswitchNontrivialInvariants(
SmallVector<BasicBlock *, 4> ExitBlocks;
L.getUniqueExitBlocks(ExitBlocks);
for (auto *ExitBB : ExitBlocks) {
- Loop *NewOuterExitL = LI.getLoopFor(ExitBB);
+ // ExitBB can be an exit block for several levels in the loop nest. Make
+ // sure we find the top most.
+ Loop *NewOuterExitL = getTopMostExitingLoop(ExitBB, LI);
if (!NewOuterExitL) {
// We exited the entire nest with this block, so we're done.
OuterExitL = nullptr;
@@ -2181,25 +2232,6 @@ static void unswitchNontrivialInvariants(
SE->forgetBlockAndLoopDispositions();
}
- bool InsertFreeze = false;
- if (FreezeLoopUnswitchCond) {
- ICFLoopSafetyInfo SafetyInfo;
- SafetyInfo.computeLoopSafetyInfo(&L);
- InsertFreeze = !SafetyInfo.isGuaranteedToExecute(TI, &DT, &L);
- }
-
- // Perform the isGuaranteedNotToBeUndefOrPoison() query before the transform,
- // otherwise the branch instruction will have been moved outside the loop
- // already, and may imply that a poison condition is always UB.
- Value *FullUnswitchCond = nullptr;
- if (FullUnswitch) {
- FullUnswitchCond =
- BI ? skipTrivialSelect(BI->getCondition()) : SI->getCondition();
- if (InsertFreeze)
- InsertFreeze = !isGuaranteedNotToBeUndefOrPoison(
- FullUnswitchCond, &AC, L.getLoopPreheader()->getTerminator(), &DT);
- }
-
// If the edge from this terminator to a successor dominates that successor,
// store a map from each block in its dominator subtree to it. This lets us
// tell when cloning for a particular successor if a block is dominated by
@@ -2274,10 +2306,11 @@ static void unswitchNontrivialInvariants(
BasicBlock *ClonedPH = ClonedPHs.begin()->second;
BI->setSuccessor(ClonedSucc, ClonedPH);
BI->setSuccessor(1 - ClonedSucc, LoopPH);
+ Value *Cond = skipTrivialSelect(BI->getCondition());
if (InsertFreeze)
- FullUnswitchCond = new FreezeInst(
- FullUnswitchCond, FullUnswitchCond->getName() + ".fr", BI);
- BI->setCondition(FullUnswitchCond);
+ Cond = new FreezeInst(
+ Cond, Cond->getName() + ".fr", BI);
+ BI->setCondition(Cond);
DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
} else {
assert(SI && "Must either be a branch or switch!");
@@ -2294,7 +2327,7 @@ static void unswitchNontrivialInvariants(
if (InsertFreeze)
SI->setCondition(new FreezeInst(
- FullUnswitchCond, FullUnswitchCond->getName() + ".fr", SI));
+ SI->getCondition(), SI->getCondition()->getName() + ".fr", SI));
// We need to use the set to populate domtree updates as even when there
// are multiple cases pointing at the same successor we only want to
@@ -2593,6 +2626,57 @@ static InstructionCost computeDomSubtreeCost(
return Cost;
}
+/// Turns a select instruction into implicit control flow branch,
+/// making the following replacement:
+///
+/// head:
+/// --code before select--
+/// select %cond, %trueval, %falseval
+/// --code after select--
+///
+/// into
+///
+/// head:
+/// --code before select--
+/// br i1 %cond, label %then, label %tail
+///
+/// then:
+/// br %tail
+///
+/// tail:
+/// phi [ %trueval, %then ], [ %falseval, %head]
+/// unreachable
+///
+/// It also makes all relevant DT and LI updates, so that all structures are in
+/// valid state after this transform.
+static BranchInst *turnSelectIntoBranch(SelectInst *SI, DominatorTree &DT,
+ LoopInfo &LI, MemorySSAUpdater *MSSAU,
+ AssumptionCache *AC) {
+ LLVM_DEBUG(dbgs() << "Turning " << *SI << " into a branch.\n");
+ BasicBlock *HeadBB = SI->getParent();
+
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ SplitBlockAndInsertIfThen(SI->getCondition(), SI, false,
+ SI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);
+ auto *CondBr = cast<BranchInst>(HeadBB->getTerminator());
+ BasicBlock *ThenBB = CondBr->getSuccessor(0),
+ *TailBB = CondBr->getSuccessor(1);
+ if (MSSAU)
+ MSSAU->moveAllAfterSpliceBlocks(HeadBB, TailBB, SI);
+
+ PHINode *Phi = PHINode::Create(SI->getType(), 2, "unswitched.select", SI);
+ Phi->addIncoming(SI->getTrueValue(), ThenBB);
+ Phi->addIncoming(SI->getFalseValue(), HeadBB);
+ SI->replaceAllUsesWith(Phi);
+ SI->eraseFromParent();
+
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
+ ++NumSelects;
+ return CondBr;
+}
+
/// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
/// making the following replacement:
///
@@ -2624,15 +2708,10 @@ static BranchInst *turnGuardIntoBranch(IntrinsicInst *GI, Loop &L,
if (MSSAU && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
- // Remove all CheckBB's successors from DomTree. A block can be seen among
- // successors more than once, but for DomTree it should be added only once.
- SmallPtrSet<BasicBlock *, 4> Successors;
- for (auto *Succ : successors(CheckBB))
- if (Successors.insert(Succ).second)
- DTUpdates.push_back({DominatorTree::Delete, CheckBB, Succ});
-
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
Instruction *DeoptBlockTerm =
- SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true);
+ SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true,
+ GI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);
BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
// SplitBlockAndInsertIfThen inserts control flow that branches to
// DeoptBlockTerm if the condition is true. We want the opposite.
@@ -2649,20 +2728,6 @@ static BranchInst *turnGuardIntoBranch(IntrinsicInst *GI, Loop &L,
GI->moveBefore(DeoptBlockTerm);
GI->setArgOperand(0, ConstantInt::getFalse(GI->getContext()));
- // Add new successors of CheckBB into DomTree.
- for (auto *Succ : successors(CheckBB))
- DTUpdates.push_back({DominatorTree::Insert, CheckBB, Succ});
-
- // Now the blocks that used to be CheckBB's successors are GuardedBlock's
- // successors.
- for (auto *Succ : Successors)
- DTUpdates.push_back({DominatorTree::Insert, GuardedBlock, Succ});
-
- // Make proper changes to DT.
- DT.applyUpdates(DTUpdates);
- // Inform LI of a new loop block.
- L.addBasicBlockToLoop(GuardedBlock, LI);
-
if (MSSAU) {
MemoryDef *MD = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(GI));
MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::BeforeTerminator);
@@ -2670,6 +2735,8 @@ static BranchInst *turnGuardIntoBranch(IntrinsicInst *GI, Loop &L,
MSSAU->getMemorySSA()->verifyMemorySSA();
}
+ if (VerifyLoopInfo)
+ LI.verify(DT);
++NumGuards;
return CheckBI;
}
@@ -2700,9 +2767,10 @@ static int CalculateUnswitchCostMultiplier(
const BasicBlock *CondBlock = TI.getParent();
if (DT.dominates(CondBlock, Latch) &&
(isGuard(&TI) ||
- llvm::count_if(successors(&TI), [&L](const BasicBlock *SuccBB) {
- return L.contains(SuccBB);
- }) <= 1)) {
+ (TI.isTerminator() &&
+ llvm::count_if(successors(&TI), [&L](const BasicBlock *SuccBB) {
+ return L.contains(SuccBB);
+ }) <= 1))) {
NumCostMultiplierSkipped++;
return 1;
}
@@ -2711,12 +2779,17 @@ static int CalculateUnswitchCostMultiplier(
int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
: std::distance(LI.begin(), LI.end()));
// Count amount of clones that all the candidates might cause during
- // unswitching. Branch/guard counts as 1, switch counts as log2 of its cases.
+ // unswitching. Branch/guard/select counts as 1, switch counts as log2 of its
+ // cases.
int UnswitchedClones = 0;
- for (auto Candidate : UnswitchCandidates) {
+ for (const auto &Candidate : UnswitchCandidates) {
const Instruction *CI = Candidate.TI;
const BasicBlock *CondBlock = CI->getParent();
bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
+ if (isa<SelectInst>(CI)) {
+ UnswitchedClones++;
+ continue;
+ }
if (isGuard(CI)) {
if (!SkipExitingSuccessors)
UnswitchedClones++;
@@ -2766,6 +2839,24 @@ static bool collectUnswitchCandidates(
const Loop &L, const LoopInfo &LI, AAResults &AA,
const MemorySSAUpdater *MSSAU) {
assert(UnswitchCandidates.empty() && "Should be!");
+
+ auto AddUnswitchCandidatesForInst = [&](Instruction *I, Value *Cond) {
+ Cond = skipTrivialSelect(Cond);
+ if (isa<Constant>(Cond))
+ return;
+ if (L.isLoopInvariant(Cond)) {
+ UnswitchCandidates.push_back({I, {Cond}});
+ return;
+ }
+ if (match(Cond, m_CombineOr(m_LogicalAnd(), m_LogicalOr()))) {
+ TinyPtrVector<Value *> Invariants =
+ collectHomogenousInstGraphLoopInvariants(
+ L, *static_cast<Instruction *>(Cond), LI);
+ if (!Invariants.empty())
+ UnswitchCandidates.push_back({I, std::move(Invariants)});
+ }
+ };
+
// Whether or not we should also collect guards in the loop.
bool CollectGuards = false;
if (UnswitchGuards) {
@@ -2779,15 +2870,20 @@ static bool collectUnswitchCandidates(
if (LI.getLoopFor(BB) != &L)
continue;
- if (CollectGuards)
- for (auto &I : *BB)
- if (isGuard(&I)) {
- auto *Cond =
- skipTrivialSelect(cast<IntrinsicInst>(&I)->getArgOperand(0));
- // TODO: Support AND, OR conditions and partial unswitching.
- if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
- UnswitchCandidates.push_back({&I, {Cond}});
- }
+ for (auto &I : *BB) {
+ if (auto *SI = dyn_cast<SelectInst>(&I)) {
+ auto *Cond = SI->getCondition();
+ // Do not unswitch vector selects and logical and/or selects
+ if (Cond->getType()->isIntegerTy(1) && !SI->getType()->isIntegerTy(1))
+ AddUnswitchCandidatesForInst(SI, Cond);
+ } else if (CollectGuards && isGuard(&I)) {
+ auto *Cond =
+ skipTrivialSelect(cast<IntrinsicInst>(&I)->getArgOperand(0));
+ // TODO: Support AND, OR conditions and partial unswitching.
+ if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
+ UnswitchCandidates.push_back({&I, {Cond}});
+ }
+ }
if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
// We can only consider fully loop-invariant switch conditions as we need
@@ -2799,29 +2895,11 @@ static bool collectUnswitchCandidates(
}
auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
- if (!BI || !BI->isConditional() || isa<Constant>(BI->getCondition()) ||
+ if (!BI || !BI->isConditional() ||
BI->getSuccessor(0) == BI->getSuccessor(1))
continue;
- Value *Cond = skipTrivialSelect(BI->getCondition());
- if (isa<Constant>(Cond))
- continue;
-
- if (L.isLoopInvariant(Cond)) {
- UnswitchCandidates.push_back({BI, {Cond}});
- continue;
- }
-
- Instruction &CondI = *cast<Instruction>(Cond);
- if (match(&CondI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()))) {
- TinyPtrVector<Value *> Invariants =
- collectHomogenousInstGraphLoopInvariants(L, CondI, LI);
- if (Invariants.empty())
- continue;
-
- UnswitchCandidates.push_back({BI, std::move(Invariants)});
- continue;
- }
+ AddUnswitchCandidatesForInst(BI, BI->getCondition());
}
if (MSSAU && !findOptionMDForLoop(&L, "llvm.loop.unswitch.partial.disable") &&
@@ -2844,6 +2922,303 @@ static bool collectUnswitchCandidates(
return !UnswitchCandidates.empty();
}
+/// Tries to canonicalize condition described by:
+///
+/// br (LHS pred RHS), label IfTrue, label IfFalse
+///
+/// into its equivalent where `Pred` is something that we support for injected
+/// invariants (so far it is limited to ult), LHS in canonicalized form is
+/// non-invariant and RHS is an invariant.
+static void canonicalizeForInvariantConditionInjection(
+ ICmpInst::Predicate &Pred, Value *&LHS, Value *&RHS, BasicBlock *&IfTrue,
+ BasicBlock *&IfFalse, const Loop &L) {
+ if (!L.contains(IfTrue)) {
+ Pred = ICmpInst::getInversePredicate(Pred);
+ std::swap(IfTrue, IfFalse);
+ }
+
+ // Move loop-invariant argument to RHS position.
+ if (L.isLoopInvariant(LHS)) {
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ std::swap(LHS, RHS);
+ }
+
+ if (Pred == ICmpInst::ICMP_SGE && match(RHS, m_Zero())) {
+ // Turn "x >=s 0" into "x <u UMIN_INT"
+ Pred = ICmpInst::ICMP_ULT;
+ RHS = ConstantInt::get(
+ RHS->getContext(),
+ APInt::getSignedMinValue(RHS->getType()->getIntegerBitWidth()));
+ }
+}
+
+/// Returns true, if predicate described by ( \p Pred, \p LHS, \p RHS )
+/// succeeding into blocks ( \p IfTrue, \p IfFalse) can be optimized by
+/// injecting a loop-invariant condition.
+static bool shouldTryInjectInvariantCondition(
+ const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS,
+ const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L) {
+ if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS))
+ return false;
+ // TODO: Support other predicates.
+ if (Pred != ICmpInst::ICMP_ULT)
+ return false;
+ // TODO: Support non-loop-exiting branches?
+ if (!L.contains(IfTrue) || L.contains(IfFalse))
+ return false;
+ // FIXME: For some reason this causes problems with MSSA updates, need to
+ // investigate why. So far, just don't unswitch latch.
+ if (L.getHeader() == IfTrue)
+ return false;
+ return true;
+}
+
+/// Returns true, if metadata on \p BI allows us to optimize branching into \p
+/// TakenSucc via injection of invariant conditions. The branch should be not
+/// enough and not previously unswitched, the information about this comes from
+/// the metadata.
+bool shouldTryInjectBasingOnMetadata(const BranchInst *BI,
+ const BasicBlock *TakenSucc) {
+ // Skip branches that have already been unswithed this way. After successful
+ // unswitching of injected condition, we will still have a copy of this loop
+ // which looks exactly the same as original one. To prevent the 2nd attempt
+ // of unswitching it in the same pass, mark this branch as "nothing to do
+ // here".
+ if (BI->hasMetadata("llvm.invariant.condition.injection.disabled"))
+ return false;
+ SmallVector<uint32_t> Weights;
+ if (!extractBranchWeights(*BI, Weights))
+ return false;
+ unsigned T = InjectInvariantConditionHotnesThreshold;
+ BranchProbability LikelyTaken(T - 1, T);
+
+ assert(Weights.size() == 2 && "Unexpected profile data!");
+ size_t Idx = BI->getSuccessor(0) == TakenSucc ? 0 : 1;
+ auto Num = Weights[Idx];
+ auto Denom = Weights[0] + Weights[1];
+ // Degenerate or overflowed metadata.
+ if (Denom == 0 || Num > Denom)
+ return false;
+ BranchProbability ActualTaken(Num, Denom);
+ if (LikelyTaken > ActualTaken)
+ return false;
+ return true;
+}
+
+/// Materialize pending invariant condition of the given candidate into IR. The
+/// injected loop-invariant condition implies the original loop-variant branch
+/// condition, so the materialization turns
+///
+/// loop_block:
+/// ...
+/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
+///
+/// into
+///
+/// preheader:
+/// %invariant_cond = LHS pred RHS
+/// ...
+/// loop_block:
+/// br i1 %invariant_cond, label InLoopSucc, label OriginalCheck
+/// OriginalCheck:
+/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
+/// ...
+static NonTrivialUnswitchCandidate
+injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L,
+ DominatorTree &DT, LoopInfo &LI,
+ AssumptionCache &AC, MemorySSAUpdater *MSSAU) {
+ assert(Candidate.hasPendingInjection() && "Nothing to inject!");
+ BasicBlock *Preheader = L.getLoopPreheader();
+ assert(Preheader && "Loop is not in simplified form?");
+ assert(LI.getLoopFor(Candidate.TI->getParent()) == &L &&
+ "Unswitching branch of inner loop!");
+
+ auto Pred = Candidate.PendingInjection->Pred;
+ auto *LHS = Candidate.PendingInjection->LHS;
+ auto *RHS = Candidate.PendingInjection->RHS;
+ auto *InLoopSucc = Candidate.PendingInjection->InLoopSucc;
+ auto *TI = cast<BranchInst>(Candidate.TI);
+ auto *BB = Candidate.TI->getParent();
+ auto *OutOfLoopSucc = InLoopSucc == TI->getSuccessor(0) ? TI->getSuccessor(1)
+ : TI->getSuccessor(0);
+ // FIXME: Remove this once limitation on successors is lifted.
+ assert(L.contains(InLoopSucc) && "Not supported yet!");
+ assert(!L.contains(OutOfLoopSucc) && "Not supported yet!");
+ auto &Ctx = BB->getContext();
+
+ IRBuilder<> Builder(Preheader->getTerminator());
+ assert(ICmpInst::isUnsigned(Pred) && "Not supported yet!");
+ if (LHS->getType() != RHS->getType()) {
+ if (LHS->getType()->getIntegerBitWidth() <
+ RHS->getType()->getIntegerBitWidth())
+ LHS = Builder.CreateZExt(LHS, RHS->getType(), LHS->getName() + ".wide");
+ else
+ RHS = Builder.CreateZExt(RHS, LHS->getType(), RHS->getName() + ".wide");
+ }
+ // Do not use builder here: CreateICmp may simplify this into a constant and
+ // unswitching will break. Better optimize it away later.
+ auto *InjectedCond =
+ ICmpInst::Create(Instruction::ICmp, Pred, LHS, RHS, "injected.cond",
+ Preheader->getTerminator());
+ auto *OldCond = TI->getCondition();
+
+ BasicBlock *CheckBlock = BasicBlock::Create(Ctx, BB->getName() + ".check",
+ BB->getParent(), InLoopSucc);
+ Builder.SetInsertPoint(TI);
+ auto *InvariantBr =
+ Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);
+
+ Builder.SetInsertPoint(CheckBlock);
+ auto *NewTerm = Builder.CreateCondBr(OldCond, InLoopSucc, OutOfLoopSucc);
+
+ TI->eraseFromParent();
+ // Prevent infinite unswitching.
+ NewTerm->setMetadata("llvm.invariant.condition.injection.disabled",
+ MDNode::get(BB->getContext(), {}));
+
+ // Fixup phis.
+ for (auto &I : *InLoopSucc) {
+ auto *PN = dyn_cast<PHINode>(&I);
+ if (!PN)
+ break;
+ auto *Inc = PN->getIncomingValueForBlock(BB);
+ PN->addIncoming(Inc, CheckBlock);
+ }
+ OutOfLoopSucc->replacePhiUsesWith(BB, CheckBlock);
+
+ SmallVector<DominatorTree::UpdateType, 4> DTUpdates = {
+ { DominatorTree::Insert, BB, CheckBlock },
+ { DominatorTree::Insert, CheckBlock, InLoopSucc },
+ { DominatorTree::Insert, CheckBlock, OutOfLoopSucc },
+ { DominatorTree::Delete, BB, OutOfLoopSucc }
+ };
+
+ DT.applyUpdates(DTUpdates);
+ if (MSSAU)
+ MSSAU->applyUpdates(DTUpdates, DT);
+ L.addBasicBlockToLoop(CheckBlock, LI);
+
+#ifndef NDEBUG
+ DT.verify();
+ LI.verify(DT);
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+#endif
+
+ // TODO: In fact, cost of unswitching a new invariant candidate is *slightly*
+ // higher because we have just inserted a new block. Need to think how to
+ // adjust the cost of injected candidates when it was first computed.
+ LLVM_DEBUG(dbgs() << "Injected a new loop-invariant branch " << *InvariantBr
+ << " and considering it for unswitching.");
+ ++NumInvariantConditionsInjected;
+ return NonTrivialUnswitchCandidate(InvariantBr, { InjectedCond },
+ Candidate.Cost);
+}
+
+/// Given chain of loop branch conditions looking like:
+/// br (Variant < Invariant1)
+/// br (Variant < Invariant2)
+/// br (Variant < Invariant3)
+/// ...
+/// collect set of invariant conditions on which we want to unswitch, which
+/// look like:
+/// Invariant1 <= Invariant2
+/// Invariant2 <= Invariant3
+/// ...
+/// Though they might not immediately exist in the IR, we can still inject them.
+static bool insertCandidatesWithPendingInjections(
+ SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates, Loop &L,
+ ICmpInst::Predicate Pred, ArrayRef<CompareDesc> Compares,
+ const DominatorTree &DT) {
+
+ assert(ICmpInst::isRelational(Pred));
+ assert(ICmpInst::isStrictPredicate(Pred));
+ if (Compares.size() < 2)
+ return false;
+ ICmpInst::Predicate NonStrictPred = ICmpInst::getNonStrictPredicate(Pred);
+ for (auto Prev = Compares.begin(), Next = Compares.begin() + 1;
+ Next != Compares.end(); ++Prev, ++Next) {
+ Value *LHS = Next->Invariant;
+ Value *RHS = Prev->Invariant;
+ BasicBlock *InLoopSucc = Prev->InLoopSucc;
+ InjectedInvariant ToInject(NonStrictPred, LHS, RHS, InLoopSucc);
+ NonTrivialUnswitchCandidate Candidate(Prev->Term, { LHS, RHS },
+ std::nullopt, std::move(ToInject));
+ UnswitchCandidates.push_back(std::move(Candidate));
+ }
+ return true;
+}
+
+/// Collect unswitch candidates by invariant conditions that are not immediately
+/// present in the loop. However, they can be injected into the code if we
+/// decide it's profitable.
+/// An example of such conditions is following:
+///
+/// for (...) {
+/// x = load ...
+/// if (! x <u C1) break;
+/// if (! x <u C2) break;
+/// <do something>
+/// }
+///
+/// We can unswitch by condition "C1 <=u C2". If that is true, then "x <u C1 <=
+/// C2" automatically implies "x <u C2", so we can get rid of one of
+/// loop-variant checks in unswitched loop version.
+static bool collectUnswitchCandidatesWithInjections(
+ SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates,
+ IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L,
+ const DominatorTree &DT, const LoopInfo &LI, AAResults &AA,
+ const MemorySSAUpdater *MSSAU) {
+ if (!InjectInvariantConditions)
+ return false;
+
+ if (!DT.isReachableFromEntry(L.getHeader()))
+ return false;
+ auto *Latch = L.getLoopLatch();
+ // Need to have a single latch and a preheader.
+ if (!Latch)
+ return false;
+ assert(L.getLoopPreheader() && "Must have a preheader!");
+
+ DenseMap<Value *, SmallVector<CompareDesc, 4> > CandidatesULT;
+ // Traverse the conditions that dominate latch (and therefore dominate each
+ // other).
+ for (auto *DTN = DT.getNode(Latch); L.contains(DTN->getBlock());
+ DTN = DTN->getIDom()) {
+ ICmpInst::Predicate Pred;
+ Value *LHS = nullptr, *RHS = nullptr;
+ BasicBlock *IfTrue = nullptr, *IfFalse = nullptr;
+ auto *BB = DTN->getBlock();
+ // Ignore inner loops.
+ if (LI.getLoopFor(BB) != &L)
+ continue;
+ auto *Term = BB->getTerminator();
+ if (!match(Term, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
+ m_BasicBlock(IfTrue), m_BasicBlock(IfFalse))))
+ continue;
+ if (!LHS->getType()->isIntegerTy())
+ continue;
+ canonicalizeForInvariantConditionInjection(Pred, LHS, RHS, IfTrue, IfFalse,
+ L);
+ if (!shouldTryInjectInvariantCondition(Pred, LHS, RHS, IfTrue, IfFalse, L))
+ continue;
+ if (!shouldTryInjectBasingOnMetadata(cast<BranchInst>(Term), IfTrue))
+ continue;
+ // Strip ZEXT for unsigned predicate.
+ // TODO: once signed predicates are supported, also strip SEXT.
+ CompareDesc Desc(cast<BranchInst>(Term), RHS, IfTrue);
+ while (auto *Zext = dyn_cast<ZExtInst>(LHS))
+ LHS = Zext->getOperand(0);
+ CandidatesULT[LHS].push_back(Desc);
+ }
+
+ bool Found = false;
+ for (auto &It : CandidatesULT)
+ Found |= insertCandidatesWithPendingInjections(
+ UnswitchCandidates, L, ICmpInst::ICMP_ULT, It.second, DT);
+ return Found;
+}
+
static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI) {
if (!L.isSafeToClone())
return false;
@@ -2943,6 +3318,10 @@ static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(
// cost for that terminator.
auto ComputeUnswitchedCost = [&](Instruction &TI,
bool FullUnswitch) -> InstructionCost {
+ // Unswitching selects unswitches the entire loop.
+ if (isa<SelectInst>(TI))
+ return LoopCost;
+
BasicBlock &BB = *TI.getParent();
SmallPtrSet<BasicBlock *, 4> Visited;
@@ -3003,10 +3382,11 @@ static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(
Instruction &TI = *Candidate.TI;
ArrayRef<Value *> Invariants = Candidate.Invariants;
BranchInst *BI = dyn_cast<BranchInst>(&TI);
- InstructionCost CandidateCost = ComputeUnswitchedCost(
- TI, /*FullUnswitch*/ !BI ||
- (Invariants.size() == 1 &&
- Invariants[0] == skipTrivialSelect(BI->getCondition())));
+ bool FullUnswitch =
+ !BI || Candidate.hasPendingInjection() ||
+ (Invariants.size() == 1 &&
+ Invariants[0] == skipTrivialSelect(BI->getCondition()));
+ InstructionCost CandidateCost = ComputeUnswitchedCost(TI, FullUnswitch);
// Calculate cost multiplier which is a tool to limit potentially
// exponential behavior of loop-unswitch.
if (EnableUnswitchCostMultiplier) {
@@ -3033,6 +3413,32 @@ static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(
return *Best;
}
+// Insert a freeze on an unswitched branch if all is true:
+// 1. freeze-loop-unswitch-cond option is true
+// 2. The branch may not execute in the loop pre-transformation. If a branch may
+// not execute and could cause UB, it would always cause UB if it is hoisted outside
+// of the loop. Insert a freeze to prevent this case.
+// 3. The branch condition may be poison or undef
+static bool shouldInsertFreeze(Loop &L, Instruction &TI, DominatorTree &DT,
+ AssumptionCache &AC) {
+ assert(isa<BranchInst>(TI) || isa<SwitchInst>(TI));
+ if (!FreezeLoopUnswitchCond)
+ return false;
+
+ ICFLoopSafetyInfo SafetyInfo;
+ SafetyInfo.computeLoopSafetyInfo(&L);
+ if (SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
+ return false;
+
+ Value *Cond;
+ if (BranchInst *BI = dyn_cast<BranchInst>(&TI))
+ Cond = skipTrivialSelect(BI->getCondition());
+ else
+ Cond = skipTrivialSelect(cast<SwitchInst>(&TI)->getCondition());
+ return !isGuaranteedNotToBeUndefOrPoison(
+ Cond, &AC, L.getLoopPreheader()->getTerminator(), &DT);
+}
+
static bool unswitchBestCondition(
Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
AAResults &AA, TargetTransformInfo &TTI,
@@ -3044,9 +3450,13 @@ static bool unswitchBestCondition(
SmallVector<NonTrivialUnswitchCandidate, 4> UnswitchCandidates;
IVConditionInfo PartialIVInfo;
Instruction *PartialIVCondBranch = nullptr;
+ collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo,
+ PartialIVCondBranch, L, LI, AA, MSSAU);
+ collectUnswitchCandidatesWithInjections(UnswitchCandidates, PartialIVInfo,
+ PartialIVCondBranch, L, DT, LI, AA,
+ MSSAU);
// If we didn't find any candidates, we're done.
- if (!collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo,
- PartialIVCondBranch, L, LI, AA, MSSAU))
+ if (UnswitchCandidates.empty())
return false;
LLVM_DEBUG(
@@ -3065,18 +3475,36 @@ static bool unswitchBestCondition(
return false;
}
+ if (Best.hasPendingInjection())
+ Best = injectPendingInvariantConditions(Best, L, DT, LI, AC, MSSAU);
+ assert(!Best.hasPendingInjection() &&
+ "All injections should have been done by now!");
+
if (Best.TI != PartialIVCondBranch)
PartialIVInfo.InstToDuplicate.clear();
- // If the best candidate is a guard, turn it into a branch.
- if (isGuard(Best.TI))
- Best.TI =
- turnGuardIntoBranch(cast<IntrinsicInst>(Best.TI), L, DT, LI, MSSAU);
+ bool InsertFreeze;
+ if (auto *SI = dyn_cast<SelectInst>(Best.TI)) {
+ // If the best candidate is a select, turn it into a branch. Select
+ // instructions with a poison conditional do not propagate poison, but
+ // branching on poison causes UB. Insert a freeze on the select
+ // conditional to prevent UB after turning the select into a branch.
+ InsertFreeze = !isGuaranteedNotToBeUndefOrPoison(
+ SI->getCondition(), &AC, L.getLoopPreheader()->getTerminator(), &DT);
+ Best.TI = turnSelectIntoBranch(SI, DT, LI, MSSAU, &AC);
+ } else {
+ // If the best candidate is a guard, turn it into a branch.
+ if (isGuard(Best.TI))
+ Best.TI =
+ turnGuardIntoBranch(cast<IntrinsicInst>(Best.TI), L, DT, LI, MSSAU);
+ InsertFreeze = shouldInsertFreeze(L, *Best.TI, DT, AC);
+ }
LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = " << Best.Cost
<< ") terminator: " << *Best.TI << "\n");
unswitchNontrivialInvariants(L, *Best.TI, Best.Invariants, PartialIVInfo, DT,
- LI, AC, UnswitchCB, SE, MSSAU, DestroyLoopCB);
+ LI, AC, UnswitchCB, SE, MSSAU, DestroyLoopCB,
+ InsertFreeze);
return true;
}
@@ -3124,6 +3552,8 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
return true;
}
+ const Function *F = L.getHeader()->getParent();
+
// Check whether we should continue with non-trivial conditions.
// EnableNonTrivialUnswitch: Global variable that forces non-trivial
// unswitching for testing and debugging.
@@ -3136,18 +3566,41 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
// branches even on targets that have divergence.
// https://bugs.llvm.org/show_bug.cgi?id=48819
bool ContinueWithNonTrivial =
- EnableNonTrivialUnswitch || (NonTrivial && !TTI.hasBranchDivergence());
+ EnableNonTrivialUnswitch || (NonTrivial && !TTI.hasBranchDivergence(F));
if (!ContinueWithNonTrivial)
return false;
// Skip non-trivial unswitching for optsize functions.
- if (L.getHeader()->getParent()->hasOptSize())
+ if (F->hasOptSize())
return false;
- // Skip cold loops, as unswitching them brings little benefit
- // but increases the code size
- if (PSI && PSI->hasProfileSummary() && BFI &&
- PSI->isFunctionColdInCallGraph(L.getHeader()->getParent(), *BFI)) {
+ // Returns true if Loop L's loop nest is cold, i.e. if the headers of L,
+ // of the loops L is nested in, and of the loops nested in L are all cold.
+ auto IsLoopNestCold = [&](const Loop *L) {
+ // Check L and all of its parent loops.
+ auto *Parent = L;
+ while (Parent) {
+ if (!PSI->isColdBlock(Parent->getHeader(), BFI))
+ return false;
+ Parent = Parent->getParentLoop();
+ }
+ // Next check all loops nested within L.
+ SmallVector<const Loop *, 4> Worklist;
+ Worklist.insert(Worklist.end(), L->getSubLoops().begin(),
+ L->getSubLoops().end());
+ while (!Worklist.empty()) {
+ auto *CurLoop = Worklist.pop_back_val();
+ if (!PSI->isColdBlock(CurLoop->getHeader(), BFI))
+ return false;
+ Worklist.insert(Worklist.end(), CurLoop->getSubLoops().begin(),
+ CurLoop->getSubLoops().end());
+ }
+ return true;
+ };
+
+ // Skip cold loops in cold loop nests, as unswitching them brings little
+ // benefit but increases the code size
+ if (PSI && PSI->hasProfileSummary() && BFI && IsLoopNestCold(&L)) {
LLVM_DEBUG(dbgs() << " Skip cold loop: " << L << "\n");
return false;
}
@@ -3249,10 +3702,10 @@ void SimpleLoopUnswitchPass::printPipeline(
static_cast<PassInfoMixin<SimpleLoopUnswitchPass> *>(this)->printPipeline(
OS, MapClassName2PassName);
- OS << "<";
+ OS << '<';
OS << (NonTrivial ? "" : "no-") << "nontrivial;";
OS << (Trivial ? "" : "no-") << "trivial";
- OS << ">";
+ OS << '>';
}
namespace {