aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Scalar/LoopUnswitch.cpp')
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnswitch.cpp321
1 files changed, 277 insertions, 44 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
index 645a89bbd0ff..18717394d384 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -32,6 +32,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
@@ -98,6 +99,12 @@ static cl::opt<unsigned>
Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"),
cl::init(100), cl::Hidden);
+static cl::opt<unsigned>
+ MSSAThreshold("loop-unswitch-memoryssa-threshold",
+ cl::desc("Max number of memory uses to explore during "
+ "partial unswitching analysis"),
+ cl::init(100), cl::Hidden);
+
namespace {
class LUAnalysisCache {
@@ -184,6 +191,7 @@ namespace {
Loop *CurrentLoop = nullptr;
DominatorTree *DT = nullptr;
MemorySSA *MSSA = nullptr;
+ AAResults *AA = nullptr;
std::unique_ptr<MemorySSAUpdater> MSSAU;
BasicBlock *LoopHeader = nullptr;
BasicBlock *LoopPreheader = nullptr;
@@ -217,6 +225,10 @@ namespace {
/// loop preheaders be inserted into the CFG.
///
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ // Lazy BFI and BPI are marked as preserved here so Loop Unswitching
+ // can remain part of the same loop pass as LICM
+ AU.addPreserved<LazyBlockFrequencyInfoPass>();
+ AU.addPreserved<LazyBranchProbabilityInfoPass>();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
if (EnableMSSALoopDependency) {
@@ -244,19 +256,22 @@ namespace {
bool tryTrivialLoopUnswitch(bool &Changed);
bool unswitchIfProfitable(Value *LoopCond, Constant *Val,
- Instruction *TI = nullptr);
+ Instruction *TI = nullptr,
+ ArrayRef<Instruction *> ToDuplicate = {});
void unswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
BasicBlock *ExitBlock, Instruction *TI);
void unswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L,
- Instruction *TI);
+ Instruction *TI,
+ ArrayRef<Instruction *> ToDuplicate = {});
void rewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
Constant *Val, bool IsEqual);
- void emitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
- BasicBlock *TrueDest,
- BasicBlock *FalseDest,
- BranchInst *OldBranch, Instruction *TI);
+ void
+ emitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
+ BasicBlock *TrueDest, BasicBlock *FalseDest,
+ BranchInst *OldBranch, Instruction *TI,
+ ArrayRef<Instruction *> ToDuplicate = {});
void simplifyCode(std::vector<Instruction *> &Worklist, Loop *L);
@@ -523,6 +538,7 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPMRef) {
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
LPM = &LPMRef;
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
if (EnableMSSALoopDependency) {
MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
@@ -624,6 +640,145 @@ static bool equalityPropUnSafe(Value &LoopCond) {
return false;
}
+/// Check if the loop header has a conditional branch that is not
+/// loop-invariant, because it involves load instructions. If all paths from
+/// either the true or false successor to the header or loop exists do not
+/// modify the memory feeding the condition, perform 'partial unswitching'. That
+/// is, duplicate the instructions feeding the condition in the pre-header. Then
+/// unswitch on the duplicated condition. The condition is now known in the
+/// unswitched version for the 'invariant' path through the original loop.
+///
+/// If the branch condition of the header is partially invariant, return a pair
+/// containing the instructions to duplicate and a boolean Constant to update
+/// the condition in the loops created for the true or false successors.
+static std::pair<SmallVector<Instruction *, 4>, Constant *>
+hasPartialIVCondition(Loop *L, MemorySSA &MSSA, AAResults *AA) {
+ SmallVector<Instruction *, 4> ToDuplicate;
+
+ auto *TI = dyn_cast<BranchInst>(L->getHeader()->getTerminator());
+ if (!TI || !TI->isConditional())
+ return {};
+
+ auto *CondI = dyn_cast<CmpInst>(TI->getCondition());
+ // The case with the condition outside the loop should already be handled
+ // earlier.
+ if (!CondI || !L->contains(CondI))
+ return {};
+
+ ToDuplicate.push_back(CondI);
+
+ SmallVector<Value *, 4> WorkList;
+ WorkList.append(CondI->op_begin(), CondI->op_end());
+
+ SmallVector<MemoryAccess *, 4> AccessesToCheck;
+ SmallVector<MemoryLocation, 4> AccessedLocs;
+ while (!WorkList.empty()) {
+ Instruction *I = dyn_cast<Instruction>(WorkList.pop_back_val());
+ if (!I || !L->contains(I))
+ continue;
+
+ // TODO: support additional instructions.
+ if (!isa<LoadInst>(I) && !isa<GetElementPtrInst>(I))
+ return {};
+
+ // Do not duplicate volatile and atomic loads.
+ if (auto *LI = dyn_cast<LoadInst>(I))
+ if (LI->isVolatile() || LI->isAtomic())
+ return {};
+
+ ToDuplicate.push_back(I);
+ if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) {
+ if (auto *MemUse = dyn_cast_or_null<MemoryUse>(MA)) {
+ // Queue the defining access to check for alias checks.
+ AccessesToCheck.push_back(MemUse->getDefiningAccess());
+ AccessedLocs.push_back(MemoryLocation::get(I));
+ } else {
+ // MemoryDefs may clobber the location or may be atomic memory
+ // operations. Bail out.
+ return {};
+ }
+ }
+ WorkList.append(I->op_begin(), I->op_end());
+ }
+
+ if (ToDuplicate.size() <= 1)
+ return {};
+
+ auto HasNoClobbersOnPath =
+ [L, AA, &AccessedLocs](BasicBlock *Succ, BasicBlock *Header,
+ SmallVector<MemoryAccess *, 4> AccessesToCheck) {
+ // First, collect all blocks in the loop that are on a patch from Succ
+ // to the header.
+ SmallVector<BasicBlock *, 4> WorkList;
+ WorkList.push_back(Succ);
+ WorkList.push_back(Header);
+ SmallPtrSet<BasicBlock *, 4> Seen;
+ Seen.insert(Header);
+ while (!WorkList.empty()) {
+ BasicBlock *Current = WorkList.pop_back_val();
+ if (!L->contains(Current))
+ continue;
+ const auto &SeenIns = Seen.insert(Current);
+ if (!SeenIns.second)
+ continue;
+
+ WorkList.append(succ_begin(Current), succ_end(Current));
+ }
+
+ // Require at least 2 blocks on a path through the loop. This skips
+ // paths that directly exit the loop.
+ if (Seen.size() < 2)
+ return false;
+
+ // Next, check if there are any MemoryDefs that are on the path through
+ // the loop (in the Seen set) and they may-alias any of the locations in
+ // AccessedLocs. If that is the case, they may modify the condition and
+ // partial unswitching is not possible.
+ SmallPtrSet<MemoryAccess *, 4> SeenAccesses;
+ while (!AccessesToCheck.empty()) {
+ MemoryAccess *Current = AccessesToCheck.pop_back_val();
+ auto SeenI = SeenAccesses.insert(Current);
+ if (!SeenI.second || !Seen.contains(Current->getBlock()))
+ continue;
+
+ // Bail out if exceeded the threshold.
+ if (SeenAccesses.size() >= MSSAThreshold)
+ return false;
+
+ // MemoryUse are read-only accesses.
+ if (isa<MemoryUse>(Current))
+ continue;
+
+ // For a MemoryDef, check if is aliases any of the location feeding
+ // the original condition.
+ if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) {
+ if (any_of(AccessedLocs, [AA, CurrentDef](MemoryLocation &Loc) {
+ return isModSet(
+ AA->getModRefInfo(CurrentDef->getMemoryInst(), Loc));
+ }))
+ return false;
+ }
+
+ for (Use &U : Current->uses())
+ AccessesToCheck.push_back(cast<MemoryAccess>(U.getUser()));
+ }
+
+ return true;
+ };
+
+ // If we branch to the same successor, partial unswitching will not be
+ // beneficial.
+ if (TI->getSuccessor(0) == TI->getSuccessor(1))
+ return {};
+
+ if (HasNoClobbersOnPath(TI->getSuccessor(0), L->getHeader(), AccessesToCheck))
+ return {ToDuplicate, ConstantInt::getTrue(TI->getContext())};
+ if (HasNoClobbersOnPath(TI->getSuccessor(1), L->getHeader(), AccessesToCheck))
+ return {ToDuplicate, ConstantInt::getFalse(TI->getContext())};
+
+ return {};
+}
+
/// Do actual work and unswitch loop if possible and profitable.
bool LoopUnswitch::processCurrentLoop() {
bool Changed = false;
@@ -661,7 +816,7 @@ bool LoopUnswitch::processCurrentLoop() {
// FIXME: Use Function::hasOptSize().
if (OptimizeForSize ||
LoopHeader->getParent()->hasFnAttribute(Attribute::OptimizeForSize))
- return false;
+ return Changed;
// Run through the instructions in the loop, keeping track of three things:
//
@@ -685,10 +840,10 @@ bool LoopUnswitch::processCurrentLoop() {
if (!CB)
continue;
if (CB->isConvergent())
- return false;
+ return Changed;
if (auto *II = dyn_cast<InvokeInst>(&I))
if (!II->getUnwindDest()->canSplitPredecessors())
- return false;
+ return Changed;
if (auto *II = dyn_cast<IntrinsicInst>(&I))
if (II->getIntrinsicID() == Intrinsic::experimental_guard)
Guards.push_back(II);
@@ -823,6 +978,28 @@ bool LoopUnswitch::processCurrentLoop() {
}
}
}
+
+ // Check if there is a header condition that is invariant along the patch from
+ // either the true or false successors to the header. This allows unswitching
+ // conditions depending on memory accesses, if there's a path not clobbering
+ // the memory locations. Check if this transform has been disabled using
+ // metadata, to avoid unswitching the same loop multiple times.
+ if (MSSA &&
+ !findOptionMDForLoop(CurrentLoop, "llvm.loop.unswitch.partial.disable")) {
+ auto ToDuplicate = hasPartialIVCondition(CurrentLoop, *MSSA, AA);
+ if (!ToDuplicate.first.empty()) {
+ LLVM_DEBUG(dbgs() << "loop-unswitch: Found partially invariant condition "
+ << *ToDuplicate.first[0] << "\n");
+ ++NumBranches;
+ unswitchIfProfitable(ToDuplicate.first[0], ToDuplicate.second,
+ CurrentLoop->getHeader()->getTerminator(),
+ ToDuplicate.first);
+
+ RedoLoop = false;
+ return true;
+ }
+ }
+
return Changed;
}
@@ -880,7 +1057,8 @@ static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
/// simplify the loop. If we decide that this is profitable,
/// unswitch the loop, reprocess the pieces, then return true.
bool LoopUnswitch::unswitchIfProfitable(Value *LoopCond, Constant *Val,
- Instruction *TI) {
+ Instruction *TI,
+ ArrayRef<Instruction *> ToDuplicate) {
// Check to see if it would be profitable to unswitch current loop.
if (!BranchesInfo.costAllowsUnswitching()) {
LLVM_DEBUG(dbgs() << "NOT unswitching loop %"
@@ -900,31 +1078,65 @@ bool LoopUnswitch::unswitchIfProfitable(Value *LoopCond, Constant *Val,
return false;
}
- unswitchNontrivialCondition(LoopCond, Val, CurrentLoop, TI);
+ unswitchNontrivialCondition(LoopCond, Val, CurrentLoop, TI, ToDuplicate);
return true;
}
/// Emit a conditional branch on two values if LIC == Val, branch to TrueDst,
/// otherwise branch to FalseDest. Insert the code immediately before OldBranch
/// and remove (but not erase!) it from the function.
-void LoopUnswitch::emitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
- BasicBlock *TrueDest,
- BasicBlock *FalseDest,
- BranchInst *OldBranch,
- Instruction *TI) {
+void LoopUnswitch::emitPreheaderBranchOnCondition(
+ Value *LIC, Constant *Val, BasicBlock *TrueDest, BasicBlock *FalseDest,
+ BranchInst *OldBranch, Instruction *TI,
+ ArrayRef<Instruction *> ToDuplicate) {
assert(OldBranch->isUnconditional() && "Preheader is not split correctly");
assert(TrueDest != FalseDest && "Branch targets should be different");
+
// Insert a conditional branch on LIC to the two preheaders. The original
// code is the true version and the new code is the false version.
Value *BranchVal = LIC;
bool Swapped = false;
- if (!isa<ConstantInt>(Val) ||
- Val->getType() != Type::getInt1Ty(LIC->getContext()))
- BranchVal = new ICmpInst(OldBranch, ICmpInst::ICMP_EQ, LIC, Val);
- else if (Val != ConstantInt::getTrue(Val->getContext())) {
- // We want to enter the new loop when the condition is true.
- std::swap(TrueDest, FalseDest);
- Swapped = true;
+
+ if (!ToDuplicate.empty()) {
+ ValueToValueMapTy Old2New;
+ for (Instruction *I : reverse(ToDuplicate)) {
+ auto *New = I->clone();
+ New->insertBefore(OldBranch);
+ RemapInstruction(New, Old2New,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+ Old2New[I] = New;
+
+ if (MSSAU) {
+ MemorySSA *MSSA = MSSAU->getMemorySSA();
+ auto *MemA = dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(I));
+ if (!MemA)
+ continue;
+
+ Loop *L = LI->getLoopFor(I->getParent());
+ auto *DefiningAccess = MemA->getDefiningAccess();
+ // If the defining access is a MemoryPhi in the header, get the incoming
+ // value for the pre-header as defining access.
+ if (DefiningAccess->getBlock() == I->getParent()) {
+ if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess)) {
+ DefiningAccess =
+ MemPhi->getIncomingValueForBlock(L->getLoopPreheader());
+ }
+ }
+ MSSAU->createMemoryAccessInBB(New, DefiningAccess, New->getParent(),
+ MemorySSA::BeforeTerminator);
+ }
+ }
+ BranchVal = Old2New[ToDuplicate[0]];
+ } else {
+
+ if (!isa<ConstantInt>(Val) ||
+ Val->getType() != Type::getInt1Ty(LIC->getContext()))
+ BranchVal = new ICmpInst(OldBranch, ICmpInst::ICMP_EQ, LIC, Val);
+ else if (Val != ConstantInt::getTrue(Val->getContext())) {
+ // We want to enter the new loop when the condition is true.
+ std::swap(TrueDest, FalseDest);
+ Swapped = true;
+ }
}
// Old branch will be removed, so save its parent and successor to update the
@@ -955,10 +1167,11 @@ void LoopUnswitch::emitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
if (OldBranchSucc != TrueDest && OldBranchSucc != FalseDest) {
Updates.push_back({DominatorTree::Delete, OldBranchParent, OldBranchSucc});
}
- DT->applyUpdates(Updates);
if (MSSAU)
- MSSAU->applyUpdates(Updates, *DT);
+ MSSAU->applyUpdates(Updates, *DT, /*UpdateDT=*/true);
+ else
+ DT->applyUpdates(Updates);
}
// If either edge is critical, split it. This helps preserve LoopSimplify
@@ -1207,8 +1420,9 @@ void LoopUnswitch::splitExitEdges(
/// We determined that the loop is profitable to unswitch when LIC equal Val.
/// Split it into loop versions and test the condition outside of either loop.
/// Return the loops created as Out1/Out2.
-void LoopUnswitch::unswitchNontrivialCondition(Value *LIC, Constant *Val,
- Loop *L, Instruction *TI) {
+void LoopUnswitch::unswitchNontrivialCondition(
+ Value *LIC, Constant *Val, Loop *L, Instruction *TI,
+ ArrayRef<Instruction *> ToDuplicate) {
Function *F = LoopHeader->getParent();
LLVM_DEBUG(dbgs() << "loop-unswitch: Unswitching loop %"
<< LoopHeader->getName() << " [" << L->getBlocks().size()
@@ -1233,7 +1447,7 @@ void LoopUnswitch::unswitchNontrivialCondition(Value *LIC, Constant *Val,
LoopBlocks.push_back(NewPreheader);
// We want the loop to come after the preheader, but before the exit blocks.
- LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end());
+ llvm::append_range(LoopBlocks, L->blocks());
SmallVector<BasicBlock*, 8> ExitBlocks;
L->getUniqueExitBlocks(ExitBlocks);
@@ -1247,7 +1461,7 @@ void LoopUnswitch::unswitchNontrivialCondition(Value *LIC, Constant *Val,
L->getUniqueExitBlocks(ExitBlocks);
// Add exit blocks to the loop blocks.
- LoopBlocks.insert(LoopBlocks.end(), ExitBlocks.begin(), ExitBlocks.end());
+ llvm::append_range(LoopBlocks, ExitBlocks);
// Next step, clone all of the basic blocks that make up the loop (including
// the loop preheader and exit blocks), keeping track of the mapping between
@@ -1340,7 +1554,7 @@ void LoopUnswitch::unswitchNontrivialCondition(Value *LIC, Constant *Val,
// Emit the new branch that selects between the two versions of this loop.
emitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR,
- TI);
+ TI, ToDuplicate);
if (MSSAU) {
// Update MemoryPhis in Exit blocks.
MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMap, *DT);
@@ -1362,17 +1576,38 @@ void LoopUnswitch::unswitchNontrivialCondition(Value *LIC, Constant *Val,
// iteration.
WeakTrackingVH LICHandle(LIC);
- // Now we rewrite the original code to know that the condition is true and the
- // new code to know that the condition is false.
- rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/false);
-
- // It's possible that simplifying one loop could cause the other to be
- // changed to another value or a constant. If its a constant, don't simplify
- // it.
- if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop &&
- LICHandle && !isa<Constant>(LICHandle))
- rewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val,
- /*IsEqual=*/true);
+ if (ToDuplicate.empty()) {
+ // Now we rewrite the original code to know that the condition is true and
+ // the new code to know that the condition is false.
+ rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/false);
+
+ // It's possible that simplifying one loop could cause the other to be
+ // changed to another value or a constant. If its a constant, don't
+ // simplify it.
+ if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop &&
+ LICHandle && !isa<Constant>(LICHandle))
+ rewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val,
+ /*IsEqual=*/true);
+ } else {
+ // Partial unswitching. Update the condition in the right loop with the
+ // constant.
+ auto *CC = cast<ConstantInt>(Val);
+ if (CC->isOneValue()) {
+ rewriteLoopBodyWithConditionConstant(NewLoop, VMap[LIC], Val,
+ /*IsEqual=*/true);
+ } else
+ rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/true);
+
+ // Mark the new loop as partially unswitched, to avoid unswitching on the
+ // same condition again.
+ auto &Context = NewLoop->getHeader()->getContext();
+ MDNode *DisableUnswitchMD = MDNode::get(
+ Context, MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
+ MDNode *NewLoopID = makePostTransformationMetadata(
+ Context, L->getLoopID(), {"llvm.loop.unswitch.partial"},
+ {DisableUnswitchMD});
+ NewLoop->setLoopID(NewLoopID);
+ }
if (MSSA && VerifyMemorySSA)
MSSA->verifyMemorySSA();
@@ -1381,9 +1616,7 @@ void LoopUnswitch::unswitchNontrivialCondition(Value *LIC, Constant *Val,
/// Remove all instances of I from the worklist vector specified.
static void removeFromWorklist(Instruction *I,
std::vector<Instruction *> &Worklist) {
-
- Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), I),
- Worklist.end());
+ llvm::erase_value(Worklist, I);
}
/// When we find that I really equals V, remove I from the