aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2022-03-20 11:40:34 +0000
committerDimitry Andric <dim@FreeBSD.org>2022-06-04 11:58:51 +0000
commit4b6eb0e63c698094db5506763df44cc83c19f643 (patch)
treef1d30b8c10bc6db323b91538745ae8ab8b593910 /contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
parent76886853f03395abb680824bcc74e98f83bd477a (diff)
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp211
1 files changed, 179 insertions, 32 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 9ee2a2d0bf08..ae2fe2767074 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -89,6 +89,7 @@
#include <utility>
using namespace llvm;
+using namespace PatternMatch;
#define DEBUG_TYPE "indvars"
@@ -155,6 +156,10 @@ class IndVarSimplify {
bool rewriteNonIntegerIVs(Loop *L);
bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
+ /// Try to improve our exit conditions by converting condition from signed
+ /// to unsigned or rotating computation out of the loop.
+ /// (See inline comment about why this is duplicated from simplifyAndExtend)
+ bool canonicalizeExitCondition(Loop *L);
/// Try to eliminate loop exits based on analyzeable exit counts
bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
/// Try to form loop invariant tests for loop exits by changing how many
@@ -494,6 +499,7 @@ bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
MadeAnyChanges = true;
PN.setIncomingValue(IncomingValIdx,
ExitVal->getIncomingValue(PreheaderIdx));
+ SE->forgetValue(&PN);
}
}
}
@@ -541,18 +547,18 @@ static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
return;
}
- if (!WI.WidestNativeType) {
+ if (!WI.WidestNativeType ||
+ Width > SE->getTypeSizeInBits(WI.WidestNativeType)) {
WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
WI.IsSigned = IsSigned;
return;
}
- // We extend the IV to satisfy the sign of its first user, arbitrarily.
- if (WI.IsSigned != IsSigned)
- return;
-
- if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
- WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
+ // We extend the IV to satisfy the sign of its user(s), or 'signed'
+ // if there are multiple users with both sign- and zero extensions,
+ // in order not to introduce nondeterministic behaviour based on the
+ // unspecified order of a PHI nodes' users-iterator.
+ WI.IsSigned |= IsSigned;
}
//===----------------------------------------------------------------------===//
@@ -1274,9 +1280,9 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
// Skip debug info intrinsics.
do {
--I;
- } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
+ } while (I->isDebugOrPseudoInst() && I != Preheader->begin());
- if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
+ if (I->isDebugOrPseudoInst() && I == Preheader->begin())
Done = true;
} else {
Done = true;
@@ -1309,6 +1315,18 @@ static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
replaceExitCond(BI, NewCond, DeadInsts);
}
+static void replaceLoopPHINodesWithPreheaderValues(
+ Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
+ assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
+ auto *LoopPreheader = L->getLoopPreheader();
+ auto *LoopHeader = L->getHeader();
+ for (auto &PN : LoopHeader->phis()) {
+ auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
+ PN.replaceAllUsesWith(PreheaderIncoming);
+ DeadInsts.emplace_back(&PN);
+ }
+}
+
static void replaceWithInvariantCond(
const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred,
const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter,
@@ -1333,7 +1351,6 @@ static bool optimizeLoopExitWithUnknownExitCount(
SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
ICmpInst::Predicate Pred;
Value *LHS, *RHS;
- using namespace PatternMatch;
BasicBlock *TrueSucc, *FalseSucc;
if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc))))
@@ -1394,6 +1411,140 @@ static bool optimizeLoopExitWithUnknownExitCount(
return true;
}
+bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
+ // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
+ // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
+ // never reaches the icmp since the zext doesn't fold to an AddRec unless
+ // it already has flags. The alternative to this would be to extending the
+ // set of "interesting" IV users to include the icmp, but doing that
+ // regresses results in practice by querying SCEVs before trip counts which
+ // rely on them which results in SCEV caching sub-optimal answers. The
+ // concern about caching sub-optimal results is why we only query SCEVs of
+ // the loop invariant RHS here.
+ SmallVector<BasicBlock*, 16> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ bool Changed = false;
+ for (auto *ExitingBB : ExitingBlocks) {
+ auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
+ if (!BI)
+ continue;
+ assert(BI->isConditional() && "exit branch must be conditional");
+
+ auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!ICmp || !ICmp->hasOneUse())
+ continue;
+
+ auto *LHS = ICmp->getOperand(0);
+ auto *RHS = ICmp->getOperand(1);
+ // For the range reasoning, avoid computing SCEVs in the loop to avoid
+ // poisoning cache with sub-optimal results. For the must-execute case,
+ // this is a neccessary precondition for correctness.
+ if (!L->isLoopInvariant(RHS)) {
+ if (!L->isLoopInvariant(LHS))
+ continue;
+ // Same logic applies for the inverse case
+ std::swap(LHS, RHS);
+ }
+
+ // Match (icmp signed-cond zext, RHS)
+ Value *LHSOp = nullptr;
+ if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
+ continue;
+
+ const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
+ const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
+ const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
+ auto FullCR = ConstantRange::getFull(InnerBitWidth);
+ FullCR = FullCR.zeroExtend(OuterBitWidth);
+ auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
+ if (FullCR.contains(RHSCR)) {
+ // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
+ // replace the signed condition with the unsigned version.
+ ICmp->setPredicate(ICmp->getUnsignedPredicate());
+ Changed = true;
+ // Note: No SCEV invalidation needed. We've changed the predicate, but
+ // have not changed exit counts, or the values produced by the compare.
+ continue;
+ }
+ }
+
+ // Now that we've canonicalized the condition to match the extend,
+ // see if we can rotate the extend out of the loop.
+ for (auto *ExitingBB : ExitingBlocks) {
+ auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
+ if (!BI)
+ continue;
+ assert(BI->isConditional() && "exit branch must be conditional");
+
+ auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
+ continue;
+
+ bool Swapped = false;
+ auto *LHS = ICmp->getOperand(0);
+ auto *RHS = ICmp->getOperand(1);
+ if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
+ // Nothing to rotate
+ continue;
+ if (L->isLoopInvariant(LHS)) {
+ // Same logic applies for the inverse case until we actually pick
+ // which operand of the compare to update.
+ Swapped = true;
+ std::swap(LHS, RHS);
+ }
+ assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
+
+ // Match (icmp unsigned-cond zext, RHS)
+ // TODO: Extend to handle corresponding sext/signed-cmp case
+ // TODO: Extend to other invertible functions
+ Value *LHSOp = nullptr;
+ if (!match(LHS, m_ZExt(m_Value(LHSOp))))
+ continue;
+
+ // In general, we only rotate if we can do so without increasing the number
+ // of instructions. The exception is when we have an zext(add-rec). The
+ // reason for allowing this exception is that we know we need to get rid
+ // of the zext for SCEV to be able to compute a trip count for said loops;
+ // we consider the new trip count valuable enough to increase instruction
+ // count by one.
+ if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
+ continue;
+
+ // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
+ // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
+ // when zext is loop varying and RHS is loop invariant. This converts
+ // loop varying work to loop-invariant work.
+ auto doRotateTransform = [&]() {
+ assert(ICmp->isUnsigned() && "must have proven unsigned already");
+ auto *NewRHS =
+ CastInst::Create(Instruction::Trunc, RHS, LHSOp->getType(), "",
+ L->getLoopPreheader()->getTerminator());
+ ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
+ ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
+ if (LHS->use_empty())
+ DeadInsts.push_back(LHS);
+ };
+
+
+ const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
+ const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
+ const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
+ auto FullCR = ConstantRange::getFull(InnerBitWidth);
+ FullCR = FullCR.zeroExtend(OuterBitWidth);
+ auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
+ if (FullCR.contains(RHSCR)) {
+ doRotateTransform();
+ Changed = true;
+ // Note, we are leaving SCEV in an unfortunately imprecise case here
+ // as rotation tends to reveal information about trip counts not
+ // previously visible.
+ continue;
+ }
+ }
+
+ return Changed;
+}
+
bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
SmallVector<BasicBlock*, 16> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
@@ -1499,20 +1650,18 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
// If we know we'd exit on the first iteration, rewrite the exit to
// reflect this. This does not imply the loop must exit through this
// exit; there may be an earlier one taken on the first iteration.
- // TODO: Given we know the backedge can't be taken, we should go ahead
- // and break it. Or at least, kill all the header phis and simplify.
+ // We know that the backedge can't be taken, so we replace all
+ // the header PHIs with values coming from the preheader.
if (ExitCount->isZero()) {
foldExit(L, ExitingBB, true, DeadInsts);
+ replaceLoopPHINodesWithPreheaderValues(L, DeadInsts);
Changed = true;
continue;
}
- // If we end up with a pointer exit count, bail. Note that we can end up
- // with a pointer exit count for one exiting block, and not for another in
- // the same loop.
- if (!ExitCount->getType()->isIntegerTy() ||
- !MaxExitCount->getType()->isIntegerTy())
- continue;
+ assert(ExitCount->getType()->isIntegerTy() &&
+ MaxExitCount->getType()->isIntegerTy() &&
+ "Exit counts must be integers");
Type *WiderType =
SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
@@ -1569,14 +1718,11 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
// through *explicit* control flow. We have to eliminate the possibility of
// implicit exits (see below) before we know it's truly exact.
const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
- if (isa<SCEVCouldNotCompute>(ExactBTC) ||
- !SE->isLoopInvariant(ExactBTC, L) ||
- !isSafeToExpand(ExactBTC, *SE))
+ if (isa<SCEVCouldNotCompute>(ExactBTC) || !isSafeToExpand(ExactBTC, *SE))
return false;
- // If we end up with a pointer exit count, bail. It may be unsized.
- if (!ExactBTC->getType()->isIntegerTy())
- return false;
+ assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
+ assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
auto BadExit = [&](BasicBlock *ExitingBB) {
// If our exiting block exits multiple loops, we can only rewrite the
@@ -1603,15 +1749,12 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
return true;
const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
- if (isa<SCEVCouldNotCompute>(ExitCount) ||
- !SE->isLoopInvariant(ExitCount, L) ||
- !isSafeToExpand(ExitCount, *SE))
- return true;
-
- // If we end up with a pointer exit count, bail. It may be unsized.
- if (!ExitCount->getType()->isIntegerTy())
+ if (isa<SCEVCouldNotCompute>(ExitCount) || !isSafeToExpand(ExitCount, *SE))
return true;
+ assert(SE->isLoopInvariant(ExitCount, L) &&
+ "Exit count must be loop invariant");
+ assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
return false;
};
@@ -1781,7 +1924,11 @@ bool IndVarSimplify::run(Loop *L) {
}
// Eliminate redundant IV cycles.
- NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
+ NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
+
+ // Try to convert exit conditions to unsigned and rotate computation
+ // out of the loop. Note: Handles invalidation internally if needed.
+ Changed |= canonicalizeExitCondition(L);
// Try to eliminate loop exits based on analyzeable exit counts
if (optimizeLoopExits(L, Rewriter)) {