src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2022-03-20 11:40:34 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2022-06-04 11:58:51 +0000
commit	4b6eb0e63c698094db5506763df44cc83c19f643 (patch)
tree	f1d30b8c10bc6db323b91538745ae8ab8b593910 /contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
parent	76886853f03395abb680824bcc74e98f83bd477a (diff)

Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp')

-rw-r--r--

contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp

211

1 files changed, 179 insertions, 32 deletions

diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 9ee2a2d0bf08..ae2fe2767074 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp

@@ -89,6 +89,7 @@

#include <utility>

using namespace llvm;

+using namespace PatternMatch;

#define DEBUG_TYPE "indvars"

@@ -155,6 +156,10 @@ class IndVarSimplify {

bool rewriteNonIntegerIVs(Loop *L);

bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);

+ /// Try to improve our exit conditions by converting condition from signed

+ /// to unsigned or rotating computation out of the loop.

+ /// (See inline comment about why this is duplicated from simplifyAndExtend)

+ bool canonicalizeExitCondition(Loop *L);

/// Try to eliminate loop exits based on analyzeable exit counts

bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);

/// Try to form loop invariant tests for loop exits by changing how many

@@ -494,6 +499,7 @@ bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {

MadeAnyChanges = true;

PN.setIncomingValue(IncomingValIdx,

ExitVal->getIncomingValue(PreheaderIdx));

+ SE->forgetValue(&PN);

}

@@ -541,18 +547,18 @@ static void visitIVCast(CastInst *Cast, WideIVInfo &WI,

return;

}

- if (!WI.WidestNativeType) {

+ if (!WI.WidestNativeType ||

+ Width > SE->getTypeSizeInBits(WI.WidestNativeType)) {

WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);

WI.IsSigned = IsSigned;

return;

}

- // We extend the IV to satisfy the sign of its first user, arbitrarily.

- if (WI.IsSigned != IsSigned)

- return;

- if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))

- WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);

+ // We extend the IV to satisfy the sign of its user(s), or 'signed'

+ // if there are multiple users with both sign- and zero extensions,

+ // in order not to introduce nondeterministic behaviour based on the

+ // unspecified order of a PHI nodes' users-iterator.

+ WI.IsSigned |= IsSigned;

}

//===----------------------------------------------------------------------===//

@@ -1274,9 +1280,9 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {

// Skip debug info intrinsics.

do {

--I;

- } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());

+ } while (I->isDebugOrPseudoInst() && I != Preheader->begin());

- if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())

+ if (I->isDebugOrPseudoInst() && I == Preheader->begin())

Done = true;

} else {

Done = true;

@@ -1309,6 +1315,18 @@ static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,

replaceExitCond(BI, NewCond, DeadInsts);

}

+static void replaceLoopPHINodesWithPreheaderValues(

+ Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts) {

+ assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");

+ auto *LoopPreheader = L->getLoopPreheader();

+ auto *LoopHeader = L->getHeader();

+ for (auto &PN : LoopHeader->phis()) {

+ auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);

+ PN.replaceAllUsesWith(PreheaderIncoming);

+ DeadInsts.emplace_back(&PN);

+ }

static void replaceWithInvariantCond(

const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred,

const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter,

@@ -1333,7 +1351,6 @@ static bool optimizeLoopExitWithUnknownExitCount(

SmallVectorImpl<WeakTrackingVH> &DeadInsts) {

ICmpInst::Predicate Pred;

Value *LHS, *RHS;

- using namespace PatternMatch;

BasicBlock *TrueSucc, *FalseSucc;

if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),

m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc))))

@@ -1394,6 +1411,140 @@ static bool optimizeLoopExitWithUnknownExitCount(

return true;

}

+bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {

+ // Note: This is duplicating a particular part on SimplifyIndVars reasoning.

+ // We need to duplicate it because given icmp zext(small-iv), C, IVUsers

+ // never reaches the icmp since the zext doesn't fold to an AddRec unless

+ // it already has flags. The alternative to this would be to extending the

+ // set of "interesting" IV users to include the icmp, but doing that

+ // regresses results in practice by querying SCEVs before trip counts which

+ // rely on them which results in SCEV caching sub-optimal answers. The

+ // concern about caching sub-optimal results is why we only query SCEVs of

+ // the loop invariant RHS here.

+ SmallVector<BasicBlock*, 16> ExitingBlocks;

+ L->getExitingBlocks(ExitingBlocks);

+ bool Changed = false;

+ for (auto *ExitingBB : ExitingBlocks) {

+ auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());

+ if (!BI)

+ continue;

+ assert(BI->isConditional() && "exit branch must be conditional");

+ auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());

+ if (!ICmp || !ICmp->hasOneUse())

+ continue;

+ auto *LHS = ICmp->getOperand(0);

+ auto *RHS = ICmp->getOperand(1);

+ // For the range reasoning, avoid computing SCEVs in the loop to avoid

+ // poisoning cache with sub-optimal results. For the must-execute case,

+ // this is a neccessary precondition for correctness.

+ if (!L->isLoopInvariant(RHS)) {

+ if (!L->isLoopInvariant(LHS))

+ continue;

+ // Same logic applies for the inverse case

+ std::swap(LHS, RHS);

+ }

+ // Match (icmp signed-cond zext, RHS)

+ Value *LHSOp = nullptr;

+ if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())

+ continue;

+ const DataLayout &DL = ExitingBB->getModule()->getDataLayout();

+ const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());

+ const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());

+ auto FullCR = ConstantRange::getFull(InnerBitWidth);

+ FullCR = FullCR.zeroExtend(OuterBitWidth);

+ auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));

+ if (FullCR.contains(RHSCR)) {

+ // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus

+ // replace the signed condition with the unsigned version.

+ ICmp->setPredicate(ICmp->getUnsignedPredicate());

+ Changed = true;

+ // Note: No SCEV invalidation needed. We've changed the predicate, but

+ // have not changed exit counts, or the values produced by the compare.

+ continue;

+ }

+ // Now that we've canonicalized the condition to match the extend,

+ // see if we can rotate the extend out of the loop.

+ for (auto *ExitingBB : ExitingBlocks) {

+ auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());

+ if (!BI)

+ continue;

+ assert(BI->isConditional() && "exit branch must be conditional");

+ auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());

+ if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())

+ continue;

+ bool Swapped = false;

+ auto *LHS = ICmp->getOperand(0);

+ auto *RHS = ICmp->getOperand(1);

+ if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))

+ // Nothing to rotate

+ continue;

+ if (L->isLoopInvariant(LHS)) {

+ // Same logic applies for the inverse case until we actually pick

+ // which operand of the compare to update.

+ Swapped = true;

+ std::swap(LHS, RHS);

+ }

+ assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));

+ // Match (icmp unsigned-cond zext, RHS)

+ // TODO: Extend to handle corresponding sext/signed-cmp case

+ // TODO: Extend to other invertible functions

+ Value *LHSOp = nullptr;

+ if (!match(LHS, m_ZExt(m_Value(LHSOp))))

+ continue;

+ // In general, we only rotate if we can do so without increasing the number

+ // of instructions. The exception is when we have an zext(add-rec). The

+ // reason for allowing this exception is that we know we need to get rid

+ // of the zext for SCEV to be able to compute a trip count for said loops;

+ // we consider the new trip count valuable enough to increase instruction

+ // count by one.

+ if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))

+ continue;

+ // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS

+ // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)

+ // when zext is loop varying and RHS is loop invariant. This converts

+ // loop varying work to loop-invariant work.

+ auto doRotateTransform = [&]() {

+ assert(ICmp->isUnsigned() && "must have proven unsigned already");

+ auto *NewRHS =

+ CastInst::Create(Instruction::Trunc, RHS, LHSOp->getType(), "",

+ L->getLoopPreheader()->getTerminator());

+ ICmp->setOperand(Swapped ? 1 : 0, LHSOp);

+ ICmp->setOperand(Swapped ? 0 : 1, NewRHS);

+ if (LHS->use_empty())

+ DeadInsts.push_back(LHS);

+ };

+ const DataLayout &DL = ExitingBB->getModule()->getDataLayout();

+ const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());

+ const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());

+ auto FullCR = ConstantRange::getFull(InnerBitWidth);

+ FullCR = FullCR.zeroExtend(OuterBitWidth);

+ auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));

+ if (FullCR.contains(RHSCR)) {

+ doRotateTransform();

+ Changed = true;

+ // Note, we are leaving SCEV in an unfortunately imprecise case here

+ // as rotation tends to reveal information about trip counts not

+ // previously visible.

+ continue;

+ }

+ return Changed;

bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {

SmallVector<BasicBlock*, 16> ExitingBlocks;

L->getExitingBlocks(ExitingBlocks);

@@ -1499,20 +1650,18 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {

// If we know we'd exit on the first iteration, rewrite the exit to

// reflect this. This does not imply the loop must exit through this

// exit; there may be an earlier one taken on the first iteration.

- // TODO: Given we know the backedge can't be taken, we should go ahead

- // and break it. Or at least, kill all the header phis and simplify.

+ // We know that the backedge can't be taken, so we replace all

+ // the header PHIs with values coming from the preheader.

if (ExitCount->isZero()) {

foldExit(L, ExitingBB, true, DeadInsts);

+ replaceLoopPHINodesWithPreheaderValues(L, DeadInsts);

Changed = true;

continue;

}

- // If we end up with a pointer exit count, bail. Note that we can end up

- // with a pointer exit count for one exiting block, and not for another in

- // the same loop.

- if (!ExitCount->getType()->isIntegerTy() ||

- !MaxExitCount->getType()->isIntegerTy())

- continue;

+ assert(ExitCount->getType()->isIntegerTy() &&

+ MaxExitCount->getType()->isIntegerTy() &&

+ "Exit counts must be integers");

Type *WiderType =

SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());

@@ -1569,14 +1718,11 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {

// through *explicit* control flow. We have to eliminate the possibility of

// implicit exits (see below) before we know it's truly exact.

const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);

- if (isa<SCEVCouldNotCompute>(ExactBTC) ||

- !SE->isLoopInvariant(ExactBTC, L) ||

- !isSafeToExpand(ExactBTC, *SE))

+ if (isa<SCEVCouldNotCompute>(ExactBTC) || !isSafeToExpand(ExactBTC, *SE))

return false;

- // If we end up with a pointer exit count, bail. It may be unsized.

- if (!ExactBTC->getType()->isIntegerTy())

- return false;

+ assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");

+ assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");

auto BadExit = [&](BasicBlock *ExitingBB) {

// If our exiting block exits multiple loops, we can only rewrite the

@@ -1603,15 +1749,12 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {

return true;

const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);

- if (isa<SCEVCouldNotCompute>(ExitCount) ||

- !SE->isLoopInvariant(ExitCount, L) ||

- !isSafeToExpand(ExitCount, *SE))

- return true;

- // If we end up with a pointer exit count, bail. It may be unsized.

- if (!ExitCount->getType()->isIntegerTy())

+ if (isa<SCEVCouldNotCompute>(ExitCount) || !isSafeToExpand(ExitCount, *SE))

return true;

+ assert(SE->isLoopInvariant(ExitCount, L) &&

+ "Exit count must be loop invariant");

+ assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");

return false;

};

@@ -1781,7 +1924,11 @@ bool IndVarSimplify::run(Loop *L) {

}

// Eliminate redundant IV cycles.

- NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);

+ NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);

+ // Try to convert exit conditions to unsigned and rotate computation

+ // out of the loop. Note: Handles invalidation internally if needed.

+ Changed |= canonicalizeExitCondition(L);

// Try to eliminate loop exits based on analyzeable exit counts

if (optimizeLoopExits(L, Rewriter)) {