diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2022-03-20 11:40:34 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2022-06-04 11:58:51 +0000 |
| commit | 4b6eb0e63c698094db5506763df44cc83c19f643 (patch) | |
| tree | f1d30b8c10bc6db323b91538745ae8ab8b593910 /contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | |
| parent | 76886853f03395abb680824bcc74e98f83bd477a (diff) | |
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp')
| -rw-r--r-- | contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 211 |
1 files changed, 179 insertions, 32 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 9ee2a2d0bf08..ae2fe2767074 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -89,6 +89,7 @@ #include <utility> using namespace llvm; +using namespace PatternMatch; #define DEBUG_TYPE "indvars" @@ -155,6 +156,10 @@ class IndVarSimplify { bool rewriteNonIntegerIVs(Loop *L); bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); + /// Try to improve our exit conditions by converting condition from signed + /// to unsigned or rotating computation out of the loop. + /// (See inline comment about why this is duplicated from simplifyAndExtend) + bool canonicalizeExitCondition(Loop *L); /// Try to eliminate loop exits based on analyzeable exit counts bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); /// Try to form loop invariant tests for loop exits by changing how many @@ -494,6 +499,7 @@ bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { MadeAnyChanges = true; PN.setIncomingValue(IncomingValIdx, ExitVal->getIncomingValue(PreheaderIdx)); + SE->forgetValue(&PN); } } } @@ -541,18 +547,18 @@ static void visitIVCast(CastInst *Cast, WideIVInfo &WI, return; } - if (!WI.WidestNativeType) { + if (!WI.WidestNativeType || + Width > SE->getTypeSizeInBits(WI.WidestNativeType)) { WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); WI.IsSigned = IsSigned; return; } - // We extend the IV to satisfy the sign of its first user, arbitrarily. - if (WI.IsSigned != IsSigned) - return; - - if (Width > SE->getTypeSizeInBits(WI.WidestNativeType)) - WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); + // We extend the IV to satisfy the sign of its user(s), or 'signed' + // if there are multiple users with both sign- and zero extensions, + // in order not to introduce nondeterministic behaviour based on the + // unspecified order of a PHI nodes' users-iterator. + WI.IsSigned |= IsSigned; } //===----------------------------------------------------------------------===// @@ -1274,9 +1280,9 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { // Skip debug info intrinsics. do { --I; - } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin()); + } while (I->isDebugOrPseudoInst() && I != Preheader->begin()); - if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin()) + if (I->isDebugOrPseudoInst() && I == Preheader->begin()) Done = true; } else { Done = true; @@ -1309,6 +1315,18 @@ static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken, replaceExitCond(BI, NewCond, DeadInsts); } +static void replaceLoopPHINodesWithPreheaderValues( + Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts) { + assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!"); + auto *LoopPreheader = L->getLoopPreheader(); + auto *LoopHeader = L->getHeader(); + for (auto &PN : LoopHeader->phis()) { + auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader); + PN.replaceAllUsesWith(PreheaderIncoming); + DeadInsts.emplace_back(&PN); + } +} + static void replaceWithInvariantCond( const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred, const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter, @@ -1333,7 +1351,6 @@ static bool optimizeLoopExitWithUnknownExitCount( SmallVectorImpl<WeakTrackingVH> &DeadInsts) { ICmpInst::Predicate Pred; Value *LHS, *RHS; - using namespace PatternMatch; BasicBlock *TrueSucc, *FalseSucc; if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) @@ -1394,6 +1411,140 @@ static bool optimizeLoopExitWithUnknownExitCount( return true; } +bool IndVarSimplify::canonicalizeExitCondition(Loop *L) { + // Note: This is duplicating a particular part on SimplifyIndVars reasoning. + // We need to duplicate it because given icmp zext(small-iv), C, IVUsers + // never reaches the icmp since the zext doesn't fold to an AddRec unless + // it already has flags. The alternative to this would be to extending the + // set of "interesting" IV users to include the icmp, but doing that + // regresses results in practice by querying SCEVs before trip counts which + // rely on them which results in SCEV caching sub-optimal answers. The + // concern about caching sub-optimal results is why we only query SCEVs of + // the loop invariant RHS here. + SmallVector<BasicBlock*, 16> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + bool Changed = false; + for (auto *ExitingBB : ExitingBlocks) { + auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); + if (!BI) + continue; + assert(BI->isConditional() && "exit branch must be conditional"); + + auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); + if (!ICmp || !ICmp->hasOneUse()) + continue; + + auto *LHS = ICmp->getOperand(0); + auto *RHS = ICmp->getOperand(1); + // For the range reasoning, avoid computing SCEVs in the loop to avoid + // poisoning cache with sub-optimal results. For the must-execute case, + // this is a neccessary precondition for correctness. + if (!L->isLoopInvariant(RHS)) { + if (!L->isLoopInvariant(LHS)) + continue; + // Same logic applies for the inverse case + std::swap(LHS, RHS); + } + + // Match (icmp signed-cond zext, RHS) + Value *LHSOp = nullptr; + if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned()) + continue; + + const DataLayout &DL = ExitingBB->getModule()->getDataLayout(); + const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType()); + const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType()); + auto FullCR = ConstantRange::getFull(InnerBitWidth); + FullCR = FullCR.zeroExtend(OuterBitWidth); + auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L)); + if (FullCR.contains(RHSCR)) { + // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus + // replace the signed condition with the unsigned version. + ICmp->setPredicate(ICmp->getUnsignedPredicate()); + Changed = true; + // Note: No SCEV invalidation needed. We've changed the predicate, but + // have not changed exit counts, or the values produced by the compare. + continue; + } + } + + // Now that we've canonicalized the condition to match the extend, + // see if we can rotate the extend out of the loop. + for (auto *ExitingBB : ExitingBlocks) { + auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); + if (!BI) + continue; + assert(BI->isConditional() && "exit branch must be conditional"); + + auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); + if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned()) + continue; + + bool Swapped = false; + auto *LHS = ICmp->getOperand(0); + auto *RHS = ICmp->getOperand(1); + if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS)) + // Nothing to rotate + continue; + if (L->isLoopInvariant(LHS)) { + // Same logic applies for the inverse case until we actually pick + // which operand of the compare to update. + Swapped = true; + std::swap(LHS, RHS); + } + assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS)); + + // Match (icmp unsigned-cond zext, RHS) + // TODO: Extend to handle corresponding sext/signed-cmp case + // TODO: Extend to other invertible functions + Value *LHSOp = nullptr; + if (!match(LHS, m_ZExt(m_Value(LHSOp)))) + continue; + + // In general, we only rotate if we can do so without increasing the number + // of instructions. The exception is when we have an zext(add-rec). The + // reason for allowing this exception is that we know we need to get rid + // of the zext for SCEV to be able to compute a trip count for said loops; + // we consider the new trip count valuable enough to increase instruction + // count by one. + if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp))) + continue; + + // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS + // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS) + // when zext is loop varying and RHS is loop invariant. This converts + // loop varying work to loop-invariant work. + auto doRotateTransform = [&]() { + assert(ICmp->isUnsigned() && "must have proven unsigned already"); + auto *NewRHS = + CastInst::Create(Instruction::Trunc, RHS, LHSOp->getType(), "", + L->getLoopPreheader()->getTerminator()); + ICmp->setOperand(Swapped ? 1 : 0, LHSOp); + ICmp->setOperand(Swapped ? 0 : 1, NewRHS); + if (LHS->use_empty()) + DeadInsts.push_back(LHS); + }; + + + const DataLayout &DL = ExitingBB->getModule()->getDataLayout(); + const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType()); + const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType()); + auto FullCR = ConstantRange::getFull(InnerBitWidth); + FullCR = FullCR.zeroExtend(OuterBitWidth); + auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L)); + if (FullCR.contains(RHSCR)) { + doRotateTransform(); + Changed = true; + // Note, we are leaving SCEV in an unfortunately imprecise case here + // as rotation tends to reveal information about trip counts not + // previously visible. + continue; + } + } + + return Changed; +} + bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { SmallVector<BasicBlock*, 16> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); @@ -1499,20 +1650,18 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { // If we know we'd exit on the first iteration, rewrite the exit to // reflect this. This does not imply the loop must exit through this // exit; there may be an earlier one taken on the first iteration. - // TODO: Given we know the backedge can't be taken, we should go ahead - // and break it. Or at least, kill all the header phis and simplify. + // We know that the backedge can't be taken, so we replace all + // the header PHIs with values coming from the preheader. if (ExitCount->isZero()) { foldExit(L, ExitingBB, true, DeadInsts); + replaceLoopPHINodesWithPreheaderValues(L, DeadInsts); Changed = true; continue; } - // If we end up with a pointer exit count, bail. Note that we can end up - // with a pointer exit count for one exiting block, and not for another in - // the same loop. - if (!ExitCount->getType()->isIntegerTy() || - !MaxExitCount->getType()->isIntegerTy()) - continue; + assert(ExitCount->getType()->isIntegerTy() && + MaxExitCount->getType()->isIntegerTy() && + "Exit counts must be integers"); Type *WiderType = SE->getWiderType(MaxExitCount->getType(), ExitCount->getType()); @@ -1569,14 +1718,11 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { // through *explicit* control flow. We have to eliminate the possibility of // implicit exits (see below) before we know it's truly exact. const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); - if (isa<SCEVCouldNotCompute>(ExactBTC) || - !SE->isLoopInvariant(ExactBTC, L) || - !isSafeToExpand(ExactBTC, *SE)) + if (isa<SCEVCouldNotCompute>(ExactBTC) || !isSafeToExpand(ExactBTC, *SE)) return false; - // If we end up with a pointer exit count, bail. It may be unsized. - if (!ExactBTC->getType()->isIntegerTy()) - return false; + assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant"); + assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer"); auto BadExit = [&](BasicBlock *ExitingBB) { // If our exiting block exits multiple loops, we can only rewrite the @@ -1603,15 +1749,12 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { return true; const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); - if (isa<SCEVCouldNotCompute>(ExitCount) || - !SE->isLoopInvariant(ExitCount, L) || - !isSafeToExpand(ExitCount, *SE)) - return true; - - // If we end up with a pointer exit count, bail. It may be unsized. - if (!ExitCount->getType()->isIntegerTy()) + if (isa<SCEVCouldNotCompute>(ExitCount) || !isSafeToExpand(ExitCount, *SE)) return true; + assert(SE->isLoopInvariant(ExitCount, L) && + "Exit count must be loop invariant"); + assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer"); return false; }; @@ -1781,7 +1924,11 @@ bool IndVarSimplify::run(Loop *L) { } // Eliminate redundant IV cycles. - NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); + NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI); + + // Try to convert exit conditions to unsigned and rotate computation + // out of the loop. Note: Handles invalidation internally if needed. + Changed |= canonicalizeExitCondition(L); // Try to eliminate loop exits based on analyzeable exit counts if (optimizeLoopExits(L, Rewriter)) { |
