diff options
Diffstat (limited to 'llvm/lib/Transforms/Scalar/IndVarSimplify.cpp')
-rw-r--r-- | llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 595 |
1 files changed, 143 insertions, 452 deletions
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index d8d7acae5c9fd..0f36c3f772e65 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -38,8 +38,9 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -81,6 +82,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" #include <cassert> #include <cstdint> @@ -100,10 +102,10 @@ STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); // implement a strong expression equivalence checker in SCEV. Until then, we // use the verify-indvars flag, which may assert in some cases. static cl::opt<bool> VerifyIndvars( - "verify-indvars", cl::Hidden, - cl::desc("Verify the ScalarEvolution result after running indvars")); - -enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl }; + "verify-indvars", cl::Hidden, + cl::desc("Verify the ScalarEvolution result after running indvars. Has no " + "effect in release builds. (Note: this adds additional SCEV " + "queries potentially changing the analysis result)")); static cl::opt<ReplaceExitVal> ReplaceExitValue( "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), @@ -140,11 +142,10 @@ class IndVarSimplify { const DataLayout &DL; TargetLibraryInfo *TLI; const TargetTransformInfo *TTI; + std::unique_ptr<MemorySSAUpdater> MSSAU; SmallVector<WeakTrackingVH, 16> DeadInsts; - bool isValidRewrite(Value *FromVal, Value *ToVal); - bool handleFloatingPointIV(Loop *L, PHINode *PH); bool rewriteNonIntegerIVs(Loop *L); @@ -155,10 +156,7 @@ class IndVarSimplify { /// iterations of the loop run when that is unobservable. bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); - bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet); - bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); bool rewriteFirstIterationLoopExitValues(Loop *L); - bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const; bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, const SCEV *ExitCount, @@ -169,66 +167,17 @@ class IndVarSimplify { public: IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, const DataLayout &DL, TargetLibraryInfo *TLI, - TargetTransformInfo *TTI) - : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {} + TargetTransformInfo *TTI, MemorySSA *MSSA) + : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) { + if (MSSA) + MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); + } bool run(Loop *L); }; } // end anonymous namespace -/// Return true if the SCEV expansion generated by the rewriter can replace the -/// original value. SCEV guarantees that it produces the same value, but the way -/// it is produced may be illegal IR. Ideally, this function will only be -/// called for verification. -bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { - // If an SCEV expression subsumed multiple pointers, its expansion could - // reassociate the GEP changing the base pointer. This is illegal because the - // final address produced by a GEP chain must be inbounds relative to its - // underlying object. Otherwise basic alias analysis, among other things, - // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid - // producing an expression involving multiple pointers. Until then, we must - // bail out here. - // - // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject - // because it understands lcssa phis while SCEV does not. - Value *FromPtr = FromVal; - Value *ToPtr = ToVal; - if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) { - FromPtr = GEP->getPointerOperand(); - } - if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) { - ToPtr = GEP->getPointerOperand(); - } - if (FromPtr != FromVal || ToPtr != ToVal) { - // Quickly check the common case - if (FromPtr == ToPtr) - return true; - - // SCEV may have rewritten an expression that produces the GEP's pointer - // operand. That's ok as long as the pointer operand has the same base - // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the - // base of a recurrence. This handles the case in which SCEV expansion - // converts a pointer type recurrence into a nonrecurrent pointer base - // indexed by an integer recurrence. - - // If the GEP base pointer is a vector of pointers, abort. - if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy()) - return false; - - const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr)); - const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr)); - if (FromBase == ToBase) - return true; - - LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase - << " != " << *ToBase << "\n"); - - return false; - } - return true; -} - /// Determine the insertion point for this user. By default, insert immediately /// before the user. SCEVExpander or LICM will hoist loop invariants out of the /// loop. For PHI nodes, there may be multiple uses, so compute the nearest @@ -477,11 +426,11 @@ bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { // new comparison. NewCompare->takeName(Compare); Compare->replaceAllUsesWith(NewCompare); - RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI); + RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get()); // Delete the old floating point increment. Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); - RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI); + RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get()); // If the FP induction variable still has uses, this is because something else // in the loop uses its value. In order to canonicalize the induction @@ -494,7 +443,7 @@ bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", &*PN->getParent()->getFirstInsertionPt()); PN->replaceAllUsesWith(Conv); - RecursivelyDeleteTriviallyDeadInstructions(PN, TLI); + RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get()); } return true; } @@ -522,222 +471,6 @@ bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { return Changed; } -namespace { - -// Collect information about PHI nodes which can be transformed in -// rewriteLoopExitValues. -struct RewritePhi { - PHINode *PN; - - // Ith incoming value. - unsigned Ith; - - // Exit value after expansion. - Value *Val; - - // High Cost when expansion. - bool HighCost; - - RewritePhi(PHINode *P, unsigned I, Value *V, bool H) - : PN(P), Ith(I), Val(V), HighCost(H) {} -}; - -} // end anonymous namespace - -//===----------------------------------------------------------------------===// -// rewriteLoopExitValues - Optimize IV users outside the loop. -// As a side effect, reduces the amount of IV processing within the loop. -//===----------------------------------------------------------------------===// - -bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const { - SmallPtrSet<const Instruction *, 8> Visited; - SmallVector<const Instruction *, 8> WorkList; - Visited.insert(I); - WorkList.push_back(I); - while (!WorkList.empty()) { - const Instruction *Curr = WorkList.pop_back_val(); - // This use is outside the loop, nothing to do. - if (!L->contains(Curr)) - continue; - // Do we assume it is a "hard" use which will not be eliminated easily? - if (Curr->mayHaveSideEffects()) - return true; - // Otherwise, add all its users to worklist. - for (auto U : Curr->users()) { - auto *UI = cast<Instruction>(U); - if (Visited.insert(UI).second) - WorkList.push_back(UI); - } - } - return false; -} - -/// Check to see if this loop has a computable loop-invariant execution count. -/// If so, this means that we can compute the final value of any expressions -/// that are recurrent in the loop, and substitute the exit values from the loop -/// into any instructions outside of the loop that use the final values of the -/// current expressions. -/// -/// This is mostly redundant with the regular IndVarSimplify activities that -/// happen later, except that it's more powerful in some cases, because it's -/// able to brute-force evaluate arbitrary instructions as long as they have -/// constant operands at the beginning of the loop. -bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { - // Check a pre-condition. - assert(L->isRecursivelyLCSSAForm(*DT, *LI) && - "Indvars did not preserve LCSSA!"); - - SmallVector<BasicBlock*, 8> ExitBlocks; - L->getUniqueExitBlocks(ExitBlocks); - - SmallVector<RewritePhi, 8> RewritePhiSet; - // Find all values that are computed inside the loop, but used outside of it. - // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan - // the exit blocks of the loop to find them. - for (BasicBlock *ExitBB : ExitBlocks) { - // If there are no PHI nodes in this exit block, then no values defined - // inside the loop are used on this path, skip it. - PHINode *PN = dyn_cast<PHINode>(ExitBB->begin()); - if (!PN) continue; - - unsigned NumPreds = PN->getNumIncomingValues(); - - // Iterate over all of the PHI nodes. - BasicBlock::iterator BBI = ExitBB->begin(); - while ((PN = dyn_cast<PHINode>(BBI++))) { - if (PN->use_empty()) - continue; // dead use, don't replace it - - if (!SE->isSCEVable(PN->getType())) - continue; - - // It's necessary to tell ScalarEvolution about this explicitly so that - // it can walk the def-use list and forget all SCEVs, as it may not be - // watching the PHI itself. Once the new exit value is in place, there - // may not be a def-use connection between the loop and every instruction - // which got a SCEVAddRecExpr for that loop. - SE->forgetValue(PN); - - // Iterate over all of the values in all the PHI nodes. - for (unsigned i = 0; i != NumPreds; ++i) { - // If the value being merged in is not integer or is not defined - // in the loop, skip it. - Value *InVal = PN->getIncomingValue(i); - if (!isa<Instruction>(InVal)) - continue; - - // If this pred is for a subloop, not L itself, skip it. - if (LI->getLoopFor(PN->getIncomingBlock(i)) != L) - continue; // The Block is in a subloop, skip it. - - // Check that InVal is defined in the loop. - Instruction *Inst = cast<Instruction>(InVal); - if (!L->contains(Inst)) - continue; - - // Okay, this instruction has a user outside of the current loop - // and varies predictably *inside* the loop. Evaluate the value it - // contains when the loop exits, if possible. We prefer to start with - // expressions which are true for all exits (so as to maximize - // expression reuse by the SCEVExpander), but resort to per-exit - // evaluation if that fails. - const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); - if (isa<SCEVCouldNotCompute>(ExitValue) || - !SE->isLoopInvariant(ExitValue, L) || - !isSafeToExpand(ExitValue, *SE)) { - // TODO: This should probably be sunk into SCEV in some way; maybe a - // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for - // most SCEV expressions and other recurrence types (e.g. shift - // recurrences). Is there existing code we can reuse? - const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i)); - if (isa<SCEVCouldNotCompute>(ExitCount)) - continue; - if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst))) - if (AddRec->getLoop() == L) - ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE); - if (isa<SCEVCouldNotCompute>(ExitValue) || - !SE->isLoopInvariant(ExitValue, L) || - !isSafeToExpand(ExitValue, *SE)) - continue; - } - - // Computing the value outside of the loop brings no benefit if it is - // definitely used inside the loop in a way which can not be optimized - // away. Avoid doing so unless we know we have a value which computes - // the ExitValue already. TODO: This should be merged into SCEV - // expander to leverage its knowledge of existing expressions. - if (ReplaceExitValue != AlwaysRepl && - !isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) && - hasHardUserWithinLoop(L, Inst)) - continue; - - bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst); - Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); - - LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal - << '\n' - << " LoopVal = " << *Inst << "\n"); - - if (!isValidRewrite(Inst, ExitVal)) { - DeadInsts.push_back(ExitVal); - continue; - } - -#ifndef NDEBUG - // If we reuse an instruction from a loop which is neither L nor one of - // its containing loops, we end up breaking LCSSA form for this loop by - // creating a new use of its instruction. - if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal)) - if (auto *EVL = LI->getLoopFor(ExitInsn->getParent())) - if (EVL != L) - assert(EVL->contains(L) && "LCSSA breach detected!"); -#endif - - // Collect all the candidate PHINodes to be rewritten. - RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost); - } - } - } - - bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet); - - bool Changed = false; - // Transformation. - for (const RewritePhi &Phi : RewritePhiSet) { - PHINode *PN = Phi.PN; - Value *ExitVal = Phi.Val; - - // Only do the rewrite when the ExitValue can be expanded cheaply. - // If LoopCanBeDel is true, rewrite exit value aggressively. - if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) { - DeadInsts.push_back(ExitVal); - continue; - } - - Changed = true; - ++NumReplaced; - Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith)); - PN->setIncomingValue(Phi.Ith, ExitVal); - - // If this instruction is dead now, delete it. Don't do it now to avoid - // invalidating iterators. - if (isInstructionTriviallyDead(Inst, TLI)) - DeadInsts.push_back(Inst); - - // Replace PN with ExitVal if that is legal and does not break LCSSA. - if (PN->getNumIncomingValues() == 1 && - LI->replacementPreservesLCSSAForm(PN, ExitVal)) { - PN->replaceAllUsesWith(ExitVal); - PN->eraseFromParent(); - } - } - - // The insertion point instruction may have been deleted; clear it out - // so that the rewriter doesn't trip over it later. - Rewriter.clearInsertPoint(); - return Changed; -} - //===---------------------------------------------------------------------===// // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know // they will exit at the first iteration. @@ -813,61 +546,6 @@ bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { return MadeAnyChanges; } -/// Check whether it is possible to delete the loop after rewriting exit -/// value. If it is possible, ignore ReplaceExitValue and do rewriting -/// aggressively. -bool IndVarSimplify::canLoopBeDeleted( - Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) { - BasicBlock *Preheader = L->getLoopPreheader(); - // If there is no preheader, the loop will not be deleted. - if (!Preheader) - return false; - - // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1. - // We obviate multiple ExitingBlocks case for simplicity. - // TODO: If we see testcase with multiple ExitingBlocks can be deleted - // after exit value rewriting, we can enhance the logic here. - SmallVector<BasicBlock *, 4> ExitingBlocks; - L->getExitingBlocks(ExitingBlocks); - SmallVector<BasicBlock *, 8> ExitBlocks; - L->getUniqueExitBlocks(ExitBlocks); - if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1) - return false; - - BasicBlock *ExitBlock = ExitBlocks[0]; - BasicBlock::iterator BI = ExitBlock->begin(); - while (PHINode *P = dyn_cast<PHINode>(BI)) { - Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]); - - // If the Incoming value of P is found in RewritePhiSet, we know it - // could be rewritten to use a loop invariant value in transformation - // phase later. Skip it in the loop invariant check below. - bool found = false; - for (const RewritePhi &Phi : RewritePhiSet) { - unsigned i = Phi.Ith; - if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) { - found = true; - break; - } - } - - Instruction *I; - if (!found && (I = dyn_cast<Instruction>(Incoming))) - if (!L->hasLoopInvariantOperands(I)) - return false; - - ++BI; - } - - for (auto *BB : L->blocks()) - if (llvm::any_of(*BB, [](Instruction &I) { - return I.mayHaveSideEffects(); - })) - return false; - - return true; -} - //===----------------------------------------------------------------------===// // IV Widening - Extend the width of an IV to cover its widest uses. //===----------------------------------------------------------------------===// @@ -1060,8 +738,8 @@ protected: Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter); bool widenLoopCompare(NarrowIVDefUse DU); - bool widenWithVariantLoadUse(NarrowIVDefUse DU); - void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU); + bool widenWithVariantUse(NarrowIVDefUse DU); + void widenWithVariantUseCodegen(NarrowIVDefUse DU); void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); }; @@ -1399,20 +1077,27 @@ bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) { return true; } -/// If the narrow use is an instruction whose two operands are the defining -/// instruction of DU and a load instruction, then we have the following: -/// if the load is hoisted outside the loop, then we do not reach this function -/// as scalar evolution analysis works fine in widenIVUse with variables -/// hoisted outside the loop and efficient code is subsequently generated by -/// not emitting truncate instructions. But when the load is not hoisted -/// (whether due to limitation in alias analysis or due to a true legality), -/// then scalar evolution can not proceed with loop variant values and -/// inefficient code is generated. This function handles the non-hoisted load -/// special case by making the optimization generate the same type of code for -/// hoisted and non-hoisted load (widen use and eliminate sign extend -/// instruction). This special case is important especially when the induction -/// variables are affecting addressing mode in code generation. -bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) { +// The widenIVUse avoids generating trunc by evaluating the use as AddRec, this +// will not work when: +// 1) SCEV traces back to an instruction inside the loop that SCEV can not +// expand, eg. add %indvar, (load %addr) +// 2) SCEV finds a loop variant, eg. add %indvar, %loopvariant +// While SCEV fails to avoid trunc, we can still try to use instruction +// combining approach to prove trunc is not required. This can be further +// extended with other instruction combining checks, but for now we handle the +// following case (sub can be "add" and "mul", "nsw + sext" can be "nus + zext") +// +// Src: +// %c = sub nsw %b, %indvar +// %d = sext %c to i64 +// Dst: +// %indvar.ext1 = sext %indvar to i64 +// %m = sext %b to i64 +// %d = sub nsw i64 %m, %indvar.ext1 +// Therefore, as long as the result of add/sub/mul is extended to wide type, no +// trunc is required regardless of how %b is generated. This pattern is common +// when calculating address in 64 bit architecture +bool WidenIV::widenWithVariantUse(NarrowIVDefUse DU) { Instruction *NarrowUse = DU.NarrowUse; Instruction *NarrowDef = DU.NarrowDef; Instruction *WideDef = DU.WideDef; @@ -1443,12 +1128,6 @@ bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) { else return false; - // We are interested in the other operand being a load instruction. - // But, we should look into relaxing this restriction later on. - auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx)); - if (I && I->getOpcode() != Instruction::Load) - return false; - // Verifying that Defining operand is an AddRec const SCEV *Op1 = SE->getSCEV(WideDef); const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1); @@ -1480,9 +1159,9 @@ bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) { return true; } -/// Special Case for widening with variant Loads (see -/// WidenIV::widenWithVariantLoadUse). This is the code generation part. -void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) { +/// Special Case for widening with loop variant (see +/// WidenIV::widenWithVariant). This is the code generation part. +void WidenIV::widenWithVariantUseCodegen(NarrowIVDefUse DU) { Instruction *NarrowUse = DU.NarrowUse; Instruction *NarrowDef = DU.NarrowDef; Instruction *WideDef = DU.WideDef; @@ -1508,33 +1187,22 @@ void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) { Builder.Insert(WideBO); WideBO->copyIRFlags(NarrowBO); - if (ExtKind == SignExtended) - ExtendKindMap[NarrowUse] = SignExtended; - else - ExtendKindMap[NarrowUse] = ZeroExtended; + assert(ExtKind != Unknown && "Unknown ExtKind not handled"); - // Update the Use. - if (ExtKind == SignExtended) { - for (Use &U : NarrowUse->uses()) { - SExtInst *User = dyn_cast<SExtInst>(U.getUser()); - if (User && User->getType() == WideType) { - LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " - << *WideBO << "\n"); - ++NumElimExt; - User->replaceAllUsesWith(WideBO); - DeadInsts.emplace_back(User); - } - } - } else { // ExtKind == ZeroExtended - for (Use &U : NarrowUse->uses()) { - ZExtInst *User = dyn_cast<ZExtInst>(U.getUser()); - if (User && User->getType() == WideType) { - LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " - << *WideBO << "\n"); - ++NumElimExt; - User->replaceAllUsesWith(WideBO); - DeadInsts.emplace_back(User); - } + ExtendKindMap[NarrowUse] = ExtKind; + + for (Use &U : NarrowUse->uses()) { + Instruction *User = nullptr; + if (ExtKind == SignExtended) + User = dyn_cast<SExtInst>(U.getUser()); + else + User = dyn_cast<ZExtInst>(U.getUser()); + if (User && User->getType() == WideType) { + LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " + << *WideBO << "\n"); + ++NumElimExt; + User->replaceAllUsesWith(WideBO); + DeadInsts.emplace_back(User); } } } @@ -1641,8 +1309,8 @@ Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { // in WideAddRec.first does not indicate a polynomial induction expression. // In that case, look at the operands of the use instruction to determine // if we can still widen the use instead of truncating its operand. - if (widenWithVariantLoadUse(DU)) { - widenWithVariantLoadUseCodegen(DU); + if (widenWithVariantUse(DU)) { + widenWithVariantUseCodegen(DU); return nullptr; } @@ -1992,8 +1660,8 @@ bool IndVarSimplify::simplifyAndExtend(Loop *L, // Information about sign/zero extensions of CurrIV. IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); - Changed |= - simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor); + Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter, + &Visitor); if (Visitor.WI.WidestNativeType) { WideIVs.push_back(Visitor.WI); @@ -2017,7 +1685,7 @@ bool IndVarSimplify::simplifyAndExtend(Loop *L, /// Given an Value which is hoped to be part of an add recurance in the given /// loop, return the associated Phi node if so. Otherwise, return null. Note -/// that this is less general than SCEVs AddRec checking. +/// that this is less general than SCEVs AddRec checking. static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { Instruction *IncI = dyn_cast<Instruction>(IncV); if (!IncI) @@ -2079,7 +1747,7 @@ static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); if (L->isLoopInvariant(BI->getCondition())) return false; - + // Do LFTR to simplify the exit condition to an ICMP. ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); if (!Cond) @@ -2122,9 +1790,9 @@ static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { /// actually poison. This can be used to assess whether a new use of Root can /// be added at a location which is control equivalent with OnPathTo (such as /// immediately before it) without introducing UB which didn't previously -/// exist. Note that a false result conveys no information. +/// exist. Note that a false result conveys no information. static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, - Instruction *OnPathTo, + Instruction *OnPathTo, DominatorTree *DT) { // Basic approach is to assume Root is poison, propagate poison forward // through all users we can easily track, and then check whether any of those @@ -2142,10 +1810,10 @@ static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, // If we know this must trigger UB on a path leading our target. if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) return true; - + // If we can't analyze propagation through this instruction, just skip it // and transitive users. Safe as false is a conservative result. - if (!propagatesFullPoison(I) && I != Root) + if (!propagatesPoison(I) && I != Root) continue; if (KnownPoison.insert(I).second) @@ -2154,7 +1822,7 @@ static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, } // Might be non-UB, or might have a path we couldn't prove must execute on - // way to exiting bb. + // way to exiting bb. return false; } @@ -2221,7 +1889,7 @@ static bool isLoopCounter(PHINode* Phi, Loop *L, ScalarEvolution *SE) { assert(Phi->getParent() == L->getHeader()); assert(L->getLoopLatch()); - + if (!SE->isSCEVable(Phi->getType())) return false; @@ -2282,7 +1950,7 @@ static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, if (!hasConcreteDef(Phi)) { // We explicitly allow unknown phis as long as they are already used by // the loop exit test. This is legal since performing LFTR could not - // increase the number of undef users. + // increase the number of undef users. Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock); if (!isLoopExitTestBasedOn(Phi, ExitingBB) && !isLoopExitTestBasedOn(IncPhi, ExitingBB)) @@ -2300,7 +1968,7 @@ static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, if (!Phi->getType()->isIntegerTy() && !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) continue; - + const SCEV *Init = AR->getStart(); if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { @@ -2506,14 +2174,14 @@ linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend // the other side of the comparison instead. We still evaluate the limit // in the narrower bitwidth, we just prefer a zext/sext outside the loop to - // a truncate within in. + // a truncate within in. bool Extended = false; const SCEV *IV = SE->getSCEV(CmpIndVar); const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar), ExitCnt->getType()); const SCEV *ZExtTrunc = SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType()); - + if (ZExtTrunc == IV) { Extended = true; ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), @@ -2531,7 +2199,7 @@ linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, if (Extended) { bool Discard; L->makeLoopInvariant(ExitCnt, Discard); - } else + } else CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), "lftr.wideiv"); } @@ -2551,7 +2219,7 @@ linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, // update the branch to use the new comparison; in the common case this // will make old comparison dead. BI->setCondition(Cond); - DeadInsts.push_back(OrigCond); + DeadInsts.emplace_back(OrigCond); ++NumLFTR; return true; @@ -2685,11 +2353,10 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { L->getExitingBlocks(ExitingBlocks); // Remove all exits which aren't both rewriteable and analyzeable. - auto NewEnd = llvm::remove_if(ExitingBlocks, - [&](BasicBlock *ExitingBB) { + auto NewEnd = llvm::remove_if(ExitingBlocks, [&](BasicBlock *ExitingBB) { // If our exitting block exits multiple loops, we can only rewrite the // innermost one. Otherwise, we're changing how many times the innermost - // loop runs before it exits. + // loop runs before it exits. if (LI->getLoopFor(ExitingBB) != L) return true; @@ -2701,18 +2368,18 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { // If already constant, nothing to do. if (isa<Constant>(BI->getCondition())) return true; - + const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); if (isa<SCEVCouldNotCompute>(ExitCount)) return true; return false; - }); + }); ExitingBlocks.erase(NewEnd, ExitingBlocks.end()); if (ExitingBlocks.empty()) return false; - - // Get a symbolic upper bound on the loop backedge taken count. + + // Get a symbolic upper bound on the loop backedge taken count. const SCEV *MaxExitCount = getMaxBackedgeTakenCount(*SE, *DT, L); if (isa<SCEVCouldNotCompute>(MaxExitCount)) return false; @@ -2720,11 +2387,12 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { // Visit our exit blocks in order of dominance. We know from the fact that // all exits (left) are analyzeable that the must be a total dominance order // between them as each must dominate the latch. The visit order only - // matters for the provably equal case. + // matters for the provably equal case. llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) { // std::sort sorts in ascending order, so we want the inverse of // the normal dominance relation. + if (A == B) return false; if (DT->properlyDominates(A, B)) return true; if (DT->properlyDominates(B, A)) return false; llvm_unreachable("expected total dominance order!"); @@ -2734,7 +2402,7 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])); } #endif - + auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) { BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); @@ -2743,7 +2411,7 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { IsTaken ? ExitIfTrue : !ExitIfTrue); BI->setCondition(NewCond); if (OldCond->use_empty()) - DeadInsts.push_back(OldCond); + DeadInsts.emplace_back(OldCond); }; bool Changed = false; @@ -2751,7 +2419,7 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { for (BasicBlock *ExitingBB : ExitingBlocks) { const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); assert(!isa<SCEVCouldNotCompute>(ExitCount) && "checked above"); - + // If we know we'd exit on the first iteration, rewrite the exit to // reflect this. This does not imply the loop must exit through this // exit; there may be an earlier one taken on the first iteration. @@ -2769,13 +2437,13 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { if (!ExitCount->getType()->isIntegerTy() || !MaxExitCount->getType()->isIntegerTy()) continue; - + Type *WiderType = SE->getWiderType(MaxExitCount->getType(), ExitCount->getType()); ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType); MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType); assert(MaxExitCount->getType() == ExitCount->getType()); - + // Can we prove that some other exit must be taken strictly before this // one? if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, @@ -2788,7 +2456,7 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { // As we run, keep track of which exit counts we've encountered. If we // find a duplicate, we've found an exit which would have exited on the // exiting iteration, but (from the visit order) strictly follows another - // which does the same and is thus dead. + // which does the same and is thus dead. if (!DominatingExitCounts.insert(ExitCount).second) { FoldExit(ExitingBB, false); Changed = true; @@ -2809,22 +2477,20 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { SmallVector<BasicBlock*, 16> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); - bool Changed = false; - // Finally, see if we can rewrite our exit conditions into a loop invariant - // form. If we have a read-only loop, and we can tell that we must exit down + // form. If we have a read-only loop, and we can tell that we must exit down // a path which does not need any of the values computed within the loop, we // can rewrite the loop to exit on the first iteration. Note that this // doesn't either a) tell us the loop exits on the first iteration (unless // *all* exits are predicateable) or b) tell us *which* exit might be taken. // This transformation looks a lot like a restricted form of dead loop // elimination, but restricted to read-only loops and without neccesssarily - // needing to kill the loop entirely. + // needing to kill the loop entirely. if (!LoopPredication) - return Changed; + return false; if (!SE->hasLoopInvariantBackedgeTakenCount(L)) - return Changed; + return false; // Note: ExactBTC is the exact backedge taken count *iff* the loop exits // through *explicit* control flow. We have to eliminate the possibility of @@ -2833,16 +2499,16 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { if (isa<SCEVCouldNotCompute>(ExactBTC) || !SE->isLoopInvariant(ExactBTC, L) || !isSafeToExpand(ExactBTC, *SE)) - return Changed; + return false; // If we end up with a pointer exit count, bail. It may be unsized. if (!ExactBTC->getType()->isIntegerTy()) - return Changed; + return false; auto BadExit = [&](BasicBlock *ExitingBB) { // If our exiting block exits multiple loops, we can only rewrite the // innermost one. Otherwise, we're changing how many times the innermost - // loop runs before it exits. + // loop runs before it exits. if (LI->getLoopFor(ExitingBB) != L) return true; @@ -2897,18 +2563,18 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { // is complicated and we choose not to for now. for (unsigned i = 1; i < ExitingBlocks.size(); i++) if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])) - return Changed; + return false; // Given our sorted total order, we know that exit[j] must be evaluated // after all exit[i] such j > i. for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++) if (BadExit(ExitingBlocks[i])) { - ExitingBlocks.resize(i); + ExitingBlocks.resize(i); break; } if (ExitingBlocks.empty()) - return Changed; + return false; // We rely on not being able to reach an exiting block on a later iteration // then it's statically compute exit count. The implementaton of @@ -2930,8 +2596,9 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { for (auto &I : *BB) // TODO:isGuaranteedToTransfer if (I.mayHaveSideEffects() || I.mayThrow()) - return Changed; + return false; + bool Changed = false; // Finally, do the actual predication for all predicatable blocks. A couple // of notes here: // 1) We don't bother to constant fold dominated exits with identical exit @@ -2970,7 +2637,7 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { Value *OldCond = BI->getCondition(); BI->setCondition(NewCond); if (OldCond->use_empty()) - DeadInsts.push_back(OldCond); + DeadInsts.emplace_back(OldCond); Changed = true; } @@ -2985,7 +2652,6 @@ bool IndVarSimplify::run(Loop *L) { // We need (and expect!) the incoming loop to be in LCSSA. assert(L->isRecursivelyLCSSAForm(*DT, *LI) && "LCSSA required to run indvars!"); - bool Changed = false; // If LoopSimplify form is not available, stay out of trouble. Some notes: // - LSR currently only supports LoopSimplify-form loops. Indvars' @@ -3001,9 +2667,15 @@ bool IndVarSimplify::run(Loop *L) { #ifndef NDEBUG // Used below for a consistency check only - const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); + // Note: Since the result returned by ScalarEvolution may depend on the order + // in which previous results are added to its cache, the call to + // getBackedgeTakenCount() may change following SCEV queries. + const SCEV *BackedgeTakenCount; + if (VerifyIndvars) + BackedgeTakenCount = SE->getBackedgeTakenCount(L); #endif + bool Changed = false; // If there are any floating-point recurrences, attempt to // transform them to use integer recurrences. Changed |= rewriteNonIntegerIVs(L); @@ -3027,8 +2699,13 @@ bool IndVarSimplify::run(Loop *L) { // that are recurrent in the loop, and substitute the exit values from the // loop into any instructions outside of the loop that use the final values // of the current expressions. - if (ReplaceExitValue != NeverRepl) - Changed |= rewriteLoopExitValues(L, Rewriter); + if (ReplaceExitValue != NeverRepl) { + if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT, + ReplaceExitValue, DeadInsts)) { + NumReplaced += Rewrites; + Changed = true; + } + } // Eliminate redundant IV cycles. NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); @@ -3039,7 +2716,7 @@ bool IndVarSimplify::run(Loop *L) { // Given we've changed exit counts, notify SCEV SE->forgetLoop(L); } - + // Try to form loop invariant tests for loop exits by changing how many // iterations of the loop run when that is unobservable. if (predicateLoopExits(L, Rewriter)) { @@ -3049,8 +2726,11 @@ bool IndVarSimplify::run(Loop *L) { } // If we have a trip count expression, rewrite the loop's exit condition - // using it. + // using it. if (!DisableLFTR) { + BasicBlock *PreHeader = L->getLoopPreheader(); + BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); + SmallVector<BasicBlock*, 16> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); for (BasicBlock *ExitingBB : ExitingBlocks) { @@ -3060,10 +2740,10 @@ bool IndVarSimplify::run(Loop *L) { // If our exitting block exits multiple loops, we can only rewrite the // innermost one. Otherwise, we're changing how many times the innermost - // loop runs before it exits. + // loop runs before it exits. if (LI->getLoopFor(ExitingBB) != L) continue; - + if (!needsLFTR(L, ExitingBB)) continue; @@ -3077,14 +2757,15 @@ bool IndVarSimplify::run(Loop *L) { // until stable to handle cases like this better. if (ExitCount->isZero()) continue; - + PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); if (!IndVar) continue; - + // Avoid high cost expansions. Note: This heuristic is questionable in - // that our definition of "high cost" is not exactly principled. - if (Rewriter.isHighCostExpansion(ExitCount, L)) + // that our definition of "high cost" is not exactly principled. + if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget, + TTI, PreHeaderBR)) continue; // Check preconditions for proper SCEVExpander operation. SCEV does not @@ -3092,7 +2773,7 @@ bool IndVarSimplify::run(Loop *L) { // any pass that uses the SCEVExpander must do it. This does not work // well for loop passes because SCEVExpander makes assumptions about // all loops, while LoopPassManager only forces the current loop to be - // simplified. + // simplified. // // FIXME: SCEV expansion has no way to bail out, so the caller must // explicitly check any assumptions made by SCEV. Brittle. @@ -3113,7 +2794,8 @@ bool IndVarSimplify::run(Loop *L) { while (!DeadInsts.empty()) if (Instruction *Inst = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val())) - Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); + Changed |= + RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get()); // The Rewriter may not be used from this point on. @@ -3127,7 +2809,7 @@ bool IndVarSimplify::run(Loop *L) { Changed |= rewriteFirstIterationLoopExitValues(L); // Clean up dead instructions. - Changed |= DeleteDeadPHIs(L->getHeader(), TLI); + Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get()); // Check a post-condition. assert(L->isRecursivelyLCSSAForm(*DT, *LI) && @@ -3150,6 +2832,8 @@ bool IndVarSimplify::run(Loop *L) { assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount, NewBECount) && "indvars must preserve SCEV"); } + if (VerifyMemorySSA && MSSAU) + MSSAU->getMemorySSA()->verifyMemorySSA(); #endif return Changed; @@ -3161,12 +2845,14 @@ PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, Function *F = L.getHeader()->getParent(); const DataLayout &DL = F->getParent()->getDataLayout(); - IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI); + IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA); if (!IVS.run(&L)) return PreservedAnalyses::all(); auto PA = getLoopPassPreservedAnalyses(); PA.preserveSet<CFGAnalyses>(); + if (AR.MSSA) + PA.preserve<MemorySSAAnalysis>(); return PA; } @@ -3191,13 +2877,18 @@ struct IndVarSimplifyLegacyPass : public LoopPass { auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr; const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); + auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>(); + MemorySSA *MSSA = nullptr; + if (MSSAAnalysis) + MSSA = &MSSAAnalysis->getMSSA(); - IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI); + IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA); return IVS.run(L); } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); + AU.addPreserved<MemorySSAWrapperPass>(); getLoopAnalysisUsage(AU); } }; |