aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Scalar/IndVarSimplify.cpp')
-rw-r--r--llvm/lib/Transforms/Scalar/IndVarSimplify.cpp595
1 files changed, 143 insertions, 452 deletions
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index d8d7acae5c9f..0f36c3f772e6 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -38,8 +38,9 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -81,6 +82,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
#include <cassert>
#include <cstdint>
@@ -100,10 +102,10 @@ STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
// implement a strong expression equivalence checker in SCEV. Until then, we
// use the verify-indvars flag, which may assert in some cases.
static cl::opt<bool> VerifyIndvars(
- "verify-indvars", cl::Hidden,
- cl::desc("Verify the ScalarEvolution result after running indvars"));
-
-enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl };
+ "verify-indvars", cl::Hidden,
+ cl::desc("Verify the ScalarEvolution result after running indvars. Has no "
+ "effect in release builds. (Note: this adds additional SCEV "
+ "queries potentially changing the analysis result)"));
static cl::opt<ReplaceExitVal> ReplaceExitValue(
"replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
@@ -140,11 +142,10 @@ class IndVarSimplify {
const DataLayout &DL;
TargetLibraryInfo *TLI;
const TargetTransformInfo *TTI;
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
SmallVector<WeakTrackingVH, 16> DeadInsts;
- bool isValidRewrite(Value *FromVal, Value *ToVal);
-
bool handleFloatingPointIV(Loop *L, PHINode *PH);
bool rewriteNonIntegerIVs(Loop *L);
@@ -155,10 +156,7 @@ class IndVarSimplify {
/// iterations of the loop run when that is unobservable.
bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
- bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
- bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
bool rewriteFirstIterationLoopExitValues(Loop *L);
- bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const;
bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
const SCEV *ExitCount,
@@ -169,66 +167,17 @@ class IndVarSimplify {
public:
IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
const DataLayout &DL, TargetLibraryInfo *TLI,
- TargetTransformInfo *TTI)
- : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {}
+ TargetTransformInfo *TTI, MemorySSA *MSSA)
+ : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {
+ if (MSSA)
+ MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
+ }
bool run(Loop *L);
};
} // end anonymous namespace
-/// Return true if the SCEV expansion generated by the rewriter can replace the
-/// original value. SCEV guarantees that it produces the same value, but the way
-/// it is produced may be illegal IR. Ideally, this function will only be
-/// called for verification.
-bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
- // If an SCEV expression subsumed multiple pointers, its expansion could
- // reassociate the GEP changing the base pointer. This is illegal because the
- // final address produced by a GEP chain must be inbounds relative to its
- // underlying object. Otherwise basic alias analysis, among other things,
- // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
- // producing an expression involving multiple pointers. Until then, we must
- // bail out here.
- //
- // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
- // because it understands lcssa phis while SCEV does not.
- Value *FromPtr = FromVal;
- Value *ToPtr = ToVal;
- if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
- FromPtr = GEP->getPointerOperand();
- }
- if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
- ToPtr = GEP->getPointerOperand();
- }
- if (FromPtr != FromVal || ToPtr != ToVal) {
- // Quickly check the common case
- if (FromPtr == ToPtr)
- return true;
-
- // SCEV may have rewritten an expression that produces the GEP's pointer
- // operand. That's ok as long as the pointer operand has the same base
- // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
- // base of a recurrence. This handles the case in which SCEV expansion
- // converts a pointer type recurrence into a nonrecurrent pointer base
- // indexed by an integer recurrence.
-
- // If the GEP base pointer is a vector of pointers, abort.
- if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
- return false;
-
- const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
- const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
- if (FromBase == ToBase)
- return true;
-
- LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase
- << " != " << *ToBase << "\n");
-
- return false;
- }
- return true;
-}
-
/// Determine the insertion point for this user. By default, insert immediately
/// before the user. SCEVExpander or LICM will hoist loop invariants out of the
/// loop. For PHI nodes, there may be multiple uses, so compute the nearest
@@ -477,11 +426,11 @@ bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
// new comparison.
NewCompare->takeName(Compare);
Compare->replaceAllUsesWith(NewCompare);
- RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI);
+ RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
// Delete the old floating point increment.
Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
- RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI);
+ RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
// If the FP induction variable still has uses, this is because something else
// in the loop uses its value. In order to canonicalize the induction
@@ -494,7 +443,7 @@ bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
&*PN->getParent()->getFirstInsertionPt());
PN->replaceAllUsesWith(Conv);
- RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
+ RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
}
return true;
}
@@ -522,222 +471,6 @@ bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
return Changed;
}
-namespace {
-
-// Collect information about PHI nodes which can be transformed in
-// rewriteLoopExitValues.
-struct RewritePhi {
- PHINode *PN;
-
- // Ith incoming value.
- unsigned Ith;
-
- // Exit value after expansion.
- Value *Val;
-
- // High Cost when expansion.
- bool HighCost;
-
- RewritePhi(PHINode *P, unsigned I, Value *V, bool H)
- : PN(P), Ith(I), Val(V), HighCost(H) {}
-};
-
-} // end anonymous namespace
-
-//===----------------------------------------------------------------------===//
-// rewriteLoopExitValues - Optimize IV users outside the loop.
-// As a side effect, reduces the amount of IV processing within the loop.
-//===----------------------------------------------------------------------===//
-
-bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const {
- SmallPtrSet<const Instruction *, 8> Visited;
- SmallVector<const Instruction *, 8> WorkList;
- Visited.insert(I);
- WorkList.push_back(I);
- while (!WorkList.empty()) {
- const Instruction *Curr = WorkList.pop_back_val();
- // This use is outside the loop, nothing to do.
- if (!L->contains(Curr))
- continue;
- // Do we assume it is a "hard" use which will not be eliminated easily?
- if (Curr->mayHaveSideEffects())
- return true;
- // Otherwise, add all its users to worklist.
- for (auto U : Curr->users()) {
- auto *UI = cast<Instruction>(U);
- if (Visited.insert(UI).second)
- WorkList.push_back(UI);
- }
- }
- return false;
-}
-
-/// Check to see if this loop has a computable loop-invariant execution count.
-/// If so, this means that we can compute the final value of any expressions
-/// that are recurrent in the loop, and substitute the exit values from the loop
-/// into any instructions outside of the loop that use the final values of the
-/// current expressions.
-///
-/// This is mostly redundant with the regular IndVarSimplify activities that
-/// happen later, except that it's more powerful in some cases, because it's
-/// able to brute-force evaluate arbitrary instructions as long as they have
-/// constant operands at the beginning of the loop.
-bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
- // Check a pre-condition.
- assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
- "Indvars did not preserve LCSSA!");
-
- SmallVector<BasicBlock*, 8> ExitBlocks;
- L->getUniqueExitBlocks(ExitBlocks);
-
- SmallVector<RewritePhi, 8> RewritePhiSet;
- // Find all values that are computed inside the loop, but used outside of it.
- // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan
- // the exit blocks of the loop to find them.
- for (BasicBlock *ExitBB : ExitBlocks) {
- // If there are no PHI nodes in this exit block, then no values defined
- // inside the loop are used on this path, skip it.
- PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
- if (!PN) continue;
-
- unsigned NumPreds = PN->getNumIncomingValues();
-
- // Iterate over all of the PHI nodes.
- BasicBlock::iterator BBI = ExitBB->begin();
- while ((PN = dyn_cast<PHINode>(BBI++))) {
- if (PN->use_empty())
- continue; // dead use, don't replace it
-
- if (!SE->isSCEVable(PN->getType()))
- continue;
-
- // It's necessary to tell ScalarEvolution about this explicitly so that
- // it can walk the def-use list and forget all SCEVs, as it may not be
- // watching the PHI itself. Once the new exit value is in place, there
- // may not be a def-use connection between the loop and every instruction
- // which got a SCEVAddRecExpr for that loop.
- SE->forgetValue(PN);
-
- // Iterate over all of the values in all the PHI nodes.
- for (unsigned i = 0; i != NumPreds; ++i) {
- // If the value being merged in is not integer or is not defined
- // in the loop, skip it.
- Value *InVal = PN->getIncomingValue(i);
- if (!isa<Instruction>(InVal))
- continue;
-
- // If this pred is for a subloop, not L itself, skip it.
- if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
- continue; // The Block is in a subloop, skip it.
-
- // Check that InVal is defined in the loop.
- Instruction *Inst = cast<Instruction>(InVal);
- if (!L->contains(Inst))
- continue;
-
- // Okay, this instruction has a user outside of the current loop
- // and varies predictably *inside* the loop. Evaluate the value it
- // contains when the loop exits, if possible. We prefer to start with
- // expressions which are true for all exits (so as to maximize
- // expression reuse by the SCEVExpander), but resort to per-exit
- // evaluation if that fails.
- const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
- if (isa<SCEVCouldNotCompute>(ExitValue) ||
- !SE->isLoopInvariant(ExitValue, L) ||
- !isSafeToExpand(ExitValue, *SE)) {
- // TODO: This should probably be sunk into SCEV in some way; maybe a
- // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for
- // most SCEV expressions and other recurrence types (e.g. shift
- // recurrences). Is there existing code we can reuse?
- const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i));
- if (isa<SCEVCouldNotCompute>(ExitCount))
- continue;
- if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst)))
- if (AddRec->getLoop() == L)
- ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE);
- if (isa<SCEVCouldNotCompute>(ExitValue) ||
- !SE->isLoopInvariant(ExitValue, L) ||
- !isSafeToExpand(ExitValue, *SE))
- continue;
- }
-
- // Computing the value outside of the loop brings no benefit if it is
- // definitely used inside the loop in a way which can not be optimized
- // away. Avoid doing so unless we know we have a value which computes
- // the ExitValue already. TODO: This should be merged into SCEV
- // expander to leverage its knowledge of existing expressions.
- if (ReplaceExitValue != AlwaysRepl &&
- !isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) &&
- hasHardUserWithinLoop(L, Inst))
- continue;
-
- bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
- Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
-
- LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
- << '\n'
- << " LoopVal = " << *Inst << "\n");
-
- if (!isValidRewrite(Inst, ExitVal)) {
- DeadInsts.push_back(ExitVal);
- continue;
- }
-
-#ifndef NDEBUG
- // If we reuse an instruction from a loop which is neither L nor one of
- // its containing loops, we end up breaking LCSSA form for this loop by
- // creating a new use of its instruction.
- if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
- if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
- if (EVL != L)
- assert(EVL->contains(L) && "LCSSA breach detected!");
-#endif
-
- // Collect all the candidate PHINodes to be rewritten.
- RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
- }
- }
- }
-
- bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
-
- bool Changed = false;
- // Transformation.
- for (const RewritePhi &Phi : RewritePhiSet) {
- PHINode *PN = Phi.PN;
- Value *ExitVal = Phi.Val;
-
- // Only do the rewrite when the ExitValue can be expanded cheaply.
- // If LoopCanBeDel is true, rewrite exit value aggressively.
- if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
- DeadInsts.push_back(ExitVal);
- continue;
- }
-
- Changed = true;
- ++NumReplaced;
- Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
- PN->setIncomingValue(Phi.Ith, ExitVal);
-
- // If this instruction is dead now, delete it. Don't do it now to avoid
- // invalidating iterators.
- if (isInstructionTriviallyDead(Inst, TLI))
- DeadInsts.push_back(Inst);
-
- // Replace PN with ExitVal if that is legal and does not break LCSSA.
- if (PN->getNumIncomingValues() == 1 &&
- LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
- PN->replaceAllUsesWith(ExitVal);
- PN->eraseFromParent();
- }
- }
-
- // The insertion point instruction may have been deleted; clear it out
- // so that the rewriter doesn't trip over it later.
- Rewriter.clearInsertPoint();
- return Changed;
-}
-
//===---------------------------------------------------------------------===//
// rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
// they will exit at the first iteration.
@@ -813,61 +546,6 @@ bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
return MadeAnyChanges;
}
-/// Check whether it is possible to delete the loop after rewriting exit
-/// value. If it is possible, ignore ReplaceExitValue and do rewriting
-/// aggressively.
-bool IndVarSimplify::canLoopBeDeleted(
- Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
- BasicBlock *Preheader = L->getLoopPreheader();
- // If there is no preheader, the loop will not be deleted.
- if (!Preheader)
- return false;
-
- // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
- // We obviate multiple ExitingBlocks case for simplicity.
- // TODO: If we see testcase with multiple ExitingBlocks can be deleted
- // after exit value rewriting, we can enhance the logic here.
- SmallVector<BasicBlock *, 4> ExitingBlocks;
- L->getExitingBlocks(ExitingBlocks);
- SmallVector<BasicBlock *, 8> ExitBlocks;
- L->getUniqueExitBlocks(ExitBlocks);
- if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1)
- return false;
-
- BasicBlock *ExitBlock = ExitBlocks[0];
- BasicBlock::iterator BI = ExitBlock->begin();
- while (PHINode *P = dyn_cast<PHINode>(BI)) {
- Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
-
- // If the Incoming value of P is found in RewritePhiSet, we know it
- // could be rewritten to use a loop invariant value in transformation
- // phase later. Skip it in the loop invariant check below.
- bool found = false;
- for (const RewritePhi &Phi : RewritePhiSet) {
- unsigned i = Phi.Ith;
- if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
- found = true;
- break;
- }
- }
-
- Instruction *I;
- if (!found && (I = dyn_cast<Instruction>(Incoming)))
- if (!L->hasLoopInvariantOperands(I))
- return false;
-
- ++BI;
- }
-
- for (auto *BB : L->blocks())
- if (llvm::any_of(*BB, [](Instruction &I) {
- return I.mayHaveSideEffects();
- }))
- return false;
-
- return true;
-}
-
//===----------------------------------------------------------------------===//
// IV Widening - Extend the width of an IV to cover its widest uses.
//===----------------------------------------------------------------------===//
@@ -1060,8 +738,8 @@ protected:
Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
bool widenLoopCompare(NarrowIVDefUse DU);
- bool widenWithVariantLoadUse(NarrowIVDefUse DU);
- void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU);
+ bool widenWithVariantUse(NarrowIVDefUse DU);
+ void widenWithVariantUseCodegen(NarrowIVDefUse DU);
void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
};
@@ -1399,20 +1077,27 @@ bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
return true;
}
-/// If the narrow use is an instruction whose two operands are the defining
-/// instruction of DU and a load instruction, then we have the following:
-/// if the load is hoisted outside the loop, then we do not reach this function
-/// as scalar evolution analysis works fine in widenIVUse with variables
-/// hoisted outside the loop and efficient code is subsequently generated by
-/// not emitting truncate instructions. But when the load is not hoisted
-/// (whether due to limitation in alias analysis or due to a true legality),
-/// then scalar evolution can not proceed with loop variant values and
-/// inefficient code is generated. This function handles the non-hoisted load
-/// special case by making the optimization generate the same type of code for
-/// hoisted and non-hoisted load (widen use and eliminate sign extend
-/// instruction). This special case is important especially when the induction
-/// variables are affecting addressing mode in code generation.
-bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) {
+// The widenIVUse avoids generating trunc by evaluating the use as AddRec, this
+// will not work when:
+// 1) SCEV traces back to an instruction inside the loop that SCEV can not
+// expand, eg. add %indvar, (load %addr)
+// 2) SCEV finds a loop variant, eg. add %indvar, %loopvariant
+// While SCEV fails to avoid trunc, we can still try to use instruction
+// combining approach to prove trunc is not required. This can be further
+// extended with other instruction combining checks, but for now we handle the
+// following case (sub can be "add" and "mul", "nsw + sext" can be "nus + zext")
+//
+// Src:
+// %c = sub nsw %b, %indvar
+// %d = sext %c to i64
+// Dst:
+// %indvar.ext1 = sext %indvar to i64
+// %m = sext %b to i64
+// %d = sub nsw i64 %m, %indvar.ext1
+// Therefore, as long as the result of add/sub/mul is extended to wide type, no
+// trunc is required regardless of how %b is generated. This pattern is common
+// when calculating address in 64 bit architecture
+bool WidenIV::widenWithVariantUse(NarrowIVDefUse DU) {
Instruction *NarrowUse = DU.NarrowUse;
Instruction *NarrowDef = DU.NarrowDef;
Instruction *WideDef = DU.WideDef;
@@ -1443,12 +1128,6 @@ bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) {
else
return false;
- // We are interested in the other operand being a load instruction.
- // But, we should look into relaxing this restriction later on.
- auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx));
- if (I && I->getOpcode() != Instruction::Load)
- return false;
-
// Verifying that Defining operand is an AddRec
const SCEV *Op1 = SE->getSCEV(WideDef);
const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
@@ -1480,9 +1159,9 @@ bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) {
return true;
}
-/// Special Case for widening with variant Loads (see
-/// WidenIV::widenWithVariantLoadUse). This is the code generation part.
-void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) {
+/// Special Case for widening with loop variant (see
+/// WidenIV::widenWithVariant). This is the code generation part.
+void WidenIV::widenWithVariantUseCodegen(NarrowIVDefUse DU) {
Instruction *NarrowUse = DU.NarrowUse;
Instruction *NarrowDef = DU.NarrowDef;
Instruction *WideDef = DU.WideDef;
@@ -1508,33 +1187,22 @@ void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) {
Builder.Insert(WideBO);
WideBO->copyIRFlags(NarrowBO);
- if (ExtKind == SignExtended)
- ExtendKindMap[NarrowUse] = SignExtended;
- else
- ExtendKindMap[NarrowUse] = ZeroExtended;
+ assert(ExtKind != Unknown && "Unknown ExtKind not handled");
- // Update the Use.
- if (ExtKind == SignExtended) {
- for (Use &U : NarrowUse->uses()) {
- SExtInst *User = dyn_cast<SExtInst>(U.getUser());
- if (User && User->getType() == WideType) {
- LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
- << *WideBO << "\n");
- ++NumElimExt;
- User->replaceAllUsesWith(WideBO);
- DeadInsts.emplace_back(User);
- }
- }
- } else { // ExtKind == ZeroExtended
- for (Use &U : NarrowUse->uses()) {
- ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
- if (User && User->getType() == WideType) {
- LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
- << *WideBO << "\n");
- ++NumElimExt;
- User->replaceAllUsesWith(WideBO);
- DeadInsts.emplace_back(User);
- }
+ ExtendKindMap[NarrowUse] = ExtKind;
+
+ for (Use &U : NarrowUse->uses()) {
+ Instruction *User = nullptr;
+ if (ExtKind == SignExtended)
+ User = dyn_cast<SExtInst>(U.getUser());
+ else
+ User = dyn_cast<ZExtInst>(U.getUser());
+ if (User && User->getType() == WideType) {
+ LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
+ << *WideBO << "\n");
+ ++NumElimExt;
+ User->replaceAllUsesWith(WideBO);
+ DeadInsts.emplace_back(User);
}
}
}
@@ -1641,8 +1309,8 @@ Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
// in WideAddRec.first does not indicate a polynomial induction expression.
// In that case, look at the operands of the use instruction to determine
// if we can still widen the use instead of truncating its operand.
- if (widenWithVariantLoadUse(DU)) {
- widenWithVariantLoadUseCodegen(DU);
+ if (widenWithVariantUse(DU)) {
+ widenWithVariantUseCodegen(DU);
return nullptr;
}
@@ -1992,8 +1660,8 @@ bool IndVarSimplify::simplifyAndExtend(Loop *L,
// Information about sign/zero extensions of CurrIV.
IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
- Changed |=
- simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor);
+ Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
+ &Visitor);
if (Visitor.WI.WidestNativeType) {
WideIVs.push_back(Visitor.WI);
@@ -2017,7 +1685,7 @@ bool IndVarSimplify::simplifyAndExtend(Loop *L,
/// Given an Value which is hoped to be part of an add recurance in the given
/// loop, return the associated Phi node if so. Otherwise, return null. Note
-/// that this is less general than SCEVs AddRec checking.
+/// that this is less general than SCEVs AddRec checking.
static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
Instruction *IncI = dyn_cast<Instruction>(IncV);
if (!IncI)
@@ -2079,7 +1747,7 @@ static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
if (L->isLoopInvariant(BI->getCondition()))
return false;
-
+
// Do LFTR to simplify the exit condition to an ICMP.
ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
if (!Cond)
@@ -2122,9 +1790,9 @@ static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
/// actually poison. This can be used to assess whether a new use of Root can
/// be added at a location which is control equivalent with OnPathTo (such as
/// immediately before it) without introducing UB which didn't previously
-/// exist. Note that a false result conveys no information.
+/// exist. Note that a false result conveys no information.
static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
- Instruction *OnPathTo,
+ Instruction *OnPathTo,
DominatorTree *DT) {
// Basic approach is to assume Root is poison, propagate poison forward
// through all users we can easily track, and then check whether any of those
@@ -2142,10 +1810,10 @@ static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
// If we know this must trigger UB on a path leading our target.
if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
return true;
-
+
// If we can't analyze propagation through this instruction, just skip it
// and transitive users. Safe as false is a conservative result.
- if (!propagatesFullPoison(I) && I != Root)
+ if (!propagatesPoison(I) && I != Root)
continue;
if (KnownPoison.insert(I).second)
@@ -2154,7 +1822,7 @@ static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
}
// Might be non-UB, or might have a path we couldn't prove must execute on
- // way to exiting bb.
+ // way to exiting bb.
return false;
}
@@ -2221,7 +1889,7 @@ static bool isLoopCounter(PHINode* Phi, Loop *L,
ScalarEvolution *SE) {
assert(Phi->getParent() == L->getHeader());
assert(L->getLoopLatch());
-
+
if (!SE->isSCEVable(Phi->getType()))
return false;
@@ -2282,7 +1950,7 @@ static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
if (!hasConcreteDef(Phi)) {
// We explicitly allow unknown phis as long as they are already used by
// the loop exit test. This is legal since performing LFTR could not
- // increase the number of undef users.
+ // increase the number of undef users.
Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
!isLoopExitTestBasedOn(IncPhi, ExitingBB))
@@ -2300,7 +1968,7 @@ static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
if (!Phi->getType()->isIntegerTy() &&
!mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
continue;
-
+
const SCEV *Init = AR->getStart();
if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
@@ -2506,14 +2174,14 @@ linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
// reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
// the other side of the comparison instead. We still evaluate the limit
// in the narrower bitwidth, we just prefer a zext/sext outside the loop to
- // a truncate within in.
+ // a truncate within in.
bool Extended = false;
const SCEV *IV = SE->getSCEV(CmpIndVar);
const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
ExitCnt->getType());
const SCEV *ZExtTrunc =
SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
-
+
if (ZExtTrunc == IV) {
Extended = true;
ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
@@ -2531,7 +2199,7 @@ linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
if (Extended) {
bool Discard;
L->makeLoopInvariant(ExitCnt, Discard);
- } else
+ } else
CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
"lftr.wideiv");
}
@@ -2551,7 +2219,7 @@ linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
// update the branch to use the new comparison; in the common case this
// will make old comparison dead.
BI->setCondition(Cond);
- DeadInsts.push_back(OrigCond);
+ DeadInsts.emplace_back(OrigCond);
++NumLFTR;
return true;
@@ -2685,11 +2353,10 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
L->getExitingBlocks(ExitingBlocks);
// Remove all exits which aren't both rewriteable and analyzeable.
- auto NewEnd = llvm::remove_if(ExitingBlocks,
- [&](BasicBlock *ExitingBB) {
+ auto NewEnd = llvm::remove_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
// If our exitting block exits multiple loops, we can only rewrite the
// innermost one. Otherwise, we're changing how many times the innermost
- // loop runs before it exits.
+ // loop runs before it exits.
if (LI->getLoopFor(ExitingBB) != L)
return true;
@@ -2701,18 +2368,18 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
// If already constant, nothing to do.
if (isa<Constant>(BI->getCondition()))
return true;
-
+
const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
if (isa<SCEVCouldNotCompute>(ExitCount))
return true;
return false;
- });
+ });
ExitingBlocks.erase(NewEnd, ExitingBlocks.end());
if (ExitingBlocks.empty())
return false;
-
- // Get a symbolic upper bound on the loop backedge taken count.
+
+ // Get a symbolic upper bound on the loop backedge taken count.
const SCEV *MaxExitCount = getMaxBackedgeTakenCount(*SE, *DT, L);
if (isa<SCEVCouldNotCompute>(MaxExitCount))
return false;
@@ -2720,11 +2387,12 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
// Visit our exit blocks in order of dominance. We know from the fact that
// all exits (left) are analyzeable that the must be a total dominance order
// between them as each must dominate the latch. The visit order only
- // matters for the provably equal case.
+ // matters for the provably equal case.
llvm::sort(ExitingBlocks,
[&](BasicBlock *A, BasicBlock *B) {
// std::sort sorts in ascending order, so we want the inverse of
// the normal dominance relation.
+ if (A == B) return false;
if (DT->properlyDominates(A, B)) return true;
if (DT->properlyDominates(B, A)) return false;
llvm_unreachable("expected total dominance order!");
@@ -2734,7 +2402,7 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
}
#endif
-
+
auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) {
BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
@@ -2743,7 +2411,7 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
IsTaken ? ExitIfTrue : !ExitIfTrue);
BI->setCondition(NewCond);
if (OldCond->use_empty())
- DeadInsts.push_back(OldCond);
+ DeadInsts.emplace_back(OldCond);
};
bool Changed = false;
@@ -2751,7 +2419,7 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
for (BasicBlock *ExitingBB : ExitingBlocks) {
const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
assert(!isa<SCEVCouldNotCompute>(ExitCount) && "checked above");
-
+
// If we know we'd exit on the first iteration, rewrite the exit to
// reflect this. This does not imply the loop must exit through this
// exit; there may be an earlier one taken on the first iteration.
@@ -2769,13 +2437,13 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
if (!ExitCount->getType()->isIntegerTy() ||
!MaxExitCount->getType()->isIntegerTy())
continue;
-
+
Type *WiderType =
SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
assert(MaxExitCount->getType() == ExitCount->getType());
-
+
// Can we prove that some other exit must be taken strictly before this
// one?
if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
@@ -2788,7 +2456,7 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
// As we run, keep track of which exit counts we've encountered. If we
// find a duplicate, we've found an exit which would have exited on the
// exiting iteration, but (from the visit order) strictly follows another
- // which does the same and is thus dead.
+ // which does the same and is thus dead.
if (!DominatingExitCounts.insert(ExitCount).second) {
FoldExit(ExitingBB, false);
Changed = true;
@@ -2809,22 +2477,20 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
SmallVector<BasicBlock*, 16> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
- bool Changed = false;
-
// Finally, see if we can rewrite our exit conditions into a loop invariant
- // form. If we have a read-only loop, and we can tell that we must exit down
+ // form. If we have a read-only loop, and we can tell that we must exit down
// a path which does not need any of the values computed within the loop, we
// can rewrite the loop to exit on the first iteration. Note that this
// doesn't either a) tell us the loop exits on the first iteration (unless
// *all* exits are predicateable) or b) tell us *which* exit might be taken.
// This transformation looks a lot like a restricted form of dead loop
// elimination, but restricted to read-only loops and without neccesssarily
- // needing to kill the loop entirely.
+ // needing to kill the loop entirely.
if (!LoopPredication)
- return Changed;
+ return false;
if (!SE->hasLoopInvariantBackedgeTakenCount(L))
- return Changed;
+ return false;
// Note: ExactBTC is the exact backedge taken count *iff* the loop exits
// through *explicit* control flow. We have to eliminate the possibility of
@@ -2833,16 +2499,16 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
if (isa<SCEVCouldNotCompute>(ExactBTC) ||
!SE->isLoopInvariant(ExactBTC, L) ||
!isSafeToExpand(ExactBTC, *SE))
- return Changed;
+ return false;
// If we end up with a pointer exit count, bail. It may be unsized.
if (!ExactBTC->getType()->isIntegerTy())
- return Changed;
+ return false;
auto BadExit = [&](BasicBlock *ExitingBB) {
// If our exiting block exits multiple loops, we can only rewrite the
// innermost one. Otherwise, we're changing how many times the innermost
- // loop runs before it exits.
+ // loop runs before it exits.
if (LI->getLoopFor(ExitingBB) != L)
return true;
@@ -2897,18 +2563,18 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
// is complicated and we choose not to for now.
for (unsigned i = 1; i < ExitingBlocks.size(); i++)
if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
- return Changed;
+ return false;
// Given our sorted total order, we know that exit[j] must be evaluated
// after all exit[i] such j > i.
for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
if (BadExit(ExitingBlocks[i])) {
- ExitingBlocks.resize(i);
+ ExitingBlocks.resize(i);
break;
}
if (ExitingBlocks.empty())
- return Changed;
+ return false;
// We rely on not being able to reach an exiting block on a later iteration
// then it's statically compute exit count. The implementaton of
@@ -2930,8 +2596,9 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
for (auto &I : *BB)
// TODO:isGuaranteedToTransfer
if (I.mayHaveSideEffects() || I.mayThrow())
- return Changed;
+ return false;
+ bool Changed = false;
// Finally, do the actual predication for all predicatable blocks. A couple
// of notes here:
// 1) We don't bother to constant fold dominated exits with identical exit
@@ -2970,7 +2637,7 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
Value *OldCond = BI->getCondition();
BI->setCondition(NewCond);
if (OldCond->use_empty())
- DeadInsts.push_back(OldCond);
+ DeadInsts.emplace_back(OldCond);
Changed = true;
}
@@ -2985,7 +2652,6 @@ bool IndVarSimplify::run(Loop *L) {
// We need (and expect!) the incoming loop to be in LCSSA.
assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
"LCSSA required to run indvars!");
- bool Changed = false;
// If LoopSimplify form is not available, stay out of trouble. Some notes:
// - LSR currently only supports LoopSimplify-form loops. Indvars'
@@ -3001,9 +2667,15 @@ bool IndVarSimplify::run(Loop *L) {
#ifndef NDEBUG
// Used below for a consistency check only
- const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+ // Note: Since the result returned by ScalarEvolution may depend on the order
+ // in which previous results are added to its cache, the call to
+ // getBackedgeTakenCount() may change following SCEV queries.
+ const SCEV *BackedgeTakenCount;
+ if (VerifyIndvars)
+ BackedgeTakenCount = SE->getBackedgeTakenCount(L);
#endif
+ bool Changed = false;
// If there are any floating-point recurrences, attempt to
// transform them to use integer recurrences.
Changed |= rewriteNonIntegerIVs(L);
@@ -3027,8 +2699,13 @@ bool IndVarSimplify::run(Loop *L) {
// that are recurrent in the loop, and substitute the exit values from the
// loop into any instructions outside of the loop that use the final values
// of the current expressions.
- if (ReplaceExitValue != NeverRepl)
- Changed |= rewriteLoopExitValues(L, Rewriter);
+ if (ReplaceExitValue != NeverRepl) {
+ if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
+ ReplaceExitValue, DeadInsts)) {
+ NumReplaced += Rewrites;
+ Changed = true;
+ }
+ }
// Eliminate redundant IV cycles.
NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
@@ -3039,7 +2716,7 @@ bool IndVarSimplify::run(Loop *L) {
// Given we've changed exit counts, notify SCEV
SE->forgetLoop(L);
}
-
+
// Try to form loop invariant tests for loop exits by changing how many
// iterations of the loop run when that is unobservable.
if (predicateLoopExits(L, Rewriter)) {
@@ -3049,8 +2726,11 @@ bool IndVarSimplify::run(Loop *L) {
}
// If we have a trip count expression, rewrite the loop's exit condition
- // using it.
+ // using it.
if (!DisableLFTR) {
+ BasicBlock *PreHeader = L->getLoopPreheader();
+ BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
+
SmallVector<BasicBlock*, 16> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
for (BasicBlock *ExitingBB : ExitingBlocks) {
@@ -3060,10 +2740,10 @@ bool IndVarSimplify::run(Loop *L) {
// If our exitting block exits multiple loops, we can only rewrite the
// innermost one. Otherwise, we're changing how many times the innermost
- // loop runs before it exits.
+ // loop runs before it exits.
if (LI->getLoopFor(ExitingBB) != L)
continue;
-
+
if (!needsLFTR(L, ExitingBB))
continue;
@@ -3077,14 +2757,15 @@ bool IndVarSimplify::run(Loop *L) {
// until stable to handle cases like this better.
if (ExitCount->isZero())
continue;
-
+
PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
if (!IndVar)
continue;
-
+
// Avoid high cost expansions. Note: This heuristic is questionable in
- // that our definition of "high cost" is not exactly principled.
- if (Rewriter.isHighCostExpansion(ExitCount, L))
+ // that our definition of "high cost" is not exactly principled.
+ if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
+ TTI, PreHeaderBR))
continue;
// Check preconditions for proper SCEVExpander operation. SCEV does not
@@ -3092,7 +2773,7 @@ bool IndVarSimplify::run(Loop *L) {
// any pass that uses the SCEVExpander must do it. This does not work
// well for loop passes because SCEVExpander makes assumptions about
// all loops, while LoopPassManager only forces the current loop to be
- // simplified.
+ // simplified.
//
// FIXME: SCEV expansion has no way to bail out, so the caller must
// explicitly check any assumptions made by SCEV. Brittle.
@@ -3113,7 +2794,8 @@ bool IndVarSimplify::run(Loop *L) {
while (!DeadInsts.empty())
if (Instruction *Inst =
dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
- Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
+ Changed |=
+ RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
// The Rewriter may not be used from this point on.
@@ -3127,7 +2809,7 @@ bool IndVarSimplify::run(Loop *L) {
Changed |= rewriteFirstIterationLoopExitValues(L);
// Clean up dead instructions.
- Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
+ Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
// Check a post-condition.
assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
@@ -3150,6 +2832,8 @@ bool IndVarSimplify::run(Loop *L) {
assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
NewBECount) && "indvars must preserve SCEV");
}
+ if (VerifyMemorySSA && MSSAU)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
#endif
return Changed;
@@ -3161,12 +2845,14 @@ PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
Function *F = L.getHeader()->getParent();
const DataLayout &DL = F->getParent()->getDataLayout();
- IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI);
+ IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA);
if (!IVS.run(&L))
return PreservedAnalyses::all();
auto PA = getLoopPassPreservedAnalyses();
PA.preserveSet<CFGAnalyses>();
+ if (AR.MSSA)
+ PA.preserve<MemorySSAAnalysis>();
return PA;
}
@@ -3191,13 +2877,18 @@ struct IndVarSimplifyLegacyPass : public LoopPass {
auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+ auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
+ MemorySSA *MSSA = nullptr;
+ if (MSSAAnalysis)
+ MSSA = &MSSAAnalysis->getMSSA();
- IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
+ IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA);
return IVS.run(L);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
+ AU.addPreserved<MemorySSAWrapperPass>();
getLoopAnalysisUsage(AU);
}
};