diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp | 1716 |
1 files changed, 807 insertions, 909 deletions
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp index 2d980e6935b3..f7c22cfb0310 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp @@ -139,8 +139,6 @@ using namespace PatternMatch; #define DEBUG_TYPE "scalar-evolution" -STATISTIC(NumArrayLenItCounts, - "Number of trip counts computed with array length"); STATISTIC(NumTripCountsComputed, "Number of loops with predictable loop counts"); STATISTIC(NumTripCountsNotComputed, @@ -1100,7 +1098,7 @@ const SCEV *ScalarEvolution::getLosslessPtrToIntExpr(const SCEV *Op, SCEV *S = new (SCEVAllocator) SCEVPtrToIntExpr(ID.Intern(SCEVAllocator), Op, IntPtrTy); UniqueSCEVs.InsertNode(S, IP); - addToLoopUseLists(S); + registerUser(S, Op); return S; } @@ -1220,7 +1218,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Type *Ty, SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); - addToLoopUseLists(S); + registerUser(S, Op); return S; } @@ -1274,7 +1272,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Type *Ty, SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); - addToLoopUseLists(S); + registerUser(S, Op); return S; } @@ -1603,7 +1601,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); - addToLoopUseLists(S); + registerUser(S, Op); return S; } @@ -1872,7 +1870,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); - addToLoopUseLists(S); + registerUser(S, Op); return S; } @@ -1911,7 +1909,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); - addToLoopUseLists(S); + registerUser(S, Op); return S; } @@ -2108,7 +2106,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); - addToLoopUseLists(S); + registerUser(S, { Op }); return S; } @@ -2390,6 +2388,24 @@ StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type, } } + // <0,+,nonnegative><nw> is also nuw + // TODO: Add corresponding nsw case + if (Type == scAddRecExpr && ScalarEvolution::hasFlags(Flags, SCEV::FlagNW) && + !ScalarEvolution::hasFlags(Flags, SCEV::FlagNUW) && Ops.size() == 2 && + Ops[0]->isZero() && IsKnownNonNegative(Ops[1])) + Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW); + + // both (udiv X, Y) * Y and Y * (udiv X, Y) are always NUW + if (Type == scMulExpr && !ScalarEvolution::hasFlags(Flags, SCEV::FlagNUW) && + Ops.size() == 2) { + if (auto *UDiv = dyn_cast<SCEVUDivExpr>(Ops[0])) + if (UDiv->getOperand(1) == Ops[1]) + Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW); + if (auto *UDiv = dyn_cast<SCEVUDivExpr>(Ops[1])) + if (UDiv->getOperand(1) == Ops[0]) + Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW); + } + return Flags; } @@ -2449,7 +2465,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, if (Depth > MaxArithDepth || hasHugeExpression(Ops)) return getOrCreateAddExpr(Ops, ComputeFlags(Ops)); - if (SCEV *S = std::get<0>(findExistingSCEVInCache(scAddExpr, Ops))) { + if (SCEV *S = findExistingSCEVInCache(scAddExpr, Ops)) { // Don't strengthen flags if we have no new information. SCEVAddExpr *Add = static_cast<SCEVAddExpr *>(S); if (Add->getNoWrapFlags(OrigFlags) != OrigFlags) @@ -2562,8 +2578,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, APInt ConstAdd = C1 + C2; auto AddFlags = AddExpr->getNoWrapFlags(); // Adding a smaller constant is NUW if the original AddExpr was NUW. - if (ScalarEvolution::maskFlags(AddFlags, SCEV::FlagNUW) == - SCEV::FlagNUW && + if (ScalarEvolution::hasFlags(AddFlags, SCEV::FlagNUW) && ConstAdd.ule(C1)) { PreservedFlags = ScalarEvolution::setFlags(PreservedFlags, SCEV::FlagNUW); @@ -2571,8 +2586,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, // Adding a constant with the same sign and small magnitude is NSW, if the // original AddExpr was NSW. - if (ScalarEvolution::maskFlags(AddFlags, SCEV::FlagNSW) == - SCEV::FlagNSW && + if (ScalarEvolution::hasFlags(AddFlags, SCEV::FlagNSW) && C1.isSignBitSet() == ConstAdd.isSignBitSet() && ConstAdd.abs().ule(C1.abs())) { PreservedFlags = @@ -2580,14 +2594,26 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, } if (PreservedFlags != SCEV::FlagAnyWrap) { - SmallVector<const SCEV *, 4> NewOps(AddExpr->op_begin(), - AddExpr->op_end()); + SmallVector<const SCEV *, 4> NewOps(AddExpr->operands()); NewOps[0] = getConstant(ConstAdd); return getAddExpr(NewOps, PreservedFlags); } } } + // Canonicalize (-1 * urem X, Y) + X --> (Y * X/Y) + if (Ops.size() == 2) { + const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[0]); + if (Mul && Mul->getNumOperands() == 2 && + Mul->getOperand(0)->isAllOnesValue()) { + const SCEV *X; + const SCEV *Y; + if (matchURem(Mul->getOperand(1), X, Y) && X == Ops[1]) { + return getMulExpr(Y, getUDivExpr(X, Y)); + } + } + } + // Skip past any other cast SCEVs. while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr) ++Idx; @@ -2766,7 +2792,8 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, // If we found some loop invariants, fold them into the recurrence. if (!LIOps.empty()) { // Compute nowrap flags for the addition of the loop-invariant ops and - // the addrec. Temporarily push it as an operand for that purpose. + // the addrec. Temporarily push it as an operand for that purpose. These + // flags are valid in the scope of the addrec only. LIOps.push_back(AddRec); SCEV::NoWrapFlags Flags = ComputeFlags(LIOps); LIOps.pop_back(); @@ -2775,10 +2802,25 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, LIOps.push_back(AddRec->getStart()); SmallVector<const SCEV *, 4> AddRecOps(AddRec->operands()); - // This follows from the fact that the no-wrap flags on the outer add - // expression are applicable on the 0th iteration, when the add recurrence - // will be equal to its start value. - AddRecOps[0] = getAddExpr(LIOps, Flags, Depth + 1); + + // It is not in general safe to propagate flags valid on an add within + // the addrec scope to one outside it. We must prove that the inner + // scope is guaranteed to execute if the outer one does to be able to + // safely propagate. We know the program is undefined if poison is + // produced on the inner scoped addrec. We also know that *for this use* + // the outer scoped add can't overflow (because of the flags we just + // computed for the inner scoped add) without the program being undefined. + // Proving that entry to the outer scope neccesitates entry to the inner + // scope, thus proves the program undefined if the flags would be violated + // in the outer scope. + SCEV::NoWrapFlags AddFlags = Flags; + if (AddFlags != SCEV::FlagAnyWrap) { + auto *DefI = getDefiningScopeBound(LIOps); + auto *ReachI = &*AddRecLoop->getHeader()->begin(); + if (!isGuaranteedToTransferExecutionTo(DefI, ReachI)) + AddFlags = SCEV::FlagAnyWrap; + } + AddRecOps[0] = getAddExpr(LIOps, AddFlags, Depth + 1); // Build the new addrec. Propagate the NUW and NSW flags if both the // outer add and the inner addrec are guaranteed to have no overflow. @@ -2862,7 +2904,7 @@ ScalarEvolution::getOrCreateAddExpr(ArrayRef<const SCEV *> Ops, S = new (SCEVAllocator) SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); - addToLoopUseLists(S); + registerUser(S, Ops); } S->setNoWrapFlags(Flags); return S; @@ -2885,7 +2927,8 @@ ScalarEvolution::getOrCreateAddRecExpr(ArrayRef<const SCEV *> Ops, S = new (SCEVAllocator) SCEVAddRecExpr(ID.Intern(SCEVAllocator), O, Ops.size(), L); UniqueSCEVs.InsertNode(S, IP); - addToLoopUseLists(S); + LoopUsers[L].push_back(S); + registerUser(S, Ops); } setNoWrapFlags(S, Flags); return S; @@ -2907,7 +2950,7 @@ ScalarEvolution::getOrCreateMulExpr(ArrayRef<const SCEV *> Ops, S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator), O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); - addToLoopUseLists(S); + registerUser(S, Ops); } S->setNoWrapFlags(Flags); return S; @@ -3022,7 +3065,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, if (Depth > MaxArithDepth || hasHugeExpression(Ops)) return getOrCreateMulExpr(Ops, ComputeFlags(Ops)); - if (SCEV *S = std::get<0>(findExistingSCEVInCache(scMulExpr, Ops))) { + if (SCEV *S = findExistingSCEVInCache(scMulExpr, Ops)) { // Don't strengthen flags if we have no new information. SCEVMulExpr *Mul = static_cast<SCEVMulExpr *>(S); if (Mul->getNoWrapFlags(OrigFlags) != OrigFlags) @@ -3416,7 +3459,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator), LHS, RHS); UniqueSCEVs.InsertNode(S, IP); - addToLoopUseLists(S); + registerUser(S, {LHS, RHS}); return S; } @@ -3593,13 +3636,21 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP, // getSCEV(Base)->getType() has the same address space as Base->getType() // because SCEV::getType() preserves the address space. Type *IntIdxTy = getEffectiveSCEVType(BaseExpr->getType()); - // FIXME(PR23527): Don't blindly transfer the inbounds flag from the GEP - // instruction to its SCEV, because the Instruction may be guarded by control - // flow and the no-overflow bits may not be valid for the expression in any - // context. This can be fixed similarly to how these flags are handled for - // adds. + const bool AssumeInBoundsFlags = [&]() { + if (!GEP->isInBounds()) + return false; + + // We'd like to propagate flags from the IR to the corresponding SCEV nodes, + // but to do that, we have to ensure that said flag is valid in the entire + // defined scope of the SCEV. + auto *GEPI = dyn_cast<Instruction>(GEP); + // TODO: non-instructions have global scope. We might be able to prove + // some global scope cases + return GEPI && isSCEVExprNeverPoison(GEPI); + }(); + SCEV::NoWrapFlags OffsetWrap = - GEP->isInBounds() ? SCEV::FlagNSW : SCEV::FlagAnyWrap; + AssumeInBoundsFlags ? SCEV::FlagNSW : SCEV::FlagAnyWrap; Type *CurTy = GEP->getType(); bool FirstIter = true; @@ -3645,21 +3696,22 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP, // Add the base address and the offset. We cannot use the nsw flag, as the // base address is unsigned. However, if we know that the offset is // non-negative, we can use nuw. - SCEV::NoWrapFlags BaseWrap = GEP->isInBounds() && isKnownNonNegative(Offset) + SCEV::NoWrapFlags BaseWrap = AssumeInBoundsFlags && isKnownNonNegative(Offset) ? SCEV::FlagNUW : SCEV::FlagAnyWrap; - return getAddExpr(BaseExpr, Offset, BaseWrap); + auto *GEPExpr = getAddExpr(BaseExpr, Offset, BaseWrap); + assert(BaseExpr->getType() == GEPExpr->getType() && + "GEP should not change type mid-flight."); + return GEPExpr; } -std::tuple<SCEV *, FoldingSetNodeID, void *> -ScalarEvolution::findExistingSCEVInCache(SCEVTypes SCEVType, - ArrayRef<const SCEV *> Ops) { +SCEV *ScalarEvolution::findExistingSCEVInCache(SCEVTypes SCEVType, + ArrayRef<const SCEV *> Ops) { FoldingSetNodeID ID; - void *IP = nullptr; ID.AddInteger(SCEVType); for (unsigned i = 0, e = Ops.size(); i != e; ++i) ID.AddPointer(Ops[i]); - return std::tuple<SCEV *, FoldingSetNodeID, void *>( - UniqueSCEVs.FindNodeOrInsertPos(ID, IP), std::move(ID), IP); + void *IP = nullptr; + return UniqueSCEVs.FindNodeOrInsertPos(ID, IP); } const SCEV *ScalarEvolution::getAbsExpr(const SCEV *Op, bool IsNSW) { @@ -3689,7 +3741,7 @@ const SCEV *ScalarEvolution::getMinMaxExpr(SCEVTypes Kind, GroupByComplexity(Ops, &LI, DT); // Check if we have created the same expression before. - if (const SCEV *S = std::get<0>(findExistingSCEVInCache(Kind, Ops))) { + if (const SCEV *S = findExistingSCEVInCache(Kind, Ops)) { return S; } @@ -3787,10 +3839,12 @@ const SCEV *ScalarEvolution::getMinMaxExpr(SCEVTypes Kind, // Okay, it looks like we really DO need an expr. Check to see if we // already have one, otherwise create a new one. - const SCEV *ExistingSCEV; FoldingSetNodeID ID; - void *IP; - std::tie(ExistingSCEV, ID, IP) = findExistingSCEVInCache(Kind, Ops); + ID.AddInteger(Kind); + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + ID.AddPointer(Ops[i]); + void *IP = nullptr; + const SCEV *ExistingSCEV = UniqueSCEVs.FindNodeOrInsertPos(ID, IP); if (ExistingSCEV) return ExistingSCEV; const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); @@ -3799,7 +3853,7 @@ const SCEV *ScalarEvolution::getMinMaxExpr(SCEVTypes Kind, SCEVMinMaxExpr(ID.Intern(SCEVAllocator), Kind, O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); - addToLoopUseLists(S); + registerUser(S, Ops); return S; } @@ -3943,6 +3997,21 @@ Type *ScalarEvolution::getWiderType(Type *T1, Type *T2) const { return getTypeSizeInBits(T1) >= getTypeSizeInBits(T2) ? T1 : T2; } +bool ScalarEvolution::instructionCouldExistWitthOperands(const SCEV *A, + const SCEV *B) { + /// For a valid use point to exist, the defining scope of one operand + /// must dominate the other. + bool PreciseA, PreciseB; + auto *ScopeA = getDefiningScopeBound({A}, PreciseA); + auto *ScopeB = getDefiningScopeBound({B}, PreciseB); + if (!PreciseA || !PreciseB) + // Can't tell. + return false; + return (ScopeA == ScopeB) || DT.dominates(ScopeA, ScopeB) || + DT.dominates(ScopeB, ScopeA); +} + + const SCEV *ScalarEvolution::getCouldNotCompute() { return CouldNotCompute.get(); } @@ -4025,24 +4094,6 @@ void ScalarEvolution::eraseValueFromMap(Value *V) { } } -/// Check whether value has nuw/nsw/exact set but SCEV does not. -/// TODO: In reality it is better to check the poison recursively -/// but this is better than nothing. -static bool SCEVLostPoisonFlags(const SCEV *S, const Value *V) { - if (auto *I = dyn_cast<Instruction>(V)) { - if (isa<OverflowingBinaryOperator>(I)) { - if (auto *NS = dyn_cast<SCEVNAryExpr>(S)) { - if (I->hasNoSignedWrap() && !NS->hasNoSignedWrap()) - return true; - if (I->hasNoUnsignedWrap() && !NS->hasNoUnsignedWrap()) - return true; - } - } else if (isa<PossiblyExactOperator>(I) && I->isExact()) - return true; - } - return false; -} - /// Return an existing SCEV if it exists, otherwise analyze the expression and /// create a new one. const SCEV *ScalarEvolution::getSCEV(Value *V) { @@ -4056,7 +4107,7 @@ const SCEV *ScalarEvolution::getSCEV(Value *V) { // ValueExprMap before insert S->{V, 0} into ExprValueMap. std::pair<ValueExprMapType::iterator, bool> Pair = ValueExprMap.insert({SCEVCallbackVH(V, this), S}); - if (Pair.second && !SCEVLostPoisonFlags(S, V)) { + if (Pair.second) { ExprValueMap[S].insert({V, nullptr}); // If S == Stripped + Offset, add Stripped -> {V, Offset} into @@ -4120,6 +4171,8 @@ static const SCEV *MatchNotExpr(const SCEV *Expr) { /// Return a SCEV corresponding to ~V = -1-V const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) { + assert(!V->getType()->isPointerTy() && "Can't negate pointer"); + if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V)) return getConstant( cast<ConstantInt>(ConstantExpr::getNot(VC->getValue()))); @@ -4146,17 +4199,16 @@ const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) { return getMinusSCEV(getMinusOne(Ty), V); } -/// Compute an expression equivalent to S - getPointerBase(S). -static const SCEV *removePointerBase(ScalarEvolution *SE, const SCEV *P) { +const SCEV *ScalarEvolution::removePointerBase(const SCEV *P) { assert(P->getType()->isPointerTy()); if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(P)) { // The base of an AddRec is the first operand. SmallVector<const SCEV *> Ops{AddRec->operands()}; - Ops[0] = removePointerBase(SE, Ops[0]); + Ops[0] = removePointerBase(Ops[0]); // Don't try to transfer nowrap flags for now. We could in some cases // (for example, if pointer operand of the AddRec is a SCEVUnknown). - return SE->getAddRecExpr(Ops, AddRec->getLoop(), SCEV::FlagAnyWrap); + return getAddRecExpr(Ops, AddRec->getLoop(), SCEV::FlagAnyWrap); } if (auto *Add = dyn_cast<SCEVAddExpr>(P)) { // The base of an Add is the pointer operand. @@ -4164,21 +4216,17 @@ static const SCEV *removePointerBase(ScalarEvolution *SE, const SCEV *P) { const SCEV **PtrOp = nullptr; for (const SCEV *&AddOp : Ops) { if (AddOp->getType()->isPointerTy()) { - // If we find an Add with multiple pointer operands, treat it as a - // pointer base to be consistent with getPointerBase. Eventually - // we should be able to assert this is impossible. - if (PtrOp) - return SE->getZero(P->getType()); + assert(!PtrOp && "Cannot have multiple pointer ops"); PtrOp = &AddOp; } } - *PtrOp = removePointerBase(SE, *PtrOp); + *PtrOp = removePointerBase(*PtrOp); // Don't try to transfer nowrap flags for now. We could in some cases // (for example, if the pointer operand of the Add is a SCEVUnknown). - return SE->getAddExpr(Ops); + return getAddExpr(Ops); } // Any other expression must be a pointer base. - return SE->getZero(P->getType()); + return getZero(P->getType()); } const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS, @@ -4195,8 +4243,8 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS, if (!LHS->getType()->isPointerTy() || getPointerBase(LHS) != getPointerBase(RHS)) return getCouldNotCompute(); - LHS = removePointerBase(this, LHS); - RHS = removePointerBase(this, RHS); + LHS = removePointerBase(LHS); + RHS = removePointerBase(RHS); } // We represent LHS - RHS as LHS + (-1)*RHS. This transformation @@ -4204,7 +4252,7 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS, auto AddFlags = SCEV::FlagAnyWrap; const bool RHSIsNotMinSigned = !getSignedRangeMin(RHS).isMinSignedValue(); - if (maskFlags(Flags, SCEV::FlagNSW) == SCEV::FlagNSW) { + if (hasFlags(Flags, SCEV::FlagNSW)) { // Let M be the minimum representable signed value. Then (-1)*RHS // signed-wraps if and only if RHS is M. That can happen even for // a NSW subtraction because e.g. (-1)*M signed-wraps even though @@ -4359,14 +4407,11 @@ const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) { const SCEV *PtrOp = nullptr; for (const SCEV *AddOp : Add->operands()) { if (AddOp->getType()->isPointerTy()) { - // Cannot find the base of an expression with multiple pointer ops. - if (PtrOp) - return V; + assert(!PtrOp && "Cannot have multiple pointer ops"); PtrOp = AddOp; } } - if (!PtrOp) // All operands were non-pointer. - return V; + assert(PtrOp && "Must have pointer op"); V = PtrOp; } else // Not something we can look further into. return V; @@ -4374,24 +4419,25 @@ const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) { } /// Push users of the given Instruction onto the given Worklist. -static void -PushDefUseChildren(Instruction *I, - SmallVectorImpl<Instruction *> &Worklist) { +static void PushDefUseChildren(Instruction *I, + SmallVectorImpl<Instruction *> &Worklist, + SmallPtrSetImpl<Instruction *> &Visited) { // Push the def-use children onto the Worklist stack. - for (User *U : I->users()) - Worklist.push_back(cast<Instruction>(U)); + for (User *U : I->users()) { + auto *UserInsn = cast<Instruction>(U); + if (Visited.insert(UserInsn).second) + Worklist.push_back(UserInsn); + } } void ScalarEvolution::forgetSymbolicName(Instruction *PN, const SCEV *SymName) { SmallVector<Instruction *, 16> Worklist; - PushDefUseChildren(PN, Worklist); - SmallPtrSet<Instruction *, 8> Visited; + SmallVector<const SCEV *, 8> ToForget; Visited.insert(PN); + Worklist.push_back(PN); while (!Worklist.empty()) { Instruction *I = Worklist.pop_back_val(); - if (!Visited.insert(I).second) - continue; auto It = ValueExprMap.find_as(static_cast<Value *>(I)); if (It != ValueExprMap.end()) { @@ -4413,12 +4459,13 @@ void ScalarEvolution::forgetSymbolicName(Instruction *PN, const SCEV *SymName) { !isa<SCEVUnknown>(Old) || (I != PN && Old == SymName)) { eraseValueFromMap(It->first); - forgetMemoizedResults(Old); + ToForget.push_back(Old); } } - PushDefUseChildren(I, Worklist); + PushDefUseChildren(I, Worklist, Visited); } + forgetMemoizedResults(ToForget); } namespace { @@ -6109,7 +6156,7 @@ ScalarEvolution::getRangeRef(const SCEV *S, // initial value. if (AddRec->hasNoUnsignedWrap()) { APInt UnsignedMinValue = getUnsignedRangeMin(AddRec->getStart()); - if (!UnsignedMinValue.isNullValue()) + if (!UnsignedMinValue.isZero()) ConservativeResult = ConservativeResult.intersectWith( ConstantRange(UnsignedMinValue, APInt(BitWidth, 0)), RangeType); } @@ -6211,9 +6258,9 @@ ScalarEvolution::getRangeRef(const SCEV *S, if (NS > 1) { // If we know any of the sign bits, we know all of the sign bits. - if (!Known.Zero.getHiBits(NS).isNullValue()) + if (!Known.Zero.getHiBits(NS).isZero()) Known.Zero.setHighBits(NS); - if (!Known.One.getHiBits(NS).isNullValue()) + if (!Known.One.getHiBits(NS).isZero()) Known.One.setHighBits(NS); } @@ -6549,17 +6596,99 @@ SCEV::NoWrapFlags ScalarEvolution::getNoWrapFlagsFromUB(const Value *V) { return isSCEVExprNeverPoison(BinOp) ? Flags : SCEV::FlagAnyWrap; } -bool ScalarEvolution::isSCEVExprNeverPoison(const Instruction *I) { - // Here we check that I is in the header of the innermost loop containing I, - // since we only deal with instructions in the loop header. The actual loop we - // need to check later will come from an add recurrence, but getting that - // requires computing the SCEV of the operands, which can be expensive. This - // check we can do cheaply to rule out some cases early. - Loop *InnermostContainingLoop = LI.getLoopFor(I->getParent()); - if (InnermostContainingLoop == nullptr || - InnermostContainingLoop->getHeader() != I->getParent()) - return false; +const Instruction * +ScalarEvolution::getNonTrivialDefiningScopeBound(const SCEV *S) { + if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(S)) + return &*AddRec->getLoop()->getHeader()->begin(); + if (auto *U = dyn_cast<SCEVUnknown>(S)) + if (auto *I = dyn_cast<Instruction>(U->getValue())) + return I; + return nullptr; +} +/// Fills \p Ops with unique operands of \p S, if it has operands. If not, +/// \p Ops remains unmodified. +static void collectUniqueOps(const SCEV *S, + SmallVectorImpl<const SCEV *> &Ops) { + SmallPtrSet<const SCEV *, 4> Unique; + auto InsertUnique = [&](const SCEV *S) { + if (Unique.insert(S).second) + Ops.push_back(S); + }; + if (auto *S2 = dyn_cast<SCEVCastExpr>(S)) + for (auto *Op : S2->operands()) + InsertUnique(Op); + else if (auto *S2 = dyn_cast<SCEVNAryExpr>(S)) + for (auto *Op : S2->operands()) + InsertUnique(Op); + else if (auto *S2 = dyn_cast<SCEVUDivExpr>(S)) + for (auto *Op : S2->operands()) + InsertUnique(Op); +} + +const Instruction * +ScalarEvolution::getDefiningScopeBound(ArrayRef<const SCEV *> Ops, + bool &Precise) { + Precise = true; + // Do a bounded search of the def relation of the requested SCEVs. + SmallSet<const SCEV *, 16> Visited; + SmallVector<const SCEV *> Worklist; + auto pushOp = [&](const SCEV *S) { + if (!Visited.insert(S).second) + return; + // Threshold of 30 here is arbitrary. + if (Visited.size() > 30) { + Precise = false; + return; + } + Worklist.push_back(S); + }; + + for (auto *S : Ops) + pushOp(S); + + const Instruction *Bound = nullptr; + while (!Worklist.empty()) { + auto *S = Worklist.pop_back_val(); + if (auto *DefI = getNonTrivialDefiningScopeBound(S)) { + if (!Bound || DT.dominates(Bound, DefI)) + Bound = DefI; + } else { + SmallVector<const SCEV *, 4> Ops; + collectUniqueOps(S, Ops); + for (auto *Op : Ops) + pushOp(Op); + } + } + return Bound ? Bound : &*F.getEntryBlock().begin(); +} + +const Instruction * +ScalarEvolution::getDefiningScopeBound(ArrayRef<const SCEV *> Ops) { + bool Discard; + return getDefiningScopeBound(Ops, Discard); +} + +bool ScalarEvolution::isGuaranteedToTransferExecutionTo(const Instruction *A, + const Instruction *B) { + if (A->getParent() == B->getParent() && + isGuaranteedToTransferExecutionToSuccessor(A->getIterator(), + B->getIterator())) + return true; + + auto *BLoop = LI.getLoopFor(B->getParent()); + if (BLoop && BLoop->getHeader() == B->getParent() && + BLoop->getLoopPreheader() == A->getParent() && + isGuaranteedToTransferExecutionToSuccessor(A->getIterator(), + A->getParent()->end()) && + isGuaranteedToTransferExecutionToSuccessor(B->getParent()->begin(), + B->getIterator())) + return true; + return false; +} + + +bool ScalarEvolution::isSCEVExprNeverPoison(const Instruction *I) { // Only proceed if we can prove that I does not yield poison. if (!programUndefinedIfPoison(I)) return false; @@ -6570,39 +6699,20 @@ bool ScalarEvolution::isSCEVExprNeverPoison(const Instruction *I) { // instructions can map to the same SCEV. If we apply NSW or NUW from I to // the SCEV, we must guarantee no wrapping for that SCEV also when it is // derived from other instructions that map to the same SCEV. We cannot make - // that guarantee for cases where I is not executed. So we need to find the - // loop that I is considered in relation to and prove that I is executed for - // every iteration of that loop. That implies that the value that I - // calculates does not wrap anywhere in the loop, so then we can apply the - // flags to the SCEV. - // - // We check isLoopInvariant to disambiguate in case we are adding recurrences - // from different loops, so that we know which loop to prove that I is - // executed in. - for (unsigned OpIndex = 0; OpIndex < I->getNumOperands(); ++OpIndex) { + // that guarantee for cases where I is not executed. So we need to find a + // upper bound on the defining scope for the SCEV, and prove that I is + // executed every time we enter that scope. When the bounding scope is a + // loop (the common case), this is equivalent to proving I executes on every + // iteration of that loop. + SmallVector<const SCEV *> SCEVOps; + for (const Use &Op : I->operands()) { // I could be an extractvalue from a call to an overflow intrinsic. // TODO: We can do better here in some cases. - if (!isSCEVable(I->getOperand(OpIndex)->getType())) - return false; - const SCEV *Op = getSCEV(I->getOperand(OpIndex)); - if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) { - bool AllOtherOpsLoopInvariant = true; - for (unsigned OtherOpIndex = 0; OtherOpIndex < I->getNumOperands(); - ++OtherOpIndex) { - if (OtherOpIndex != OpIndex) { - const SCEV *OtherOp = getSCEV(I->getOperand(OtherOpIndex)); - if (!isLoopInvariant(OtherOp, AddRec->getLoop())) { - AllOtherOpsLoopInvariant = false; - break; - } - } - } - if (AllOtherOpsLoopInvariant && - isGuaranteedToExecuteForEveryIteration(I, AddRec->getLoop())) - return true; - } + if (isSCEVable(Op->getType())) + SCEVOps.push_back(getSCEV(Op)); } - return false; + auto *DefI = getDefiningScopeBound(SCEVOps); + return isGuaranteedToTransferExecutionTo(DefI, I); } bool ScalarEvolution::isAddRecNeverPoison(const Instruction *I, const Loop *L) { @@ -7144,10 +7254,21 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // Iteration Count Computation Code // -const SCEV *ScalarEvolution::getTripCountFromExitCount(const SCEV *ExitCount) { - // Get the trip count from the BE count by adding 1. Overflow, results - // in zero which means "unknown". - return getAddExpr(ExitCount, getOne(ExitCount->getType())); +const SCEV *ScalarEvolution::getTripCountFromExitCount(const SCEV *ExitCount, + bool Extend) { + if (isa<SCEVCouldNotCompute>(ExitCount)) + return getCouldNotCompute(); + + auto *ExitCountType = ExitCount->getType(); + assert(ExitCountType->isIntegerTy()); + + if (!Extend) + return getAddExpr(ExitCount, getOne(ExitCountType)); + + auto *WiderType = Type::getIntNTy(ExitCountType->getContext(), + 1 + ExitCountType->getScalarSizeInBits()); + return getAddExpr(getNoopOrZeroExtend(ExitCount, WiderType), + getOne(WiderType)); } static unsigned getConstantTripCount(const SCEVConstant *ExitCount) { @@ -7186,6 +7307,131 @@ unsigned ScalarEvolution::getSmallConstantMaxTripCount(const Loop *L) { return getConstantTripCount(MaxExitCount); } +const SCEV *ScalarEvolution::getConstantMaxTripCountFromArray(const Loop *L) { + // We can't infer from Array in Irregular Loop. + // FIXME: It's hard to infer loop bound from array operated in Nested Loop. + if (!L->isLoopSimplifyForm() || !L->isInnermost()) + return getCouldNotCompute(); + + // FIXME: To make the scene more typical, we only analysis loops that have + // one exiting block and that block must be the latch. To make it easier to + // capture loops that have memory access and memory access will be executed + // in each iteration. + const BasicBlock *LoopLatch = L->getLoopLatch(); + assert(LoopLatch && "See defination of simplify form loop."); + if (L->getExitingBlock() != LoopLatch) + return getCouldNotCompute(); + + const DataLayout &DL = getDataLayout(); + SmallVector<const SCEV *> InferCountColl; + for (auto *BB : L->getBlocks()) { + // Go here, we can know that Loop is a single exiting and simplified form + // loop. Make sure that infer from Memory Operation in those BBs must be + // executed in loop. First step, we can make sure that max execution time + // of MemAccessBB in loop represents latch max excution time. + // If MemAccessBB does not dom Latch, skip. + // Entry + // │ + // ┌─────▼─────┐ + // │Loop Header◄─────┐ + // └──┬──────┬─┘ │ + // │ │ │ + // ┌────────▼──┐ ┌─▼─────┐ │ + // │MemAccessBB│ │OtherBB│ │ + // └────────┬──┘ └─┬─────┘ │ + // │ │ │ + // ┌─▼──────▼─┐ │ + // │Loop Latch├─────┘ + // └────┬─────┘ + // ▼ + // Exit + if (!DT.dominates(BB, LoopLatch)) + continue; + + for (Instruction &Inst : *BB) { + // Find Memory Operation Instruction. + auto *GEP = getLoadStorePointerOperand(&Inst); + if (!GEP) + continue; + + auto *ElemSize = dyn_cast<SCEVConstant>(getElementSize(&Inst)); + // Do not infer from scalar type, eg."ElemSize = sizeof()". + if (!ElemSize) + continue; + + // Use a existing polynomial recurrence on the trip count. + auto *AddRec = dyn_cast<SCEVAddRecExpr>(getSCEV(GEP)); + if (!AddRec) + continue; + auto *ArrBase = dyn_cast<SCEVUnknown>(getPointerBase(AddRec)); + auto *Step = dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*this)); + if (!ArrBase || !Step) + continue; + assert(isLoopInvariant(ArrBase, L) && "See addrec definition"); + + // Only handle { %array + step }, + // FIXME: {(SCEVAddRecExpr) + step } could not be analysed here. + if (AddRec->getStart() != ArrBase) + continue; + + // Memory operation pattern which have gaps. + // Or repeat memory opreation. + // And index of GEP wraps arround. + if (Step->getAPInt().getActiveBits() > 32 || + Step->getAPInt().getZExtValue() != + ElemSize->getAPInt().getZExtValue() || + Step->isZero() || Step->getAPInt().isNegative()) + continue; + + // Only infer from stack array which has certain size. + // Make sure alloca instruction is not excuted in loop. + AllocaInst *AllocateInst = dyn_cast<AllocaInst>(ArrBase->getValue()); + if (!AllocateInst || L->contains(AllocateInst->getParent())) + continue; + + // Make sure only handle normal array. + auto *Ty = dyn_cast<ArrayType>(AllocateInst->getAllocatedType()); + auto *ArrSize = dyn_cast<ConstantInt>(AllocateInst->getArraySize()); + if (!Ty || !ArrSize || !ArrSize->isOne()) + continue; + // Also make sure step was increased the same with sizeof allocated + // element type. + const PointerType *GEPT = dyn_cast<PointerType>(GEP->getType()); + if (Ty->getElementType() != GEPT->getElementType()) + continue; + + // FIXME: Since gep indices are silently zext to the indexing type, + // we will have a narrow gep index which wraps around rather than + // increasing strictly, we shoule ensure that step is increasing + // strictly by the loop iteration. + // Now we can infer a max execution time by MemLength/StepLength. + const SCEV *MemSize = + getConstant(Step->getType(), DL.getTypeAllocSize(Ty)); + auto *MaxExeCount = + dyn_cast<SCEVConstant>(getUDivCeilSCEV(MemSize, Step)); + if (!MaxExeCount || MaxExeCount->getAPInt().getActiveBits() > 32) + continue; + + // If the loop reaches the maximum number of executions, we can not + // access bytes starting outside the statically allocated size without + // being immediate UB. But it is allowed to enter loop header one more + // time. + auto *InferCount = dyn_cast<SCEVConstant>( + getAddExpr(MaxExeCount, getOne(MaxExeCount->getType()))); + // Discard the maximum number of execution times under 32bits. + if (!InferCount || InferCount->getAPInt().getActiveBits() > 32) + continue; + + InferCountColl.push_back(InferCount); + } + } + + if (InferCountColl.size() == 0) + return getCouldNotCompute(); + + return getUMinFromMismatchedTypes(InferCountColl); +} + unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L) { SmallVector<BasicBlock *, 8> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); @@ -7287,13 +7533,15 @@ bool ScalarEvolution::isBackedgeTakenCountMaxOrZero(const Loop *L) { } /// Push PHI nodes in the header of the given loop onto the given Worklist. -static void -PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) { +static void PushLoopPHIs(const Loop *L, + SmallVectorImpl<Instruction *> &Worklist, + SmallPtrSetImpl<Instruction *> &Visited) { BasicBlock *Header = L->getHeader(); // Push all Loop-header PHIs onto the Worklist stack. for (PHINode &PN : Header->phis()) - Worklist.push_back(&PN); + if (Visited.insert(&PN).second) + Worklist.push_back(&PN); } const ScalarEvolution::BackedgeTakenInfo & @@ -7354,9 +7602,9 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { // it handles SCEVUnknown PHI nodes specially. if (Result.hasAnyInfo()) { SmallVector<Instruction *, 16> Worklist; - PushLoopPHIs(L, Worklist); - SmallPtrSet<Instruction *, 8> Discovered; + SmallVector<const SCEV *, 8> ToForget; + PushLoopPHIs(L, Worklist, Discovered); while (!Worklist.empty()) { Instruction *I = Worklist.pop_back_val(); @@ -7373,7 +7621,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { // own when it gets to that point. if (!isa<PHINode>(I) || !isa<SCEVUnknown>(Old)) { eraseValueFromMap(It->first); - forgetMemoizedResults(Old); + ToForget.push_back(Old); } if (PHINode *PN = dyn_cast<PHINode>(I)) ConstantEvolutionLoopExitValue.erase(PN); @@ -7405,6 +7653,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { Worklist.push_back(I); } } + forgetMemoizedResults(ToForget); } // Re-lookup the insert position, since the call to @@ -7441,6 +7690,7 @@ void ScalarEvolution::forgetLoop(const Loop *L) { SmallVector<const Loop *, 16> LoopWorklist(1, L); SmallVector<Instruction *, 32> Worklist; SmallPtrSet<Instruction *, 16> Visited; + SmallVector<const SCEV *, 16> ToForget; // Iterate over all the loops and sub-loops to drop SCEV information. while (!LoopWorklist.empty()) { @@ -7462,29 +7712,27 @@ void ScalarEvolution::forgetLoop(const Loop *L) { auto LoopUsersItr = LoopUsers.find(CurrL); if (LoopUsersItr != LoopUsers.end()) { - for (auto *S : LoopUsersItr->second) - forgetMemoizedResults(S); + ToForget.insert(ToForget.end(), LoopUsersItr->second.begin(), + LoopUsersItr->second.end()); LoopUsers.erase(LoopUsersItr); } // Drop information about expressions based on loop-header PHIs. - PushLoopPHIs(CurrL, Worklist); + PushLoopPHIs(CurrL, Worklist, Visited); while (!Worklist.empty()) { Instruction *I = Worklist.pop_back_val(); - if (!Visited.insert(I).second) - continue; ValueExprMapType::iterator It = ValueExprMap.find_as(static_cast<Value *>(I)); if (It != ValueExprMap.end()) { eraseValueFromMap(It->first); - forgetMemoizedResults(It->second); + ToForget.push_back(It->second); if (PHINode *PN = dyn_cast<PHINode>(I)) ConstantEvolutionLoopExitValue.erase(PN); } - PushDefUseChildren(I, Worklist); + PushDefUseChildren(I, Worklist, Visited); } LoopPropertiesCache.erase(CurrL); @@ -7492,6 +7740,7 @@ void ScalarEvolution::forgetLoop(const Loop *L) { // ValuesAtScopes map. LoopWorklist.append(CurrL->begin(), CurrL->end()); } + forgetMemoizedResults(ToForget); } void ScalarEvolution::forgetTopmostLoop(const Loop *L) { @@ -7506,25 +7755,25 @@ void ScalarEvolution::forgetValue(Value *V) { // Drop information about expressions based on loop-header PHIs. SmallVector<Instruction *, 16> Worklist; + SmallPtrSet<Instruction *, 8> Visited; + SmallVector<const SCEV *, 8> ToForget; Worklist.push_back(I); + Visited.insert(I); - SmallPtrSet<Instruction *, 8> Visited; while (!Worklist.empty()) { I = Worklist.pop_back_val(); - if (!Visited.insert(I).second) - continue; - ValueExprMapType::iterator It = ValueExprMap.find_as(static_cast<Value *>(I)); if (It != ValueExprMap.end()) { eraseValueFromMap(It->first); - forgetMemoizedResults(It->second); + ToForget.push_back(It->second); if (PHINode *PN = dyn_cast<PHINode>(I)) ConstantEvolutionLoopExitValue.erase(PN); } - PushDefUseChildren(I, Worklist); + PushDefUseChildren(I, Worklist, Visited); } + forgetMemoizedResults(ToForget); } void ScalarEvolution::forgetLoopDispositions(const Loop *L) { @@ -7598,7 +7847,7 @@ ScalarEvolution::BackedgeTakenInfo::getConstantMax(ScalarEvolution *SE) const { return !ENT.hasAlwaysTruePredicate(); }; - if (any_of(ExitNotTaken, PredicateNotAlwaysTrue) || !getConstantMax()) + if (!getConstantMax() || any_of(ExitNotTaken, PredicateNotAlwaysTrue)) return SE->getCouldNotCompute(); assert((isa<SCEVCouldNotCompute>(getConstantMax()) || @@ -7635,6 +7884,12 @@ ScalarEvolution::ExitLimit::ExitLimit( const SCEV *E, const SCEV *M, bool MaxOrZero, ArrayRef<const SmallPtrSetImpl<const SCEVPredicate *> *> PredSetList) : ExactNotTaken(E), MaxNotTaken(M), MaxOrZero(MaxOrZero) { + // If we prove the max count is zero, so is the symbolic bound. This happens + // in practice due to differences in a) how context sensitive we've chosen + // to be and b) how we reason about bounds impied by UB. + if (MaxNotTaken->isZero()) + ExactNotTaken = MaxNotTaken; + assert((isa<SCEVCouldNotCompute>(ExactNotTaken) || !isa<SCEVCouldNotCompute>(MaxNotTaken)) && "Exact is not allowed to be less precise than Max"); @@ -7740,7 +7995,7 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L, if (auto *BI = dyn_cast<BranchInst>(ExitBB->getTerminator())) if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) { bool ExitIfTrue = !L->contains(BI->getSuccessor(0)); - if ((ExitIfTrue && CI->isZero()) || (!ExitIfTrue && CI->isOne())) + if (ExitIfTrue == CI->isZero()) continue; } @@ -8030,15 +8285,6 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L, Pred = ExitCond->getInversePredicate(); const ICmpInst::Predicate OriginalPred = Pred; - // Handle common loops like: for (X = "string"; *X; ++X) - if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0))) - if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) { - ExitLimit ItCnt = - computeLoadConstantCompareExitLimit(LI, RHS, L, Pred); - if (ItCnt.hasAnyInfo()) - return ItCnt; - } - const SCEV *LHS = getSCEV(ExitCond->getOperand(0)); const SCEV *RHS = getSCEV(ExitCond->getOperand(1)); @@ -8070,6 +8316,32 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L, if (!isa<SCEVCouldNotCompute>(Ret)) return Ret; } + // If this loop must exit based on this condition (or execute undefined + // behaviour), and we can prove the test sequence produced must repeat + // the same values on self-wrap of the IV, then we can infer that IV + // doesn't self wrap because if it did, we'd have an infinite (undefined) + // loop. + if (ControlsExit && isLoopInvariant(RHS, L) && loopHasNoAbnormalExits(L) && + loopIsFiniteByAssumption(L)) { + + // TODO: We can peel off any functions which are invertible *in L*. Loop + // invariant terms are effectively constants for our purposes here. + auto *InnerLHS = LHS; + if (auto *ZExt = dyn_cast<SCEVZeroExtendExpr>(LHS)) + InnerLHS = ZExt->getOperand(); + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(InnerLHS)) { + auto *StrideC = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this)); + if (!AR->hasNoSelfWrap() && AR->getLoop() == L && AR->isAffine() && + StrideC && StrideC->getAPInt().isPowerOf2()) { + auto Flags = AR->getNoWrapFlags(); + Flags = setFlags(Flags, SCEV::FlagNW); + SmallVector<const SCEV*> Operands{AR->operands()}; + Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags); + setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), Flags); + } + } + } + switch (Pred) { case ICmpInst::ICMP_NE: { // while (X != Y) // Convert to: while (X-Y != 0) @@ -8169,85 +8441,6 @@ EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C, return cast<SCEVConstant>(Val)->getValue(); } -/// Given an exit condition of 'icmp op load X, cst', try to see if we can -/// compute the backedge execution count. -ScalarEvolution::ExitLimit -ScalarEvolution::computeLoadConstantCompareExitLimit( - LoadInst *LI, - Constant *RHS, - const Loop *L, - ICmpInst::Predicate predicate) { - if (LI->isVolatile()) return getCouldNotCompute(); - - // Check to see if the loaded pointer is a getelementptr of a global. - // TODO: Use SCEV instead of manually grubbing with GEPs. - GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0)); - if (!GEP) return getCouldNotCompute(); - - // Make sure that it is really a constant global we are gepping, with an - // initializer, and make sure the first IDX is really 0. - GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)); - if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() || - GEP->getNumOperands() < 3 || !isa<Constant>(GEP->getOperand(1)) || - !cast<Constant>(GEP->getOperand(1))->isNullValue()) - return getCouldNotCompute(); - - // Okay, we allow one non-constant index into the GEP instruction. - Value *VarIdx = nullptr; - std::vector<Constant*> Indexes; - unsigned VarIdxNum = 0; - for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i) - if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) { - Indexes.push_back(CI); - } else if (!isa<ConstantInt>(GEP->getOperand(i))) { - if (VarIdx) return getCouldNotCompute(); // Multiple non-constant idx's. - VarIdx = GEP->getOperand(i); - VarIdxNum = i-2; - Indexes.push_back(nullptr); - } - - // Loop-invariant loads may be a byproduct of loop optimization. Skip them. - if (!VarIdx) - return getCouldNotCompute(); - - // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant. - // Check to see if X is a loop variant variable value now. - const SCEV *Idx = getSCEV(VarIdx); - Idx = getSCEVAtScope(Idx, L); - - // We can only recognize very limited forms of loop index expressions, in - // particular, only affine AddRec's like {C1,+,C2}<L>. - const SCEVAddRecExpr *IdxExpr = dyn_cast<SCEVAddRecExpr>(Idx); - if (!IdxExpr || IdxExpr->getLoop() != L || !IdxExpr->isAffine() || - isLoopInvariant(IdxExpr, L) || - !isa<SCEVConstant>(IdxExpr->getOperand(0)) || - !isa<SCEVConstant>(IdxExpr->getOperand(1))) - return getCouldNotCompute(); - - unsigned MaxSteps = MaxBruteForceIterations; - for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) { - ConstantInt *ItCst = ConstantInt::get( - cast<IntegerType>(IdxExpr->getType()), IterationNum); - ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this); - - // Form the GEP offset. - Indexes[VarIdxNum] = Val; - - Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(), - Indexes); - if (!Result) break; // Cannot compute! - - // Evaluate the condition for this iteration. - Result = ConstantExpr::getICmp(predicate, Result, RHS); - if (!isa<ConstantInt>(Result)) break; // Couldn't decide for sure - if (cast<ConstantInt>(Result)->getValue().isMinValue()) { - ++NumArrayLenItCounts; - return getConstant(ItCst); // Found terminating iteration! - } - } - return getCouldNotCompute(); -} - ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit( Value *LHS, Value *RHSV, const Loop *L, ICmpInst::Predicate Pred) { ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV); @@ -9160,7 +9353,7 @@ GetQuadraticEquation(const SCEVAddRecExpr *AddRec) { APInt L = LC->getAPInt(); APInt M = MC->getAPInt(); APInt N = NC->getAPInt(); - assert(!N.isNullValue() && "This is not a quadratic addrec"); + assert(!N.isZero() && "This is not a quadratic addrec"); unsigned BitWidth = LC->getAPInt().getBitWidth(); unsigned NewWidth = BitWidth + 1; @@ -9486,9 +9679,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit, // N = Distance (as unsigned) if (StepC->getValue()->isOne() || StepC->getValue()->isMinusOne()) { APInt MaxBECount = getUnsignedRangeMax(applyLoopGuards(Distance, L)); - APInt MaxBECountBase = getUnsignedRangeMax(Distance); - if (MaxBECountBase.ult(MaxBECount)) - MaxBECount = MaxBECountBase; + MaxBECount = APIntOps::umin(MaxBECount, getUnsignedRangeMax(Distance)); // When a loop like "for (int i = 0; i != n; ++i) { /* body */ }" is rotated, // we end up with a loop whose backedge-taken count is n - 1. Detect this @@ -9521,11 +9712,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit, const SCEV *Max = getCouldNotCompute(); if (Exact != getCouldNotCompute()) { APInt MaxInt = getUnsignedRangeMax(applyLoopGuards(Exact, L)); - APInt BaseMaxInt = getUnsignedRangeMax(Exact); - if (BaseMaxInt.ult(MaxInt)) - Max = getConstant(BaseMaxInt); - else - Max = getConstant(MaxInt); + Max = getConstant(APIntOps::umin(MaxInt, getUnsignedRangeMax(Exact))); } return ExitLimit(Exact, Max, false, Predicates); } @@ -9533,9 +9720,12 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit, // Solve the general equation. const SCEV *E = SolveLinEquationWithOverflow(StepC->getAPInt(), getNegativeSCEV(Start), *this); - const SCEV *M = E == getCouldNotCompute() - ? E - : getConstant(getUnsignedRangeMax(E)); + + const SCEV *M = E; + if (E != getCouldNotCompute()) { + APInt MaxWithGuards = getUnsignedRangeMax(applyLoopGuards(E, L)); + M = getConstant(APIntOps::umin(MaxWithGuards, getUnsignedRangeMax(E))); + } return ExitLimit(E, M, false, Predicates); } @@ -9911,23 +10101,23 @@ Optional<bool> ScalarEvolution::evaluatePredicate(ICmpInst::Predicate Pred, bool ScalarEvolution::isKnownPredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, - const Instruction *Context) { + const Instruction *CtxI) { // TODO: Analyze guards and assumes from Context's block. return isKnownPredicate(Pred, LHS, RHS) || - isBasicBlockEntryGuardedByCond(Context->getParent(), Pred, LHS, RHS); + isBasicBlockEntryGuardedByCond(CtxI->getParent(), Pred, LHS, RHS); } -Optional<bool> -ScalarEvolution::evaluatePredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS, - const SCEV *RHS, - const Instruction *Context) { +Optional<bool> ScalarEvolution::evaluatePredicateAt(ICmpInst::Predicate Pred, + const SCEV *LHS, + const SCEV *RHS, + const Instruction *CtxI) { Optional<bool> KnownWithoutContext = evaluatePredicate(Pred, LHS, RHS); if (KnownWithoutContext) return KnownWithoutContext; - if (isBasicBlockEntryGuardedByCond(Context->getParent(), Pred, LHS, RHS)) + if (isBasicBlockEntryGuardedByCond(CtxI->getParent(), Pred, LHS, RHS)) return true; - else if (isBasicBlockEntryGuardedByCond(Context->getParent(), + else if (isBasicBlockEntryGuardedByCond(CtxI->getParent(), ICmpInst::getInversePredicate(Pred), LHS, RHS)) return false; @@ -10057,7 +10247,7 @@ ScalarEvolution::getLoopInvariantPredicate(ICmpInst::Predicate Pred, Optional<ScalarEvolution::LoopInvariantPredicate> ScalarEvolution::getLoopInvariantExitCondDuringFirstIterations( ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, - const Instruction *Context, const SCEV *MaxIter) { + const Instruction *CtxI, const SCEV *MaxIter) { // Try to prove the following set of facts: // - The predicate is monotonic in the iteration space. // - If the check does not fail on the 1st iteration: @@ -10111,7 +10301,7 @@ ScalarEvolution::getLoopInvariantExitCondDuringFirstIterations( if (Step == MinusOne) NoOverflowPred = CmpInst::getSwappedPredicate(NoOverflowPred); const SCEV *Start = AR->getStart(); - if (!isKnownPredicateAt(NoOverflowPred, Start, Last, Context)) + if (!isKnownPredicateAt(NoOverflowPred, Start, Last, CtxI)) return None; // Everything is fine. @@ -10448,12 +10638,12 @@ bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB, // Try to prove (Pred, LHS, RHS) using isImpliedCond. auto ProveViaCond = [&](const Value *Condition, bool Inverse) { - const Instruction *Context = &BB->front(); - if (isImpliedCond(Pred, LHS, RHS, Condition, Inverse, Context)) + const Instruction *CtxI = &BB->front(); + if (isImpliedCond(Pred, LHS, RHS, Condition, Inverse, CtxI)) return true; if (ProvingStrictComparison) { auto ProofFn = [&](ICmpInst::Predicate P) { - return isImpliedCond(P, LHS, RHS, Condition, Inverse, Context); + return isImpliedCond(P, LHS, RHS, Condition, Inverse, CtxI); }; if (SplitAndProve(ProofFn)) return true; @@ -10525,7 +10715,7 @@ bool ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Value *FoundCondValue, bool Inverse, - const Instruction *Context) { + const Instruction *CtxI) { // False conditions implies anything. Do not bother analyzing it further. if (FoundCondValue == ConstantInt::getBool(FoundCondValue->getContext(), Inverse)) @@ -10541,12 +10731,12 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const Value *Op0, *Op1; if (match(FoundCondValue, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) { if (!Inverse) - return isImpliedCond(Pred, LHS, RHS, Op0, Inverse, Context) || - isImpliedCond(Pred, LHS, RHS, Op1, Inverse, Context); + return isImpliedCond(Pred, LHS, RHS, Op0, Inverse, CtxI) || + isImpliedCond(Pred, LHS, RHS, Op1, Inverse, CtxI); } else if (match(FoundCondValue, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) { if (Inverse) - return isImpliedCond(Pred, LHS, RHS, Op0, Inverse, Context) || - isImpliedCond(Pred, LHS, RHS, Op1, Inverse, Context); + return isImpliedCond(Pred, LHS, RHS, Op0, Inverse, CtxI) || + isImpliedCond(Pred, LHS, RHS, Op1, Inverse, CtxI); } const ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue); @@ -10563,14 +10753,14 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *FoundLHS = getSCEV(ICI->getOperand(0)); const SCEV *FoundRHS = getSCEV(ICI->getOperand(1)); - return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS, Context); + return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS, CtxI); } bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, ICmpInst::Predicate FoundPred, const SCEV *FoundLHS, const SCEV *FoundRHS, - const Instruction *Context) { + const Instruction *CtxI) { // Balance the types. if (getTypeSizeInBits(LHS->getType()) < getTypeSizeInBits(FoundLHS->getType())) { @@ -10583,12 +10773,14 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, auto BitWidth = getTypeSizeInBits(NarrowType); const SCEV *MaxValue = getZeroExtendExpr( getConstant(APInt::getMaxValue(BitWidth)), WideType); - if (isKnownPredicate(ICmpInst::ICMP_ULE, FoundLHS, MaxValue) && - isKnownPredicate(ICmpInst::ICMP_ULE, FoundRHS, MaxValue)) { + if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, FoundLHS, + MaxValue) && + isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, FoundRHS, + MaxValue)) { const SCEV *TruncFoundLHS = getTruncateExpr(FoundLHS, NarrowType); const SCEV *TruncFoundRHS = getTruncateExpr(FoundRHS, NarrowType); if (isImpliedCondBalancedTypes(Pred, LHS, RHS, FoundPred, TruncFoundLHS, - TruncFoundRHS, Context)) + TruncFoundRHS, CtxI)) return true; } } @@ -10615,13 +10807,13 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, } } return isImpliedCondBalancedTypes(Pred, LHS, RHS, FoundPred, FoundLHS, - FoundRHS, Context); + FoundRHS, CtxI); } bool ScalarEvolution::isImpliedCondBalancedTypes( ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, ICmpInst::Predicate FoundPred, const SCEV *FoundLHS, const SCEV *FoundRHS, - const Instruction *Context) { + const Instruction *CtxI) { assert(getTypeSizeInBits(LHS->getType()) == getTypeSizeInBits(FoundLHS->getType()) && "Types should be balanced!"); @@ -10647,7 +10839,7 @@ bool ScalarEvolution::isImpliedCondBalancedTypes( // Check whether the found predicate is the same as the desired predicate. if (FoundPred == Pred) - return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context); + return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, CtxI); // Check whether swapping the found predicate makes it the same as the // desired predicate. @@ -10663,27 +10855,70 @@ bool ScalarEvolution::isImpliedCondBalancedTypes( // do this if it would break canonical constant/addrec ordering. if (!isa<SCEVConstant>(RHS) && !isa<SCEVAddRecExpr>(LHS)) return isImpliedCondOperands(FoundPred, RHS, LHS, FoundLHS, FoundRHS, - Context); + CtxI); if (!isa<SCEVConstant>(FoundRHS) && !isa<SCEVAddRecExpr>(FoundLHS)) - return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS, Context); + return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS, CtxI); + + // There's no clear preference between forms 3. and 4., try both. Avoid + // forming getNotSCEV of pointer values as the resulting subtract is + // not legal. + if (!LHS->getType()->isPointerTy() && !RHS->getType()->isPointerTy() && + isImpliedCondOperands(FoundPred, getNotSCEV(LHS), getNotSCEV(RHS), + FoundLHS, FoundRHS, CtxI)) + return true; - // Don't try to getNotSCEV pointers. - if (LHS->getType()->isPointerTy() || FoundLHS->getType()->isPointerTy()) - return false; + if (!FoundLHS->getType()->isPointerTy() && + !FoundRHS->getType()->isPointerTy() && + isImpliedCondOperands(Pred, LHS, RHS, getNotSCEV(FoundLHS), + getNotSCEV(FoundRHS), CtxI)) + return true; - // There's no clear preference between forms 3. and 4., try both. - return isImpliedCondOperands(FoundPred, getNotSCEV(LHS), getNotSCEV(RHS), - FoundLHS, FoundRHS, Context) || - isImpliedCondOperands(Pred, LHS, RHS, getNotSCEV(FoundLHS), - getNotSCEV(FoundRHS), Context); + return false; } - // Unsigned comparison is the same as signed comparison when both the operands - // are non-negative. - if (CmpInst::isUnsigned(FoundPred) && - CmpInst::getSignedPredicate(FoundPred) == Pred && - isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS)) - return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context); + auto IsSignFlippedPredicate = [](CmpInst::Predicate P1, + CmpInst::Predicate P2) { + assert(P1 != P2 && "Handled earlier!"); + return CmpInst::isRelational(P2) && + P1 == CmpInst::getFlippedSignednessPredicate(P2); + }; + if (IsSignFlippedPredicate(Pred, FoundPred)) { + // Unsigned comparison is the same as signed comparison when both the + // operands are non-negative or negative. + if ((isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS)) || + (isKnownNegative(FoundLHS) && isKnownNegative(FoundRHS))) + return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, CtxI); + // Create local copies that we can freely swap and canonicalize our + // conditions to "le/lt". + ICmpInst::Predicate CanonicalPred = Pred, CanonicalFoundPred = FoundPred; + const SCEV *CanonicalLHS = LHS, *CanonicalRHS = RHS, + *CanonicalFoundLHS = FoundLHS, *CanonicalFoundRHS = FoundRHS; + if (ICmpInst::isGT(CanonicalPred) || ICmpInst::isGE(CanonicalPred)) { + CanonicalPred = ICmpInst::getSwappedPredicate(CanonicalPred); + CanonicalFoundPred = ICmpInst::getSwappedPredicate(CanonicalFoundPred); + std::swap(CanonicalLHS, CanonicalRHS); + std::swap(CanonicalFoundLHS, CanonicalFoundRHS); + } + assert((ICmpInst::isLT(CanonicalPred) || ICmpInst::isLE(CanonicalPred)) && + "Must be!"); + assert((ICmpInst::isLT(CanonicalFoundPred) || + ICmpInst::isLE(CanonicalFoundPred)) && + "Must be!"); + if (ICmpInst::isSigned(CanonicalPred) && isKnownNonNegative(CanonicalRHS)) + // Use implication: + // x <u y && y >=s 0 --> x <s y. + // If we can prove the left part, the right part is also proven. + return isImpliedCondOperands(CanonicalFoundPred, CanonicalLHS, + CanonicalRHS, CanonicalFoundLHS, + CanonicalFoundRHS); + if (ICmpInst::isUnsigned(CanonicalPred) && isKnownNegative(CanonicalRHS)) + // Use implication: + // x <s y && y <s 0 --> x <u y. + // If we can prove the left part, the right part is also proven. + return isImpliedCondOperands(CanonicalFoundPred, CanonicalLHS, + CanonicalRHS, CanonicalFoundLHS, + CanonicalFoundRHS); + } // Check if we can make progress by sharpening ranges. if (FoundPred == ICmpInst::ICMP_NE && @@ -10721,7 +10956,7 @@ bool ScalarEvolution::isImpliedCondBalancedTypes( // We know V `Pred` SharperMin. If this implies LHS `Pred` // RHS, we're done. if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(SharperMin), - Context)) + CtxI)) return true; LLVM_FALLTHROUGH; @@ -10736,8 +10971,7 @@ bool ScalarEvolution::isImpliedCondBalancedTypes( // // If V `Pred` Min implies LHS `Pred` RHS, we're done. - if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min), - Context)) + if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min), CtxI)) return true; break; @@ -10745,14 +10979,14 @@ bool ScalarEvolution::isImpliedCondBalancedTypes( case ICmpInst::ICMP_SLE: case ICmpInst::ICMP_ULE: if (isImpliedCondOperands(CmpInst::getSwappedPredicate(Pred), RHS, - LHS, V, getConstant(SharperMin), Context)) + LHS, V, getConstant(SharperMin), CtxI)) return true; LLVM_FALLTHROUGH; case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_ULT: if (isImpliedCondOperands(CmpInst::getSwappedPredicate(Pred), RHS, - LHS, V, getConstant(Min), Context)) + LHS, V, getConstant(Min), CtxI)) return true; break; @@ -10766,12 +11000,11 @@ bool ScalarEvolution::isImpliedCondBalancedTypes( // Check whether the actual condition is beyond sufficient. if (FoundPred == ICmpInst::ICMP_EQ) if (ICmpInst::isTrueWhenEqual(Pred)) - if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context)) + if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, CtxI)) return true; if (Pred == ICmpInst::ICMP_NE) if (!ICmpInst::isTrueWhenEqual(FoundPred)) - if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS, - Context)) + if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS, CtxI)) return true; // Otherwise assume the worst. @@ -10852,7 +11085,7 @@ Optional<APInt> ScalarEvolution::computeConstantDifference(const SCEV *More, bool ScalarEvolution::isImpliedCondOperandsViaAddRecStart( ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, - const SCEV *FoundLHS, const SCEV *FoundRHS, const Instruction *Context) { + const SCEV *FoundLHS, const SCEV *FoundRHS, const Instruction *CtxI) { // Try to recognize the following pattern: // // FoundRHS = ... @@ -10866,9 +11099,9 @@ bool ScalarEvolution::isImpliedCondOperandsViaAddRecStart( // each iteration of this loop, including the first iteration. Therefore, in // this case, `FoundLHS Pred FoundRHS` implies `Start Pred FoundRHS`. Try to // prove the original pred using this fact. - if (!Context) + if (!CtxI) return false; - const BasicBlock *ContextBB = Context->getParent(); + const BasicBlock *ContextBB = CtxI->getParent(); // Make sure AR varies in the context block. if (auto *AR = dyn_cast<SCEVAddRecExpr>(FoundLHS)) { const Loop *L = AR->getLoop(); @@ -11090,7 +11323,7 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, const SCEV *FoundRHS, - const Instruction *Context) { + const Instruction *CtxI) { if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundLHS, FoundRHS)) return true; @@ -11098,7 +11331,7 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred, return true; if (isImpliedCondOperandsViaAddRecStart(Pred, LHS, RHS, FoundLHS, FoundRHS, - Context)) + CtxI)) return true; return isImpliedCondOperandsHelper(Pred, LHS, RHS, @@ -11534,6 +11767,12 @@ const SCEV *ScalarEvolution::computeMaxBECountForLT(const SCEV *Start, if (IsSigned && BitWidth == 1) return getZero(Stride->getType()); + // This code has only been closely audited for negative strides in the + // unsigned comparison case, it may be correct for signed comparison, but + // that needs to be established. + assert((!IsSigned || !isKnownNonPositive(Stride)) && + "Stride is expected strictly positive for signed case!"); + // Calculate the maximum backedge count based on the range of values // permitted by Start, End, and Stride. APInt MinStart = @@ -11576,6 +11815,80 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS); bool PredicatedIV = false; + auto canAssumeNoSelfWrap = [&](const SCEVAddRecExpr *AR) { + // Can we prove this loop *must* be UB if overflow of IV occurs? + // Reasoning goes as follows: + // * Suppose the IV did self wrap. + // * If Stride evenly divides the iteration space, then once wrap + // occurs, the loop must revisit the same values. + // * We know that RHS is invariant, and that none of those values + // caused this exit to be taken previously. Thus, this exit is + // dynamically dead. + // * If this is the sole exit, then a dead exit implies the loop + // must be infinite if there are no abnormal exits. + // * If the loop were infinite, then it must either not be mustprogress + // or have side effects. Otherwise, it must be UB. + // * It can't (by assumption), be UB so we have contradicted our + // premise and can conclude the IV did not in fact self-wrap. + if (!isLoopInvariant(RHS, L)) + return false; + + auto *StrideC = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this)); + if (!StrideC || !StrideC->getAPInt().isPowerOf2()) + return false; + + if (!ControlsExit || !loopHasNoAbnormalExits(L)) + return false; + + return loopIsFiniteByAssumption(L); + }; + + if (!IV) { + if (auto *ZExt = dyn_cast<SCEVZeroExtendExpr>(LHS)) { + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ZExt->getOperand()); + if (AR && AR->getLoop() == L && AR->isAffine()) { + auto canProveNUW = [&]() { + if (!isLoopInvariant(RHS, L)) + return false; + + if (!isKnownNonZero(AR->getStepRecurrence(*this))) + // We need the sequence defined by AR to strictly increase in the + // unsigned integer domain for the logic below to hold. + return false; + + const unsigned InnerBitWidth = getTypeSizeInBits(AR->getType()); + const unsigned OuterBitWidth = getTypeSizeInBits(RHS->getType()); + // If RHS <=u Limit, then there must exist a value V in the sequence + // defined by AR (e.g. {Start,+,Step}) such that V >u RHS, and + // V <=u UINT_MAX. Thus, we must exit the loop before unsigned + // overflow occurs. This limit also implies that a signed comparison + // (in the wide bitwidth) is equivalent to an unsigned comparison as + // the high bits on both sides must be zero. + APInt StrideMax = getUnsignedRangeMax(AR->getStepRecurrence(*this)); + APInt Limit = APInt::getMaxValue(InnerBitWidth) - (StrideMax - 1); + Limit = Limit.zext(OuterBitWidth); + return getUnsignedRangeMax(applyLoopGuards(RHS, L)).ule(Limit); + }; + auto Flags = AR->getNoWrapFlags(); + if (!hasFlags(Flags, SCEV::FlagNUW) && canProveNUW()) + Flags = setFlags(Flags, SCEV::FlagNUW); + + setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), Flags); + if (AR->hasNoUnsignedWrap()) { + // Emulate what getZeroExtendExpr would have done during construction + // if we'd been able to infer the fact just above at that time. + const SCEV *Step = AR->getStepRecurrence(*this); + Type *Ty = ZExt->getType(); + auto *S = getAddRecExpr( + getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, 0), + getZeroExtendExpr(Step, Ty, 0), L, AR->getNoWrapFlags()); + IV = dyn_cast<SCEVAddRecExpr>(S); + } + } + } + } + + if (!IV && AllowPredicates) { // Try to make this an AddRec using runtime tests, in the first X // iterations of this loop, where X is the SCEV expression found by the @@ -11626,32 +11939,29 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, // // a) IV is either nuw or nsw depending upon signedness (indicated by the // NoWrap flag). - // b) loop is single exit with no side effects. - // + // b) the loop is guaranteed to be finite (e.g. is mustprogress and has + // no side effects within the loop) + // c) loop has a single static exit (with no abnormal exits) // // Precondition a) implies that if the stride is negative, this is a single // trip loop. The backedge taken count formula reduces to zero in this case. // - // Precondition b) implies that if rhs is invariant in L, then unknown - // stride being zero means the backedge can't be taken without UB. + // Precondition b) and c) combine to imply that if rhs is invariant in L, + // then a zero stride means the backedge can't be taken without executing + // undefined behavior. // // The positive stride case is the same as isKnownPositive(Stride) returning // true (original behavior of the function). // - // We want to make sure that the stride is truly unknown as there are edge - // cases where ScalarEvolution propagates no wrap flags to the - // post-increment/decrement IV even though the increment/decrement operation - // itself is wrapping. The computed backedge taken count may be wrong in - // such cases. This is prevented by checking that the stride is not known to - // be either positive or non-positive. For example, no wrap flags are - // propagated to the post-increment IV of this loop with a trip count of 2 - - // - // unsigned char i; - // for(i=127; i<128; i+=129) - // A[i] = i; - // - if (PredicatedIV || !NoWrap || isKnownNonPositive(Stride) || - !loopIsFiniteByAssumption(L)) + if (PredicatedIV || !NoWrap || !loopIsFiniteByAssumption(L) || + !loopHasNoAbnormalExits(L)) + return getCouldNotCompute(); + + // This bailout is protecting the logic in computeMaxBECountForLT which + // has not yet been sufficiently auditted or tested with negative strides. + // We used to filter out all known-non-positive cases here, we're in the + // process of being less restrictive bit by bit. + if (IsSigned && isKnownNonPositive(Stride)) return getCouldNotCompute(); if (!isKnownNonZero(Stride)) { @@ -11687,37 +11997,12 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, } } else if (!Stride->isOne() && !NoWrap) { auto isUBOnWrap = [&]() { - // Can we prove this loop *must* be UB if overflow of IV occurs? - // Reasoning goes as follows: - // * Suppose the IV did self wrap. - // * If Stride evenly divides the iteration space, then once wrap - // occurs, the loop must revisit the same values. - // * We know that RHS is invariant, and that none of those values - // caused this exit to be taken previously. Thus, this exit is - // dynamically dead. - // * If this is the sole exit, then a dead exit implies the loop - // must be infinite if there are no abnormal exits. - // * If the loop were infinite, then it must either not be mustprogress - // or have side effects. Otherwise, it must be UB. - // * It can't (by assumption), be UB so we have contradicted our - // premise and can conclude the IV did not in fact self-wrap. // From no-self-wrap, we need to then prove no-(un)signed-wrap. This // follows trivially from the fact that every (un)signed-wrapped, but // not self-wrapped value must be LT than the last value before // (un)signed wrap. Since we know that last value didn't exit, nor // will any smaller one. - - if (!isLoopInvariant(RHS, L)) - return false; - - auto *StrideC = dyn_cast<SCEVConstant>(Stride); - if (!StrideC || !StrideC->getAPInt().isPowerOf2()) - return false; - - if (!ControlsExit || !loopHasNoAbnormalExits(L)) - return false; - - return loopIsFiniteByAssumption(L); + return canAssumeNoSelfWrap(IV); }; // Avoid proven overflow cases: this will ensure that the backedge taken @@ -11740,7 +12025,9 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, const SCEV *Start = IV->getStart(); // Preserve pointer-typed Start/RHS to pass to isLoopEntryGuardedByCond. - // Use integer-typed versions for actual computation. + // If we convert to integers, isLoopEntryGuardedByCond will miss some cases. + // Use integer-typed versions for actual computation; we can't subtract + // pointers in general. const SCEV *OrigStart = Start; const SCEV *OrigRHS = RHS; if (Start->getType()->isPointerTy()) { @@ -11771,10 +12058,13 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, // is End and so the result is as above, and if not max(End,Start) is Start // so we get a backedge count of zero. const SCEV *BECount = nullptr; - auto *StartMinusStride = getMinusSCEV(OrigStart, Stride); + auto *OrigStartMinusStride = getMinusSCEV(OrigStart, Stride); + assert(isAvailableAtLoopEntry(OrigStartMinusStride, L) && "Must be!"); + assert(isAvailableAtLoopEntry(OrigStart, L) && "Must be!"); + assert(isAvailableAtLoopEntry(OrigRHS, L) && "Must be!"); // Can we prove (max(RHS,Start) > Start - Stride? - if (isLoopEntryGuardedByCond(L, Cond, StartMinusStride, Start) && - isLoopEntryGuardedByCond(L, Cond, StartMinusStride, RHS)) { + if (isLoopEntryGuardedByCond(L, Cond, OrigStartMinusStride, OrigStart) && + isLoopEntryGuardedByCond(L, Cond, OrigStartMinusStride, OrigRHS)) { // In this case, we can use a refined formula for computing backedge taken // count. The general formula remains: // "End-Start /uceiling Stride" where "End = max(RHS,Start)" @@ -11795,10 +12085,8 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, // Our preconditions trivially imply no overflow in that form. const SCEV *MinusOne = getMinusOne(Stride->getType()); const SCEV *Numerator = - getMinusSCEV(getAddExpr(RHS, MinusOne), StartMinusStride); - if (!isa<SCEVCouldNotCompute>(Numerator)) { - BECount = getUDivExpr(Numerator, Stride); - } + getMinusSCEV(getAddExpr(RHS, MinusOne), getMinusSCEV(Start, Stride)); + BECount = getUDivExpr(Numerator, Stride); } const SCEV *BECountIfBackedgeTaken = nullptr; @@ -12141,7 +12429,7 @@ SCEVAddRecExpr::getPostIncExpr(ScalarEvolution &SE) const { } // Return true when S contains at least an undef value. -static inline bool containsUndefs(const SCEV *S) { +bool ScalarEvolution::containsUndefs(const SCEV *S) const { return SCEVExprContains(S, [](const SCEV *S) { if (const auto *SU = dyn_cast<SCEVUnknown>(S)) return isa<UndefValue>(SU->getValue()); @@ -12149,237 +12437,6 @@ static inline bool containsUndefs(const SCEV *S) { }); } -namespace { - -// Collect all steps of SCEV expressions. -struct SCEVCollectStrides { - ScalarEvolution &SE; - SmallVectorImpl<const SCEV *> &Strides; - - SCEVCollectStrides(ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &S) - : SE(SE), Strides(S) {} - - bool follow(const SCEV *S) { - if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) - Strides.push_back(AR->getStepRecurrence(SE)); - return true; - } - - bool isDone() const { return false; } -}; - -// Collect all SCEVUnknown and SCEVMulExpr expressions. -struct SCEVCollectTerms { - SmallVectorImpl<const SCEV *> &Terms; - - SCEVCollectTerms(SmallVectorImpl<const SCEV *> &T) : Terms(T) {} - - bool follow(const SCEV *S) { - if (isa<SCEVUnknown>(S) || isa<SCEVMulExpr>(S) || - isa<SCEVSignExtendExpr>(S)) { - if (!containsUndefs(S)) - Terms.push_back(S); - - // Stop recursion: once we collected a term, do not walk its operands. - return false; - } - - // Keep looking. - return true; - } - - bool isDone() const { return false; } -}; - -// Check if a SCEV contains an AddRecExpr. -struct SCEVHasAddRec { - bool &ContainsAddRec; - - SCEVHasAddRec(bool &ContainsAddRec) : ContainsAddRec(ContainsAddRec) { - ContainsAddRec = false; - } - - bool follow(const SCEV *S) { - if (isa<SCEVAddRecExpr>(S)) { - ContainsAddRec = true; - - // Stop recursion: once we collected a term, do not walk its operands. - return false; - } - - // Keep looking. - return true; - } - - bool isDone() const { return false; } -}; - -// Find factors that are multiplied with an expression that (possibly as a -// subexpression) contains an AddRecExpr. In the expression: -// -// 8 * (100 + %p * %q * (%a + {0, +, 1}_loop)) -// -// "%p * %q" are factors multiplied by the expression "(%a + {0, +, 1}_loop)" -// that contains the AddRec {0, +, 1}_loop. %p * %q are likely to be array size -// parameters as they form a product with an induction variable. -// -// This collector expects all array size parameters to be in the same MulExpr. -// It might be necessary to later add support for collecting parameters that are -// spread over different nested MulExpr. -struct SCEVCollectAddRecMultiplies { - SmallVectorImpl<const SCEV *> &Terms; - ScalarEvolution &SE; - - SCEVCollectAddRecMultiplies(SmallVectorImpl<const SCEV *> &T, ScalarEvolution &SE) - : Terms(T), SE(SE) {} - - bool follow(const SCEV *S) { - if (auto *Mul = dyn_cast<SCEVMulExpr>(S)) { - bool HasAddRec = false; - SmallVector<const SCEV *, 0> Operands; - for (auto Op : Mul->operands()) { - const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Op); - if (Unknown && !isa<CallInst>(Unknown->getValue())) { - Operands.push_back(Op); - } else if (Unknown) { - HasAddRec = true; - } else { - bool ContainsAddRec = false; - SCEVHasAddRec ContiansAddRec(ContainsAddRec); - visitAll(Op, ContiansAddRec); - HasAddRec |= ContainsAddRec; - } - } - if (Operands.size() == 0) - return true; - - if (!HasAddRec) - return false; - - Terms.push_back(SE.getMulExpr(Operands)); - // Stop recursion: once we collected a term, do not walk its operands. - return false; - } - - // Keep looking. - return true; - } - - bool isDone() const { return false; } -}; - -} // end anonymous namespace - -/// Find parametric terms in this SCEVAddRecExpr. We first for parameters in -/// two places: -/// 1) The strides of AddRec expressions. -/// 2) Unknowns that are multiplied with AddRec expressions. -void ScalarEvolution::collectParametricTerms(const SCEV *Expr, - SmallVectorImpl<const SCEV *> &Terms) { - SmallVector<const SCEV *, 4> Strides; - SCEVCollectStrides StrideCollector(*this, Strides); - visitAll(Expr, StrideCollector); - - LLVM_DEBUG({ - dbgs() << "Strides:\n"; - for (const SCEV *S : Strides) - dbgs() << *S << "\n"; - }); - - for (const SCEV *S : Strides) { - SCEVCollectTerms TermCollector(Terms); - visitAll(S, TermCollector); - } - - LLVM_DEBUG({ - dbgs() << "Terms:\n"; - for (const SCEV *T : Terms) - dbgs() << *T << "\n"; - }); - - SCEVCollectAddRecMultiplies MulCollector(Terms, *this); - visitAll(Expr, MulCollector); -} - -static bool findArrayDimensionsRec(ScalarEvolution &SE, - SmallVectorImpl<const SCEV *> &Terms, - SmallVectorImpl<const SCEV *> &Sizes) { - int Last = Terms.size() - 1; - const SCEV *Step = Terms[Last]; - - // End of recursion. - if (Last == 0) { - if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Step)) { - SmallVector<const SCEV *, 2> Qs; - for (const SCEV *Op : M->operands()) - if (!isa<SCEVConstant>(Op)) - Qs.push_back(Op); - - Step = SE.getMulExpr(Qs); - } - - Sizes.push_back(Step); - return true; - } - - for (const SCEV *&Term : Terms) { - // Normalize the terms before the next call to findArrayDimensionsRec. - const SCEV *Q, *R; - SCEVDivision::divide(SE, Term, Step, &Q, &R); - - // Bail out when GCD does not evenly divide one of the terms. - if (!R->isZero()) - return false; - - Term = Q; - } - - // Remove all SCEVConstants. - erase_if(Terms, [](const SCEV *E) { return isa<SCEVConstant>(E); }); - - if (Terms.size() > 0) - if (!findArrayDimensionsRec(SE, Terms, Sizes)) - return false; - - Sizes.push_back(Step); - return true; -} - -// Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter. -static inline bool containsParameters(SmallVectorImpl<const SCEV *> &Terms) { - for (const SCEV *T : Terms) - if (SCEVExprContains(T, [](const SCEV *S) { return isa<SCEVUnknown>(S); })) - return true; - - return false; -} - -// Return the number of product terms in S. -static inline int numberOfTerms(const SCEV *S) { - if (const SCEVMulExpr *Expr = dyn_cast<SCEVMulExpr>(S)) - return Expr->getNumOperands(); - return 1; -} - -static const SCEV *removeConstantFactors(ScalarEvolution &SE, const SCEV *T) { - if (isa<SCEVConstant>(T)) - return nullptr; - - if (isa<SCEVUnknown>(T)) - return T; - - if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(T)) { - SmallVector<const SCEV *, 2> Factors; - for (const SCEV *Op : M->operands()) - if (!isa<SCEVConstant>(Op)) - Factors.push_back(Op); - - return SE.getMulExpr(Factors); - } - - return T; -} - /// Return the size of an element read or written by Inst. const SCEV *ScalarEvolution::getElementSize(Instruction *Inst) { Type *Ty; @@ -12394,248 +12451,6 @@ const SCEV *ScalarEvolution::getElementSize(Instruction *Inst) { return getSizeOfExpr(ETy, Ty); } -void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms, - SmallVectorImpl<const SCEV *> &Sizes, - const SCEV *ElementSize) { - if (Terms.size() < 1 || !ElementSize) - return; - - // Early return when Terms do not contain parameters: we do not delinearize - // non parametric SCEVs. - if (!containsParameters(Terms)) - return; - - LLVM_DEBUG({ - dbgs() << "Terms:\n"; - for (const SCEV *T : Terms) - dbgs() << *T << "\n"; - }); - - // Remove duplicates. - array_pod_sort(Terms.begin(), Terms.end()); - Terms.erase(std::unique(Terms.begin(), Terms.end()), Terms.end()); - - // Put larger terms first. - llvm::sort(Terms, [](const SCEV *LHS, const SCEV *RHS) { - return numberOfTerms(LHS) > numberOfTerms(RHS); - }); - - // Try to divide all terms by the element size. If term is not divisible by - // element size, proceed with the original term. - for (const SCEV *&Term : Terms) { - const SCEV *Q, *R; - SCEVDivision::divide(*this, Term, ElementSize, &Q, &R); - if (!Q->isZero()) - Term = Q; - } - - SmallVector<const SCEV *, 4> NewTerms; - - // Remove constant factors. - for (const SCEV *T : Terms) - if (const SCEV *NewT = removeConstantFactors(*this, T)) - NewTerms.push_back(NewT); - - LLVM_DEBUG({ - dbgs() << "Terms after sorting:\n"; - for (const SCEV *T : NewTerms) - dbgs() << *T << "\n"; - }); - - if (NewTerms.empty() || !findArrayDimensionsRec(*this, NewTerms, Sizes)) { - Sizes.clear(); - return; - } - - // The last element to be pushed into Sizes is the size of an element. - Sizes.push_back(ElementSize); - - LLVM_DEBUG({ - dbgs() << "Sizes:\n"; - for (const SCEV *S : Sizes) - dbgs() << *S << "\n"; - }); -} - -void ScalarEvolution::computeAccessFunctions( - const SCEV *Expr, SmallVectorImpl<const SCEV *> &Subscripts, - SmallVectorImpl<const SCEV *> &Sizes) { - // Early exit in case this SCEV is not an affine multivariate function. - if (Sizes.empty()) - return; - - if (auto *AR = dyn_cast<SCEVAddRecExpr>(Expr)) - if (!AR->isAffine()) - return; - - const SCEV *Res = Expr; - int Last = Sizes.size() - 1; - for (int i = Last; i >= 0; i--) { - const SCEV *Q, *R; - SCEVDivision::divide(*this, Res, Sizes[i], &Q, &R); - - LLVM_DEBUG({ - dbgs() << "Res: " << *Res << "\n"; - dbgs() << "Sizes[i]: " << *Sizes[i] << "\n"; - dbgs() << "Res divided by Sizes[i]:\n"; - dbgs() << "Quotient: " << *Q << "\n"; - dbgs() << "Remainder: " << *R << "\n"; - }); - - Res = Q; - - // Do not record the last subscript corresponding to the size of elements in - // the array. - if (i == Last) { - - // Bail out if the remainder is too complex. - if (isa<SCEVAddRecExpr>(R)) { - Subscripts.clear(); - Sizes.clear(); - return; - } - - continue; - } - - // Record the access function for the current subscript. - Subscripts.push_back(R); - } - - // Also push in last position the remainder of the last division: it will be - // the access function of the innermost dimension. - Subscripts.push_back(Res); - - std::reverse(Subscripts.begin(), Subscripts.end()); - - LLVM_DEBUG({ - dbgs() << "Subscripts:\n"; - for (const SCEV *S : Subscripts) - dbgs() << *S << "\n"; - }); -} - -/// Splits the SCEV into two vectors of SCEVs representing the subscripts and -/// sizes of an array access. Returns the remainder of the delinearization that -/// is the offset start of the array. The SCEV->delinearize algorithm computes -/// the multiples of SCEV coefficients: that is a pattern matching of sub -/// expressions in the stride and base of a SCEV corresponding to the -/// computation of a GCD (greatest common divisor) of base and stride. When -/// SCEV->delinearize fails, it returns the SCEV unchanged. -/// -/// For example: when analyzing the memory access A[i][j][k] in this loop nest -/// -/// void foo(long n, long m, long o, double A[n][m][o]) { -/// -/// for (long i = 0; i < n; i++) -/// for (long j = 0; j < m; j++) -/// for (long k = 0; k < o; k++) -/// A[i][j][k] = 1.0; -/// } -/// -/// the delinearization input is the following AddRec SCEV: -/// -/// AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k> -/// -/// From this SCEV, we are able to say that the base offset of the access is %A -/// because it appears as an offset that does not divide any of the strides in -/// the loops: -/// -/// CHECK: Base offset: %A -/// -/// and then SCEV->delinearize determines the size of some of the dimensions of -/// the array as these are the multiples by which the strides are happening: -/// -/// CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes. -/// -/// Note that the outermost dimension remains of UnknownSize because there are -/// no strides that would help identifying the size of the last dimension: when -/// the array has been statically allocated, one could compute the size of that -/// dimension by dividing the overall size of the array by the size of the known -/// dimensions: %m * %o * 8. -/// -/// Finally delinearize provides the access functions for the array reference -/// that does correspond to A[i][j][k] of the above C testcase: -/// -/// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>] -/// -/// The testcases are checking the output of a function pass: -/// DelinearizationPass that walks through all loads and stores of a function -/// asking for the SCEV of the memory access with respect to all enclosing -/// loops, calling SCEV->delinearize on that and printing the results. -void ScalarEvolution::delinearize(const SCEV *Expr, - SmallVectorImpl<const SCEV *> &Subscripts, - SmallVectorImpl<const SCEV *> &Sizes, - const SCEV *ElementSize) { - // First step: collect parametric terms. - SmallVector<const SCEV *, 4> Terms; - collectParametricTerms(Expr, Terms); - - if (Terms.empty()) - return; - - // Second step: find subscript sizes. - findArrayDimensions(Terms, Sizes, ElementSize); - - if (Sizes.empty()) - return; - - // Third step: compute the access functions for each subscript. - computeAccessFunctions(Expr, Subscripts, Sizes); - - if (Subscripts.empty()) - return; - - LLVM_DEBUG({ - dbgs() << "succeeded to delinearize " << *Expr << "\n"; - dbgs() << "ArrayDecl[UnknownSize]"; - for (const SCEV *S : Sizes) - dbgs() << "[" << *S << "]"; - - dbgs() << "\nArrayRef"; - for (const SCEV *S : Subscripts) - dbgs() << "[" << *S << "]"; - dbgs() << "\n"; - }); -} - -bool ScalarEvolution::getIndexExpressionsFromGEP( - const GetElementPtrInst *GEP, SmallVectorImpl<const SCEV *> &Subscripts, - SmallVectorImpl<int> &Sizes) { - assert(Subscripts.empty() && Sizes.empty() && - "Expected output lists to be empty on entry to this function."); - assert(GEP && "getIndexExpressionsFromGEP called with a null GEP"); - Type *Ty = nullptr; - bool DroppedFirstDim = false; - for (unsigned i = 1; i < GEP->getNumOperands(); i++) { - const SCEV *Expr = getSCEV(GEP->getOperand(i)); - if (i == 1) { - Ty = GEP->getSourceElementType(); - if (auto *Const = dyn_cast<SCEVConstant>(Expr)) - if (Const->getValue()->isZero()) { - DroppedFirstDim = true; - continue; - } - Subscripts.push_back(Expr); - continue; - } - - auto *ArrayTy = dyn_cast<ArrayType>(Ty); - if (!ArrayTy) { - Subscripts.clear(); - Sizes.clear(); - return false; - } - - Subscripts.push_back(Expr); - if (!(DroppedFirstDim && i == 2)) - Sizes.push_back(ArrayTy->getNumElements()); - - Ty = ArrayTy->getElementType(); - } - return !Subscripts.empty(); -} - //===----------------------------------------------------------------------===// // SCEVCallbackVH Class Implementation //===----------------------------------------------------------------------===// @@ -12722,6 +12537,7 @@ ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg) LoopDispositions(std::move(Arg.LoopDispositions)), LoopPropertiesCache(std::move(Arg.LoopPropertiesCache)), BlockDispositions(std::move(Arg.BlockDispositions)), + SCEVUsers(std::move(Arg.SCEVUsers)), UnsignedRanges(std::move(Arg.UnsignedRanges)), SignedRanges(std::move(Arg.SignedRanges)), UniqueSCEVs(std::move(Arg.UniqueSCEVs)), @@ -12934,7 +12750,7 @@ ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) { Values.emplace_back(L, LoopVariant); LoopDisposition D = computeLoopDisposition(S, L); auto &Values2 = LoopDispositions[S]; - for (auto &V : make_range(Values2.rbegin(), Values2.rend())) { + for (auto &V : llvm::reverse(Values2)) { if (V.getPointer() == L) { V.setInt(D); break; @@ -13042,7 +12858,7 @@ ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) { Values.emplace_back(BB, DoesNotDominateBlock); BlockDisposition D = computeBlockDisposition(S, BB); auto &Values2 = BlockDispositions[S]; - for (auto &V : make_range(Values2.rbegin(), Values2.rend())) { + for (auto &V : llvm::reverse(Values2)) { if (V.getPointer() == BB) { V.setInt(D); break; @@ -13130,41 +12946,58 @@ bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const { return SCEVExprContains(S, [&](const SCEV *Expr) { return Expr == Op; }); } -void -ScalarEvolution::forgetMemoizedResults(const SCEV *S) { - ValuesAtScopes.erase(S); - LoopDispositions.erase(S); - BlockDispositions.erase(S); - UnsignedRanges.erase(S); - SignedRanges.erase(S); - ExprValueMap.erase(S); - HasRecMap.erase(S); - MinTrailingZerosCache.erase(S); +void ScalarEvolution::forgetMemoizedResults(ArrayRef<const SCEV *> SCEVs) { + SmallPtrSet<const SCEV *, 8> ToForget(SCEVs.begin(), SCEVs.end()); + SmallVector<const SCEV *, 8> Worklist(ToForget.begin(), ToForget.end()); + + while (!Worklist.empty()) { + const SCEV *Curr = Worklist.pop_back_val(); + auto Users = SCEVUsers.find(Curr); + if (Users != SCEVUsers.end()) + for (auto *User : Users->second) + if (ToForget.insert(User).second) + Worklist.push_back(User); + } + + for (auto *S : ToForget) + forgetMemoizedResultsImpl(S); for (auto I = PredicatedSCEVRewrites.begin(); I != PredicatedSCEVRewrites.end();) { std::pair<const SCEV *, const Loop *> Entry = I->first; - if (Entry.first == S) + if (ToForget.count(Entry.first)) PredicatedSCEVRewrites.erase(I++); else ++I; } - auto RemoveSCEVFromBackedgeMap = - [S](DenseMap<const Loop *, BackedgeTakenInfo> &Map) { + auto RemoveSCEVFromBackedgeMap = [&ToForget]( + DenseMap<const Loop *, BackedgeTakenInfo> &Map) { for (auto I = Map.begin(), E = Map.end(); I != E;) { BackedgeTakenInfo &BEInfo = I->second; - if (BEInfo.hasOperand(S)) + if (any_of(ToForget, + [&BEInfo](const SCEV *S) { return BEInfo.hasOperand(S); })) Map.erase(I++); else ++I; } - }; + }; RemoveSCEVFromBackedgeMap(BackedgeTakenCounts); RemoveSCEVFromBackedgeMap(PredicatedBackedgeTakenCounts); } +void ScalarEvolution::forgetMemoizedResultsImpl(const SCEV *S) { + ValuesAtScopes.erase(S); + LoopDispositions.erase(S); + BlockDispositions.erase(S); + UnsignedRanges.erase(S); + SignedRanges.erase(S); + ExprValueMap.erase(S); + HasRecMap.erase(S); + MinTrailingZerosCache.erase(S); +} + void ScalarEvolution::getUsedLoops(const SCEV *S, SmallPtrSetImpl<const Loop *> &LoopsUsed) { @@ -13185,13 +13018,6 @@ ScalarEvolution::getUsedLoops(const SCEV *S, SCEVTraversal<FindUsedLoops>(F).visitAll(S); } -void ScalarEvolution::addToLoopUseLists(const SCEV *S) { - SmallPtrSet<const Loop *, 8> LoopsUsed; - getUsedLoops(S, LoopsUsed); - for (auto *L : LoopsUsed) - LoopUsers[L].push_back(S); -} - void ScalarEvolution::verify() const { ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this); ScalarEvolution SE2(F, TLI, AC, DT, LI); @@ -13282,6 +13108,23 @@ void ScalarEvolution::verify() const { assert(ValidLoops.contains(AR->getLoop()) && "AddRec references invalid loop"); } + + // Verify intergity of SCEV users. + for (const auto &S : UniqueSCEVs) { + SmallVector<const SCEV *, 4> Ops; + collectUniqueOps(&S, Ops); + for (const auto *Op : Ops) { + // We do not store dependencies of constants. + if (isa<SCEVConstant>(Op)) + continue; + auto It = SCEVUsers.find(Op); + if (It != SCEVUsers.end() && It->second.count(&S)) + continue; + dbgs() << "Use of operand " << *Op << " by user " << S + << " is not being tracked!\n"; + std::abort(); + } + } } bool ScalarEvolution::invalidate( @@ -13685,6 +13528,16 @@ PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE, Loop &L) : SE(SE), L(L) {} +void ScalarEvolution::registerUser(const SCEV *User, + ArrayRef<const SCEV *> Ops) { + for (auto *Op : Ops) + // We do not expect that forgetting cached data for SCEVConstants will ever + // open any prospects for sharpening or introduce any correctness issues, + // so we don't bother storing their dependencies. + if (!isa<SCEVConstant>(Op)) + SCEVUsers[Op].insert(User); +} + const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) { const SCEV *Expr = SE.getSCEV(V); RewriteEntry &Entry = RewriteMap[Expr]; @@ -13897,52 +13750,51 @@ ScalarEvolution::computeSymbolicMaxBackedgeTakenCount(const Loop *L) { return getUMinFromMismatchedTypes(ExitCounts); } -/// This rewriter is similar to SCEVParameterRewriter (it replaces SCEVUnknown -/// components following the Map (Value -> SCEV)), but skips AddRecExpr because -/// we cannot guarantee that the replacement is loop invariant in the loop of -/// the AddRec. +/// A rewriter to replace SCEV expressions in Map with the corresponding entry +/// in the map. It skips AddRecExpr because we cannot guarantee that the +/// replacement is loop invariant in the loop of the AddRec. +/// +/// At the moment only rewriting SCEVUnknown and SCEVZeroExtendExpr is +/// supported. class SCEVLoopGuardRewriter : public SCEVRewriteVisitor<SCEVLoopGuardRewriter> { - ValueToSCEVMapTy ⤅ + const DenseMap<const SCEV *, const SCEV *> ⤅ public: - SCEVLoopGuardRewriter(ScalarEvolution &SE, ValueToSCEVMapTy &M) + SCEVLoopGuardRewriter(ScalarEvolution &SE, + DenseMap<const SCEV *, const SCEV *> &M) : SCEVRewriteVisitor(SE), Map(M) {} const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { return Expr; } const SCEV *visitUnknown(const SCEVUnknown *Expr) { - auto I = Map.find(Expr->getValue()); + auto I = Map.find(Expr); if (I == Map.end()) return Expr; return I->second; } + + const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { + auto I = Map.find(Expr); + if (I == Map.end()) + return SCEVRewriteVisitor<SCEVLoopGuardRewriter>::visitZeroExtendExpr( + Expr); + return I->second; + } }; const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) { + SmallVector<const SCEV *> ExprsToRewrite; auto CollectCondition = [&](ICmpInst::Predicate Predicate, const SCEV *LHS, - const SCEV *RHS, ValueToSCEVMapTy &RewriteMap) { - // If we have LHS == 0, check if LHS is computing a property of some unknown - // SCEV %v which we can rewrite %v to express explicitly. - const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS); - if (Predicate == CmpInst::ICMP_EQ && RHSC && - RHSC->getValue()->isNullValue()) { - // If LHS is A % B, i.e. A % B == 0, rewrite A to (A /u B) * B to - // explicitly express that. - const SCEV *URemLHS = nullptr; - const SCEV *URemRHS = nullptr; - if (matchURem(LHS, URemLHS, URemRHS)) { - if (const SCEVUnknown *LHSUnknown = dyn_cast<SCEVUnknown>(URemLHS)) { - Value *V = LHSUnknown->getValue(); - auto Multiple = - getMulExpr(getUDivExpr(URemLHS, URemRHS), URemRHS, - (SCEV::NoWrapFlags)(SCEV::FlagNUW | SCEV::FlagNSW)); - RewriteMap[V] = Multiple; - return; - } - } - } - - if (!isa<SCEVUnknown>(LHS) && isa<SCEVUnknown>(RHS)) { + const SCEV *RHS, + DenseMap<const SCEV *, const SCEV *> + &RewriteMap) { + // WARNING: It is generally unsound to apply any wrap flags to the proposed + // replacement SCEV which isn't directly implied by the structure of that + // SCEV. In particular, using contextual facts to imply flags is *NOT* + // legal. See the scoping rules for flags in the header to understand why. + + // If LHS is a constant, apply information to the other expression. + if (isa<SCEVConstant>(LHS)) { std::swap(LHS, RHS); Predicate = CmpInst::getSwappedPredicate(Predicate); } @@ -13950,7 +13802,8 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) { // Check for a condition of the form (-C1 + X < C2). InstCombine will // create this form when combining two checks of the form (X u< C2 + C1) and // (X >=u C1). - auto MatchRangeCheckIdiom = [this, Predicate, LHS, RHS, &RewriteMap]() { + auto MatchRangeCheckIdiom = [this, Predicate, LHS, RHS, &RewriteMap, + &ExprsToRewrite]() { auto *AddExpr = dyn_cast<SCEVAddExpr>(LHS); if (!AddExpr || AddExpr->getNumOperands() != 2) return false; @@ -13968,26 +13821,55 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) { // Bail out, unless we have a non-wrapping, monotonic range. if (ExactRegion.isWrappedSet() || ExactRegion.isFullSet()) return false; - auto I = RewriteMap.find(LHSUnknown->getValue()); + auto I = RewriteMap.find(LHSUnknown); const SCEV *RewrittenLHS = I != RewriteMap.end() ? I->second : LHSUnknown; - RewriteMap[LHSUnknown->getValue()] = getUMaxExpr( + RewriteMap[LHSUnknown] = getUMaxExpr( getConstant(ExactRegion.getUnsignedMin()), getUMinExpr(RewrittenLHS, getConstant(ExactRegion.getUnsignedMax()))); + ExprsToRewrite.push_back(LHSUnknown); return true; }; if (MatchRangeCheckIdiom()) return; - // For now, limit to conditions that provide information about unknown - // expressions. RHS also cannot contain add recurrences. - auto *LHSUnknown = dyn_cast<SCEVUnknown>(LHS); - if (!LHSUnknown || containsAddRecurrence(RHS)) + // If we have LHS == 0, check if LHS is computing a property of some unknown + // SCEV %v which we can rewrite %v to express explicitly. + const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS); + if (Predicate == CmpInst::ICMP_EQ && RHSC && + RHSC->getValue()->isNullValue()) { + // If LHS is A % B, i.e. A % B == 0, rewrite A to (A /u B) * B to + // explicitly express that. + const SCEV *URemLHS = nullptr; + const SCEV *URemRHS = nullptr; + if (matchURem(LHS, URemLHS, URemRHS)) { + if (const SCEVUnknown *LHSUnknown = dyn_cast<SCEVUnknown>(URemLHS)) { + auto Multiple = getMulExpr(getUDivExpr(URemLHS, URemRHS), URemRHS); + RewriteMap[LHSUnknown] = Multiple; + ExprsToRewrite.push_back(LHSUnknown); + return; + } + } + } + + // Do not apply information for constants or if RHS contains an AddRec. + if (isa<SCEVConstant>(LHS) || containsAddRecurrence(RHS)) + return; + + // If RHS is SCEVUnknown, make sure the information is applied to it. + if (!isa<SCEVUnknown>(LHS) && isa<SCEVUnknown>(RHS)) { + std::swap(LHS, RHS); + Predicate = CmpInst::getSwappedPredicate(Predicate); + } + + // Limit to expressions that can be rewritten. + if (!isa<SCEVUnknown>(LHS) && !isa<SCEVZeroExtendExpr>(LHS)) return; // Check whether LHS has already been rewritten. In that case we want to // chain further rewrites onto the already rewritten value. - auto I = RewriteMap.find(LHSUnknown->getValue()); + auto I = RewriteMap.find(LHS); const SCEV *RewrittenLHS = I != RewriteMap.end() ? I->second : LHS; + const SCEV *RewrittenRHS = nullptr; switch (Predicate) { case CmpInst::ICMP_ULT: @@ -14031,14 +13913,17 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) { break; } - if (RewrittenRHS) - RewriteMap[LHSUnknown->getValue()] = RewrittenRHS; + if (RewrittenRHS) { + RewriteMap[LHS] = RewrittenRHS; + if (LHS == RewrittenLHS) + ExprsToRewrite.push_back(LHS); + } }; // Starting at the loop predecessor, climb up the predecessor chain, as long // as there are predecessors that can be found that have unique successors // leading to the original header. // TODO: share this logic with isLoopEntryGuardedByCond. - ValueToSCEVMapTy RewriteMap; + DenseMap<const SCEV *, const SCEV *> RewriteMap; for (std::pair<const BasicBlock *, const BasicBlock *> Pair( L->getLoopPredecessor(), L->getHeader()); Pair.first; Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) { @@ -14088,6 +13973,19 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) { if (RewriteMap.empty()) return Expr; + + // Now that all rewrite information is collect, rewrite the collected + // expressions with the information in the map. This applies information to + // sub-expressions. + if (ExprsToRewrite.size() > 1) { + for (const SCEV *Expr : ExprsToRewrite) { + const SCEV *RewriteTo = RewriteMap[Expr]; + RewriteMap.erase(Expr); + SCEVLoopGuardRewriter Rewriter(*this, RewriteMap); + RewriteMap.insert({Expr, Rewriter.visit(RewriteTo)}); + } + } + SCEVLoopGuardRewriter Rewriter(*this, RewriteMap); return Rewriter.visit(Expr); } |