diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp')
| -rw-r--r-- | contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp | 2010 |
1 files changed, 1073 insertions, 937 deletions
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp index 410f93b1c215..9ae05a4b5ccc 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp @@ -26,6 +26,7 @@ #include "llvm/Analysis/AssumeBundleQueries.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/DomConditionCache.h" #include "llvm/Analysis/GuardUtils.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" @@ -33,6 +34,7 @@ #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/VectorUtils.h" +#include "llvm/Analysis/WithCache.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -148,117 +150,105 @@ static void computeKnownBits(const Value *V, const APInt &DemandedElts, KnownBits &Known, unsigned Depth, const SimplifyQuery &Q); -static void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, - const SimplifyQuery &Q) { +void llvm::computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, + const SimplifyQuery &Q) { // Since the number of lanes in a scalable vector is unknown at compile time, // we track one bit which is implicitly broadcast to all lanes. This means // that all lanes in a scalable vector are considered demanded. auto *FVTy = dyn_cast<FixedVectorType>(V->getType()); APInt DemandedElts = FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1); - computeKnownBits(V, DemandedElts, Known, Depth, Q); + ::computeKnownBits(V, DemandedElts, Known, Depth, Q); } void llvm::computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, bool UseInstrInfo) { - ::computeKnownBits(V, Known, Depth, - SimplifyQuery(DL, /*TLI*/ nullptr, DT, AC, - safeCxtI(V, CxtI), UseInstrInfo)); -} - -void llvm::computeKnownBits(const Value *V, const APInt &DemandedElts, - KnownBits &Known, const DataLayout &DL, - unsigned Depth, AssumptionCache *AC, - const Instruction *CxtI, const DominatorTree *DT, - bool UseInstrInfo) { - ::computeKnownBits(V, DemandedElts, Known, Depth, - SimplifyQuery(DL, /*TLI*/ nullptr, DT, AC, - safeCxtI(V, CxtI), UseInstrInfo)); + computeKnownBits( + V, Known, Depth, + SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); } -static KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts, - unsigned Depth, const SimplifyQuery &Q); - -static KnownBits computeKnownBits(const Value *V, unsigned Depth, - const SimplifyQuery &Q); - KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, bool UseInstrInfo) { - return ::computeKnownBits(V, Depth, - SimplifyQuery(DL, /*TLI*/ nullptr, DT, AC, - safeCxtI(V, CxtI), UseInstrInfo)); + return computeKnownBits( + V, Depth, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); } KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, bool UseInstrInfo) { - return ::computeKnownBits(V, DemandedElts, Depth, - SimplifyQuery(DL, /*TLI*/ nullptr, DT, AC, - safeCxtI(V, CxtI), UseInstrInfo)); + return computeKnownBits( + V, DemandedElts, Depth, + SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); } -bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS, - const DataLayout &DL, AssumptionCache *AC, - const Instruction *CxtI, const DominatorTree *DT, - bool UseInstrInfo) { - assert(LHS->getType() == RHS->getType() && - "LHS and RHS should have the same type"); - assert(LHS->getType()->isIntOrIntVectorTy() && - "LHS and RHS should be integers"); +static bool haveNoCommonBitsSetSpecialCases(const Value *LHS, const Value *RHS, + const SimplifyQuery &SQ) { // Look for an inverted mask: (X & ~M) op (Y & M). { Value *M; if (match(LHS, m_c_And(m_Not(m_Value(M)), m_Value())) && - match(RHS, m_c_And(m_Specific(M), m_Value()))) - return true; - if (match(RHS, m_c_And(m_Not(m_Value(M)), m_Value())) && - match(LHS, m_c_And(m_Specific(M), m_Value()))) + match(RHS, m_c_And(m_Specific(M), m_Value())) && + isGuaranteedNotToBeUndef(M, SQ.AC, SQ.CxtI, SQ.DT)) return true; } // X op (Y & ~X) - if (match(RHS, m_c_And(m_Not(m_Specific(LHS)), m_Value())) || - match(LHS, m_c_And(m_Not(m_Specific(RHS)), m_Value()))) + if (match(RHS, m_c_And(m_Not(m_Specific(LHS)), m_Value())) && + isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT)) return true; // X op ((X & Y) ^ Y) -- this is the canonical form of the previous pattern // for constant Y. Value *Y; if (match(RHS, - m_c_Xor(m_c_And(m_Specific(LHS), m_Value(Y)), m_Deferred(Y))) || - match(LHS, m_c_Xor(m_c_And(m_Specific(RHS), m_Value(Y)), m_Deferred(Y)))) + m_c_Xor(m_c_And(m_Specific(LHS), m_Value(Y)), m_Deferred(Y))) && + isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT) && + isGuaranteedNotToBeUndef(Y, SQ.AC, SQ.CxtI, SQ.DT)) return true; // Peek through extends to find a 'not' of the other side: // (ext Y) op ext(~Y) - // (ext ~Y) op ext(Y) - if ((match(LHS, m_ZExtOrSExt(m_Value(Y))) && - match(RHS, m_ZExtOrSExt(m_Not(m_Specific(Y))))) || - (match(RHS, m_ZExtOrSExt(m_Value(Y))) && - match(LHS, m_ZExtOrSExt(m_Not(m_Specific(Y)))))) + if (match(LHS, m_ZExtOrSExt(m_Value(Y))) && + match(RHS, m_ZExtOrSExt(m_Not(m_Specific(Y)))) && + isGuaranteedNotToBeUndef(Y, SQ.AC, SQ.CxtI, SQ.DT)) return true; // Look for: (A & B) op ~(A | B) { Value *A, *B; if (match(LHS, m_And(m_Value(A), m_Value(B))) && - match(RHS, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))) - return true; - if (match(RHS, m_And(m_Value(A), m_Value(B))) && - match(LHS, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))) + match(RHS, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))) && + isGuaranteedNotToBeUndef(A, SQ.AC, SQ.CxtI, SQ.DT) && + isGuaranteedNotToBeUndef(B, SQ.AC, SQ.CxtI, SQ.DT)) return true; } - IntegerType *IT = cast<IntegerType>(LHS->getType()->getScalarType()); - KnownBits LHSKnown(IT->getBitWidth()); - KnownBits RHSKnown(IT->getBitWidth()); - computeKnownBits(LHS, LHSKnown, DL, 0, AC, CxtI, DT, UseInstrInfo); - computeKnownBits(RHS, RHSKnown, DL, 0, AC, CxtI, DT, UseInstrInfo); - return KnownBits::haveNoCommonBitsSet(LHSKnown, RHSKnown); + + return false; +} + +bool llvm::haveNoCommonBitsSet(const WithCache<const Value *> &LHSCache, + const WithCache<const Value *> &RHSCache, + const SimplifyQuery &SQ) { + const Value *LHS = LHSCache.getValue(); + const Value *RHS = RHSCache.getValue(); + + assert(LHS->getType() == RHS->getType() && + "LHS and RHS should have the same type"); + assert(LHS->getType()->isIntOrIntVectorTy() && + "LHS and RHS should be integers"); + + if (haveNoCommonBitsSetSpecialCases(LHS, RHS, SQ) || + haveNoCommonBitsSetSpecialCases(RHS, LHS, SQ)) + return true; + + return KnownBits::haveNoCommonBitsSet(LHSCache.getKnownBits(SQ), + RHSCache.getKnownBits(SQ)); } bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *I) { @@ -275,10 +265,9 @@ bool llvm::isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, bool OrZero, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, bool UseInstrInfo) { - return ::isKnownToBeAPowerOfTwo(V, OrZero, Depth, - SimplifyQuery(DL, /*TLI*/ nullptr, DT, AC, - safeCxtI(V, CxtI), - UseInstrInfo)); + return ::isKnownToBeAPowerOfTwo( + V, OrZero, Depth, + SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); } static bool isKnownNonZero(const Value *V, const APInt &DemandedElts, @@ -290,36 +279,28 @@ static bool isKnownNonZero(const Value *V, unsigned Depth, bool llvm::isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, bool UseInstrInfo) { - return ::isKnownNonZero(V, Depth, - SimplifyQuery(DL, /*TLI*/ nullptr, DT, AC, - safeCxtI(V, CxtI), UseInstrInfo)); + return ::isKnownNonZero( + V, Depth, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); } -bool llvm::isKnownNonNegative(const Value *V, const DataLayout &DL, - unsigned Depth, AssumptionCache *AC, - const Instruction *CxtI, const DominatorTree *DT, - bool UseInstrInfo) { - KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT, UseInstrInfo); - return Known.isNonNegative(); +bool llvm::isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, + unsigned Depth) { + return computeKnownBits(V, Depth, SQ).isNonNegative(); } -bool llvm::isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth, - AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT, bool UseInstrInfo) { +bool llvm::isKnownPositive(const Value *V, const SimplifyQuery &SQ, + unsigned Depth) { if (auto *CI = dyn_cast<ConstantInt>(V)) return CI->getValue().isStrictlyPositive(); // TODO: We'd doing two recursive queries here. We should factor this such // that only a single query is needed. - return isKnownNonNegative(V, DL, Depth, AC, CxtI, DT, UseInstrInfo) && - isKnownNonZero(V, DL, Depth, AC, CxtI, DT, UseInstrInfo); + return isKnownNonNegative(V, SQ, Depth) && ::isKnownNonZero(V, Depth, SQ); } -bool llvm::isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth, - AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT, bool UseInstrInfo) { - KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT, UseInstrInfo); - return Known.isNegative(); +bool llvm::isKnownNegative(const Value *V, const SimplifyQuery &SQ, + unsigned Depth) { + return computeKnownBits(V, Depth, SQ).isNegative(); } static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth, @@ -329,21 +310,16 @@ bool llvm::isKnownNonEqual(const Value *V1, const Value *V2, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, bool UseInstrInfo) { - return ::isKnownNonEqual(V1, V2, 0, - SimplifyQuery(DL, /*TLI*/ nullptr, DT, AC, - safeCxtI(V2, V1, CxtI), UseInstrInfo)); + return ::isKnownNonEqual( + V1, V2, 0, + SimplifyQuery(DL, DT, AC, safeCxtI(V2, V1, CxtI), UseInstrInfo)); } -static bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth, - const SimplifyQuery &Q); - bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask, - const DataLayout &DL, unsigned Depth, - AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT, bool UseInstrInfo) { - return ::MaskedValueIsZero(V, Mask, Depth, - SimplifyQuery(DL, /*TLI*/ nullptr, DT, AC, - safeCxtI(V, CxtI), UseInstrInfo)); + const SimplifyQuery &SQ, unsigned Depth) { + KnownBits Known(Mask.getBitWidth()); + computeKnownBits(V, Known, Depth, SQ); + return Mask.isSubsetOf(Known.Zero); } static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts, @@ -361,9 +337,8 @@ unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, bool UseInstrInfo) { - return ::ComputeNumSignBits(V, Depth, - SimplifyQuery(DL, /*TLI*/ nullptr, DT, AC, - safeCxtI(V, CxtI), UseInstrInfo)); + return ::ComputeNumSignBits( + V, Depth, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo)); } unsigned llvm::ComputeMaxSignificantBits(const Value *V, const DataLayout &DL, @@ -422,10 +397,9 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW, } bool SelfMultiply = Op0 == Op1; - // TODO: SelfMultiply can be poison, but not undef. if (SelfMultiply) SelfMultiply &= - isGuaranteedNotToBeUndefOrPoison(Op0, Q.AC, Q.CxtI, Q.DT, Depth + 1); + isGuaranteedNotToBeUndef(Op0, Q.AC, Q.CxtI, Q.DT, Depth + 1); Known = KnownBits::mul(Known, Known2, SelfMultiply); // Only make use of no-wrap flags if we failed to compute the sign bit @@ -573,11 +547,24 @@ static bool cmpExcludesZero(CmpInst::Predicate Pred, const Value *RHS) { // All other predicates - rely on generic ConstantRange handling. const APInt *C; - if (!match(RHS, m_APInt(C))) + auto Zero = APInt::getZero(RHS->getType()->getScalarSizeInBits()); + if (match(RHS, m_APInt(C))) { + ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(Pred, *C); + return !TrueValues.contains(Zero); + } + + auto *VC = dyn_cast<ConstantDataVector>(RHS); + if (VC == nullptr) return false; - ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(Pred, *C); - return !TrueValues.contains(APInt::getZero(C->getBitWidth())); + for (unsigned ElemIdx = 0, NElem = VC->getNumElements(); ElemIdx < NElem; + ++ElemIdx) { + ConstantRange TrueValues = ConstantRange::makeExactICmpRegion( + Pred, VC->getElementAsAPInt(ElemIdx)); + if (TrueValues.contains(Zero)) + return false; + } + return true; } static bool isKnownNonZeroFromAssume(const Value *V, const SimplifyQuery &Q) { @@ -586,30 +573,34 @@ static bool isKnownNonZeroFromAssume(const Value *V, const SimplifyQuery &Q) { if (!Q.AC || !Q.CxtI) return false; - if (Q.CxtI && V->getType()->isPointerTy()) { - SmallVector<Attribute::AttrKind, 2> AttrKinds{Attribute::NonNull}; - if (!NullPointerIsDefined(Q.CxtI->getFunction(), - V->getType()->getPointerAddressSpace())) - AttrKinds.push_back(Attribute::Dereferenceable); - - if (getKnowledgeValidInContext(V, AttrKinds, Q.CxtI, Q.DT, Q.AC)) - return true; - } - - for (auto &AssumeVH : Q.AC->assumptionsFor(V)) { - if (!AssumeVH) + for (AssumptionCache::ResultElem &Elem : Q.AC->assumptionsFor(V)) { + if (!Elem.Assume) continue; - CallInst *I = cast<CallInst>(AssumeVH); + + AssumeInst *I = cast<AssumeInst>(Elem.Assume); assert(I->getFunction() == Q.CxtI->getFunction() && "Got assumption for the wrong function!"); + if (Elem.Index != AssumptionCache::ExprResultIdx) { + if (!V->getType()->isPointerTy()) + continue; + if (RetainedKnowledge RK = getKnowledgeFromBundle( + *I, I->bundle_op_info_begin()[Elem.Index])) { + if (RK.WasOn == V && + (RK.AttrKind == Attribute::NonNull || + (RK.AttrKind == Attribute::Dereferenceable && + !NullPointerIsDefined(Q.CxtI->getFunction(), + V->getType()->getPointerAddressSpace()))) && + isValidAssumeForContext(I, Q.CxtI, Q.DT)) + return true; + } + continue; + } + // Warning: This loop can end up being somewhat performance sensitive. // We're running this loop for once for each value queried resulting in a // runtime of ~O(#assumes * #values). - assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume && - "must be an assume intrinsic"); - Value *RHS; CmpInst::Predicate Pred; auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V))); @@ -623,157 +614,89 @@ static bool isKnownNonZeroFromAssume(const Value *V, const SimplifyQuery &Q) { return false; } -static void computeKnownBitsFromCmp(const Value *V, const ICmpInst *Cmp, - KnownBits &Known, unsigned Depth, +static void computeKnownBitsFromCmp(const Value *V, CmpInst::Predicate Pred, + Value *LHS, Value *RHS, KnownBits &Known, const SimplifyQuery &Q) { + if (RHS->getType()->isPointerTy()) { + // Handle comparison of pointer to null explicitly, as it will not be + // covered by the m_APInt() logic below. + if (LHS == V && match(RHS, m_Zero())) { + switch (Pred) { + case ICmpInst::ICMP_EQ: + Known.setAllZero(); + break; + case ICmpInst::ICMP_SGE: + case ICmpInst::ICMP_SGT: + Known.makeNonNegative(); + break; + case ICmpInst::ICMP_SLT: + Known.makeNegative(); + break; + default: + break; + } + } + return; + } + unsigned BitWidth = Known.getBitWidth(); - // We are attempting to compute known bits for the operands of an assume. - // Do not try to use other assumptions for those recursive calls because - // that can lead to mutual recursion and a compile-time explosion. - // An example of the mutual recursion: computeKnownBits can call - // isKnownNonZero which calls computeKnownBitsFromAssume (this function) - // and so on. - SimplifyQuery QueryNoAC = Q; - QueryNoAC.AC = nullptr; - - // Note that ptrtoint may change the bitwidth. - Value *A, *B; auto m_V = m_CombineOr(m_Specific(V), m_PtrToIntSameSize(Q.DL, m_Specific(V))); - CmpInst::Predicate Pred; - uint64_t C; - switch (Cmp->getPredicate()) { + const APInt *Mask, *C; + uint64_t ShAmt; + switch (Pred) { case ICmpInst::ICMP_EQ: - // assume(v = a) - if (match(Cmp, m_c_ICmp(Pred, m_V, m_Value(A)))) { - KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC); - Known = Known.unionWith(RHSKnown); - // assume(v & b = a) - } else if (match(Cmp, - m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A)))) { - KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC); - KnownBits MaskKnown = computeKnownBits(B, Depth + 1, QueryNoAC); - - // For those bits in the mask that are known to be one, we can propagate - // known bits from the RHS to V. - Known.Zero |= RHSKnown.Zero & MaskKnown.One; - Known.One |= RHSKnown.One & MaskKnown.One; - // assume(~(v & b) = a) - } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))), - m_Value(A)))) { - KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC); - KnownBits MaskKnown = computeKnownBits(B, Depth + 1, QueryNoAC); - - // For those bits in the mask that are known to be one, we can propagate - // inverted known bits from the RHS to V. - Known.Zero |= RHSKnown.One & MaskKnown.One; - Known.One |= RHSKnown.Zero & MaskKnown.One; - // assume(v | b = a) - } else if (match(Cmp, - m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A)))) { - KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC); - KnownBits BKnown = computeKnownBits(B, Depth + 1, QueryNoAC); - - // For those bits in B that are known to be zero, we can propagate known - // bits from the RHS to V. - Known.Zero |= RHSKnown.Zero & BKnown.Zero; - Known.One |= RHSKnown.One & BKnown.Zero; - // assume(~(v | b) = a) - } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))), - m_Value(A)))) { - KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC); - KnownBits BKnown = computeKnownBits(B, Depth + 1, QueryNoAC); - - // For those bits in B that are known to be zero, we can propagate - // inverted known bits from the RHS to V. - Known.Zero |= RHSKnown.One & BKnown.Zero; - Known.One |= RHSKnown.Zero & BKnown.Zero; - // assume(v ^ b = a) - } else if (match(Cmp, - m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A)))) { - KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC); - KnownBits BKnown = computeKnownBits(B, Depth + 1, QueryNoAC); - - // For those bits in B that are known to be zero, we can propagate known - // bits from the RHS to V. For those bits in B that are known to be one, - // we can propagate inverted known bits from the RHS to V. - Known.Zero |= RHSKnown.Zero & BKnown.Zero; - Known.One |= RHSKnown.One & BKnown.Zero; - Known.Zero |= RHSKnown.One & BKnown.One; - Known.One |= RHSKnown.Zero & BKnown.One; - // assume(~(v ^ b) = a) - } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))), - m_Value(A)))) { - KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC); - KnownBits BKnown = computeKnownBits(B, Depth + 1, QueryNoAC); - - // For those bits in B that are known to be zero, we can propagate - // inverted known bits from the RHS to V. For those bits in B that are - // known to be one, we can propagate known bits from the RHS to V. - Known.Zero |= RHSKnown.One & BKnown.Zero; - Known.One |= RHSKnown.Zero & BKnown.Zero; - Known.Zero |= RHSKnown.Zero & BKnown.One; - Known.One |= RHSKnown.One & BKnown.One; - // assume(v << c = a) - } else if (match(Cmp, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)), - m_Value(A))) && - C < BitWidth) { - KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC); - - // For those bits in RHS that are known, we can propagate them to known - // bits in V shifted to the right by C. - RHSKnown.Zero.lshrInPlace(C); - RHSKnown.One.lshrInPlace(C); + // assume(V = C) + if (match(LHS, m_V) && match(RHS, m_APInt(C))) { + Known = Known.unionWith(KnownBits::makeConstant(*C)); + // assume(V & Mask = C) + } else if (match(LHS, m_And(m_V, m_APInt(Mask))) && + match(RHS, m_APInt(C))) { + // For one bits in Mask, we can propagate bits from C to V. + Known.Zero |= ~*C & *Mask; + Known.One |= *C & *Mask; + // assume(V | Mask = C) + } else if (match(LHS, m_Or(m_V, m_APInt(Mask))) && match(RHS, m_APInt(C))) { + // For zero bits in Mask, we can propagate bits from C to V. + Known.Zero |= ~*C & ~*Mask; + Known.One |= *C & ~*Mask; + // assume(V ^ Mask = C) + } else if (match(LHS, m_Xor(m_V, m_APInt(Mask))) && + match(RHS, m_APInt(C))) { + // Equivalent to assume(V == Mask ^ C) + Known = Known.unionWith(KnownBits::makeConstant(*C ^ *Mask)); + // assume(V << ShAmt = C) + } else if (match(LHS, m_Shl(m_V, m_ConstantInt(ShAmt))) && + match(RHS, m_APInt(C)) && ShAmt < BitWidth) { + // For those bits in C that are known, we can propagate them to known + // bits in V shifted to the right by ShAmt. + KnownBits RHSKnown = KnownBits::makeConstant(*C); + RHSKnown.Zero.lshrInPlace(ShAmt); + RHSKnown.One.lshrInPlace(ShAmt); Known = Known.unionWith(RHSKnown); - // assume(~(v << c) = a) - } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))), - m_Value(A))) && - C < BitWidth) { - KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC); - // For those bits in RHS that are known, we can propagate them inverted - // to known bits in V shifted to the right by C. - RHSKnown.One.lshrInPlace(C); - Known.Zero |= RHSKnown.One; - RHSKnown.Zero.lshrInPlace(C); - Known.One |= RHSKnown.Zero; - // assume(v >> c = a) - } else if (match(Cmp, m_c_ICmp(Pred, m_Shr(m_V, m_ConstantInt(C)), - m_Value(A))) && - C < BitWidth) { - KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC); + // assume(V >> ShAmt = C) + } else if (match(LHS, m_Shr(m_V, m_ConstantInt(ShAmt))) && + match(RHS, m_APInt(C)) && ShAmt < BitWidth) { + KnownBits RHSKnown = KnownBits::makeConstant(*C); // For those bits in RHS that are known, we can propagate them to known // bits in V shifted to the right by C. - Known.Zero |= RHSKnown.Zero << C; - Known.One |= RHSKnown.One << C; - // assume(~(v >> c) = a) - } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_Shr(m_V, m_ConstantInt(C))), - m_Value(A))) && - C < BitWidth) { - KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC); - // For those bits in RHS that are known, we can propagate them inverted - // to known bits in V shifted to the right by C. - Known.Zero |= RHSKnown.One << C; - Known.One |= RHSKnown.Zero << C; + Known.Zero |= RHSKnown.Zero << ShAmt; + Known.One |= RHSKnown.One << ShAmt; } break; case ICmpInst::ICMP_NE: { - // assume (v & b != 0) where b is a power of 2 + // assume (V & B != 0) where B is a power of 2 const APInt *BPow2; - if (match(Cmp, m_ICmp(Pred, m_c_And(m_V, m_Power2(BPow2)), m_Zero()))) { + if (match(LHS, m_And(m_V, m_Power2(BPow2))) && match(RHS, m_Zero())) Known.One |= *BPow2; - } break; } default: const APInt *Offset = nullptr; - if (match(Cmp, m_ICmp(Pred, m_CombineOr(m_V, m_Add(m_V, m_APInt(Offset))), - m_Value(A)))) { - KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC); - ConstantRange RHSRange = - ConstantRange::fromKnownBits(RHSKnown, Cmp->isSigned()); - ConstantRange LHSRange = - ConstantRange::makeAllowedICmpRegion(Pred, RHSRange); + if (match(LHS, m_CombineOr(m_V, m_Add(m_V, m_APInt(Offset)))) && + match(RHS, m_APInt(C))) { + ConstantRange LHSRange = ConstantRange::makeAllowedICmpRegion(Pred, *C); if (Offset) LHSRange = LHSRange.sub(*Offset); Known = Known.unionWith(LHSRange.toKnownBits()); @@ -782,41 +705,67 @@ static void computeKnownBitsFromCmp(const Value *V, const ICmpInst *Cmp, } } -void llvm::computeKnownBitsFromAssume(const Value *V, KnownBits &Known, +void llvm::computeKnownBitsFromContext(const Value *V, KnownBits &Known, unsigned Depth, const SimplifyQuery &Q) { - // Use of assumptions is context-sensitive. If we don't have a context, we - // cannot use them! - if (!Q.AC || !Q.CxtI) + if (!Q.CxtI) return; - unsigned BitWidth = Known.getBitWidth(); + if (Q.DC && Q.DT) { + // Handle dominating conditions. + for (BranchInst *BI : Q.DC->conditionsFor(V)) { + auto *Cmp = dyn_cast<ICmpInst>(BI->getCondition()); + if (!Cmp) + continue; - // Refine Known set if the pointer alignment is set by assume bundles. - if (V->getType()->isPointerTy()) { - if (RetainedKnowledge RK = getKnowledgeValidInContext( - V, { Attribute::Alignment }, Q.CxtI, Q.DT, Q.AC)) { - if (isPowerOf2_64(RK.ArgValue)) - Known.Zero.setLowBits(Log2_64(RK.ArgValue)); + BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(0)); + if (Q.DT->dominates(Edge0, Q.CxtI->getParent())) + computeKnownBitsFromCmp(V, Cmp->getPredicate(), Cmp->getOperand(0), + Cmp->getOperand(1), Known, Q); + + BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(1)); + if (Q.DT->dominates(Edge1, Q.CxtI->getParent())) + computeKnownBitsFromCmp(V, Cmp->getInversePredicate(), + Cmp->getOperand(0), Cmp->getOperand(1), Known, + Q); } + + if (Known.hasConflict()) + Known.resetAll(); } + if (!Q.AC) + return; + + unsigned BitWidth = Known.getBitWidth(); + // Note that the patterns below need to be kept in sync with the code // in AssumptionCache::updateAffectedValues. - for (auto &AssumeVH : Q.AC->assumptionsFor(V)) { - if (!AssumeVH) + for (AssumptionCache::ResultElem &Elem : Q.AC->assumptionsFor(V)) { + if (!Elem.Assume) continue; - CallInst *I = cast<CallInst>(AssumeVH); + + AssumeInst *I = cast<AssumeInst>(Elem.Assume); assert(I->getParent()->getParent() == Q.CxtI->getParent()->getParent() && "Got assumption for the wrong function!"); + if (Elem.Index != AssumptionCache::ExprResultIdx) { + if (!V->getType()->isPointerTy()) + continue; + if (RetainedKnowledge RK = getKnowledgeFromBundle( + *I, I->bundle_op_info_begin()[Elem.Index])) { + if (RK.WasOn == V && RK.AttrKind == Attribute::Alignment && + isPowerOf2_64(RK.ArgValue) && + isValidAssumeForContext(I, Q.CxtI, Q.DT)) + Known.Zero.setLowBits(Log2_64(RK.ArgValue)); + } + continue; + } + // Warning: This loop can end up being somewhat performance sensitive. // We're running this loop for once for each value queried resulting in a // runtime of ~O(#assumes * #values). - assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume && - "must be an assume intrinsic"); - Value *Arg = I->getArgOperand(0); if (Arg == V && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { @@ -844,7 +793,8 @@ void llvm::computeKnownBitsFromAssume(const Value *V, KnownBits &Known, if (!isValidAssumeForContext(I, Q.CxtI, Q.DT)) continue; - computeKnownBitsFromCmp(V, Cmp, Known, Depth, Q); + computeKnownBitsFromCmp(V, Cmp->getPredicate(), Cmp->getOperand(0), + Cmp->getOperand(1), Known, Q); } // Conflicting assumption: Undefined behavior will occur on this execution @@ -948,18 +898,17 @@ getKnownBitsFromAndXorOr(const Operator *I, const APInt &DemandedElts, } // Public so this can be used in `SimplifyDemandedUseBits`. -KnownBits llvm::analyzeKnownBitsFromAndXorOr( - const Operator *I, const KnownBits &KnownLHS, const KnownBits &KnownRHS, - unsigned Depth, const DataLayout &DL, AssumptionCache *AC, - const Instruction *CxtI, const DominatorTree *DT, bool UseInstrInfo) { +KnownBits llvm::analyzeKnownBitsFromAndXorOr(const Operator *I, + const KnownBits &KnownLHS, + const KnownBits &KnownRHS, + unsigned Depth, + const SimplifyQuery &SQ) { auto *FVTy = dyn_cast<FixedVectorType>(I->getType()); APInt DemandedElts = FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1); return getKnownBitsFromAndXorOr(I, DemandedElts, KnownLHS, KnownRHS, Depth, - SimplifyQuery(DL, /*TLI*/ nullptr, DT, AC, - safeCxtI(I, CxtI), - UseInstrInfo)); + SQ); } ConstantRange llvm::getVScaleRange(const Function *F, unsigned BitWidth) { @@ -1101,6 +1050,9 @@ static void computeKnownBitsFromOperator(const Operator *I, assert(SrcBitWidth && "SrcBitWidth can't be zero"); Known = Known.anyextOrTrunc(SrcBitWidth); computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + if (auto *Inst = dyn_cast<PossiblyNonNegInst>(I); + Inst && Inst->hasNonNeg() && !Known.isNegative()) + Known.makeNonNegative(); Known = Known.zextOrTrunc(BitWidth); break; } @@ -1459,11 +1411,13 @@ static void computeKnownBitsFromOperator(const Operator *I, // Recurse, but cap the recursion to one level, because we don't // want to waste time spinning around in loops. + // TODO: See if we can base recursion limiter on number of incoming phi + // edges so we don't overly clamp analysis. computeKnownBits(IncValue, Known2, MaxAnalysisRecursionDepth - 1, RecQ); - // If this failed, see if we can use a conditional branch into the phi + // See if we can further use a conditional branch into the phi // to help us determine the range of the value. - if (Known2.isUnknown()) { + if (!Known2.isConstant()) { ICmpInst::Predicate Pred; const APInt *RHSC; BasicBlock *TrueSucc, *FalseSucc; @@ -1476,21 +1430,19 @@ static void computeKnownBitsFromOperator(const Operator *I, // If we're using the false successor, invert the predicate. if (FalseSucc == P->getParent()) Pred = CmpInst::getInversePredicate(Pred); - - switch (Pred) { - case CmpInst::Predicate::ICMP_EQ: - Known2 = KnownBits::makeConstant(*RHSC); - break; - case CmpInst::Predicate::ICMP_ULE: - Known2.Zero.setHighBits(RHSC->countl_zero()); - break; - case CmpInst::Predicate::ICMP_ULT: - Known2.Zero.setHighBits((*RHSC - 1).countl_zero()); - break; - default: - // TODO - add additional integer predicate handling. + // Get the knownbits implied by the incoming phi condition. + auto CR = ConstantRange::makeExactICmpRegion(Pred, *RHSC); + KnownBits KnownUnion = Known2.unionWith(CR.toKnownBits()); + // We can have conflicts here if we are analyzing deadcode (its + // impossible for us reach this BB based the icmp). + if (KnownUnion.hasConflict()) { + // No reason to continue analyzing in a known dead region, so + // just resetAll and break. This will cause us to also exit the + // outer loop. + Known.resetAll(); break; } + Known2 = KnownUnion; } } } @@ -1513,8 +1465,10 @@ static void computeKnownBitsFromOperator(const Operator *I, Q.IIQ.getMetadata(cast<Instruction>(I), LLVMContext::MD_range)) computeKnownBitsFromRangeMetadata(*MD, Known); if (const Value *RV = cast<CallBase>(I)->getReturnedArgOperand()) { - computeKnownBits(RV, Known2, Depth + 1, Q); - Known = Known.unionWith(Known2); + if (RV->getType() == I->getType()) { + computeKnownBits(RV, Known2, Depth + 1, Q); + Known = Known.unionWith(Known2); + } } if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { switch (II->getIntrinsicID()) { @@ -1635,8 +1589,8 @@ static void computeKnownBitsFromOperator(const Operator *I, const Value *Mask = I->getOperand(1); Known2 = KnownBits(Mask->getType()->getScalarSizeInBits()); computeKnownBits(Mask, Known2, Depth + 1, Q); - // This is basically a pointer typed and. - Known &= Known2.zextOrTrunc(Known.getBitWidth()); + // TODO: 1-extend would be more precise. + Known &= Known2.anyextOrTrunc(BitWidth); break; } case Intrinsic::x86_sse42_crc32_64_64: @@ -1644,7 +1598,7 @@ static void computeKnownBitsFromOperator(const Operator *I, break; case Intrinsic::riscv_vsetvli: case Intrinsic::riscv_vsetvlimax: - // Assume that VL output is >= 65536. + // Assume that VL output is <= 65536. // TODO: Take SEW and LMUL into account. if (BitWidth > 17) Known.Zero.setBitsFrom(17); @@ -1778,17 +1732,17 @@ static void computeKnownBitsFromOperator(const Operator *I, /// Determine which bits of V are known to be either zero or one and return /// them. -KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts, - unsigned Depth, const SimplifyQuery &Q) { +KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts, + unsigned Depth, const SimplifyQuery &Q) { KnownBits Known(getBitWidth(V->getType(), Q.DL)); - computeKnownBits(V, DemandedElts, Known, Depth, Q); + ::computeKnownBits(V, DemandedElts, Known, Depth, Q); return Known; } /// Determine which bits of V are known to be either zero or one and return /// them. -KnownBits computeKnownBits(const Value *V, unsigned Depth, - const SimplifyQuery &Q) { +KnownBits llvm::computeKnownBits(const Value *V, unsigned Depth, + const SimplifyQuery &Q) { KnownBits Known(getBitWidth(V->getType(), Q.DL)); computeKnownBits(V, Known, Depth, Q); return Known; @@ -1884,6 +1838,8 @@ void computeKnownBits(const Value *V, const APInt &DemandedElts, if (!DemandedElts[i]) continue; Constant *Element = CV->getAggregateElement(i); + if (isa<PoisonValue>(Element)) + continue; auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element); if (!ElementCI) { Known.resetAll(); @@ -1932,11 +1888,11 @@ void computeKnownBits(const Value *V, const APInt &DemandedElts, Known.Zero.setLowBits(Log2(Alignment)); } - // computeKnownBitsFromAssume strictly refines Known. + // computeKnownBitsFromContext strictly refines Known. // Therefore, we run them after computeKnownBitsFromOperator. - // Check whether a nearby assume intrinsic can determine some known bits. - computeKnownBitsFromAssume(V, Known, Depth, Q); + // Check whether we can determine known bits from context such as assumes. + computeKnownBitsFromContext(V, Known, Depth, Q); assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); } @@ -2006,11 +1962,17 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, const SimplifyQuery &Q) { assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); - // Attempt to match against constants. - if (OrZero && match(V, m_Power2OrZero())) - return true; - if (match(V, m_Power2())) - return true; + if (isa<Constant>(V)) + return OrZero ? match(V, m_Power2OrZero()) : match(V, m_Power2()); + + // i1 is by definition a power of 2 or zero. + if (OrZero && V->getType()->getScalarSizeInBits() == 1) + return true; + + auto *I = dyn_cast<Instruction>(V); + if (!I) + return false; + if (Q.CxtI && match(V, m_VScale())) { const Function *F = Q.CxtI->getFunction(); // The vscale_range indicates vscale is a power-of-two. @@ -2019,70 +1981,71 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, // 1 << X is clearly a power of two if the one is not shifted off the end. If // it is shifted off the end then the result is undefined. - if (match(V, m_Shl(m_One(), m_Value()))) + if (match(I, m_Shl(m_One(), m_Value()))) return true; // (signmask) >>l X is clearly a power of two if the one is not shifted off // the bottom. If it is shifted off the bottom then the result is undefined. - if (match(V, m_LShr(m_SignMask(), m_Value()))) + if (match(I, m_LShr(m_SignMask(), m_Value()))) return true; // The remaining tests are all recursive, so bail out if we hit the limit. if (Depth++ == MaxAnalysisRecursionDepth) return false; - Value *X = nullptr, *Y = nullptr; - // A shift left or a logical shift right of a power of two is a power of two - // or zero. - if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) || - match(V, m_LShr(m_Value(X), m_Value())))) - return isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q); - - if (const ZExtInst *ZI = dyn_cast<ZExtInst>(V)) - return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth, Q); - - if (const SelectInst *SI = dyn_cast<SelectInst>(V)) - return isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth, Q) && - isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth, Q); - - // Peek through min/max. - if (match(V, m_MaxOrMin(m_Value(X), m_Value(Y)))) { - return isKnownToBeAPowerOfTwo(X, OrZero, Depth, Q) && - isKnownToBeAPowerOfTwo(Y, OrZero, Depth, Q); - } - - if (OrZero && match(V, m_And(m_Value(X), m_Value(Y)))) { + switch (I->getOpcode()) { + case Instruction::ZExt: + return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q); + case Instruction::Trunc: + return OrZero && isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q); + case Instruction::Shl: + if (OrZero || Q.IIQ.hasNoUnsignedWrap(I) || Q.IIQ.hasNoSignedWrap(I)) + return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q); + return false; + case Instruction::LShr: + if (OrZero || Q.IIQ.isExact(cast<BinaryOperator>(I))) + return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q); + return false; + case Instruction::UDiv: + if (Q.IIQ.isExact(cast<BinaryOperator>(I))) + return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q); + return false; + case Instruction::Mul: + return isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q) && + isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q) && + (OrZero || isKnownNonZero(I, Depth, Q)); + case Instruction::And: // A power of two and'd with anything is a power of two or zero. - if (isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q) || - isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, Depth, Q)) + if (OrZero && + (isKnownToBeAPowerOfTwo(I->getOperand(1), /*OrZero*/ true, Depth, Q) || + isKnownToBeAPowerOfTwo(I->getOperand(0), /*OrZero*/ true, Depth, Q))) return true; // X & (-X) is always a power of two or zero. - if (match(X, m_Neg(m_Specific(Y))) || match(Y, m_Neg(m_Specific(X)))) - return true; + if (match(I->getOperand(0), m_Neg(m_Specific(I->getOperand(1)))) || + match(I->getOperand(1), m_Neg(m_Specific(I->getOperand(0))))) + return OrZero || isKnownNonZero(I->getOperand(0), Depth, Q); return false; - } - - // Adding a power-of-two or zero to the same power-of-two or zero yields - // either the original power-of-two, a larger power-of-two or zero. - if (match(V, m_Add(m_Value(X), m_Value(Y)))) { + case Instruction::Add: { + // Adding a power-of-two or zero to the same power-of-two or zero yields + // either the original power-of-two, a larger power-of-two or zero. const OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V); if (OrZero || Q.IIQ.hasNoUnsignedWrap(VOBO) || Q.IIQ.hasNoSignedWrap(VOBO)) { - if (match(X, m_And(m_Specific(Y), m_Value())) || - match(X, m_And(m_Value(), m_Specific(Y)))) - if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth, Q)) - return true; - if (match(Y, m_And(m_Specific(X), m_Value())) || - match(Y, m_And(m_Value(), m_Specific(X)))) - if (isKnownToBeAPowerOfTwo(X, OrZero, Depth, Q)) - return true; + if (match(I->getOperand(0), + m_c_And(m_Specific(I->getOperand(1)), m_Value())) && + isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q)) + return true; + if (match(I->getOperand(1), + m_c_And(m_Specific(I->getOperand(0)), m_Value())) && + isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q)) + return true; unsigned BitWidth = V->getType()->getScalarSizeInBits(); KnownBits LHSBits(BitWidth); - computeKnownBits(X, LHSBits, Depth, Q); + computeKnownBits(I->getOperand(0), LHSBits, Depth, Q); KnownBits RHSBits(BitWidth); - computeKnownBits(Y, RHSBits, Depth, Q); + computeKnownBits(I->getOperand(1), RHSBits, Depth, Q); // If i8 V is a power of two or zero: // ZeroBits: 1 1 1 0 1 1 1 1 // ~ZeroBits: 0 0 0 1 0 0 0 0 @@ -2092,11 +2055,16 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, if (OrZero || RHSBits.One.getBoolValue() || LHSBits.One.getBoolValue()) return true; } + return false; } - - // A PHI node is power of two if all incoming values are power of two, or if - // it is an induction variable where in each step its value is a power of two. - if (const PHINode *PN = dyn_cast<PHINode>(V)) { + case Instruction::Select: + return isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q) && + isKnownToBeAPowerOfTwo(I->getOperand(2), OrZero, Depth, Q); + case Instruction::PHI: { + // A PHI node is power of two if all incoming values are power of two, or if + // it is an induction variable where in each step its value is a power of + // two. + auto *PN = cast<PHINode>(I); SimplifyQuery RecQ = Q; // Check if it is an induction variable and always power of two. @@ -2117,17 +2085,36 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, return isKnownToBeAPowerOfTwo(U.get(), OrZero, NewDepth, RecQ); }); } - - // An exact divide or right shift can only shift off zero bits, so the result - // is a power of two only if the first operand is a power of two and not - // copying a sign bit (sdiv int_min, 2). - if (match(V, m_Exact(m_LShr(m_Value(), m_Value()))) || - match(V, m_Exact(m_UDiv(m_Value(), m_Value())))) { - return isKnownToBeAPowerOfTwo(cast<Operator>(V)->getOperand(0), OrZero, - Depth, Q); + case Instruction::Invoke: + case Instruction::Call: { + if (auto *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + case Intrinsic::umax: + case Intrinsic::smax: + case Intrinsic::umin: + case Intrinsic::smin: + return isKnownToBeAPowerOfTwo(II->getArgOperand(1), OrZero, Depth, Q) && + isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q); + // bswap/bitreverse just move around bits, but don't change any 1s/0s + // thus dont change pow2/non-pow2 status. + case Intrinsic::bitreverse: + case Intrinsic::bswap: + return isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q); + case Intrinsic::fshr: + case Intrinsic::fshl: + // If Op0 == Op1, this is a rotate. is_pow2(rotate(x, y)) == is_pow2(x) + if (II->getArgOperand(0) == II->getArgOperand(1)) + return isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q); + break; + default: + break; + } + } + return false; + } + default: + return false; } - - return false; } /// Test whether a GEP's result is known to be non-null. @@ -2231,6 +2218,11 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V, return true; } + if ((match(U, m_IDiv(m_Value(), m_Specific(V))) || + match(U, m_IRem(m_Value(), m_Specific(V)))) && + isValidAssumeForContext(cast<Instruction>(U), CtxI, DT)) + return true; + // Consider only compare instructions uniquely controlling a branch Value *RHS; CmpInst::Predicate Pred; @@ -2447,6 +2439,9 @@ static bool isKnownNonZeroFromOperator(const Operator *I, unsigned Depth, const SimplifyQuery &Q) { unsigned BitWidth = getBitWidth(I->getType()->getScalarType(), Q.DL); switch (I->getOpcode()) { + case Instruction::Alloca: + // Alloca never returns null, malloc might. + return I->getType()->getPointerAddressSpace() == 0; case Instruction::GetElementPtr: if (I->getType()->isPointerTy()) return isGEPKnownNonNull(cast<GEPOperator>(I), Depth, Q); @@ -2545,26 +2540,33 @@ static bool isKnownNonZeroFromOperator(const Operator *I, return isNonZeroShift(I, DemandedElts, Depth, Q, Known); } case Instruction::UDiv: - case Instruction::SDiv: + case Instruction::SDiv: { // X / Y // div exact can only produce a zero if the dividend is zero. if (cast<PossiblyExactOperator>(I)->isExact()) return isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q); - if (I->getOpcode() == Instruction::UDiv) { - std::optional<bool> XUgeY; - KnownBits XKnown = - computeKnownBits(I->getOperand(0), DemandedElts, Depth, Q); - if (!XKnown.isUnknown()) { - KnownBits YKnown = - computeKnownBits(I->getOperand(1), DemandedElts, Depth, Q); - // If X u>= Y then div is non zero (0/0 is UB). - XUgeY = KnownBits::uge(XKnown, YKnown); - } - // If X is total unknown or X u< Y we won't be able to prove non-zero - // with compute known bits so just return early. - return XUgeY && *XUgeY; + + std::optional<bool> XUgeY; + KnownBits XKnown = + computeKnownBits(I->getOperand(0), DemandedElts, Depth, Q); + // If X is fully unknown we won't be able to figure anything out so don't + // both computing knownbits for Y. + if (XKnown.isUnknown()) + return false; + + KnownBits YKnown = + computeKnownBits(I->getOperand(1), DemandedElts, Depth, Q); + if (I->getOpcode() == Instruction::SDiv) { + // For signed division need to compare abs value of the operands. + XKnown = XKnown.abs(/*IntMinIsPoison*/ false); + YKnown = YKnown.abs(/*IntMinIsPoison*/ false); } - break; + // If X u>= Y then div is non zero (0/0 is UB). + XUgeY = KnownBits::uge(XKnown, YKnown); + // If X is total unknown or X u< Y we won't be able to prove non-zero + // with compute known bits so just return early. + return XUgeY && *XUgeY; + } case Instruction::Add: { // X + Y. @@ -2651,6 +2653,23 @@ static bool isKnownNonZeroFromOperator(const Operator *I, if (U.get() == PN) return true; RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator(); + // Check if the branch on the phi excludes zero. + ICmpInst::Predicate Pred; + Value *X; + BasicBlock *TrueSucc, *FalseSucc; + if (match(RecQ.CxtI, + m_Br(m_c_ICmp(Pred, m_Specific(U.get()), m_Value(X)), + m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) { + // Check for cases of duplicate successors. + if ((TrueSucc == PN->getParent()) != (FalseSucc == PN->getParent())) { + // If we're using the false successor, invert the predicate. + if (FalseSucc == PN->getParent()) + Pred = CmpInst::getInversePredicate(Pred); + if (cmpExcludesZero(Pred, X)) + return true; + } + } + // Finally recurse on the edge and check it directly. return isKnownNonZero(U.get(), DemandedElts, NewDepth, RecQ); }); } @@ -2672,15 +2691,33 @@ static bool isKnownNonZeroFromOperator(const Operator *I, return isKnownNonZero(I->getOperand(0), Depth, Q) && isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT, Depth); - case Instruction::Load: - // A Load tagged with nonnull metadata is never null. - if (Q.IIQ.getMetadata(cast<LoadInst>(I), LLVMContext::MD_nonnull)) - return true; + case Instruction::Load: { + auto *LI = cast<LoadInst>(I); + // A Load tagged with nonnull or dereferenceable with null pointer undefined + // is never null. + if (auto *PtrT = dyn_cast<PointerType>(I->getType())) + if (Q.IIQ.getMetadata(LI, LLVMContext::MD_nonnull) || + (Q.IIQ.getMetadata(LI, LLVMContext::MD_dereferenceable) && + !NullPointerIsDefined(LI->getFunction(), PtrT->getAddressSpace()))) + return true; // No need to fall through to computeKnownBits as range metadata is already // handled in isKnownNonZero. return false; + } case Instruction::Call: + case Instruction::Invoke: + if (I->getType()->isPointerTy()) { + const auto *Call = cast<CallBase>(I); + if (Call->isReturnNonNull()) + return true; + if (const auto *RP = getArgumentAliasingToReturnedPointer(Call, true)) + return isKnownNonZero(RP, Depth, Q); + } else if (const Value *RV = cast<CallBase>(I)->getReturnedArgOperand()) { + if (RV->getType() == I->getType() && isKnownNonZero(RV, Depth, Q)) + return true; + } + if (auto *II = dyn_cast<IntrinsicInst>(I)) { switch (II->getIntrinsicID()) { case Intrinsic::sshl_sat: @@ -2741,8 +2778,10 @@ static bool isKnownNonZeroFromOperator(const Operator *I, default: break; } + break; } - break; + + return false; } KnownBits Known(BitWidth); @@ -2831,10 +2870,6 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth, // Check for pointer simplifications. if (PointerType *PtrTy = dyn_cast<PointerType>(V->getType())) { - // Alloca never returns null, malloc might. - if (isa<AllocaInst>(V) && PtrTy->getAddressSpace() == 0) - return true; - // A byval, inalloca may not be null in a non-default addres space. A // nonnull argument is assumed never 0. if (const Argument *A = dyn_cast<Argument>(V)) { @@ -2843,13 +2878,6 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth, A->hasNonNullAttr())) return true; } - - if (const auto *Call = dyn_cast<CallBase>(V)) { - if (Call->isReturnNonNull()) - return true; - if (const auto *RP = getArgumentAliasingToReturnedPointer(Call, true)) - return isKnownNonZero(RP, Depth, Q); - } } if (const auto *I = dyn_cast<Operator>(V)) @@ -3049,6 +3077,77 @@ static bool isNonEqualPHIs(const PHINode *PN1, const PHINode *PN2, return true; } +static bool isNonEqualSelect(const Value *V1, const Value *V2, unsigned Depth, + const SimplifyQuery &Q) { + const SelectInst *SI1 = dyn_cast<SelectInst>(V1); + if (!SI1) + return false; + + if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2)) { + const Value *Cond1 = SI1->getCondition(); + const Value *Cond2 = SI2->getCondition(); + if (Cond1 == Cond2) + return isKnownNonEqual(SI1->getTrueValue(), SI2->getTrueValue(), + Depth + 1, Q) && + isKnownNonEqual(SI1->getFalseValue(), SI2->getFalseValue(), + Depth + 1, Q); + } + return isKnownNonEqual(SI1->getTrueValue(), V2, Depth + 1, Q) && + isKnownNonEqual(SI1->getFalseValue(), V2, Depth + 1, Q); +} + +// Check to see if A is both a GEP and is the incoming value for a PHI in the +// loop, and B is either a ptr or another GEP. If the PHI has 2 incoming values, +// one of them being the recursive GEP A and the other a ptr at same base and at +// the same/higher offset than B we are only incrementing the pointer further in +// loop if offset of recursive GEP is greater than 0. +static bool isNonEqualPointersWithRecursiveGEP(const Value *A, const Value *B, + const SimplifyQuery &Q) { + if (!A->getType()->isPointerTy() || !B->getType()->isPointerTy()) + return false; + + auto *GEPA = dyn_cast<GEPOperator>(A); + if (!GEPA || GEPA->getNumIndices() != 1 || !isa<Constant>(GEPA->idx_begin())) + return false; + + // Handle 2 incoming PHI values with one being a recursive GEP. + auto *PN = dyn_cast<PHINode>(GEPA->getPointerOperand()); + if (!PN || PN->getNumIncomingValues() != 2) + return false; + + // Search for the recursive GEP as an incoming operand, and record that as + // Step. + Value *Start = nullptr; + Value *Step = const_cast<Value *>(A); + if (PN->getIncomingValue(0) == Step) + Start = PN->getIncomingValue(1); + else if (PN->getIncomingValue(1) == Step) + Start = PN->getIncomingValue(0); + else + return false; + + // Other incoming node base should match the B base. + // StartOffset >= OffsetB && StepOffset > 0? + // StartOffset <= OffsetB && StepOffset < 0? + // Is non-equal if above are true. + // We use stripAndAccumulateInBoundsConstantOffsets to restrict the + // optimisation to inbounds GEPs only. + unsigned IndexWidth = Q.DL.getIndexTypeSizeInBits(Start->getType()); + APInt StartOffset(IndexWidth, 0); + Start = Start->stripAndAccumulateInBoundsConstantOffsets(Q.DL, StartOffset); + APInt StepOffset(IndexWidth, 0); + Step = Step->stripAndAccumulateInBoundsConstantOffsets(Q.DL, StepOffset); + + // Check if Base Pointer of Step matches the PHI. + if (Step != PN) + return false; + APInt OffsetB(IndexWidth, 0); + B = B->stripAndAccumulateInBoundsConstantOffsets(Q.DL, OffsetB); + return Start == B && + ((StartOffset.sge(OffsetB) && StepOffset.isStrictlyPositive()) || + (StartOffset.sle(OffsetB) && StepOffset.isNegative())); +} + /// Return true if it is known that V1 != V2. static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth, const SimplifyQuery &Q) { @@ -3098,23 +3197,15 @@ static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth, Known2.Zero.intersects(Known1.One)) return true; } - return false; -} -/// Return true if 'V & Mask' is known to be zero. We use this predicate to -/// simplify operations downstream. Mask is known to be zero for bits that V -/// cannot have. -/// -/// This function is defined on values with integer type, values with pointer -/// type, and vectors of integers. In the case -/// where V is a vector, the mask, known zero, and known one values are the -/// same width as the vector element, and the bit is set only if it is true -/// for all of the elements in the vector. -bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth, - const SimplifyQuery &Q) { - KnownBits Known(Mask.getBitWidth()); - computeKnownBits(V, Known, Depth, Q); - return Mask.isSubsetOf(Known.Zero); + if (isNonEqualSelect(V1, V2, Depth, Q) || isNonEqualSelect(V2, V1, Depth, Q)) + return true; + + if (isNonEqualPointersWithRecursiveGEP(V1, V2, Q) || + isNonEqualPointersWithRecursiveGEP(V2, V1, Q)) + return true; + + return false; } // Match a signed min+max clamp pattern like smax(smin(In, CHigh), CLow). @@ -3641,6 +3732,8 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(const CallBase &CB, return Intrinsic::not_intrinsic; } +/// Deprecated, use computeKnownFPClass instead. +/// /// If \p SignBitOnly is true, test for a known 0 sign bit rather than a /// standard ordered compare. e.g. make -0.0 olt 0.0 be true because of the sign /// bit despite comparing equal. @@ -3832,13 +3925,9 @@ static bool cannotBeOrderedLessThanZeroImpl(const Value *V, return false; } -bool llvm::CannotBeOrderedLessThanZero(const Value *V, const DataLayout &DL, - const TargetLibraryInfo *TLI) { - return cannotBeOrderedLessThanZeroImpl(V, DL, TLI, false, 0); -} - bool llvm::SignBitMustBeZero(const Value *V, const DataLayout &DL, const TargetLibraryInfo *TLI) { + // FIXME: Use computeKnownFPClass and pass all arguments return cannotBeOrderedLessThanZeroImpl(V, DL, TLI, true, 0); } @@ -3941,9 +4030,15 @@ std::pair<Value *, FPClassTest> llvm::fcmpToClassTest(FCmpInst::Predicate Pred, Value *LHS, Value *RHS, bool LookThroughSrc) { const APFloat *ConstRHS; - if (!match(RHS, m_APFloat(ConstRHS))) - return {nullptr, fcNone}; + if (!match(RHS, m_APFloatAllowUndef(ConstRHS))) + return {nullptr, fcAllFlags}; + return fcmpToClassTest(Pred, F, LHS, ConstRHS, LookThroughSrc); +} + +std::pair<Value *, FPClassTest> +llvm::fcmpToClassTest(FCmpInst::Predicate Pred, const Function &F, Value *LHS, + const APFloat *ConstRHS, bool LookThroughSrc) { // fcmp ord x, zero|normal|subnormal|inf -> ~fcNan if (Pred == FCmpInst::FCMP_ORD && !ConstRHS->isNaN()) return {LHS, ~fcNan}; @@ -3958,7 +4053,7 @@ std::pair<Value *, FPClassTest> llvm::fcmpToClassTest(FCmpInst::Predicate Pred, // TODO: Handle DAZ by expanding masks to cover subnormal cases. if (Pred != FCmpInst::FCMP_ORD && Pred != FCmpInst::FCMP_UNO && !inputDenormalIsIEEE(F, LHS->getType())) - return {nullptr, fcNone}; + return {nullptr, fcAllFlags}; switch (Pred) { case FCmpInst::FCMP_OEQ: // Match x == 0.0 @@ -3995,7 +4090,7 @@ std::pair<Value *, FPClassTest> llvm::fcmpToClassTest(FCmpInst::Predicate Pred, break; } - return {nullptr, fcNone}; + return {nullptr, fcAllFlags}; } Value *Src = LHS; @@ -4078,8 +4173,14 @@ std::pair<Value *, FPClassTest> llvm::fcmpToClassTest(FCmpInst::Predicate Pred, } case FCmpInst::FCMP_OGE: case FCmpInst::FCMP_ULT: { - if (ConstRHS->isNegative()) // TODO - return {nullptr, fcNone}; + if (ConstRHS->isNegative()) { + // fcmp oge x, -inf -> ~fcNan + // fcmp oge fabs(x), -inf -> ~fcNan + // fcmp ult x, -inf -> fcNan + // fcmp ult fabs(x), -inf -> fcNan + Mask = ~fcNan; + break; + } // fcmp oge fabs(x), +inf -> fcInf // fcmp oge x, +inf -> fcPosInf @@ -4092,15 +4193,21 @@ std::pair<Value *, FPClassTest> llvm::fcmpToClassTest(FCmpInst::Predicate Pred, } case FCmpInst::FCMP_OGT: case FCmpInst::FCMP_ULE: { - if (ConstRHS->isNegative()) - return {nullptr, fcNone}; + if (ConstRHS->isNegative()) { + // fcmp ogt x, -inf -> fcmp one x, -inf + // fcmp ogt fabs(x), -inf -> fcmp ord x, x + // fcmp ule x, -inf -> fcmp ueq x, -inf + // fcmp ule fabs(x), -inf -> fcmp uno x, x + Mask = IsFabs ? ~fcNan : ~(fcNegInf | fcNan); + break; + } // No value is ordered and greater than infinity. Mask = fcNone; break; } default: - return {nullptr, fcNone}; + return {nullptr, fcAllFlags}; } } else if (ConstRHS->isSmallestNormalized() && !ConstRHS->isNegative()) { // Match pattern that's used in __builtin_isnormal. @@ -4129,14 +4236,14 @@ std::pair<Value *, FPClassTest> llvm::fcmpToClassTest(FCmpInst::Predicate Pred, break; } default: - return {nullptr, fcNone}; + return {nullptr, fcAllFlags}; } } else if (ConstRHS->isNaN()) { // fcmp o__ x, nan -> false // fcmp u__ x, nan -> true Mask = fcNone; } else - return {nullptr, fcNone}; + return {nullptr, fcAllFlags}; // Invert the comparison for the unordered cases. if (FCmpInst::isUnordered(Pred)) @@ -4369,427 +4476,421 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts, break; } case Instruction::Call: { - if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Op)) { - const Intrinsic::ID IID = II->getIntrinsicID(); - switch (IID) { - case Intrinsic::fabs: { - if ((InterestedClasses & (fcNan | fcPositive)) != fcNone) { - // If we only care about the sign bit we don't need to inspect the - // operand. - computeKnownFPClass(II->getArgOperand(0), DemandedElts, - InterestedClasses, Known, Depth + 1, Q); - } - - Known.fabs(); - break; - } - case Intrinsic::copysign: { - KnownFPClass KnownSign; - + const CallInst *II = cast<CallInst>(Op); + const Intrinsic::ID IID = II->getIntrinsicID(); + switch (IID) { + case Intrinsic::fabs: { + if ((InterestedClasses & (fcNan | fcPositive)) != fcNone) { + // If we only care about the sign bit we don't need to inspect the + // operand. computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, Known, Depth + 1, Q); - computeKnownFPClass(II->getArgOperand(1), DemandedElts, - InterestedClasses, KnownSign, Depth + 1, Q); - Known.copysign(KnownSign); - break; } - case Intrinsic::fma: - case Intrinsic::fmuladd: { - if ((InterestedClasses & fcNegative) == fcNone) - break; - if (II->getArgOperand(0) != II->getArgOperand(1)) - break; - - // The multiply cannot be -0 and therefore the add can't be -0 - Known.knownNot(fcNegZero); + Known.fabs(); + break; + } + case Intrinsic::copysign: { + KnownFPClass KnownSign; - // x * x + y is non-negative if y is non-negative. - KnownFPClass KnownAddend; - computeKnownFPClass(II->getArgOperand(2), DemandedElts, - InterestedClasses, KnownAddend, Depth + 1, Q); + computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, + Known, Depth + 1, Q); + computeKnownFPClass(II->getArgOperand(1), DemandedElts, InterestedClasses, + KnownSign, Depth + 1, Q); + Known.copysign(KnownSign); + break; + } + case Intrinsic::fma: + case Intrinsic::fmuladd: { + if ((InterestedClasses & fcNegative) == fcNone) + break; - // TODO: Known sign bit with no nans - if (KnownAddend.cannotBeOrderedLessThanZero()) - Known.knownNot(fcNegative); + if (II->getArgOperand(0) != II->getArgOperand(1)) break; - } - case Intrinsic::sqrt: - case Intrinsic::experimental_constrained_sqrt: { - KnownFPClass KnownSrc; - FPClassTest InterestedSrcs = InterestedClasses; - if (InterestedClasses & fcNan) - InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask; - computeKnownFPClass(II->getArgOperand(0), DemandedElts, - InterestedSrcs, KnownSrc, Depth + 1, Q); + // The multiply cannot be -0 and therefore the add can't be -0 + Known.knownNot(fcNegZero); - if (KnownSrc.isKnownNeverPosInfinity()) - Known.knownNot(fcPosInf); - if (KnownSrc.isKnownNever(fcSNan)) - Known.knownNot(fcSNan); - - // Any negative value besides -0 returns a nan. - if (KnownSrc.isKnownNeverNaN() && - KnownSrc.cannotBeOrderedLessThanZero()) - Known.knownNot(fcNan); - - // The only negative value that can be returned is -0 for -0 inputs. - Known.knownNot(fcNegInf | fcNegSubnormal | fcNegNormal); - - // If the input denormal mode could be PreserveSign, a negative - // subnormal input could produce a negative zero output. - const Function *F = II->getFunction(); - if (Q.IIQ.hasNoSignedZeros(II) || - (F && KnownSrc.isKnownNeverLogicalNegZero(*F, II->getType()))) { - Known.knownNot(fcNegZero); - if (KnownSrc.isKnownNeverNaN()) - Known.SignBit = false; - } + // x * x + y is non-negative if y is non-negative. + KnownFPClass KnownAddend; + computeKnownFPClass(II->getArgOperand(2), DemandedElts, InterestedClasses, + KnownAddend, Depth + 1, Q); - break; - } - case Intrinsic::sin: - case Intrinsic::cos: { - // Return NaN on infinite inputs. - KnownFPClass KnownSrc; - computeKnownFPClass(II->getArgOperand(0), DemandedElts, - InterestedClasses, KnownSrc, Depth + 1, Q); - Known.knownNot(fcInf); - if (KnownSrc.isKnownNeverNaN() && KnownSrc.isKnownNeverInfinity()) - Known.knownNot(fcNan); - break; + // TODO: Known sign bit with no nans + if (KnownAddend.cannotBeOrderedLessThanZero()) + Known.knownNot(fcNegative); + break; + } + case Intrinsic::sqrt: + case Intrinsic::experimental_constrained_sqrt: { + KnownFPClass KnownSrc; + FPClassTest InterestedSrcs = InterestedClasses; + if (InterestedClasses & fcNan) + InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask; + + computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedSrcs, + KnownSrc, Depth + 1, Q); + + if (KnownSrc.isKnownNeverPosInfinity()) + Known.knownNot(fcPosInf); + if (KnownSrc.isKnownNever(fcSNan)) + Known.knownNot(fcSNan); + + // Any negative value besides -0 returns a nan. + if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero()) + Known.knownNot(fcNan); + + // The only negative value that can be returned is -0 for -0 inputs. + Known.knownNot(fcNegInf | fcNegSubnormal | fcNegNormal); + + // If the input denormal mode could be PreserveSign, a negative + // subnormal input could produce a negative zero output. + const Function *F = II->getFunction(); + if (Q.IIQ.hasNoSignedZeros(II) || + (F && KnownSrc.isKnownNeverLogicalNegZero(*F, II->getType()))) { + Known.knownNot(fcNegZero); + if (KnownSrc.isKnownNeverNaN()) + Known.SignBit = false; } - case Intrinsic::maxnum: - case Intrinsic::minnum: - case Intrinsic::minimum: - case Intrinsic::maximum: { - KnownFPClass KnownLHS, KnownRHS; - computeKnownFPClass(II->getArgOperand(0), DemandedElts, - InterestedClasses, KnownLHS, Depth + 1, Q); - computeKnownFPClass(II->getArgOperand(1), DemandedElts, - InterestedClasses, KnownRHS, Depth + 1, Q); - - bool NeverNaN = - KnownLHS.isKnownNeverNaN() || KnownRHS.isKnownNeverNaN(); - Known = KnownLHS | KnownRHS; - - // If either operand is not NaN, the result is not NaN. - if (NeverNaN && (IID == Intrinsic::minnum || IID == Intrinsic::maxnum)) - Known.knownNot(fcNan); - - if (IID == Intrinsic::maxnum) { - // If at least one operand is known to be positive, the result must be - // positive. - if ((KnownLHS.cannotBeOrderedLessThanZero() && - KnownLHS.isKnownNeverNaN()) || - (KnownRHS.cannotBeOrderedLessThanZero() && - KnownRHS.isKnownNeverNaN())) - Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); - } else if (IID == Intrinsic::maximum) { - // If at least one operand is known to be positive, the result must be - // positive. - if (KnownLHS.cannotBeOrderedLessThanZero() || - KnownRHS.cannotBeOrderedLessThanZero()) - Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); - } else if (IID == Intrinsic::minnum) { - // If at least one operand is known to be negative, the result must be - // negative. - if ((KnownLHS.cannotBeOrderedGreaterThanZero() && - KnownLHS.isKnownNeverNaN()) || - (KnownRHS.cannotBeOrderedGreaterThanZero() && - KnownRHS.isKnownNeverNaN())) - Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask); - } else { - // If at least one operand is known to be negative, the result must be - // negative. - if (KnownLHS.cannotBeOrderedGreaterThanZero() || - KnownRHS.cannotBeOrderedGreaterThanZero()) - Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask); - } + break; + } + case Intrinsic::sin: + case Intrinsic::cos: { + // Return NaN on infinite inputs. + KnownFPClass KnownSrc; + computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, + KnownSrc, Depth + 1, Q); + Known.knownNot(fcInf); + if (KnownSrc.isKnownNeverNaN() && KnownSrc.isKnownNeverInfinity()) + Known.knownNot(fcNan); + break; + } + case Intrinsic::maxnum: + case Intrinsic::minnum: + case Intrinsic::minimum: + case Intrinsic::maximum: { + KnownFPClass KnownLHS, KnownRHS; + computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, + KnownLHS, Depth + 1, Q); + computeKnownFPClass(II->getArgOperand(1), DemandedElts, InterestedClasses, + KnownRHS, Depth + 1, Q); - // Fixup zero handling if denormals could be returned as a zero. - // - // As there's no spec for denormal flushing, be conservative with the - // treatment of denormals that could be flushed to zero. For older - // subtargets on AMDGPU the min/max instructions would not flush the - // output and return the original value. - // - // TODO: This could be refined based on the sign - if ((Known.KnownFPClasses & fcZero) != fcNone && - !Known.isKnownNeverSubnormal()) { - const Function *Parent = II->getFunction(); - if (!Parent) - break; - - DenormalMode Mode = Parent->getDenormalMode( - II->getType()->getScalarType()->getFltSemantics()); - if (Mode != DenormalMode::getIEEE()) - Known.KnownFPClasses |= fcZero; - } + bool NeverNaN = KnownLHS.isKnownNeverNaN() || KnownRHS.isKnownNeverNaN(); + Known = KnownLHS | KnownRHS; - break; + // If either operand is not NaN, the result is not NaN. + if (NeverNaN && (IID == Intrinsic::minnum || IID == Intrinsic::maxnum)) + Known.knownNot(fcNan); + + if (IID == Intrinsic::maxnum) { + // If at least one operand is known to be positive, the result must be + // positive. + if ((KnownLHS.cannotBeOrderedLessThanZero() && + KnownLHS.isKnownNeverNaN()) || + (KnownRHS.cannotBeOrderedLessThanZero() && + KnownRHS.isKnownNeverNaN())) + Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); + } else if (IID == Intrinsic::maximum) { + // If at least one operand is known to be positive, the result must be + // positive. + if (KnownLHS.cannotBeOrderedLessThanZero() || + KnownRHS.cannotBeOrderedLessThanZero()) + Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); + } else if (IID == Intrinsic::minnum) { + // If at least one operand is known to be negative, the result must be + // negative. + if ((KnownLHS.cannotBeOrderedGreaterThanZero() && + KnownLHS.isKnownNeverNaN()) || + (KnownRHS.cannotBeOrderedGreaterThanZero() && + KnownRHS.isKnownNeverNaN())) + Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask); + } else { + // If at least one operand is known to be negative, the result must be + // negative. + if (KnownLHS.cannotBeOrderedGreaterThanZero() || + KnownRHS.cannotBeOrderedGreaterThanZero()) + Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask); } - case Intrinsic::canonicalize: { - KnownFPClass KnownSrc; - computeKnownFPClass(II->getArgOperand(0), DemandedElts, - InterestedClasses, KnownSrc, Depth + 1, Q); - // This is essentially a stronger form of - // propagateCanonicalizingSrc. Other "canonicalizing" operations don't - // actually have an IR canonicalization guarantee. + // Fixup zero handling if denormals could be returned as a zero. + // + // As there's no spec for denormal flushing, be conservative with the + // treatment of denormals that could be flushed to zero. For older + // subtargets on AMDGPU the min/max instructions would not flush the + // output and return the original value. + // + // TODO: This could be refined based on the sign + if ((Known.KnownFPClasses & fcZero) != fcNone && + !Known.isKnownNeverSubnormal()) { + const Function *Parent = II->getFunction(); + if (!Parent) + break; + + DenormalMode Mode = Parent->getDenormalMode( + II->getType()->getScalarType()->getFltSemantics()); + if (Mode != DenormalMode::getIEEE()) + Known.KnownFPClasses |= fcZero; + } - // Canonicalize may flush denormals to zero, so we have to consider the - // denormal mode to preserve known-not-0 knowledge. - Known.KnownFPClasses = KnownSrc.KnownFPClasses | fcZero | fcQNan; + break; + } + case Intrinsic::canonicalize: { + KnownFPClass KnownSrc; + computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, + KnownSrc, Depth + 1, Q); - // Stronger version of propagateNaN - // Canonicalize is guaranteed to quiet signaling nans. - if (KnownSrc.isKnownNeverNaN()) - Known.knownNot(fcNan); - else - Known.knownNot(fcSNan); + // This is essentially a stronger form of + // propagateCanonicalizingSrc. Other "canonicalizing" operations don't + // actually have an IR canonicalization guarantee. - const Function *F = II->getFunction(); - if (!F) - break; + // Canonicalize may flush denormals to zero, so we have to consider the + // denormal mode to preserve known-not-0 knowledge. + Known.KnownFPClasses = KnownSrc.KnownFPClasses | fcZero | fcQNan; - // If the parent function flushes denormals, the canonical output cannot - // be a denormal. - const fltSemantics &FPType = - II->getType()->getScalarType()->getFltSemantics(); - DenormalMode DenormMode = F->getDenormalMode(FPType); - if (DenormMode == DenormalMode::getIEEE()) { - if (KnownSrc.isKnownNever(fcPosZero)) - Known.knownNot(fcPosZero); - if (KnownSrc.isKnownNever(fcNegZero)) - Known.knownNot(fcNegZero); - break; - } + // Stronger version of propagateNaN + // Canonicalize is guaranteed to quiet signaling nans. + if (KnownSrc.isKnownNeverNaN()) + Known.knownNot(fcNan); + else + Known.knownNot(fcSNan); - if (DenormMode.inputsAreZero() || DenormMode.outputsAreZero()) - Known.knownNot(fcSubnormal); + const Function *F = II->getFunction(); + if (!F) + break; - if (DenormMode.Input == DenormalMode::PositiveZero || - (DenormMode.Output == DenormalMode::PositiveZero && - DenormMode.Input == DenormalMode::IEEE)) + // If the parent function flushes denormals, the canonical output cannot + // be a denormal. + const fltSemantics &FPType = + II->getType()->getScalarType()->getFltSemantics(); + DenormalMode DenormMode = F->getDenormalMode(FPType); + if (DenormMode == DenormalMode::getIEEE()) { + if (KnownSrc.isKnownNever(fcPosZero)) + Known.knownNot(fcPosZero); + if (KnownSrc.isKnownNever(fcNegZero)) Known.knownNot(fcNegZero); - break; } - case Intrinsic::trunc: - case Intrinsic::floor: - case Intrinsic::ceil: - case Intrinsic::rint: - case Intrinsic::nearbyint: - case Intrinsic::round: - case Intrinsic::roundeven: { - KnownFPClass KnownSrc; - FPClassTest InterestedSrcs = InterestedClasses; - if (InterestedSrcs & fcPosFinite) - InterestedSrcs |= fcPosFinite; - if (InterestedSrcs & fcNegFinite) - InterestedSrcs |= fcNegFinite; - computeKnownFPClass(II->getArgOperand(0), DemandedElts, - InterestedSrcs, KnownSrc, Depth + 1, Q); - // Integer results cannot be subnormal. + if (DenormMode.inputsAreZero() || DenormMode.outputsAreZero()) Known.knownNot(fcSubnormal); - Known.propagateNaN(KnownSrc, true); + if (DenormMode.Input == DenormalMode::PositiveZero || + (DenormMode.Output == DenormalMode::PositiveZero && + DenormMode.Input == DenormalMode::IEEE)) + Known.knownNot(fcNegZero); - // Pass through infinities, except PPC_FP128 is a special case for - // intrinsics other than trunc. - if (IID == Intrinsic::trunc || !V->getType()->isMultiUnitFPType()) { - if (KnownSrc.isKnownNeverPosInfinity()) - Known.knownNot(fcPosInf); - if (KnownSrc.isKnownNeverNegInfinity()) - Known.knownNot(fcNegInf); - } + break; + } + case Intrinsic::trunc: + case Intrinsic::floor: + case Intrinsic::ceil: + case Intrinsic::rint: + case Intrinsic::nearbyint: + case Intrinsic::round: + case Intrinsic::roundeven: { + KnownFPClass KnownSrc; + FPClassTest InterestedSrcs = InterestedClasses; + if (InterestedSrcs & fcPosFinite) + InterestedSrcs |= fcPosFinite; + if (InterestedSrcs & fcNegFinite) + InterestedSrcs |= fcNegFinite; + computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedSrcs, + KnownSrc, Depth + 1, Q); + + // Integer results cannot be subnormal. + Known.knownNot(fcSubnormal); - // Negative round ups to 0 produce -0 - if (KnownSrc.isKnownNever(fcPosFinite)) - Known.knownNot(fcPosFinite); - if (KnownSrc.isKnownNever(fcNegFinite)) - Known.knownNot(fcNegFinite); + Known.propagateNaN(KnownSrc, true); - break; + // Pass through infinities, except PPC_FP128 is a special case for + // intrinsics other than trunc. + if (IID == Intrinsic::trunc || !V->getType()->isMultiUnitFPType()) { + if (KnownSrc.isKnownNeverPosInfinity()) + Known.knownNot(fcPosInf); + if (KnownSrc.isKnownNeverNegInfinity()) + Known.knownNot(fcNegInf); } - case Intrinsic::exp: - case Intrinsic::exp2: { - Known.knownNot(fcNegative); - if ((InterestedClasses & fcNan) == fcNone) - break; - KnownFPClass KnownSrc; - computeKnownFPClass(II->getArgOperand(0), DemandedElts, - InterestedClasses, KnownSrc, Depth + 1, Q); - if (KnownSrc.isKnownNeverNaN()) { - Known.knownNot(fcNan); - Known.SignBit = false; - } + // Negative round ups to 0 produce -0 + if (KnownSrc.isKnownNever(fcPosFinite)) + Known.knownNot(fcPosFinite); + if (KnownSrc.isKnownNever(fcNegFinite)) + Known.knownNot(fcNegFinite); + break; + } + case Intrinsic::exp: + case Intrinsic::exp2: + case Intrinsic::exp10: { + Known.knownNot(fcNegative); + if ((InterestedClasses & fcNan) == fcNone) break; + + KnownFPClass KnownSrc; + computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, + KnownSrc, Depth + 1, Q); + if (KnownSrc.isKnownNeverNaN()) { + Known.knownNot(fcNan); + Known.SignBit = false; } - case Intrinsic::fptrunc_round: { - computeKnownFPClassForFPTrunc(Op, DemandedElts, InterestedClasses, - Known, Depth, Q); + + break; + } + case Intrinsic::fptrunc_round: { + computeKnownFPClassForFPTrunc(Op, DemandedElts, InterestedClasses, Known, + Depth, Q); + break; + } + case Intrinsic::log: + case Intrinsic::log10: + case Intrinsic::log2: + case Intrinsic::experimental_constrained_log: + case Intrinsic::experimental_constrained_log10: + case Intrinsic::experimental_constrained_log2: { + // log(+inf) -> +inf + // log([+-]0.0) -> -inf + // log(-inf) -> nan + // log(-x) -> nan + if ((InterestedClasses & (fcNan | fcInf)) == fcNone) break; - } - case Intrinsic::log: - case Intrinsic::log10: - case Intrinsic::log2: - case Intrinsic::experimental_constrained_log: - case Intrinsic::experimental_constrained_log10: - case Intrinsic::experimental_constrained_log2: { - // log(+inf) -> +inf - // log([+-]0.0) -> -inf - // log(-inf) -> nan - // log(-x) -> nan - if ((InterestedClasses & (fcNan | fcInf)) == fcNone) - break; - FPClassTest InterestedSrcs = InterestedClasses; - if ((InterestedClasses & fcNegInf) != fcNone) - InterestedSrcs |= fcZero | fcSubnormal; - if ((InterestedClasses & fcNan) != fcNone) - InterestedSrcs |= fcNan | (fcNegative & ~fcNan); + FPClassTest InterestedSrcs = InterestedClasses; + if ((InterestedClasses & fcNegInf) != fcNone) + InterestedSrcs |= fcZero | fcSubnormal; + if ((InterestedClasses & fcNan) != fcNone) + InterestedSrcs |= fcNan | (fcNegative & ~fcNan); - KnownFPClass KnownSrc; - computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedSrcs, - KnownSrc, Depth + 1, Q); + KnownFPClass KnownSrc; + computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedSrcs, + KnownSrc, Depth + 1, Q); - if (KnownSrc.isKnownNeverPosInfinity()) - Known.knownNot(fcPosInf); + if (KnownSrc.isKnownNeverPosInfinity()) + Known.knownNot(fcPosInf); - if (KnownSrc.isKnownNeverNaN() && - KnownSrc.cannotBeOrderedLessThanZero()) - Known.knownNot(fcNan); + if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero()) + Known.knownNot(fcNan); - const Function *F = II->getFunction(); - if (F && KnownSrc.isKnownNeverLogicalZero(*F, II->getType())) - Known.knownNot(fcNegInf); + const Function *F = II->getFunction(); + if (F && KnownSrc.isKnownNeverLogicalZero(*F, II->getType())) + Known.knownNot(fcNegInf); + break; + } + case Intrinsic::powi: { + if ((InterestedClasses & fcNegative) == fcNone) break; - } - case Intrinsic::powi: { - if ((InterestedClasses & fcNegative) == fcNone) - break; - const Value *Exp = II->getArgOperand(1); - Type *ExpTy = Exp->getType(); - unsigned BitWidth = ExpTy->getScalarType()->getIntegerBitWidth(); - KnownBits ExponentKnownBits(BitWidth); - computeKnownBits(Exp, - isa<VectorType>(ExpTy) ? DemandedElts : APInt(1, 1), - ExponentKnownBits, Depth + 1, Q); + const Value *Exp = II->getArgOperand(1); + Type *ExpTy = Exp->getType(); + unsigned BitWidth = ExpTy->getScalarType()->getIntegerBitWidth(); + KnownBits ExponentKnownBits(BitWidth); + computeKnownBits(Exp, isa<VectorType>(ExpTy) ? DemandedElts : APInt(1, 1), + ExponentKnownBits, Depth + 1, Q); - if (ExponentKnownBits.Zero[0]) { // Is even - Known.knownNot(fcNegative); - break; - } - - // Given that exp is an integer, here are the - // ways that pow can return a negative value: - // - // pow(-x, exp) --> negative if exp is odd and x is negative. - // pow(-0, exp) --> -inf if exp is negative odd. - // pow(-0, exp) --> -0 if exp is positive odd. - // pow(-inf, exp) --> -0 if exp is negative odd. - // pow(-inf, exp) --> -inf if exp is positive odd. - KnownFPClass KnownSrc; - computeKnownFPClass(II->getArgOperand(0), DemandedElts, fcNegative, - KnownSrc, Depth + 1, Q); - if (KnownSrc.isKnownNever(fcNegative)) - Known.knownNot(fcNegative); + if (ExponentKnownBits.Zero[0]) { // Is even + Known.knownNot(fcNegative); break; } - case Intrinsic::ldexp: { - KnownFPClass KnownSrc; - computeKnownFPClass(II->getArgOperand(0), DemandedElts, - InterestedClasses, KnownSrc, Depth + 1, Q); - Known.propagateNaN(KnownSrc, /*PropagateSign=*/true); - // Sign is preserved, but underflows may produce zeroes. - if (KnownSrc.isKnownNever(fcNegative)) - Known.knownNot(fcNegative); - else if (KnownSrc.cannotBeOrderedLessThanZero()) - Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); + // Given that exp is an integer, here are the + // ways that pow can return a negative value: + // + // pow(-x, exp) --> negative if exp is odd and x is negative. + // pow(-0, exp) --> -inf if exp is negative odd. + // pow(-0, exp) --> -0 if exp is positive odd. + // pow(-inf, exp) --> -0 if exp is negative odd. + // pow(-inf, exp) --> -inf if exp is positive odd. + KnownFPClass KnownSrc; + computeKnownFPClass(II->getArgOperand(0), DemandedElts, fcNegative, + KnownSrc, Depth + 1, Q); + if (KnownSrc.isKnownNever(fcNegative)) + Known.knownNot(fcNegative); + break; + } + case Intrinsic::ldexp: { + KnownFPClass KnownSrc; + computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, + KnownSrc, Depth + 1, Q); + Known.propagateNaN(KnownSrc, /*PropagateSign=*/true); - if (KnownSrc.isKnownNever(fcPositive)) - Known.knownNot(fcPositive); - else if (KnownSrc.cannotBeOrderedGreaterThanZero()) - Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask); + // Sign is preserved, but underflows may produce zeroes. + if (KnownSrc.isKnownNever(fcNegative)) + Known.knownNot(fcNegative); + else if (KnownSrc.cannotBeOrderedLessThanZero()) + Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); - // Can refine inf/zero handling based on the exponent operand. - const FPClassTest ExpInfoMask = fcZero | fcSubnormal | fcInf; - if ((InterestedClasses & ExpInfoMask) == fcNone) - break; - if ((KnownSrc.KnownFPClasses & ExpInfoMask) == fcNone) - break; + if (KnownSrc.isKnownNever(fcPositive)) + Known.knownNot(fcPositive); + else if (KnownSrc.cannotBeOrderedGreaterThanZero()) + Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask); - const fltSemantics &Flt - = II->getType()->getScalarType()->getFltSemantics(); - unsigned Precision = APFloat::semanticsPrecision(Flt); - const Value *ExpArg = II->getArgOperand(1); - ConstantRange ExpRange = computeConstantRange( - ExpArg, true, Q.IIQ.UseInstrInfo, Q.AC, Q.CxtI, Q.DT, Depth + 1); + // Can refine inf/zero handling based on the exponent operand. + const FPClassTest ExpInfoMask = fcZero | fcSubnormal | fcInf; + if ((InterestedClasses & ExpInfoMask) == fcNone) + break; + if ((KnownSrc.KnownFPClasses & ExpInfoMask) == fcNone) + break; - const int MantissaBits = Precision - 1; - if (ExpRange.getSignedMin().sge(static_cast<int64_t>(MantissaBits))) - Known.knownNot(fcSubnormal); + const fltSemantics &Flt = + II->getType()->getScalarType()->getFltSemantics(); + unsigned Precision = APFloat::semanticsPrecision(Flt); + const Value *ExpArg = II->getArgOperand(1); + ConstantRange ExpRange = computeConstantRange( + ExpArg, true, Q.IIQ.UseInstrInfo, Q.AC, Q.CxtI, Q.DT, Depth + 1); - const Function *F = II->getFunction(); - const APInt *ConstVal = ExpRange.getSingleElement(); - if (ConstVal && ConstVal->isZero()) { - // ldexp(x, 0) -> x, so propagate everything. - Known.propagateCanonicalizingSrc(KnownSrc, *F, - II->getType()); - } else if (ExpRange.isAllNegative()) { - // If we know the power is <= 0, can't introduce inf - if (KnownSrc.isKnownNeverPosInfinity()) - Known.knownNot(fcPosInf); - if (KnownSrc.isKnownNeverNegInfinity()) - Known.knownNot(fcNegInf); - } else if (ExpRange.isAllNonNegative()) { - // If we know the power is >= 0, can't introduce subnormal or zero - if (KnownSrc.isKnownNeverPosSubnormal()) - Known.knownNot(fcPosSubnormal); - if (KnownSrc.isKnownNeverNegSubnormal()) - Known.knownNot(fcNegSubnormal); - if (F && KnownSrc.isKnownNeverLogicalPosZero(*F, II->getType())) - Known.knownNot(fcPosZero); - if (F && KnownSrc.isKnownNeverLogicalNegZero(*F, II->getType())) - Known.knownNot(fcNegZero); - } + const int MantissaBits = Precision - 1; + if (ExpRange.getSignedMin().sge(static_cast<int64_t>(MantissaBits))) + Known.knownNot(fcSubnormal); - break; - } - case Intrinsic::arithmetic_fence: { - computeKnownFPClass(II->getArgOperand(0), DemandedElts, - InterestedClasses, Known, Depth + 1, Q); - break; + const Function *F = II->getFunction(); + const APInt *ConstVal = ExpRange.getSingleElement(); + if (ConstVal && ConstVal->isZero()) { + // ldexp(x, 0) -> x, so propagate everything. + Known.propagateCanonicalizingSrc(KnownSrc, *F, II->getType()); + } else if (ExpRange.isAllNegative()) { + // If we know the power is <= 0, can't introduce inf + if (KnownSrc.isKnownNeverPosInfinity()) + Known.knownNot(fcPosInf); + if (KnownSrc.isKnownNeverNegInfinity()) + Known.knownNot(fcNegInf); + } else if (ExpRange.isAllNonNegative()) { + // If we know the power is >= 0, can't introduce subnormal or zero + if (KnownSrc.isKnownNeverPosSubnormal()) + Known.knownNot(fcPosSubnormal); + if (KnownSrc.isKnownNeverNegSubnormal()) + Known.knownNot(fcNegSubnormal); + if (F && KnownSrc.isKnownNeverLogicalPosZero(*F, II->getType())) + Known.knownNot(fcPosZero); + if (F && KnownSrc.isKnownNeverLogicalNegZero(*F, II->getType())) + Known.knownNot(fcNegZero); } - case Intrinsic::experimental_constrained_sitofp: - case Intrinsic::experimental_constrained_uitofp: - // Cannot produce nan - Known.knownNot(fcNan); - // sitofp and uitofp turn into +0.0 for zero. - Known.knownNot(fcNegZero); + break; + } + case Intrinsic::arithmetic_fence: { + computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses, + Known, Depth + 1, Q); + break; + } + case Intrinsic::experimental_constrained_sitofp: + case Intrinsic::experimental_constrained_uitofp: + // Cannot produce nan + Known.knownNot(fcNan); - // Integers cannot be subnormal - Known.knownNot(fcSubnormal); + // sitofp and uitofp turn into +0.0 for zero. + Known.knownNot(fcNegZero); - if (IID == Intrinsic::experimental_constrained_uitofp) - Known.signBitMustBeZero(); + // Integers cannot be subnormal + Known.knownNot(fcSubnormal); - // TODO: Copy inf handling from instructions - break; - default: - break; - } + if (IID == Intrinsic::experimental_constrained_uitofp) + Known.signBitMustBeZero(); + + // TODO: Copy inf handling from instructions + break; + default: + break; } break; @@ -5249,7 +5350,7 @@ Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) { return UndefInt8; // Return Undef for zero-sized type. - if (!DL.getTypeStoreSize(V->getType()).isNonZero()) + if (DL.getTypeStoreSize(V->getType()).isZero()) return UndefInt8; Constant *C = dyn_cast<Constant>(V); @@ -5296,10 +5397,9 @@ Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) { if (CE->getOpcode() == Instruction::IntToPtr) { if (auto *PtrTy = dyn_cast<PointerType>(CE->getType())) { unsigned BitWidth = DL.getPointerSizeInBits(PtrTy->getAddressSpace()); - return isBytewiseValue( - ConstantExpr::getIntegerCast(CE->getOperand(0), - Type::getIntNTy(Ctx, BitWidth), false), - DL); + if (Constant *Op = ConstantFoldIntegerCast( + CE->getOperand(0), Type::getIntNTy(Ctx, BitWidth), false, DL)) + return isBytewiseValue(Op, DL); } } } @@ -6184,36 +6284,31 @@ static OverflowResult mapOverflowResult(ConstantRange::OverflowResult OR) { } /// Combine constant ranges from computeConstantRange() and computeKnownBits(). -static ConstantRange computeConstantRangeIncludingKnownBits( - const Value *V, bool ForSigned, const DataLayout &DL, unsigned Depth, - AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, - bool UseInstrInfo = true) { - KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT, UseInstrInfo); - ConstantRange CR1 = ConstantRange::fromKnownBits(Known, ForSigned); - ConstantRange CR2 = computeConstantRange(V, ForSigned, UseInstrInfo); +static ConstantRange +computeConstantRangeIncludingKnownBits(const WithCache<const Value *> &V, + bool ForSigned, + const SimplifyQuery &SQ) { + ConstantRange CR1 = + ConstantRange::fromKnownBits(V.getKnownBits(SQ), ForSigned); + ConstantRange CR2 = computeConstantRange(V, ForSigned, SQ.IIQ.UseInstrInfo); ConstantRange::PreferredRangeType RangeType = ForSigned ? ConstantRange::Signed : ConstantRange::Unsigned; return CR1.intersectWith(CR2, RangeType); } -OverflowResult llvm::computeOverflowForUnsignedMul( - const Value *LHS, const Value *RHS, const DataLayout &DL, - AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, - bool UseInstrInfo) { - KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT, - UseInstrInfo); - KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT, - UseInstrInfo); +OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS, + const Value *RHS, + const SimplifyQuery &SQ) { + KnownBits LHSKnown = computeKnownBits(LHS, /*Depth=*/0, SQ); + KnownBits RHSKnown = computeKnownBits(RHS, /*Depth=*/0, SQ); ConstantRange LHSRange = ConstantRange::fromKnownBits(LHSKnown, false); ConstantRange RHSRange = ConstantRange::fromKnownBits(RHSKnown, false); return mapOverflowResult(LHSRange.unsignedMulMayOverflow(RHSRange)); } -OverflowResult -llvm::computeOverflowForSignedMul(const Value *LHS, const Value *RHS, - const DataLayout &DL, AssumptionCache *AC, - const Instruction *CxtI, - const DominatorTree *DT, bool UseInstrInfo) { +OverflowResult llvm::computeOverflowForSignedMul(const Value *LHS, + const Value *RHS, + const SimplifyQuery &SQ) { // Multiplying n * m significant bits yields a result of n + m significant // bits. If the total number of significant bits does not exceed the // result bit width (minus 1), there is no overflow. @@ -6224,8 +6319,8 @@ llvm::computeOverflowForSignedMul(const Value *LHS, const Value *RHS, // Note that underestimating the number of sign bits gives a more // conservative answer. - unsigned SignBits = ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) + - ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT); + unsigned SignBits = + ::ComputeNumSignBits(LHS, 0, SQ) + ::ComputeNumSignBits(RHS, 0, SQ); // First handle the easy case: if we have enough sign bits there's // definitely no overflow. @@ -6242,34 +6337,29 @@ llvm::computeOverflowForSignedMul(const Value *LHS, const Value *RHS, // product is exactly the minimum negative number. // E.g. mul i16 with 17 sign bits: 0xff00 * 0xff80 = 0x8000 // For simplicity we just check if at least one side is not negative. - KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT, - UseInstrInfo); - KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT, - UseInstrInfo); + KnownBits LHSKnown = computeKnownBits(LHS, /*Depth=*/0, SQ); + KnownBits RHSKnown = computeKnownBits(RHS, /*Depth=*/0, SQ); if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()) return OverflowResult::NeverOverflows; } return OverflowResult::MayOverflow; } -OverflowResult llvm::computeOverflowForUnsignedAdd( - const Value *LHS, const Value *RHS, const DataLayout &DL, - AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, - bool UseInstrInfo) { - ConstantRange LHSRange = computeConstantRangeIncludingKnownBits( - LHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT, UseInstrInfo); - ConstantRange RHSRange = computeConstantRangeIncludingKnownBits( - RHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT, UseInstrInfo); +OverflowResult +llvm::computeOverflowForUnsignedAdd(const WithCache<const Value *> &LHS, + const WithCache<const Value *> &RHS, + const SimplifyQuery &SQ) { + ConstantRange LHSRange = + computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/false, SQ); + ConstantRange RHSRange = + computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/false, SQ); return mapOverflowResult(LHSRange.unsignedAddMayOverflow(RHSRange)); } -static OverflowResult computeOverflowForSignedAdd(const Value *LHS, - const Value *RHS, - const AddOperator *Add, - const DataLayout &DL, - AssumptionCache *AC, - const Instruction *CxtI, - const DominatorTree *DT) { +static OverflowResult +computeOverflowForSignedAdd(const WithCache<const Value *> &LHS, + const WithCache<const Value *> &RHS, + const AddOperator *Add, const SimplifyQuery &SQ) { if (Add && Add->hasNoSignedWrap()) { return OverflowResult::NeverOverflows; } @@ -6288,14 +6378,14 @@ static OverflowResult computeOverflowForSignedAdd(const Value *LHS, // // Since the carry into the most significant position is always equal to // the carry out of the addition, there is no signed overflow. - if (ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) > 1 && - ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT) > 1) + if (::ComputeNumSignBits(LHS, 0, SQ) > 1 && + ::ComputeNumSignBits(RHS, 0, SQ) > 1) return OverflowResult::NeverOverflows; - ConstantRange LHSRange = computeConstantRangeIncludingKnownBits( - LHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT); - ConstantRange RHSRange = computeConstantRangeIncludingKnownBits( - RHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT); + ConstantRange LHSRange = + computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/true, SQ); + ConstantRange RHSRange = + computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/true, SQ); OverflowResult OR = mapOverflowResult(LHSRange.signedAddMayOverflow(RHSRange)); if (OR != OverflowResult::MayOverflow) @@ -6309,16 +6399,14 @@ static OverflowResult computeOverflowForSignedAdd(const Value *LHS, // CANNOT overflow. If this can be determined from the known bits of the // operands the above signedAddMayOverflow() check will have already done so. // The only other way to improve on the known bits is from an assumption, so - // call computeKnownBitsFromAssume() directly. + // call computeKnownBitsFromContext() directly. bool LHSOrRHSKnownNonNegative = (LHSRange.isAllNonNegative() || RHSRange.isAllNonNegative()); bool LHSOrRHSKnownNegative = (LHSRange.isAllNegative() || RHSRange.isAllNegative()); if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) { KnownBits AddKnown(LHSRange.getBitWidth()); - computeKnownBitsFromAssume( - Add, AddKnown, /*Depth=*/0, - SimplifyQuery(DL, /*TLI*/ nullptr, DT, AC, CxtI, DT)); + computeKnownBitsFromContext(Add, AddKnown, /*Depth=*/0, SQ); if ((AddKnown.isNonNegative() && LHSOrRHSKnownNonNegative) || (AddKnown.isNegative() && LHSOrRHSKnownNegative)) return OverflowResult::NeverOverflows; @@ -6329,10 +6417,7 @@ static OverflowResult computeOverflowForSignedAdd(const Value *LHS, OverflowResult llvm::computeOverflowForUnsignedSub(const Value *LHS, const Value *RHS, - const DataLayout &DL, - AssumptionCache *AC, - const Instruction *CxtI, - const DominatorTree *DT) { + const SimplifyQuery &SQ) { // X - (X % ?) // The remainder of a value can't have greater magnitude than itself, // so the subtraction can't overflow. @@ -6346,32 +6431,29 @@ OverflowResult llvm::computeOverflowForUnsignedSub(const Value *LHS, // See simplifyICmpWithBinOpOnLHS() for candidates. if (match(RHS, m_URem(m_Specific(LHS), m_Value())) || match(RHS, m_NUWSub(m_Specific(LHS), m_Value()))) - if (isGuaranteedNotToBeUndefOrPoison(LHS, AC, CxtI, DT)) + if (isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT)) return OverflowResult::NeverOverflows; // Checking for conditions implied by dominating conditions may be expensive. // Limit it to usub_with_overflow calls for now. - if (match(CxtI, + if (match(SQ.CxtI, m_Intrinsic<Intrinsic::usub_with_overflow>(m_Value(), m_Value()))) - if (auto C = - isImpliedByDomCondition(CmpInst::ICMP_UGE, LHS, RHS, CxtI, DL)) { + if (auto C = isImpliedByDomCondition(CmpInst::ICMP_UGE, LHS, RHS, SQ.CxtI, + SQ.DL)) { if (*C) return OverflowResult::NeverOverflows; return OverflowResult::AlwaysOverflowsLow; } - ConstantRange LHSRange = computeConstantRangeIncludingKnownBits( - LHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT); - ConstantRange RHSRange = computeConstantRangeIncludingKnownBits( - RHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT); + ConstantRange LHSRange = + computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/false, SQ); + ConstantRange RHSRange = + computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/false, SQ); return mapOverflowResult(LHSRange.unsignedSubMayOverflow(RHSRange)); } OverflowResult llvm::computeOverflowForSignedSub(const Value *LHS, const Value *RHS, - const DataLayout &DL, - AssumptionCache *AC, - const Instruction *CxtI, - const DominatorTree *DT) { + const SimplifyQuery &SQ) { // X - (X % ?) // The remainder of a value can't have greater magnitude than itself, // so the subtraction can't overflow. @@ -6382,19 +6464,19 @@ OverflowResult llvm::computeOverflowForSignedSub(const Value *LHS, // then determining no-overflow may allow other transforms. if (match(RHS, m_SRem(m_Specific(LHS), m_Value())) || match(RHS, m_NSWSub(m_Specific(LHS), m_Value()))) - if (isGuaranteedNotToBeUndefOrPoison(LHS, AC, CxtI, DT)) + if (isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT)) return OverflowResult::NeverOverflows; // If LHS and RHS each have at least two sign bits, the subtraction // cannot overflow. - if (ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) > 1 && - ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT) > 1) + if (::ComputeNumSignBits(LHS, 0, SQ) > 1 && + ::ComputeNumSignBits(RHS, 0, SQ) > 1) return OverflowResult::NeverOverflows; - ConstantRange LHSRange = computeConstantRangeIncludingKnownBits( - LHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT); - ConstantRange RHSRange = computeConstantRangeIncludingKnownBits( - RHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT); + ConstantRange LHSRange = + computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/true, SQ); + ConstantRange RHSRange = + computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/true, SQ); return mapOverflowResult(LHSRange.signedSubMayOverflow(RHSRange)); } @@ -6540,6 +6622,7 @@ static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly, case Intrinsic::log2: case Intrinsic::exp: case Intrinsic::exp2: + case Intrinsic::exp10: case Intrinsic::fabs: case Intrinsic::copysign: case Intrinsic::floor: @@ -6557,6 +6640,8 @@ static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly, case Intrinsic::minimum: case Intrinsic::maximum: case Intrinsic::is_fpclass: + case Intrinsic::ldexp: + case Intrinsic::frexp: return false; case Intrinsic::lround: case Intrinsic::llround: @@ -6748,7 +6833,9 @@ static bool isGuaranteedNotToBeUndefOrPoison(const Value *V, return true; if (const auto *CB = dyn_cast<CallBase>(V)) { - if (CB->hasRetAttr(Attribute::NoUndef)) + if (CB->hasRetAttr(Attribute::NoUndef) || + CB->hasRetAttr(Attribute::Dereferenceable) || + CB->hasRetAttr(Attribute::DereferenceableOrNull)) return true; } @@ -6838,6 +6925,13 @@ bool llvm::isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC, return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, true); } +bool llvm::isGuaranteedNotToBeUndef(const Value *V, AssumptionCache *AC, + const Instruction *CtxI, + const DominatorTree *DT, unsigned Depth) { + // TODO: This is currently equivalent to isGuaranteedNotToBeUndefOrPoison(). + return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, false); +} + /// Return true if undefined behavior would provably be executed on the path to /// OnPathTo if Root produced a posion result. Note that this doesn't say /// anything about whether OnPathTo is actually executed or whether Root is @@ -6883,21 +6977,16 @@ bool llvm::mustExecuteUBIfPoisonOnPathTo(Instruction *Root, } OverflowResult llvm::computeOverflowForSignedAdd(const AddOperator *Add, - const DataLayout &DL, - AssumptionCache *AC, - const Instruction *CxtI, - const DominatorTree *DT) { + const SimplifyQuery &SQ) { return ::computeOverflowForSignedAdd(Add->getOperand(0), Add->getOperand(1), - Add, DL, AC, CxtI, DT); + Add, SQ); } -OverflowResult llvm::computeOverflowForSignedAdd(const Value *LHS, - const Value *RHS, - const DataLayout &DL, - AssumptionCache *AC, - const Instruction *CxtI, - const DominatorTree *DT) { - return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, DL, AC, CxtI, DT); +OverflowResult +llvm::computeOverflowForSignedAdd(const WithCache<const Value *> &LHS, + const WithCache<const Value *> &RHS, + const SimplifyQuery &SQ) { + return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, SQ); } bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { @@ -7114,6 +7203,8 @@ static bool programUndefinedIfUndefOrPoison(const Value *V, Begin = Inst->getIterator(); Begin++; } else if (const auto *Arg = dyn_cast<Argument>(V)) { + if (Arg->getParent()->isDeclaration()) + return false; BB = &Arg->getParent()->getEntryBlock(); Begin = BB->begin(); } else { @@ -7760,6 +7851,7 @@ static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2, if (!C) return nullptr; + const DataLayout &DL = CmpI->getModule()->getDataLayout(); Constant *CastedTo = nullptr; switch (*CastOp) { case Instruction::ZExt: @@ -7797,26 +7889,27 @@ static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2, // CmpConst == C is checked below. CastedTo = CmpConst; } else { - CastedTo = ConstantExpr::getIntegerCast(C, SrcTy, CmpI->isSigned()); + unsigned ExtOp = CmpI->isSigned() ? Instruction::SExt : Instruction::ZExt; + CastedTo = ConstantFoldCastOperand(ExtOp, C, SrcTy, DL); } break; case Instruction::FPTrunc: - CastedTo = ConstantExpr::getFPExtend(C, SrcTy, true); + CastedTo = ConstantFoldCastOperand(Instruction::FPExt, C, SrcTy, DL); break; case Instruction::FPExt: - CastedTo = ConstantExpr::getFPTrunc(C, SrcTy, true); + CastedTo = ConstantFoldCastOperand(Instruction::FPTrunc, C, SrcTy, DL); break; case Instruction::FPToUI: - CastedTo = ConstantExpr::getUIToFP(C, SrcTy, true); + CastedTo = ConstantFoldCastOperand(Instruction::UIToFP, C, SrcTy, DL); break; case Instruction::FPToSI: - CastedTo = ConstantExpr::getSIToFP(C, SrcTy, true); + CastedTo = ConstantFoldCastOperand(Instruction::SIToFP, C, SrcTy, DL); break; case Instruction::UIToFP: - CastedTo = ConstantExpr::getFPToUI(C, SrcTy, true); + CastedTo = ConstantFoldCastOperand(Instruction::FPToUI, C, SrcTy, DL); break; case Instruction::SIToFP: - CastedTo = ConstantExpr::getFPToSI(C, SrcTy, true); + CastedTo = ConstantFoldCastOperand(Instruction::FPToSI, C, SrcTy, DL); break; default: break; @@ -7827,8 +7920,8 @@ static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2, // Make sure the cast doesn't lose any information. Constant *CastedBack = - ConstantExpr::getCast(*CastOp, CastedTo, C->getType(), true); - if (CastedBack != C) + ConstantFoldCastOperand(*CastOp, CastedTo, C->getType(), DL); + if (CastedBack && CastedBack != C) return nullptr; return CastedTo; @@ -7989,7 +8082,7 @@ bool llvm::matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, for (unsigned i = 0; i != 2; ++i) { Value *L = P->getIncomingValue(i); Value *R = P->getIncomingValue(!i); - Operator *LU = dyn_cast<Operator>(L); + auto *LU = dyn_cast<BinaryOperator>(L); if (!LU) continue; unsigned Opcode = LU->getOpcode(); @@ -8027,7 +8120,7 @@ bool llvm::matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, // OR // %iv = [R, %entry], [%iv.next, %backedge] // %iv.next = binop L, %iv - BO = cast<BinaryOperator>(LU); + BO = LU; Start = R; Step = L; return true; @@ -8065,10 +8158,9 @@ static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS, } case CmpInst::ICMP_ULE: { - const APInt *C; - - // LHS u<= LHS +_{nuw} C for any C - if (match(RHS, m_NUWAdd(m_Specific(LHS), m_APInt(C)))) + // LHS u<= LHS +_{nuw} V for any V + if (match(RHS, m_c_Add(m_Specific(LHS), m_Value())) && + cast<OverflowingBinaryOperator>(RHS)->hasNoUnsignedWrap()) return true; // RHS >> V u<= RHS for any V @@ -8207,17 +8299,40 @@ static std::optional<bool> isImpliedCondICmps(const ICmpInst *LHS, CmpInst::Predicate LPred = LHSIsTrue ? LHS->getPredicate() : LHS->getInversePredicate(); - // Can we infer anything when the two compares have matching operands? - bool AreSwappedOps; - if (areMatchingOperands(L0, L1, R0, R1, AreSwappedOps)) - return isImpliedCondMatchingOperands(LPred, RPred, AreSwappedOps); - // Can we infer anything when the 0-operands match and the 1-operands are // constants (not necessarily matching)? const APInt *LC, *RC; if (L0 == R0 && match(L1, m_APInt(LC)) && match(R1, m_APInt(RC))) return isImpliedCondCommonOperandWithConstants(LPred, *LC, RPred, *RC); + // Can we infer anything when the two compares have matching operands? + bool AreSwappedOps; + if (areMatchingOperands(L0, L1, R0, R1, AreSwappedOps)) + return isImpliedCondMatchingOperands(LPred, RPred, AreSwappedOps); + + // L0 = R0 = L1 + R1, L0 >=u L1 implies R0 >=u R1, L0 <u L1 implies R0 <u R1 + if (ICmpInst::isUnsigned(LPred) && ICmpInst::isUnsigned(RPred)) { + if (L0 == R1) { + std::swap(R0, R1); + RPred = ICmpInst::getSwappedPredicate(RPred); + } + if (L1 == R0) { + std::swap(L0, L1); + LPred = ICmpInst::getSwappedPredicate(LPred); + } + if (L1 == R1) { + std::swap(L0, L1); + LPred = ICmpInst::getSwappedPredicate(LPred); + std::swap(R0, R1); + RPred = ICmpInst::getSwappedPredicate(RPred); + } + if (L0 == R0 && + (LPred == ICmpInst::ICMP_ULT || LPred == ICmpInst::ICMP_UGE) && + (RPred == ICmpInst::ICMP_ULT || RPred == ICmpInst::ICMP_UGE) && + match(L0, m_c_Add(m_Specific(L1), m_Specific(R1)))) + return LPred == RPred; + } + if (LPred == RPred) return isImpliedCondOperands(LPred, L0, L1, R0, R1, DL, Depth); @@ -8427,6 +8542,11 @@ static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower, if (match(BO.getOperand(1), m_APInt(C))) // 'and x, C' produces [0, C]. Upper = *C + 1; + // X & -X is a power of two or zero. So we can cap the value at max power of + // two. + if (match(BO.getOperand(0), m_Neg(m_Specific(BO.getOperand(1)))) || + match(BO.getOperand(1), m_Neg(m_Specific(BO.getOperand(0))))) + Upper = APInt::getSignedMinValue(Width) + 1; break; case Instruction::Or: @@ -8488,7 +8608,20 @@ static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower, Lower = *C; Upper = C->shl(ShiftAmount) + 1; } + } else { + // If lowbit is set, value can never be zero. + if ((*C)[0]) + Lower = APInt::getOneBitSet(Width, 0); + // If we are shifting a constant the largest it can be is if the longest + // sequence of consecutive ones is shifted to the highbits (breaking + // ties for which sequence is higher). At the moment we take a liberal + // upper bound on this by just popcounting the constant. + // TODO: There may be a bitwise trick for it longest/highest + // consecutative sequence of ones (naive method is O(Width) loop). + Upper = APInt::getHighBitsSet(Width, C->popcount()) + 1; } + } else if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) { + Upper = APInt::getBitsSetFrom(Width, C->getZExtValue()) + 1; } break; @@ -8659,56 +8792,50 @@ static ConstantRange getRangeForIntrinsic(const IntrinsicInst &II) { return ConstantRange::getFull(Width); } -static void setLimitsForSelectPattern(const SelectInst &SI, APInt &Lower, - APInt &Upper, const InstrInfoQuery &IIQ) { +static ConstantRange getRangeForSelectPattern(const SelectInst &SI, + const InstrInfoQuery &IIQ) { + unsigned BitWidth = SI.getType()->getScalarSizeInBits(); const Value *LHS = nullptr, *RHS = nullptr; SelectPatternResult R = matchSelectPattern(&SI, LHS, RHS); if (R.Flavor == SPF_UNKNOWN) - return; - - unsigned BitWidth = SI.getType()->getScalarSizeInBits(); + return ConstantRange::getFull(BitWidth); if (R.Flavor == SelectPatternFlavor::SPF_ABS) { // If the negation part of the abs (in RHS) has the NSW flag, // then the result of abs(X) is [0..SIGNED_MAX], // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN. - Lower = APInt::getZero(BitWidth); if (match(RHS, m_Neg(m_Specific(LHS))) && IIQ.hasNoSignedWrap(cast<Instruction>(RHS))) - Upper = APInt::getSignedMaxValue(BitWidth) + 1; - else - Upper = APInt::getSignedMinValue(BitWidth) + 1; - return; + return ConstantRange::getNonEmpty(APInt::getZero(BitWidth), + APInt::getSignedMaxValue(BitWidth) + 1); + + return ConstantRange::getNonEmpty(APInt::getZero(BitWidth), + APInt::getSignedMinValue(BitWidth) + 1); } if (R.Flavor == SelectPatternFlavor::SPF_NABS) { // The result of -abs(X) is <= 0. - Lower = APInt::getSignedMinValue(BitWidth); - Upper = APInt(BitWidth, 1); - return; + return ConstantRange::getNonEmpty(APInt::getSignedMinValue(BitWidth), + APInt(BitWidth, 1)); } const APInt *C; if (!match(LHS, m_APInt(C)) && !match(RHS, m_APInt(C))) - return; + return ConstantRange::getFull(BitWidth); switch (R.Flavor) { - case SPF_UMIN: - Upper = *C + 1; - break; - case SPF_UMAX: - Lower = *C; - break; - case SPF_SMIN: - Lower = APInt::getSignedMinValue(BitWidth); - Upper = *C + 1; - break; - case SPF_SMAX: - Lower = *C; - Upper = APInt::getSignedMaxValue(BitWidth) + 1; - break; - default: - break; + case SPF_UMIN: + return ConstantRange::getNonEmpty(APInt::getZero(BitWidth), *C + 1); + case SPF_UMAX: + return ConstantRange::getNonEmpty(*C, APInt::getZero(BitWidth)); + case SPF_SMIN: + return ConstantRange::getNonEmpty(APInt::getSignedMinValue(BitWidth), + *C + 1); + case SPF_SMAX: + return ConstantRange::getNonEmpty(*C, + APInt::getSignedMaxValue(BitWidth) + 1); + default: + return ConstantRange::getFull(BitWidth); } } @@ -8742,9 +8869,17 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool ForSigned, const APInt *C; if (match(V, m_APInt(C))) return ConstantRange(*C); + unsigned BitWidth = V->getType()->getScalarSizeInBits(); + + if (auto *VC = dyn_cast<ConstantDataVector>(V)) { + ConstantRange CR = ConstantRange::getEmpty(BitWidth); + for (unsigned ElemIdx = 0, NElem = VC->getNumElements(); ElemIdx < NElem; + ++ElemIdx) + CR = CR.unionWith(VC->getElementAsAPInt(ElemIdx)); + return CR; + } InstrInfoQuery IIQ(UseInstrInfo); - unsigned BitWidth = V->getType()->getScalarSizeInBits(); ConstantRange CR = ConstantRange::getFull(BitWidth); if (auto *BO = dyn_cast<BinaryOperator>(V)) { APInt Lower = APInt(BitWidth, 0); @@ -8755,11 +8890,12 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool ForSigned, } else if (auto *II = dyn_cast<IntrinsicInst>(V)) CR = getRangeForIntrinsic(*II); else if (auto *SI = dyn_cast<SelectInst>(V)) { - APInt Lower = APInt(BitWidth, 0); - APInt Upper = APInt(BitWidth, 0); - // TODO: Return ConstantRange. - setLimitsForSelectPattern(*SI, Lower, Upper, IIQ); - CR = ConstantRange::getNonEmpty(Lower, Upper); + ConstantRange CRTrue = computeConstantRange( + SI->getTrueValue(), ForSigned, UseInstrInfo, AC, CtxI, DT, Depth + 1); + ConstantRange CRFalse = computeConstantRange( + SI->getFalseValue(), ForSigned, UseInstrInfo, AC, CtxI, DT, Depth + 1); + CR = CRTrue.unionWith(CRFalse); + CR = CR.intersectWith(getRangeForSelectPattern(*SI, IIQ)); } else if (isa<FPToUIInst>(V) || isa<FPToSIInst>(V)) { APInt Lower = APInt(BitWidth, 0); APInt Upper = APInt(BitWidth, 0); |
