diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp')
| -rw-r--r-- | contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp | 511 |
1 files changed, 400 insertions, 111 deletions
diff --git a/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp index 941a68c5e6fd..d7510c899101 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp @@ -56,8 +56,8 @@ static Value *simplifyFPUnOp(unsigned, Value *, const FastMathFlags &, const SimplifyQuery &, unsigned); static Value *SimplifyBinOp(unsigned, Value *, Value *, const SimplifyQuery &, unsigned); -static Value *SimplifyFPBinOp(unsigned, Value *, Value *, const FastMathFlags &, - const SimplifyQuery &, unsigned); +static Value *SimplifyBinOp(unsigned, Value *, Value *, const FastMathFlags &, + const SimplifyQuery &, unsigned); static Value *SimplifyCmpInst(unsigned, Value *, Value *, const SimplifyQuery &, unsigned); static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, @@ -137,6 +137,71 @@ static bool isSameCompare(Value *V, CmpInst::Predicate Pred, Value *LHS, CRHS == LHS; } +/// Simplify comparison with true or false branch of select: +/// %sel = select i1 %cond, i32 %tv, i32 %fv +/// %cmp = icmp sle i32 %sel, %rhs +/// Compose new comparison by substituting %sel with either %tv or %fv +/// and see if it simplifies. +static Value *simplifyCmpSelCase(CmpInst::Predicate Pred, Value *LHS, + Value *RHS, Value *Cond, + const SimplifyQuery &Q, unsigned MaxRecurse, + Constant *TrueOrFalse) { + Value *SimplifiedCmp = SimplifyCmpInst(Pred, LHS, RHS, Q, MaxRecurse); + if (SimplifiedCmp == Cond) { + // %cmp simplified to the select condition (%cond). + return TrueOrFalse; + } else if (!SimplifiedCmp && isSameCompare(Cond, Pred, LHS, RHS)) { + // It didn't simplify. However, if composed comparison is equivalent + // to the select condition (%cond) then we can replace it. + return TrueOrFalse; + } + return SimplifiedCmp; +} + +/// Simplify comparison with true branch of select +static Value *simplifyCmpSelTrueCase(CmpInst::Predicate Pred, Value *LHS, + Value *RHS, Value *Cond, + const SimplifyQuery &Q, + unsigned MaxRecurse) { + return simplifyCmpSelCase(Pred, LHS, RHS, Cond, Q, MaxRecurse, + getTrue(Cond->getType())); +} + +/// Simplify comparison with false branch of select +static Value *simplifyCmpSelFalseCase(CmpInst::Predicate Pred, Value *LHS, + Value *RHS, Value *Cond, + const SimplifyQuery &Q, + unsigned MaxRecurse) { + return simplifyCmpSelCase(Pred, LHS, RHS, Cond, Q, MaxRecurse, + getFalse(Cond->getType())); +} + +/// We know comparison with both branches of select can be simplified, but they +/// are not equal. This routine handles some logical simplifications. +static Value *handleOtherCmpSelSimplifications(Value *TCmp, Value *FCmp, + Value *Cond, + const SimplifyQuery &Q, + unsigned MaxRecurse) { + // If the false value simplified to false, then the result of the compare + // is equal to "Cond && TCmp". This also catches the case when the false + // value simplified to false and the true value to true, returning "Cond". + if (match(FCmp, m_Zero())) + if (Value *V = SimplifyAndInst(Cond, TCmp, Q, MaxRecurse)) + return V; + // If the true value simplified to true, then the result of the compare + // is equal to "Cond || FCmp". + if (match(TCmp, m_One())) + if (Value *V = SimplifyOrInst(Cond, FCmp, Q, MaxRecurse)) + return V; + // Finally, if the false value simplified to true and the true value to + // false, then the result of the compare is equal to "!Cond". + if (match(FCmp, m_One()) && match(TCmp, m_Zero())) + if (Value *V = SimplifyXorInst( + Cond, Constant::getAllOnesValue(Cond->getType()), Q, MaxRecurse)) + return V; + return nullptr; +} + /// Does the given value dominate the specified phi node? static bool valueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { Instruction *I = dyn_cast<Instruction>(V); @@ -398,6 +463,12 @@ static Value *ThreadBinOpOverSelect(Instruction::BinaryOps Opcode, Value *LHS, /// In the case of a comparison with a select instruction, try to simplify the /// comparison by seeing whether both branches of the select result in the same /// value. Returns the common value if so, otherwise returns null. +/// For example, if we have: +/// %tmp = select i1 %cmp, i32 1, i32 2 +/// %cmp1 = icmp sle i32 %tmp, 3 +/// We can simplify %cmp1 to true, because both branches of select are +/// less than 3. We compose new comparison by substituting %tmp with both +/// branches of select and see if it can be simplified. static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { @@ -418,32 +489,14 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, // Now that we have "cmp select(Cond, TV, FV), RHS", analyse it. // Does "cmp TV, RHS" simplify? - Value *TCmp = SimplifyCmpInst(Pred, TV, RHS, Q, MaxRecurse); - if (TCmp == Cond) { - // It not only simplified, it simplified to the select condition. Replace - // it with 'true'. - TCmp = getTrue(Cond->getType()); - } else if (!TCmp) { - // It didn't simplify. However if "cmp TV, RHS" is equal to the select - // condition then we can replace it with 'true'. Otherwise give up. - if (!isSameCompare(Cond, Pred, TV, RHS)) - return nullptr; - TCmp = getTrue(Cond->getType()); - } + Value *TCmp = simplifyCmpSelTrueCase(Pred, TV, RHS, Cond, Q, MaxRecurse); + if (!TCmp) + return nullptr; // Does "cmp FV, RHS" simplify? - Value *FCmp = SimplifyCmpInst(Pred, FV, RHS, Q, MaxRecurse); - if (FCmp == Cond) { - // It not only simplified, it simplified to the select condition. Replace - // it with 'false'. - FCmp = getFalse(Cond->getType()); - } else if (!FCmp) { - // It didn't simplify. However if "cmp FV, RHS" is equal to the select - // condition then we can replace it with 'false'. Otherwise give up. - if (!isSameCompare(Cond, Pred, FV, RHS)) - return nullptr; - FCmp = getFalse(Cond->getType()); - } + Value *FCmp = simplifyCmpSelFalseCase(Pred, FV, RHS, Cond, Q, MaxRecurse); + if (!FCmp) + return nullptr; // If both sides simplified to the same value, then use it as the result of // the original comparison. @@ -452,26 +505,8 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, // The remaining cases only make sense if the select condition has the same // type as the result of the comparison, so bail out if this is not so. - if (Cond->getType()->isVectorTy() != RHS->getType()->isVectorTy()) - return nullptr; - // If the false value simplified to false, then the result of the compare - // is equal to "Cond && TCmp". This also catches the case when the false - // value simplified to false and the true value to true, returning "Cond". - if (match(FCmp, m_Zero())) - if (Value *V = SimplifyAndInst(Cond, TCmp, Q, MaxRecurse)) - return V; - // If the true value simplified to true, then the result of the compare - // is equal to "Cond || FCmp". - if (match(TCmp, m_One())) - if (Value *V = SimplifyOrInst(Cond, FCmp, Q, MaxRecurse)) - return V; - // Finally, if the false value simplified to true and the true value to - // false, then the result of the compare is equal to "!Cond". - if (match(FCmp, m_One()) && match(TCmp, m_Zero())) - if (Value *V = - SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()), - Q, MaxRecurse)) - return V; + if (Cond->getType()->isVectorTy() == RHS->getType()->isVectorTy()) + return handleOtherCmpSelSimplifications(TCmp, FCmp, Cond, Q, MaxRecurse); return nullptr; } @@ -543,10 +578,16 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, // Evaluate the BinOp on the incoming phi values. Value *CommonValue = nullptr; - for (Value *Incoming : PI->incoming_values()) { + for (unsigned u = 0, e = PI->getNumIncomingValues(); u < e; ++u) { + Value *Incoming = PI->getIncomingValue(u); + Instruction *InTI = PI->getIncomingBlock(u)->getTerminator(); // If the incoming value is the phi node itself, it can safely be skipped. if (Incoming == PI) continue; - Value *V = SimplifyCmpInst(Pred, Incoming, RHS, Q, MaxRecurse); + // Change the context instruction to the "edge" that flows into the phi. + // This is important because that is where incoming is actually "evaluated" + // even though it is used later somewhere else. + Value *V = SimplifyCmpInst(Pred, Incoming, RHS, Q.getWithInstruction(InTI), + MaxRecurse); // If the operation failed to simplify, or simplified to a different value // to previously, then give up. if (!V || (CommonValue && V != CommonValue)) @@ -656,16 +697,16 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V, bool AllowNonInbounds = false) { assert(V->getType()->isPtrOrPtrVectorTy()); - Type *IntPtrTy = DL.getIntPtrType(V->getType())->getScalarType(); - APInt Offset = APInt::getNullValue(IntPtrTy->getIntegerBitWidth()); + Type *IntIdxTy = DL.getIndexType(V->getType())->getScalarType(); + APInt Offset = APInt::getNullValue(IntIdxTy->getIntegerBitWidth()); V = V->stripAndAccumulateConstantOffsets(DL, Offset, AllowNonInbounds); // As that strip may trace through `addrspacecast`, need to sext or trunc // the offset calculated. - IntPtrTy = DL.getIntPtrType(V->getType())->getScalarType(); - Offset = Offset.sextOrTrunc(IntPtrTy->getIntegerBitWidth()); + IntIdxTy = DL.getIndexType(V->getType())->getScalarType(); + Offset = Offset.sextOrTrunc(IntIdxTy->getIntegerBitWidth()); - Constant *OffsetIntPtr = ConstantInt::get(IntPtrTy, Offset); + Constant *OffsetIntPtr = ConstantInt::get(IntIdxTy, Offset); if (V->getType()->isVectorTy()) return ConstantVector::getSplat(V->getType()->getVectorNumElements(), OffsetIntPtr); @@ -1371,7 +1412,8 @@ Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, /// Commuted variants are assumed to be handled by calling this function again /// with the parameters swapped. static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp, - ICmpInst *UnsignedICmp, bool IsAnd) { + ICmpInst *UnsignedICmp, bool IsAnd, + const SimplifyQuery &Q) { Value *X, *Y; ICmpInst::Predicate EqPred; @@ -1380,6 +1422,59 @@ static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp, return nullptr; ICmpInst::Predicate UnsignedPred; + + Value *A, *B; + // Y = (A - B); + if (match(Y, m_Sub(m_Value(A), m_Value(B)))) { + if (match(UnsignedICmp, + m_c_ICmp(UnsignedPred, m_Specific(A), m_Specific(B))) && + ICmpInst::isUnsigned(UnsignedPred)) { + if (UnsignedICmp->getOperand(0) != A) + UnsignedPred = ICmpInst::getSwappedPredicate(UnsignedPred); + + // A >=/<= B || (A - B) != 0 <--> true + if ((UnsignedPred == ICmpInst::ICMP_UGE || + UnsignedPred == ICmpInst::ICMP_ULE) && + EqPred == ICmpInst::ICMP_NE && !IsAnd) + return ConstantInt::getTrue(UnsignedICmp->getType()); + // A </> B && (A - B) == 0 <--> false + if ((UnsignedPred == ICmpInst::ICMP_ULT || + UnsignedPred == ICmpInst::ICMP_UGT) && + EqPred == ICmpInst::ICMP_EQ && IsAnd) + return ConstantInt::getFalse(UnsignedICmp->getType()); + + // A </> B && (A - B) != 0 <--> A </> B + // A </> B || (A - B) != 0 <--> (A - B) != 0 + if (EqPred == ICmpInst::ICMP_NE && (UnsignedPred == ICmpInst::ICMP_ULT || + UnsignedPred == ICmpInst::ICMP_UGT)) + return IsAnd ? UnsignedICmp : ZeroICmp; + + // A <=/>= B && (A - B) == 0 <--> (A - B) == 0 + // A <=/>= B || (A - B) == 0 <--> A <=/>= B + if (EqPred == ICmpInst::ICMP_EQ && (UnsignedPred == ICmpInst::ICMP_ULE || + UnsignedPred == ICmpInst::ICMP_UGE)) + return IsAnd ? ZeroICmp : UnsignedICmp; + } + + // Given Y = (A - B) + // Y >= A && Y != 0 --> Y >= A iff B != 0 + // Y < A || Y == 0 --> Y < A iff B != 0 + if (match(UnsignedICmp, + m_c_ICmp(UnsignedPred, m_Specific(Y), m_Specific(A)))) { + if (UnsignedICmp->getOperand(0) != Y) + UnsignedPred = ICmpInst::getSwappedPredicate(UnsignedPred); + + if (UnsignedPred == ICmpInst::ICMP_UGE && IsAnd && + EqPred == ICmpInst::ICMP_NE && + isKnownNonZero(B, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT)) + return UnsignedICmp; + if (UnsignedPred == ICmpInst::ICMP_ULT && !IsAnd && + EqPred == ICmpInst::ICMP_EQ && + isKnownNonZero(B, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT)) + return UnsignedICmp; + } + } + if (match(UnsignedICmp, m_ICmp(UnsignedPred, m_Value(X), m_Specific(Y))) && ICmpInst::isUnsigned(UnsignedPred)) ; @@ -1395,19 +1490,33 @@ static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp, if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_NE) return IsAnd ? UnsignedICmp : ZeroICmp; - // X >= Y || Y != 0 --> true + // X <= Y && Y != 0 --> X <= Y iff X != 0 + // X <= Y || Y != 0 --> Y != 0 iff X != 0 + if (UnsignedPred == ICmpInst::ICMP_ULE && EqPred == ICmpInst::ICMP_NE && + isKnownNonZero(X, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT)) + return IsAnd ? UnsignedICmp : ZeroICmp; + + // X >= Y && Y == 0 --> Y == 0 // X >= Y || Y == 0 --> X >= Y - if (UnsignedPred == ICmpInst::ICMP_UGE && !IsAnd) { - if (EqPred == ICmpInst::ICMP_NE) - return getTrue(UnsignedICmp->getType()); - return UnsignedICmp; - } + if (UnsignedPred == ICmpInst::ICMP_UGE && EqPred == ICmpInst::ICMP_EQ) + return IsAnd ? ZeroICmp : UnsignedICmp; + + // X > Y && Y == 0 --> Y == 0 iff X != 0 + // X > Y || Y == 0 --> X > Y iff X != 0 + if (UnsignedPred == ICmpInst::ICMP_UGT && EqPred == ICmpInst::ICMP_EQ && + isKnownNonZero(X, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT)) + return IsAnd ? ZeroICmp : UnsignedICmp; // X < Y && Y == 0 --> false if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_EQ && IsAnd) return getFalse(UnsignedICmp->getType()); + // X >= Y || Y != 0 --> true + if (UnsignedPred == ICmpInst::ICMP_UGE && EqPred == ICmpInst::ICMP_NE && + !IsAnd) + return getTrue(UnsignedICmp->getType()); + return nullptr; } @@ -1587,10 +1696,10 @@ static Value *simplifyAndOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1, } static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1, - const InstrInfoQuery &IIQ) { - if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true)) + const SimplifyQuery &Q) { + if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true, Q)) return X; - if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/true)) + if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/true, Q)) return X; if (Value *X = simplifyAndOfICmpsWithSameOperands(Op0, Op1)) @@ -1604,9 +1713,9 @@ static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1, if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, true)) return X; - if (Value *X = simplifyAndOfICmpsWithAdd(Op0, Op1, IIQ)) + if (Value *X = simplifyAndOfICmpsWithAdd(Op0, Op1, Q.IIQ)) return X; - if (Value *X = simplifyAndOfICmpsWithAdd(Op1, Op0, IIQ)) + if (Value *X = simplifyAndOfICmpsWithAdd(Op1, Op0, Q.IIQ)) return X; return nullptr; @@ -1660,10 +1769,10 @@ static Value *simplifyOrOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1, } static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1, - const InstrInfoQuery &IIQ) { - if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false)) + const SimplifyQuery &Q) { + if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false, Q)) return X; - if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/false)) + if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/false, Q)) return X; if (Value *X = simplifyOrOfICmpsWithSameOperands(Op0, Op1)) @@ -1677,9 +1786,9 @@ static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1, if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, false)) return X; - if (Value *X = simplifyOrOfICmpsWithAdd(Op0, Op1, IIQ)) + if (Value *X = simplifyOrOfICmpsWithAdd(Op0, Op1, Q.IIQ)) return X; - if (Value *X = simplifyOrOfICmpsWithAdd(Op1, Op0, IIQ)) + if (Value *X = simplifyOrOfICmpsWithAdd(Op1, Op0, Q.IIQ)) return X; return nullptr; @@ -1738,8 +1847,8 @@ static Value *simplifyAndOrOfCmps(const SimplifyQuery &Q, auto *ICmp0 = dyn_cast<ICmpInst>(Op0); auto *ICmp1 = dyn_cast<ICmpInst>(Op1); if (ICmp0 && ICmp1) - V = IsAnd ? simplifyAndOfICmps(ICmp0, ICmp1, Q.IIQ) - : simplifyOrOfICmps(ICmp0, ICmp1, Q.IIQ); + V = IsAnd ? simplifyAndOfICmps(ICmp0, ICmp1, Q) + : simplifyOrOfICmps(ICmp0, ICmp1, Q); auto *FCmp0 = dyn_cast<FCmpInst>(Op0); auto *FCmp1 = dyn_cast<FCmpInst>(Op1); @@ -1759,6 +1868,77 @@ static Value *simplifyAndOrOfCmps(const SimplifyQuery &Q, return nullptr; } +/// Check that the Op1 is in expected form, i.e.: +/// %Agg = tail call { i4, i1 } @llvm.[us]mul.with.overflow.i4(i4 %X, i4 %???) +/// %Op1 = extractvalue { i4, i1 } %Agg, 1 +static bool omitCheckForZeroBeforeMulWithOverflowInternal(Value *Op1, + Value *X) { + auto *Extract = dyn_cast<ExtractValueInst>(Op1); + // We should only be extracting the overflow bit. + if (!Extract || !Extract->getIndices().equals(1)) + return false; + Value *Agg = Extract->getAggregateOperand(); + // This should be a multiplication-with-overflow intrinsic. + if (!match(Agg, m_CombineOr(m_Intrinsic<Intrinsic::umul_with_overflow>(), + m_Intrinsic<Intrinsic::smul_with_overflow>()))) + return false; + // One of its multipliers should be the value we checked for zero before. + if (!match(Agg, m_CombineOr(m_Argument<0>(m_Specific(X)), + m_Argument<1>(m_Specific(X))))) + return false; + return true; +} + +/// The @llvm.[us]mul.with.overflow intrinsic could have been folded from some +/// other form of check, e.g. one that was using division; it may have been +/// guarded against division-by-zero. We can drop that check now. +/// Look for: +/// %Op0 = icmp ne i4 %X, 0 +/// %Agg = tail call { i4, i1 } @llvm.[us]mul.with.overflow.i4(i4 %X, i4 %???) +/// %Op1 = extractvalue { i4, i1 } %Agg, 1 +/// %??? = and i1 %Op0, %Op1 +/// We can just return %Op1 +static Value *omitCheckForZeroBeforeMulWithOverflow(Value *Op0, Value *Op1) { + ICmpInst::Predicate Pred; + Value *X; + if (!match(Op0, m_ICmp(Pred, m_Value(X), m_Zero())) || + Pred != ICmpInst::Predicate::ICMP_NE) + return nullptr; + // Is Op1 in expected form? + if (!omitCheckForZeroBeforeMulWithOverflowInternal(Op1, X)) + return nullptr; + // Can omit 'and', and just return the overflow bit. + return Op1; +} + +/// The @llvm.[us]mul.with.overflow intrinsic could have been folded from some +/// other form of check, e.g. one that was using division; it may have been +/// guarded against division-by-zero. We can drop that check now. +/// Look for: +/// %Op0 = icmp eq i4 %X, 0 +/// %Agg = tail call { i4, i1 } @llvm.[us]mul.with.overflow.i4(i4 %X, i4 %???) +/// %Op1 = extractvalue { i4, i1 } %Agg, 1 +/// %NotOp1 = xor i1 %Op1, true +/// %or = or i1 %Op0, %NotOp1 +/// We can just return %NotOp1 +static Value *omitCheckForZeroBeforeInvertedMulWithOverflow(Value *Op0, + Value *NotOp1) { + ICmpInst::Predicate Pred; + Value *X; + if (!match(Op0, m_ICmp(Pred, m_Value(X), m_Zero())) || + Pred != ICmpInst::Predicate::ICMP_EQ) + return nullptr; + // We expect the other hand of an 'or' to be a 'not'. + Value *Op1; + if (!match(NotOp1, m_Not(m_Value(Op1)))) + return nullptr; + // Is Op1 in expected form? + if (!omitCheckForZeroBeforeMulWithOverflowInternal(Op1, X)) + return nullptr; + // Can omit 'and', and just return the inverted overflow bit. + return NotOp1; +} + /// Given operands for an And, see if we can fold the result. /// If not, this returns null. static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, @@ -1813,6 +1993,14 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return Op0; } + // If we have a multiplication overflow check that is being 'and'ed with a + // check that one of the multipliers is not zero, we can omit the 'and', and + // only keep the overflow check. + if (Value *V = omitCheckForZeroBeforeMulWithOverflow(Op0, Op1)) + return V; + if (Value *V = omitCheckForZeroBeforeMulWithOverflow(Op1, Op0)) + return V; + // A & (-A) = A if A is a power of two or zero. if (match(Op0, m_Neg(m_Specific(Op1))) || match(Op1, m_Neg(m_Specific(Op0)))) { @@ -1987,6 +2175,14 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, false)) return V; + // If we have a multiplication overflow check that is being 'and'ed with a + // check that one of the multipliers is not zero, we can omit the 'and', and + // only keep the overflow check. + if (Value *V = omitCheckForZeroBeforeInvertedMulWithOverflow(Op0, Op1)) + return V; + if (Value *V = omitCheckForZeroBeforeInvertedMulWithOverflow(Op1, Op0)) + return V; + // Try some generic simplifications for associative operations. if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, Q, MaxRecurse)) @@ -3529,6 +3725,9 @@ static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, // %sel = select i1 %cmp, i32 -2147483648, i32 %add // // We can't replace %sel with %add unless we strip away the flags. + // TODO: This is an unusual limitation because better analysis results in + // worse simplification. InstCombine can do this fold more generally + // by dropping the flags. Remove this fold to save compile-time? if (isa<OverflowingBinaryOperator>(B)) if (Q.IIQ.hasNoSignedWrap(B) || Q.IIQ.hasNoUnsignedWrap(B)) return nullptr; @@ -3745,18 +3944,21 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal, /// Try to simplify a select instruction when its condition operand is a /// floating-point comparison. -static Value *simplifySelectWithFCmp(Value *Cond, Value *T, Value *F) { +static Value *simplifySelectWithFCmp(Value *Cond, Value *T, Value *F, + const SimplifyQuery &Q) { FCmpInst::Predicate Pred; if (!match(Cond, m_FCmp(Pred, m_Specific(T), m_Specific(F))) && !match(Cond, m_FCmp(Pred, m_Specific(F), m_Specific(T)))) return nullptr; - // TODO: The transform may not be valid with -0.0. An incomplete way of - // testing for that possibility is to check if at least one operand is a - // non-zero constant. + // This transform is safe if we do not have (do not care about) -0.0 or if + // at least one operand is known to not be -0.0. Otherwise, the select can + // change the sign of a zero operand. + bool HasNoSignedZeros = Q.CxtI && isa<FPMathOperator>(Q.CxtI) && + Q.CxtI->hasNoSignedZeros(); const APFloat *C; - if ((match(T, m_APFloat(C)) && C->isNonZero()) || - (match(F, m_APFloat(C)) && C->isNonZero())) { + if (HasNoSignedZeros || (match(T, m_APFloat(C)) && C->isNonZero()) || + (match(F, m_APFloat(C)) && C->isNonZero())) { // (T == F) ? T : F --> F // (F == T) ? T : F --> F if (Pred == FCmpInst::FCMP_OEQ) @@ -3794,6 +3996,15 @@ static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, return FalseVal; } + // select i1 Cond, i1 true, i1 false --> i1 Cond + assert(Cond->getType()->isIntOrIntVectorTy(1) && + "Select must have bool or bool vector condition"); + assert(TrueVal->getType() == FalseVal->getType() && + "Select must have same types for true/false ops"); + if (Cond->getType() == TrueVal->getType() && + match(TrueVal, m_One()) && match(FalseVal, m_ZeroInt())) + return Cond; + // select ?, X, X -> X if (TrueVal == FalseVal) return TrueVal; @@ -3807,7 +4018,7 @@ static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, simplifySelectWithICmpCond(Cond, TrueVal, FalseVal, Q, MaxRecurse)) return V; - if (Value *V = simplifySelectWithFCmp(Cond, TrueVal, FalseVal)) + if (Value *V = simplifySelectWithFCmp(Cond, TrueVal, FalseVal, Q)) return V; if (Value *V = foldSelectWithBinaryOp(Cond, TrueVal, FalseVal)) @@ -3865,7 +4076,7 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, // The following transforms are only safe if the ptrtoint cast // doesn't truncate the pointers. if (Ops[1]->getType()->getScalarSizeInBits() == - Q.DL.getIndexSizeInBits(AS)) { + Q.DL.getPointerSizeInBits(AS)) { auto PtrToIntOrZero = [GEPTy](Value *P) -> Value * { if (match(P, m_Zero())) return Constant::getNullValue(GEPTy); @@ -4250,6 +4461,30 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, ShuffleVectorInst::commuteShuffleMask(Indices, InVecNumElts); } + // A splat of an inserted scalar constant becomes a vector constant: + // shuf (inselt ?, C, IndexC), undef, <IndexC, IndexC...> --> <C, C...> + // NOTE: We may have commuted above, so analyze the updated Indices, not the + // original mask constant. + Constant *C; + ConstantInt *IndexC; + if (match(Op0, m_InsertElement(m_Value(), m_Constant(C), + m_ConstantInt(IndexC)))) { + // Match a splat shuffle mask of the insert index allowing undef elements. + int InsertIndex = IndexC->getZExtValue(); + if (all_of(Indices, [InsertIndex](int MaskElt) { + return MaskElt == InsertIndex || MaskElt == -1; + })) { + assert(isa<UndefValue>(Op1) && "Expected undef operand 1 for splat"); + + // Shuffle mask undefs become undefined constant result elements. + SmallVector<Constant *, 16> VecC(MaskNumElts, C); + for (unsigned i = 0; i != MaskNumElts; ++i) + if (Indices[i] == -1) + VecC[i] = UndefValue::get(C->getType()); + return ConstantVector::get(VecC); + } + } + // A shuffle of a splat is always the splat itself. Legal if the shuffle's // value type is same as the input vectors' type. if (auto *OpShuf = dyn_cast<ShuffleVectorInst>(Op0)) @@ -4324,14 +4559,16 @@ static Constant *propagateNaN(Constant *In) { return In; } -static Constant *simplifyFPBinop(Value *Op0, Value *Op1) { - if (isa<UndefValue>(Op0) || isa<UndefValue>(Op1)) - return ConstantFP::getNaN(Op0->getType()); +/// Perform folds that are common to any floating-point operation. This implies +/// transforms based on undef/NaN because the operation itself makes no +/// difference to the result. +static Constant *simplifyFPOp(ArrayRef<Value *> Ops) { + if (any_of(Ops, [](Value *V) { return isa<UndefValue>(V); })) + return ConstantFP::getNaN(Ops[0]->getType()); - if (match(Op0, m_NaN())) - return propagateNaN(cast<Constant>(Op0)); - if (match(Op1, m_NaN())) - return propagateNaN(cast<Constant>(Op1)); + for (Value *V : Ops) + if (match(V, m_NaN())) + return propagateNaN(cast<Constant>(V)); return nullptr; } @@ -4343,7 +4580,7 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, if (Constant *C = foldOrCommuteConstant(Instruction::FAdd, Op0, Op1, Q)) return C; - if (Constant *C = simplifyFPBinop(Op0, Op1)) + if (Constant *C = simplifyFPOp({Op0, Op1})) return C; // fadd X, -0 ==> X @@ -4390,7 +4627,7 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, if (Constant *C = foldOrCommuteConstant(Instruction::FSub, Op0, Op1, Q)) return C; - if (Constant *C = simplifyFPBinop(Op0, Op1)) + if (Constant *C = simplifyFPOp({Op0, Op1})) return C; // fsub X, +0 ==> X @@ -4430,23 +4667,27 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, return nullptr; } -/// Given the operands for an FMul, see if we can fold the result -static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const SimplifyQuery &Q, unsigned MaxRecurse) { - if (Constant *C = foldOrCommuteConstant(Instruction::FMul, Op0, Op1, Q)) - return C; - - if (Constant *C = simplifyFPBinop(Op0, Op1)) +static Value *SimplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF, + const SimplifyQuery &Q, unsigned MaxRecurse) { + if (Constant *C = simplifyFPOp({Op0, Op1})) return C; // fmul X, 1.0 ==> X if (match(Op1, m_FPOne())) return Op0; + // fmul 1.0, X ==> X + if (match(Op0, m_FPOne())) + return Op1; + // fmul nnan nsz X, 0 ==> 0 if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZeroFP())) return ConstantFP::getNullValue(Op0->getType()); + // fmul nnan nsz 0, X ==> 0 + if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZeroFP())) + return ConstantFP::getNullValue(Op1->getType()); + // sqrt(X) * sqrt(X) --> X, if we can: // 1. Remove the intermediate rounding (reassociate). // 2. Ignore non-zero negative numbers because sqrt would produce NAN. @@ -4459,6 +4700,16 @@ static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, return nullptr; } +/// Given the operands for an FMul, see if we can fold the result +static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const SimplifyQuery &Q, unsigned MaxRecurse) { + if (Constant *C = foldOrCommuteConstant(Instruction::FMul, Op0, Op1, Q)) + return C; + + // Now apply simplifications that do not require rounding. + return SimplifyFMAFMul(Op0, Op1, FMF, Q, MaxRecurse); +} + Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q) { return ::SimplifyFAddInst(Op0, Op1, FMF, Q, RecursionLimit); @@ -4475,12 +4726,17 @@ Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, return ::SimplifyFMulInst(Op0, Op1, FMF, Q, RecursionLimit); } +Value *llvm::SimplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF, + const SimplifyQuery &Q) { + return ::SimplifyFMAFMul(Op0, Op1, FMF, Q, RecursionLimit); +} + static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, const SimplifyQuery &Q, unsigned) { if (Constant *C = foldOrCommuteConstant(Instruction::FDiv, Op0, Op1, Q)) return C; - if (Constant *C = simplifyFPBinop(Op0, Op1)) + if (Constant *C = simplifyFPOp({Op0, Op1})) return C; // X / 1.0 -> X @@ -4525,7 +4781,7 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, if (Constant *C = foldOrCommuteConstant(Instruction::FRem, Op0, Op1, Q)) return C; - if (Constant *C = simplifyFPBinop(Op0, Op1)) + if (Constant *C = simplifyFPOp({Op0, Op1})) return C; // Unlike fdiv, the result of frem always matches the sign of the dividend. @@ -4564,8 +4820,7 @@ static Value *simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q, /// Given the operand for a UnaryOperator, see if we can fold the result. /// If not, this returns null. -/// In contrast to SimplifyUnOp, try to use FastMathFlag when folding the -/// result. In case we don't need FastMathFlags, simply fall to SimplifyUnOp. +/// Try to use FastMathFlags when folding the result. static Value *simplifyFPUnOp(unsigned Opcode, Value *Op, const FastMathFlags &FMF, const SimplifyQuery &Q, unsigned MaxRecurse) { @@ -4581,8 +4836,8 @@ Value *llvm::SimplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q) { return ::simplifyUnOp(Opcode, Op, Q, RecursionLimit); } -Value *llvm::SimplifyFPUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF, - const SimplifyQuery &Q) { +Value *llvm::SimplifyUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF, + const SimplifyQuery &Q) { return ::simplifyFPUnOp(Opcode, Op, FMF, Q, RecursionLimit); } @@ -4634,11 +4889,10 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, /// Given operands for a BinaryOperator, see if we can fold the result. /// If not, this returns null. -/// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the -/// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp. -static Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, - const FastMathFlags &FMF, const SimplifyQuery &Q, - unsigned MaxRecurse) { +/// Try to use FastMathFlags when folding the result. +static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, + const FastMathFlags &FMF, const SimplifyQuery &Q, + unsigned MaxRecurse) { switch (Opcode) { case Instruction::FAdd: return SimplifyFAddInst(LHS, RHS, FMF, Q, MaxRecurse); @@ -4658,9 +4912,9 @@ Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, return ::SimplifyBinOp(Opcode, LHS, RHS, Q, RecursionLimit); } -Value *llvm::SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, - FastMathFlags FMF, const SimplifyQuery &Q) { - return ::SimplifyFPBinOp(Opcode, LHS, RHS, FMF, Q, RecursionLimit); +Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, + FastMathFlags FMF, const SimplifyQuery &Q) { + return ::SimplifyBinOp(Opcode, LHS, RHS, FMF, Q, RecursionLimit); } /// Given operands for a CmpInst, see if we can fold the result. @@ -4906,6 +5160,16 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, return Op0; } break; + case Intrinsic::copysign: + // copysign X, X --> X + if (Op0 == Op1) + return Op0; + // copysign -X, X --> X + // copysign X, -X --> -X + if (match(Op0, m_FNeg(m_Specific(Op1))) || + match(Op1, m_FNeg(m_Specific(Op0)))) + return Op1; + break; case Intrinsic::maxnum: case Intrinsic::minnum: case Intrinsic::maximum: @@ -5009,6 +5273,15 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { } return nullptr; } + case Intrinsic::fma: + case Intrinsic::fmuladd: { + Value *Op0 = Call->getArgOperand(0); + Value *Op1 = Call->getArgOperand(1); + Value *Op2 = Call->getArgOperand(2); + if (Value *V = simplifyFPOp({ Op0, Op1, Op2 })) + return V; + return nullptr; + } default: return nullptr; } @@ -5046,6 +5319,19 @@ Value *llvm::SimplifyCall(CallBase *Call, const SimplifyQuery &Q) { return ConstantFoldCall(Call, F, ConstantArgs, Q.TLI); } +/// Given operands for a Freeze, see if we can fold the result. +static Value *SimplifyFreezeInst(Value *Op0) { + // Use a utility function defined in ValueTracking. + if (llvm::isGuaranteedNotToBeUndefOrPoison(Op0)) + return Op0; + // We have room for improvement. + return nullptr; +} + +Value *llvm::SimplifyFreezeInst(Value *Op0, const SimplifyQuery &Q) { + return ::SimplifyFreezeInst(Op0); +} + /// See if we can compute a simplified version of this instruction. /// If not, this returns null. @@ -5188,6 +5474,9 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ, Result = SimplifyCall(cast<CallInst>(I), Q); break; } + case Instruction::Freeze: + Result = SimplifyFreezeInst(I->getOperand(0), Q); + break; #define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc: #include "llvm/IR/Instruction.def" #undef HANDLE_CAST_INST @@ -5308,7 +5597,7 @@ const SimplifyQuery getBestSimplifyQuery(Pass &P, Function &F) { auto *DTWP = P.getAnalysisIfAvailable<DominatorTreeWrapperPass>(); auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; auto *TLIWP = P.getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); - auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr; + auto *TLI = TLIWP ? &TLIWP->getTLI(F) : nullptr; auto *ACWP = P.getAnalysisIfAvailable<AssumptionCacheTracker>(); auto *AC = ACWP ? &ACWP->getAssumptionCache(F) : nullptr; return {F.getParent()->getDataLayout(), TLI, DT, AC}; |
