diff options
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp')
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp | 226 |
1 files changed, 154 insertions, 72 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index ce2b913dba61..4a1e82ae9c1d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -38,15 +38,16 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" -#include "llvm/Transforms/InstCombine/InstCombineWorklist.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" #include <cassert> #include <utility> +#define DEBUG_TYPE "instcombine" +#include "llvm/Transforms/Utils/InstructionWorklist.h" + using namespace llvm; using namespace PatternMatch; -#define DEBUG_TYPE "instcombine" static Value *createMinMax(InstCombiner::BuilderTy &Builder, SelectPatternFlavor SPF, Value *A, Value *B) { @@ -165,7 +166,7 @@ static Value *foldSelectICmpAnd(SelectInst &Sel, ICmpInst *Cmp, // simplify/reduce the instructions. APInt TC = *SelTC; APInt FC = *SelFC; - if (!TC.isNullValue() && !FC.isNullValue()) { + if (!TC.isZero() && !FC.isZero()) { // If the select constants differ by exactly one bit and that's the same // bit that is masked and checked by the select condition, the select can // be replaced by bitwise logic to set/clear one bit of the constant result. @@ -202,7 +203,7 @@ static Value *foldSelectICmpAnd(SelectInst &Sel, ICmpInst *Cmp, // Determine which shift is needed to transform result of the 'and' into the // desired result. - const APInt &ValC = !TC.isNullValue() ? TC : FC; + const APInt &ValC = !TC.isZero() ? TC : FC; unsigned ValZeros = ValC.logBase2(); unsigned AndZeros = AndMask.logBase2(); @@ -224,7 +225,7 @@ static Value *foldSelectICmpAnd(SelectInst &Sel, ICmpInst *Cmp, // Okay, now we know that everything is set up, we just don't know whether we // have a icmp_ne or icmp_eq and whether the true or false val is the zero. - bool ShouldNotVal = !TC.isNullValue(); + bool ShouldNotVal = !TC.isZero(); ShouldNotVal ^= Pred == ICmpInst::ICMP_NE; if (ShouldNotVal) V = Builder.CreateXor(V, ValC); @@ -319,8 +320,16 @@ Instruction *InstCombinerImpl::foldSelectOpOp(SelectInst &SI, Instruction *TI, Value *X, *Y; if (match(TI, m_FNeg(m_Value(X))) && match(FI, m_FNeg(m_Value(Y))) && (TI->hasOneUse() || FI->hasOneUse())) { + // Intersect FMF from the fneg instructions and union those with the select. + FastMathFlags FMF = TI->getFastMathFlags(); + FMF &= FI->getFastMathFlags(); + FMF |= SI.getFastMathFlags(); Value *NewSel = Builder.CreateSelect(Cond, X, Y, SI.getName() + ".v", &SI); - return UnaryOperator::CreateFNegFMF(NewSel, TI); + if (auto *NewSelI = dyn_cast<Instruction>(NewSel)) + NewSelI->setFastMathFlags(FMF); + Instruction *NewFNeg = UnaryOperator::CreateFNeg(NewSel); + NewFNeg->setFastMathFlags(FMF); + return NewFNeg; } // Min/max intrinsic with a common operand can have the common operand pulled @@ -420,10 +429,9 @@ Instruction *InstCombinerImpl::foldSelectOpOp(SelectInst &SI, Instruction *TI, } static bool isSelect01(const APInt &C1I, const APInt &C2I) { - if (!C1I.isNullValue() && !C2I.isNullValue()) // One side must be zero. + if (!C1I.isZero() && !C2I.isZero()) // One side must be zero. return false; - return C1I.isOneValue() || C1I.isAllOnesValue() || - C2I.isOneValue() || C2I.isAllOnesValue(); + return C1I.isOne() || C1I.isAllOnes() || C2I.isOne() || C2I.isAllOnes(); } /// Try to fold the select into one of the operands to allow further @@ -715,6 +723,58 @@ static Instruction *foldSetClearBits(SelectInst &Sel, return nullptr; } +// select (x == 0), 0, x * y --> freeze(y) * x +// select (y == 0), 0, x * y --> freeze(x) * y +// select (x == 0), undef, x * y --> freeze(y) * x +// select (x == undef), 0, x * y --> freeze(y) * x +// Usage of mul instead of 0 will make the result more poisonous, +// so the operand that was not checked in the condition should be frozen. +// The latter folding is applied only when a constant compared with x is +// is a vector consisting of 0 and undefs. If a constant compared with x +// is a scalar undefined value or undefined vector then an expression +// should be already folded into a constant. +static Instruction *foldSelectZeroOrMul(SelectInst &SI, InstCombinerImpl &IC) { + auto *CondVal = SI.getCondition(); + auto *TrueVal = SI.getTrueValue(); + auto *FalseVal = SI.getFalseValue(); + Value *X, *Y; + ICmpInst::Predicate Predicate; + + // Assuming that constant compared with zero is not undef (but it may be + // a vector with some undef elements). Otherwise (when a constant is undef) + // the select expression should be already simplified. + if (!match(CondVal, m_ICmp(Predicate, m_Value(X), m_Zero())) || + !ICmpInst::isEquality(Predicate)) + return nullptr; + + if (Predicate == ICmpInst::ICMP_NE) + std::swap(TrueVal, FalseVal); + + // Check that TrueVal is a constant instead of matching it with m_Zero() + // to handle the case when it is a scalar undef value or a vector containing + // non-zero elements that are masked by undef elements in the compare + // constant. + auto *TrueValC = dyn_cast<Constant>(TrueVal); + if (TrueValC == nullptr || + !match(FalseVal, m_c_Mul(m_Specific(X), m_Value(Y))) || + !isa<Instruction>(FalseVal)) + return nullptr; + + auto *ZeroC = cast<Constant>(cast<Instruction>(CondVal)->getOperand(1)); + auto *MergedC = Constant::mergeUndefsWith(TrueValC, ZeroC); + // If X is compared with 0 then TrueVal could be either zero or undef. + // m_Zero match vectors containing some undef elements, but for scalars + // m_Undef should be used explicitly. + if (!match(MergedC, m_Zero()) && !match(MergedC, m_Undef())) + return nullptr; + + auto *FalseValI = cast<Instruction>(FalseVal); + auto *FrY = IC.InsertNewInstBefore(new FreezeInst(Y, Y->getName() + ".fr"), + *FalseValI); + IC.replaceOperand(*FalseValI, FalseValI->getOperand(0) == Y ? 0 : 1, FrY); + return IC.replaceInstUsesWith(SI, FalseValI); +} + /// Transform patterns such as (a > b) ? a - b : 0 into usub.sat(a, b). /// There are 8 commuted/swapped variants of this pattern. /// TODO: Also support a - UMIN(a,b) patterns. @@ -1229,8 +1289,8 @@ Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel, // Iff -C1 s<= C2 s<= C0-C1 // Also ULT predicate can also be UGT iff C0 != -1 (+invert result) // SLT predicate can also be SGT iff C2 != INT_MAX (+invert res.) -static Instruction *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0, - InstCombiner::BuilderTy &Builder) { +static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0, + InstCombiner::BuilderTy &Builder) { Value *X = Sel0.getTrueValue(); Value *Sel1 = Sel0.getFalseValue(); @@ -1238,36 +1298,42 @@ static Instruction *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0, // Said condition must be one-use. if (!Cmp0.hasOneUse()) return nullptr; + ICmpInst::Predicate Pred0 = Cmp0.getPredicate(); Value *Cmp00 = Cmp0.getOperand(0); Constant *C0; if (!match(Cmp0.getOperand(1), m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C0)))) return nullptr; - // Canonicalize Cmp0 into the form we expect. + + if (!isa<SelectInst>(Sel1)) { + Pred0 = ICmpInst::getInversePredicate(Pred0); + std::swap(X, Sel1); + } + + // Canonicalize Cmp0 into ult or uge. // FIXME: we shouldn't care about lanes that are 'undef' in the end? - switch (Cmp0.getPredicate()) { + switch (Pred0) { case ICmpInst::Predicate::ICMP_ULT: + case ICmpInst::Predicate::ICMP_UGE: + // Although icmp ult %x, 0 is an unusual thing to try and should generally + // have been simplified, it does not verify with undef inputs so ensure we + // are not in a strange state. + if (!match(C0, m_SpecificInt_ICMP( + ICmpInst::Predicate::ICMP_NE, + APInt::getZero(C0->getType()->getScalarSizeInBits())))) + return nullptr; break; // Great! case ICmpInst::Predicate::ICMP_ULE: - // We'd have to increment C0 by one, and for that it must not have all-ones - // element, but then it would have been canonicalized to 'ult' before - // we get here. So we can't do anything useful with 'ule'. - return nullptr; case ICmpInst::Predicate::ICMP_UGT: - // We want to canonicalize it to 'ult', so we'll need to increment C0, - // which again means it must not have any all-ones elements. + // We want to canonicalize it to 'ult' or 'uge', so we'll need to increment + // C0, which again means it must not have any all-ones elements. if (!match(C0, - m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_NE, - APInt::getAllOnesValue( - C0->getType()->getScalarSizeInBits())))) + m_SpecificInt_ICMP( + ICmpInst::Predicate::ICMP_NE, + APInt::getAllOnes(C0->getType()->getScalarSizeInBits())))) return nullptr; // Can't do, have all-ones element[s]. C0 = InstCombiner::AddOne(C0); - std::swap(X, Sel1); break; - case ICmpInst::Predicate::ICMP_UGE: - // The only way we'd get this predicate if this `icmp` has extra uses, - // but then we won't be able to do this fold. - return nullptr; default: return nullptr; // Unknown predicate. } @@ -1277,11 +1343,16 @@ static Instruction *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0, if (!Sel1->hasOneUse()) return nullptr; + // If the types do not match, look through any truncs to the underlying + // instruction. + if (Cmp00->getType() != X->getType() && X->hasOneUse()) + match(X, m_TruncOrSelf(m_Value(X))); + // We now can finish matching the condition of the outermost select: // it should either be the X itself, or an addition of some constant to X. Constant *C1; if (Cmp00 == X) - C1 = ConstantInt::getNullValue(Sel0.getType()); + C1 = ConstantInt::getNullValue(X->getType()); else if (!match(Cmp00, m_Add(m_Specific(X), m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C1))))) @@ -1335,6 +1406,8 @@ static Instruction *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0, // The thresholds of this clamp-like pattern. auto *ThresholdLowIncl = ConstantExpr::getNeg(C1); auto *ThresholdHighExcl = ConstantExpr::getSub(C0, C1); + if (Pred0 == ICmpInst::Predicate::ICMP_UGE) + std::swap(ThresholdLowIncl, ThresholdHighExcl); // The fold has a precondition 1: C2 s>= ThresholdLow auto *Precond1 = ConstantExpr::getICmp(ICmpInst::Predicate::ICMP_SGE, C2, @@ -1347,15 +1420,29 @@ static Instruction *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0, if (!match(Precond2, m_One())) return nullptr; + // If we are matching from a truncated input, we need to sext the + // ReplacementLow and ReplacementHigh values. Only do the transform if they + // are free to extend due to being constants. + if (X->getType() != Sel0.getType()) { + Constant *LowC, *HighC; + if (!match(ReplacementLow, m_ImmConstant(LowC)) || + !match(ReplacementHigh, m_ImmConstant(HighC))) + return nullptr; + ReplacementLow = ConstantExpr::getSExt(LowC, X->getType()); + ReplacementHigh = ConstantExpr::getSExt(HighC, X->getType()); + } + // All good, finally emit the new pattern. Value *ShouldReplaceLow = Builder.CreateICmpSLT(X, ThresholdLowIncl); Value *ShouldReplaceHigh = Builder.CreateICmpSGE(X, ThresholdHighExcl); Value *MaybeReplacedLow = Builder.CreateSelect(ShouldReplaceLow, ReplacementLow, X); - Instruction *MaybeReplacedHigh = - SelectInst::Create(ShouldReplaceHigh, ReplacementHigh, MaybeReplacedLow); - return MaybeReplacedHigh; + // Create the final select. If we looked through a truncate above, we will + // need to retruncate the result. + Value *MaybeReplacedHigh = Builder.CreateSelect( + ShouldReplaceHigh, ReplacementHigh, MaybeReplacedLow); + return Builder.CreateTrunc(MaybeReplacedHigh, Sel0.getType()); } // If we have @@ -1446,8 +1533,8 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI, if (Instruction *NewAbs = canonicalizeAbsNabs(SI, *ICI, *this)) return NewAbs; - if (Instruction *NewAbs = canonicalizeClampLike(SI, *ICI, Builder)) - return NewAbs; + if (Value *V = canonicalizeClampLike(SI, *ICI, Builder)) + return replaceInstUsesWith(SI, V); if (Instruction *NewSel = tryToReuseConstantFromSelectInComparison(SI, *ICI, *this)) @@ -1816,9 +1903,7 @@ foldOverflowingAddSubSelect(SelectInst &SI, InstCombiner::BuilderTy &Builder) { m_Value(TrueVal), m_Value(FalseVal)))) return false; - auto IsZeroOrOne = [](const APInt &C) { - return C.isNullValue() || C.isOneValue(); - }; + auto IsZeroOrOne = [](const APInt &C) { return C.isZero() || C.isOne(); }; auto IsMinMax = [&](Value *Min, Value *Max) { APInt MinVal = APInt::getSignedMinValue(Ty->getScalarSizeInBits()); APInt MaxVal = APInt::getSignedMaxValue(Ty->getScalarSizeInBits()); @@ -2182,7 +2267,7 @@ static Instruction *moveAddAfterMinMax(SelectPatternFlavor SPF, Value *X, } /// Match a sadd_sat or ssub_sat which is using min/max to clamp the value. -Instruction *InstCombinerImpl::matchSAddSubSat(SelectInst &MinMax1) { +Instruction *InstCombinerImpl::matchSAddSubSat(Instruction &MinMax1) { Type *Ty = MinMax1.getType(); // We are looking for a tree of: @@ -2212,23 +2297,14 @@ Instruction *InstCombinerImpl::matchSAddSubSat(SelectInst &MinMax1) { if (!shouldChangeType(Ty->getScalarType()->getIntegerBitWidth(), NewBitWidth)) return nullptr; - // Also make sure that the number of uses is as expected. The "3"s are for the - // the two items of min/max (the compare and the select). - if (MinMax2->hasNUsesOrMore(3) || AddSub->hasNUsesOrMore(3)) + // Also make sure that the number of uses is as expected. The 3 is for the + // the two items of the compare and the select, or 2 from a min/max. + unsigned ExpUses = isa<IntrinsicInst>(MinMax1) ? 2 : 3; + if (MinMax2->hasNUsesOrMore(ExpUses) || AddSub->hasNUsesOrMore(ExpUses)) return nullptr; // Create the new type (which can be a vector type) Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth); - // Match the two extends from the add/sub - Value *A, *B; - if(!match(AddSub, m_BinOp(m_SExt(m_Value(A)), m_SExt(m_Value(B))))) - return nullptr; - // And check the incoming values are of a type smaller than or equal to the - // size of the saturation. Otherwise the higher bits can cause different - // results. - if (A->getType()->getScalarSizeInBits() > NewBitWidth || - B->getType()->getScalarSizeInBits() > NewBitWidth) - return nullptr; Intrinsic::ID IntrinsicID; if (AddSub->getOpcode() == Instruction::Add) @@ -2238,10 +2314,16 @@ Instruction *InstCombinerImpl::matchSAddSubSat(SelectInst &MinMax1) { else return nullptr; + // The two operands of the add/sub must be nsw-truncatable to the NewTy. This + // is usually achieved via a sext from a smaller type. + if (ComputeMinSignedBits(AddSub->getOperand(0), 0, AddSub) > NewBitWidth || + ComputeMinSignedBits(AddSub->getOperand(1), 0, AddSub) > NewBitWidth) + return nullptr; + // Finally create and return the sat intrinsic, truncated to the new type Function *F = Intrinsic::getDeclaration(MinMax1.getModule(), IntrinsicID, NewTy); - Value *AT = Builder.CreateSExt(A, NewTy); - Value *BT = Builder.CreateSExt(B, NewTy); + Value *AT = Builder.CreateTrunc(AddSub->getOperand(0), NewTy); + Value *BT = Builder.CreateTrunc(AddSub->getOperand(1), NewTy); Value *Sat = Builder.CreateCall(F, {AT, BT}); return CastInst::Create(Instruction::SExt, Sat, Ty); } @@ -2432,7 +2514,7 @@ Instruction *InstCombinerImpl::foldVectorSelect(SelectInst &Sel) { unsigned NumElts = VecTy->getNumElements(); APInt UndefElts(NumElts, 0); - APInt AllOnesEltMask(APInt::getAllOnesValue(NumElts)); + APInt AllOnesEltMask(APInt::getAllOnes(NumElts)); if (Value *V = SimplifyDemandedVectorElts(&Sel, AllOnesEltMask, UndefElts)) { if (V != &Sel) return replaceInstUsesWith(Sel, V); @@ -2754,11 +2836,16 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { /* IsAnd */ IsAnd)) return I; - if (auto *ICmp0 = dyn_cast<ICmpInst>(CondVal)) - if (auto *ICmp1 = dyn_cast<ICmpInst>(Op1)) + if (auto *ICmp0 = dyn_cast<ICmpInst>(CondVal)) { + if (auto *ICmp1 = dyn_cast<ICmpInst>(Op1)) { if (auto *V = foldAndOrOfICmpsOfAndWithPow2(ICmp0, ICmp1, &SI, IsAnd, /* IsLogical */ true)) return replaceInstUsesWith(SI, V); + + if (auto *V = foldEqOfParts(ICmp0, ICmp1, IsAnd)) + return replaceInstUsesWith(SI, V); + } + } } // select (select a, true, b), c, false -> select a, c, false @@ -2863,14 +2950,10 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { } // Canonicalize select with fcmp to fabs(). -0.0 makes this tricky. We need - // fast-math-flags (nsz) or fsub with +0.0 (not fneg) for this to work. We - // also require nnan because we do not want to unintentionally change the - // sign of a NaN value. + // fast-math-flags (nsz) or fsub with +0.0 (not fneg) for this to work. // (X <= +/-0.0) ? (0.0 - X) : X --> fabs(X) - Instruction *FSub; if (match(CondVal, m_FCmp(Pred, m_Specific(FalseVal), m_AnyZeroFP())) && match(TrueVal, m_FSub(m_PosZeroFP(), m_Specific(FalseVal))) && - match(TrueVal, m_Instruction(FSub)) && FSub->hasNoNaNs() && (Pred == FCmpInst::FCMP_OLE || Pred == FCmpInst::FCMP_ULE)) { Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, FalseVal, &SI); return replaceInstUsesWith(SI, Fabs); @@ -2878,7 +2961,6 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { // (X > +/-0.0) ? X : (0.0 - X) --> fabs(X) if (match(CondVal, m_FCmp(Pred, m_Specific(TrueVal), m_AnyZeroFP())) && match(FalseVal, m_FSub(m_PosZeroFP(), m_Specific(TrueVal))) && - match(FalseVal, m_Instruction(FSub)) && FSub->hasNoNaNs() && (Pred == FCmpInst::FCMP_OGT || Pred == FCmpInst::FCMP_UGT)) { Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, TrueVal, &SI); return replaceInstUsesWith(SI, Fabs); @@ -2886,11 +2968,8 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { // With nnan and nsz: // (X < +/-0.0) ? -X : X --> fabs(X) // (X <= +/-0.0) ? -X : X --> fabs(X) - Instruction *FNeg; if (match(CondVal, m_FCmp(Pred, m_Specific(FalseVal), m_AnyZeroFP())) && - match(TrueVal, m_FNeg(m_Specific(FalseVal))) && - match(TrueVal, m_Instruction(FNeg)) && FNeg->hasNoNaNs() && - FNeg->hasNoSignedZeros() && SI.hasNoSignedZeros() && + match(TrueVal, m_FNeg(m_Specific(FalseVal))) && SI.hasNoSignedZeros() && (Pred == FCmpInst::FCMP_OLT || Pred == FCmpInst::FCMP_OLE || Pred == FCmpInst::FCMP_ULT || Pred == FCmpInst::FCMP_ULE)) { Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, FalseVal, &SI); @@ -2900,9 +2979,7 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { // (X > +/-0.0) ? X : -X --> fabs(X) // (X >= +/-0.0) ? X : -X --> fabs(X) if (match(CondVal, m_FCmp(Pred, m_Specific(TrueVal), m_AnyZeroFP())) && - match(FalseVal, m_FNeg(m_Specific(TrueVal))) && - match(FalseVal, m_Instruction(FNeg)) && FNeg->hasNoNaNs() && - FNeg->hasNoSignedZeros() && SI.hasNoSignedZeros() && + match(FalseVal, m_FNeg(m_Specific(TrueVal))) && SI.hasNoSignedZeros() && (Pred == FCmpInst::FCMP_OGT || Pred == FCmpInst::FCMP_OGE || Pred == FCmpInst::FCMP_UGT || Pred == FCmpInst::FCMP_UGE)) { Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, TrueVal, &SI); @@ -2920,6 +2997,8 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { return Add; if (Instruction *Or = foldSetClearBits(SI, Builder)) return Or; + if (Instruction *Mul = foldSelectZeroOrMul(SI, *this)) + return Mul; // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z)) auto *TI = dyn_cast<Instruction>(TrueVal); @@ -2939,8 +3018,10 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { if (Gep->getNumOperands() != 2 || Gep->getPointerOperand() != Base || !Gep->hasOneUse()) return nullptr; - Type *ElementType = Gep->getResultElementType(); Value *Idx = Gep->getOperand(1); + if (isa<VectorType>(CondVal->getType()) && !isa<VectorType>(Idx->getType())) + return nullptr; + Type *ElementType = Gep->getResultElementType(); Value *NewT = Idx; Value *NewF = Constant::getNullValue(Idx->getType()); if (Swap) @@ -3188,9 +3269,9 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { if (!CondVal->getType()->isVectorTy() && !AC.assumptions().empty()) { KnownBits Known(1); computeKnownBits(CondVal, Known, 0, &SI); - if (Known.One.isOneValue()) + if (Known.One.isOne()) return replaceInstUsesWith(SI, TrueVal); - if (Known.Zero.isOneValue()) + if (Known.Zero.isOne()) return replaceInstUsesWith(SI, FalseVal); } @@ -3230,7 +3311,8 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { Value *Mask; if (match(TrueVal, m_Zero()) && match(FalseVal, m_MaskedLoad(m_Value(), m_Value(), m_Value(Mask), - m_CombineOr(m_Undef(), m_Zero())))) { + m_CombineOr(m_Undef(), m_Zero()))) && + (CondVal->getType() == Mask->getType())) { // We can remove the select by ensuring the load zeros all lanes the // select would have. We determine this by proving there is no overlap // between the load and select masks. |
