diff options
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp')
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp | 907 |
1 files changed, 383 insertions, 524 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 65e60498ff95..ad96a5f475f1 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -22,6 +22,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" @@ -49,13 +50,6 @@ using namespace llvm; using namespace PatternMatch; -static Value *createMinMax(InstCombiner::BuilderTy &Builder, - SelectPatternFlavor SPF, Value *A, Value *B) { - CmpInst::Predicate Pred = getMinMaxPred(SPF); - assert(CmpInst::isIntPredicate(Pred) && "Expected integer predicate"); - return Builder.CreateSelect(Builder.CreateICmp(Pred, A, B), A, B); -} - /// Replace a select operand based on an equality comparison with the identity /// constant of a binop. static Instruction *foldSelectBinOpIdentity(SelectInst &Sel, @@ -370,6 +364,7 @@ Instruction *InstCombinerImpl::foldSelectOpOp(SelectInst &SI, Instruction *TI, // one-use constraint, but that needs be examined carefully since it may not // reduce the total number of instructions. if (TI->getNumOperands() != 2 || FI->getNumOperands() != 2 || + !TI->isSameOperationAs(FI) || (!isa<BinaryOperator>(TI) && !isa<GetElementPtrInst>(TI)) || !TI->hasOneUse() || !FI->hasOneUse()) return nullptr; @@ -444,69 +439,56 @@ Instruction *InstCombinerImpl::foldSelectIntoOp(SelectInst &SI, Value *TrueVal, Value *FalseVal) { // See the comment above GetSelectFoldableOperands for a description of the // transformation we are doing here. - if (auto *TVI = dyn_cast<BinaryOperator>(TrueVal)) { - if (TVI->hasOneUse() && !isa<Constant>(FalseVal)) { - if (unsigned SFO = getSelectFoldableOperands(TVI)) { - unsigned OpToFold = 0; - if ((SFO & 1) && FalseVal == TVI->getOperand(0)) { - OpToFold = 1; - } else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) { - OpToFold = 2; - } + auto TryFoldSelectIntoOp = [&](SelectInst &SI, Value *TrueVal, + Value *FalseVal, + bool Swapped) -> Instruction * { + if (auto *TVI = dyn_cast<BinaryOperator>(TrueVal)) { + if (TVI->hasOneUse() && !isa<Constant>(FalseVal)) { + if (unsigned SFO = getSelectFoldableOperands(TVI)) { + unsigned OpToFold = 0; + if ((SFO & 1) && FalseVal == TVI->getOperand(0)) + OpToFold = 1; + else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) + OpToFold = 2; - if (OpToFold) { - Constant *C = ConstantExpr::getBinOpIdentity(TVI->getOpcode(), - TVI->getType(), true); - Value *OOp = TVI->getOperand(2-OpToFold); - // Avoid creating select between 2 constants unless it's selecting - // between 0, 1 and -1. - const APInt *OOpC; - bool OOpIsAPInt = match(OOp, m_APInt(OOpC)); - if (!isa<Constant>(OOp) || - (OOpIsAPInt && isSelect01(C->getUniqueInteger(), *OOpC))) { - Value *NewSel = Builder.CreateSelect(SI.getCondition(), OOp, C); - NewSel->takeName(TVI); - BinaryOperator *BO = BinaryOperator::Create(TVI->getOpcode(), - FalseVal, NewSel); - BO->copyIRFlags(TVI); - return BO; + if (OpToFold) { + FastMathFlags FMF; + // TODO: We probably ought to revisit cases where the select and FP + // instructions have different flags and add tests to ensure the + // behaviour is correct. + if (isa<FPMathOperator>(&SI)) + FMF = SI.getFastMathFlags(); + Constant *C = ConstantExpr::getBinOpIdentity( + TVI->getOpcode(), TVI->getType(), true, FMF.noSignedZeros()); + Value *OOp = TVI->getOperand(2 - OpToFold); + // Avoid creating select between 2 constants unless it's selecting + // between 0, 1 and -1. + const APInt *OOpC; + bool OOpIsAPInt = match(OOp, m_APInt(OOpC)); + if (!isa<Constant>(OOp) || + (OOpIsAPInt && isSelect01(C->getUniqueInteger(), *OOpC))) { + Value *NewSel = Builder.CreateSelect( + SI.getCondition(), Swapped ? C : OOp, Swapped ? OOp : C); + if (isa<FPMathOperator>(&SI)) + cast<Instruction>(NewSel)->setFastMathFlags(FMF); + NewSel->takeName(TVI); + BinaryOperator *BO = + BinaryOperator::Create(TVI->getOpcode(), FalseVal, NewSel); + BO->copyIRFlags(TVI); + return BO; + } } } } } - } + return nullptr; + }; - if (auto *FVI = dyn_cast<BinaryOperator>(FalseVal)) { - if (FVI->hasOneUse() && !isa<Constant>(TrueVal)) { - if (unsigned SFO = getSelectFoldableOperands(FVI)) { - unsigned OpToFold = 0; - if ((SFO & 1) && TrueVal == FVI->getOperand(0)) { - OpToFold = 1; - } else if ((SFO & 2) && TrueVal == FVI->getOperand(1)) { - OpToFold = 2; - } + if (Instruction *R = TryFoldSelectIntoOp(SI, TrueVal, FalseVal, false)) + return R; - if (OpToFold) { - Constant *C = ConstantExpr::getBinOpIdentity(FVI->getOpcode(), - FVI->getType(), true); - Value *OOp = FVI->getOperand(2-OpToFold); - // Avoid creating select between 2 constants unless it's selecting - // between 0, 1 and -1. - const APInt *OOpC; - bool OOpIsAPInt = match(OOp, m_APInt(OOpC)); - if (!isa<Constant>(OOp) || - (OOpIsAPInt && isSelect01(C->getUniqueInteger(), *OOpC))) { - Value *NewSel = Builder.CreateSelect(SI.getCondition(), C, OOp); - NewSel->takeName(FVI); - BinaryOperator *BO = BinaryOperator::Create(FVI->getOpcode(), - TrueVal, NewSel); - BO->copyIRFlags(FVI); - return BO; - } - } - } - } - } + if (Instruction *R = TryFoldSelectIntoOp(SI, FalseVal, TrueVal, true)) + return R; return nullptr; } @@ -535,6 +517,16 @@ static Instruction *foldSelectICmpAndAnd(Type *SelType, const ICmpInst *Cmp, // Where %B may be optionally shifted: lshr %X, %Z. Value *X, *Z; const bool HasShift = match(B, m_OneUse(m_LShr(m_Value(X), m_Value(Z)))); + + // The shift must be valid. + // TODO: This restricts the fold to constant shift amounts. Is there a way to + // handle variable shifts safely? PR47012 + if (HasShift && + !match(Z, m_SpecificInt_ICMP(CmpInst::ICMP_ULT, + APInt(SelType->getScalarSizeInBits(), + SelType->getScalarSizeInBits())))) + return nullptr; + if (!HasShift) X = B; @@ -1096,74 +1088,55 @@ static bool adjustMinMax(SelectInst &Sel, ICmpInst &Cmp) { return true; } -/// If this is an integer min/max (icmp + select) with a constant operand, -/// create the canonical icmp for the min/max operation and canonicalize the -/// constant to the 'false' operand of the select: -/// select (icmp Pred X, C1), C2, X --> select (icmp Pred' X, C2), X, C2 -/// Note: if C1 != C2, this will change the icmp constant to the existing -/// constant operand of the select. -static Instruction *canonicalizeMinMaxWithConstant(SelectInst &Sel, - ICmpInst &Cmp, - InstCombinerImpl &IC) { - if (!Cmp.hasOneUse() || !isa<Constant>(Cmp.getOperand(1))) - return nullptr; - - // Canonicalize the compare predicate based on whether we have min or max. +static Instruction *canonicalizeSPF(SelectInst &Sel, ICmpInst &Cmp, + InstCombinerImpl &IC) { Value *LHS, *RHS; - SelectPatternResult SPR = matchSelectPattern(&Sel, LHS, RHS); - if (!SelectPatternResult::isMinOrMax(SPR.Flavor)) - return nullptr; - - // Is this already canonical? - ICmpInst::Predicate CanonicalPred = getMinMaxPred(SPR.Flavor); - if (Cmp.getOperand(0) == LHS && Cmp.getOperand(1) == RHS && - Cmp.getPredicate() == CanonicalPred) - return nullptr; - - // Bail out on unsimplified X-0 operand (due to some worklist management bug), - // as this may cause an infinite combine loop. Let the sub be folded first. - if (match(LHS, m_Sub(m_Value(), m_Zero())) || - match(RHS, m_Sub(m_Value(), m_Zero()))) + // TODO: What to do with pointer min/max patterns? + if (!Sel.getType()->isIntOrIntVectorTy()) return nullptr; - // Create the canonical compare and plug it into the select. - IC.replaceOperand(Sel, 0, IC.Builder.CreateICmp(CanonicalPred, LHS, RHS)); - - // If the select operands did not change, we're done. - if (Sel.getTrueValue() == LHS && Sel.getFalseValue() == RHS) - return &Sel; - - // If we are swapping the select operands, swap the metadata too. - assert(Sel.getTrueValue() == RHS && Sel.getFalseValue() == LHS && - "Unexpected results from matchSelectPattern"); - Sel.swapValues(); - Sel.swapProfMetadata(); - return &Sel; -} - -static Instruction *canonicalizeAbsNabs(SelectInst &Sel, ICmpInst &Cmp, - InstCombinerImpl &IC) { - if (!Cmp.hasOneUse() || !isa<Constant>(Cmp.getOperand(1))) - return nullptr; - - Value *LHS, *RHS; SelectPatternFlavor SPF = matchSelectPattern(&Sel, LHS, RHS).Flavor; - if (SPF != SelectPatternFlavor::SPF_ABS && - SPF != SelectPatternFlavor::SPF_NABS) - return nullptr; + if (SPF == SelectPatternFlavor::SPF_ABS || + SPF == SelectPatternFlavor::SPF_NABS) { + if (!Cmp.hasOneUse() && !RHS->hasOneUse()) + return nullptr; // TODO: Relax this restriction. + + // Note that NSW flag can only be propagated for normal, non-negated abs! + bool IntMinIsPoison = SPF == SelectPatternFlavor::SPF_ABS && + match(RHS, m_NSWNeg(m_Specific(LHS))); + Constant *IntMinIsPoisonC = + ConstantInt::get(Type::getInt1Ty(Sel.getContext()), IntMinIsPoison); + Instruction *Abs = + IC.Builder.CreateBinaryIntrinsic(Intrinsic::abs, LHS, IntMinIsPoisonC); - // Note that NSW flag can only be propagated for normal, non-negated abs! - bool IntMinIsPoison = SPF == SelectPatternFlavor::SPF_ABS && - match(RHS, m_NSWNeg(m_Specific(LHS))); - Constant *IntMinIsPoisonC = - ConstantInt::get(Type::getInt1Ty(Sel.getContext()), IntMinIsPoison); - Instruction *Abs = - IC.Builder.CreateBinaryIntrinsic(Intrinsic::abs, LHS, IntMinIsPoisonC); + if (SPF == SelectPatternFlavor::SPF_NABS) + return BinaryOperator::CreateNeg(Abs); // Always without NSW flag! + return IC.replaceInstUsesWith(Sel, Abs); + } - if (SPF == SelectPatternFlavor::SPF_NABS) - return BinaryOperator::CreateNeg(Abs); // Always without NSW flag! + if (SelectPatternResult::isMinOrMax(SPF)) { + Intrinsic::ID IntrinsicID; + switch (SPF) { + case SelectPatternFlavor::SPF_UMIN: + IntrinsicID = Intrinsic::umin; + break; + case SelectPatternFlavor::SPF_UMAX: + IntrinsicID = Intrinsic::umax; + break; + case SelectPatternFlavor::SPF_SMIN: + IntrinsicID = Intrinsic::smin; + break; + case SelectPatternFlavor::SPF_SMAX: + IntrinsicID = Intrinsic::smax; + break; + default: + llvm_unreachable("Unexpected SPF"); + } + return IC.replaceInstUsesWith( + Sel, IC.Builder.CreateBinaryIntrinsic(IntrinsicID, LHS, RHS)); + } - return IC.replaceInstUsesWith(Sel, Abs); + return nullptr; } /// If we have a select with an equality comparison, then we know the value in @@ -1336,6 +1309,7 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0, ICmpInst::Predicate::ICMP_NE, APInt::getAllOnes(C0->getType()->getScalarSizeInBits())))) return nullptr; // Can't do, have all-ones element[s]. + Pred0 = ICmpInst::getFlippedStrictnessPredicate(Pred0); C0 = InstCombiner::AddOne(C0); break; default: @@ -1401,15 +1375,22 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0, case ICmpInst::Predicate::ICMP_SGE: // Also non-canonical, but here we don't need to change C2, // so we don't have any restrictions on C2, so we can just handle it. + Pred1 = ICmpInst::Predicate::ICMP_SLT; std::swap(ReplacementLow, ReplacementHigh); break; default: return nullptr; // Unknown predicate. } + assert(Pred1 == ICmpInst::Predicate::ICMP_SLT && + "Unexpected predicate type."); // The thresholds of this clamp-like pattern. auto *ThresholdLowIncl = ConstantExpr::getNeg(C1); auto *ThresholdHighExcl = ConstantExpr::getSub(C0, C1); + + assert((Pred0 == ICmpInst::Predicate::ICMP_ULT || + Pred0 == ICmpInst::Predicate::ICMP_UGE) && + "Unexpected predicate type."); if (Pred0 == ICmpInst::Predicate::ICMP_UGE) std::swap(ThresholdLowIncl, ThresholdHighExcl); @@ -1530,17 +1511,71 @@ tryToReuseConstantFromSelectInComparison(SelectInst &Sel, ICmpInst &Cmp, return &Sel; } +static Instruction *foldSelectZeroOrOnes(ICmpInst *Cmp, Value *TVal, + Value *FVal, + InstCombiner::BuilderTy &Builder) { + if (!Cmp->hasOneUse()) + return nullptr; + + const APInt *CmpC; + if (!match(Cmp->getOperand(1), m_APIntAllowUndef(CmpC))) + return nullptr; + + // (X u< 2) ? -X : -1 --> sext (X != 0) + Value *X = Cmp->getOperand(0); + if (Cmp->getPredicate() == ICmpInst::ICMP_ULT && *CmpC == 2 && + match(TVal, m_Neg(m_Specific(X))) && match(FVal, m_AllOnes())) + return new SExtInst(Builder.CreateIsNotNull(X), TVal->getType()); + + // (X u> 1) ? -1 : -X --> sext (X != 0) + if (Cmp->getPredicate() == ICmpInst::ICMP_UGT && *CmpC == 1 && + match(FVal, m_Neg(m_Specific(X))) && match(TVal, m_AllOnes())) + return new SExtInst(Builder.CreateIsNotNull(X), TVal->getType()); + + return nullptr; +} + +static Value *foldSelectInstWithICmpConst(SelectInst &SI, ICmpInst *ICI) { + const APInt *CmpC; + Value *V; + CmpInst::Predicate Pred; + if (!match(ICI, m_ICmp(Pred, m_Value(V), m_APInt(CmpC)))) + return nullptr; + + BinaryOperator *BO; + const APInt *C; + CmpInst::Predicate CPred; + if (match(&SI, m_Select(m_Specific(ICI), m_APInt(C), m_BinOp(BO)))) + CPred = ICI->getPredicate(); + else if (match(&SI, m_Select(m_Specific(ICI), m_BinOp(BO), m_APInt(C)))) + CPred = ICI->getInversePredicate(); + else + return nullptr; + + const APInt *BinOpC; + if (!match(BO, m_BinOp(m_Specific(V), m_APInt(BinOpC)))) + return nullptr; + + ConstantRange R = ConstantRange::makeExactICmpRegion(CPred, *CmpC) + .binaryOp(BO->getOpcode(), *BinOpC); + if (R == *C) { + BO->dropPoisonGeneratingFlags(); + return BO; + } + return nullptr; +} + /// Visit a SelectInst that has an ICmpInst as its first operand. Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI) { if (Instruction *NewSel = foldSelectValueEquivalence(SI, *ICI)) return NewSel; - if (Instruction *NewSel = canonicalizeMinMaxWithConstant(SI, *ICI, *this)) - return NewSel; + if (Instruction *NewSPF = canonicalizeSPF(SI, *ICI, *this)) + return NewSPF; - if (Instruction *NewAbs = canonicalizeAbsNabs(SI, *ICI, *this)) - return NewAbs; + if (Value *V = foldSelectInstWithICmpConst(SI, ICI)) + return replaceInstUsesWith(SI, V); if (Value *V = canonicalizeClampLike(SI, *ICI, Builder)) return replaceInstUsesWith(SI, V); @@ -1572,6 +1607,22 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI, } } + // Canonicalize a signbit condition to use zero constant by swapping: + // (CmpLHS > -1) ? TV : FV --> (CmpLHS < 0) ? FV : TV + // To avoid conflicts (infinite loops) with other canonicalizations, this is + // not applied with any constant select arm. + if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, m_AllOnes()) && + !match(TrueVal, m_Constant()) && !match(FalseVal, m_Constant()) && + ICI->hasOneUse()) { + InstCombiner::BuilderTy::InsertPointGuard Guard(Builder); + Builder.SetInsertPoint(&SI); + Value *IsNeg = Builder.CreateIsNeg(CmpLHS, ICI->getName()); + replaceOperand(SI, 0, IsNeg); + SI.swapValues(); + SI.swapProfMetadata(); + return &SI; + } + // FIXME: This code is nearly duplicated in InstSimplify. Using/refactoring // decomposeBitTestICmp() might help. { @@ -1629,6 +1680,9 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI, if (Instruction *V = foldSelectCtlzToCttz(ICI, TrueVal, FalseVal, Builder)) return V; + if (Instruction *V = foldSelectZeroOrOnes(ICI, TrueVal, FalseVal, Builder)) + return V; + if (Value *V = foldSelectICmpAndOr(ICI, TrueVal, FalseVal, Builder)) return replaceInstUsesWith(SI, V); @@ -1698,114 +1752,6 @@ Instruction *InstCombinerImpl::foldSPFofSPF(Instruction *Inner, // TODO: This could be done in instsimplify. if (SPF1 == SPF2 && SelectPatternResult::isMinOrMax(SPF1)) return replaceInstUsesWith(Outer, Inner); - - // MAX(MIN(a, b), a) -> a - // MIN(MAX(a, b), a) -> a - // TODO: This could be done in instsimplify. - if ((SPF1 == SPF_SMIN && SPF2 == SPF_SMAX) || - (SPF1 == SPF_SMAX && SPF2 == SPF_SMIN) || - (SPF1 == SPF_UMIN && SPF2 == SPF_UMAX) || - (SPF1 == SPF_UMAX && SPF2 == SPF_UMIN)) - return replaceInstUsesWith(Outer, C); - } - - if (SPF1 == SPF2) { - const APInt *CB, *CC; - if (match(B, m_APInt(CB)) && match(C, m_APInt(CC))) { - // MIN(MIN(A, 23), 97) -> MIN(A, 23) - // MAX(MAX(A, 97), 23) -> MAX(A, 97) - // TODO: This could be done in instsimplify. - if ((SPF1 == SPF_UMIN && CB->ule(*CC)) || - (SPF1 == SPF_SMIN && CB->sle(*CC)) || - (SPF1 == SPF_UMAX && CB->uge(*CC)) || - (SPF1 == SPF_SMAX && CB->sge(*CC))) - return replaceInstUsesWith(Outer, Inner); - - // MIN(MIN(A, 97), 23) -> MIN(A, 23) - // MAX(MAX(A, 23), 97) -> MAX(A, 97) - if ((SPF1 == SPF_UMIN && CB->ugt(*CC)) || - (SPF1 == SPF_SMIN && CB->sgt(*CC)) || - (SPF1 == SPF_UMAX && CB->ult(*CC)) || - (SPF1 == SPF_SMAX && CB->slt(*CC))) { - Outer.replaceUsesOfWith(Inner, A); - return &Outer; - } - } - } - - // max(max(A, B), min(A, B)) --> max(A, B) - // min(min(A, B), max(A, B)) --> min(A, B) - // TODO: This could be done in instsimplify. - if (SPF1 == SPF2 && - ((SPF1 == SPF_UMIN && match(C, m_c_UMax(m_Specific(A), m_Specific(B)))) || - (SPF1 == SPF_SMIN && match(C, m_c_SMax(m_Specific(A), m_Specific(B)))) || - (SPF1 == SPF_UMAX && match(C, m_c_UMin(m_Specific(A), m_Specific(B)))) || - (SPF1 == SPF_SMAX && match(C, m_c_SMin(m_Specific(A), m_Specific(B)))))) - return replaceInstUsesWith(Outer, Inner); - - // ABS(ABS(X)) -> ABS(X) - // NABS(NABS(X)) -> NABS(X) - // TODO: This could be done in instsimplify. - if (SPF1 == SPF2 && (SPF1 == SPF_ABS || SPF1 == SPF_NABS)) { - return replaceInstUsesWith(Outer, Inner); - } - - // ABS(NABS(X)) -> ABS(X) - // NABS(ABS(X)) -> NABS(X) - if ((SPF1 == SPF_ABS && SPF2 == SPF_NABS) || - (SPF1 == SPF_NABS && SPF2 == SPF_ABS)) { - SelectInst *SI = cast<SelectInst>(Inner); - Value *NewSI = - Builder.CreateSelect(SI->getCondition(), SI->getFalseValue(), - SI->getTrueValue(), SI->getName(), SI); - return replaceInstUsesWith(Outer, NewSI); - } - - auto IsFreeOrProfitableToInvert = - [&](Value *V, Value *&NotV, bool &ElidesXor) { - if (match(V, m_Not(m_Value(NotV)))) { - // If V has at most 2 uses then we can get rid of the xor operation - // entirely. - ElidesXor |= !V->hasNUsesOrMore(3); - return true; - } - - if (isFreeToInvert(V, !V->hasNUsesOrMore(3))) { - NotV = nullptr; - return true; - } - - return false; - }; - - Value *NotA, *NotB, *NotC; - bool ElidesXor = false; - - // MIN(MIN(~A, ~B), ~C) == ~MAX(MAX(A, B), C) - // MIN(MAX(~A, ~B), ~C) == ~MAX(MIN(A, B), C) - // MAX(MIN(~A, ~B), ~C) == ~MIN(MAX(A, B), C) - // MAX(MAX(~A, ~B), ~C) == ~MIN(MIN(A, B), C) - // - // This transform is performance neutral if we can elide at least one xor from - // the set of three operands, since we'll be tacking on an xor at the very - // end. - if (SelectPatternResult::isMinOrMax(SPF1) && - SelectPatternResult::isMinOrMax(SPF2) && - IsFreeOrProfitableToInvert(A, NotA, ElidesXor) && - IsFreeOrProfitableToInvert(B, NotB, ElidesXor) && - IsFreeOrProfitableToInvert(C, NotC, ElidesXor) && ElidesXor) { - if (!NotA) - NotA = Builder.CreateNot(A); - if (!NotB) - NotB = Builder.CreateNot(B); - if (!NotC) - NotC = Builder.CreateNot(C); - - Value *NewInner = createMinMax(Builder, getInverseMinMaxFlavor(SPF1), NotA, - NotB); - Value *NewOuter = Builder.CreateNot( - createMinMax(Builder, getInverseMinMaxFlavor(SPF2), NewInner, NotC)); - return replaceInstUsesWith(Outer, NewOuter); } return nullptr; @@ -2238,163 +2184,6 @@ static Value *foldSelectCmpXchg(SelectInst &SI) { return nullptr; } -static Instruction *moveAddAfterMinMax(SelectPatternFlavor SPF, Value *X, - Value *Y, - InstCombiner::BuilderTy &Builder) { - assert(SelectPatternResult::isMinOrMax(SPF) && "Expected min/max pattern"); - bool IsUnsigned = SPF == SelectPatternFlavor::SPF_UMIN || - SPF == SelectPatternFlavor::SPF_UMAX; - // TODO: If InstSimplify could fold all cases where C2 <= C1, we could change - // the constant value check to an assert. - Value *A; - const APInt *C1, *C2; - if (IsUnsigned && match(X, m_NUWAdd(m_Value(A), m_APInt(C1))) && - match(Y, m_APInt(C2)) && C2->uge(*C1) && X->hasNUses(2)) { - // umin (add nuw A, C1), C2 --> add nuw (umin A, C2 - C1), C1 - // umax (add nuw A, C1), C2 --> add nuw (umax A, C2 - C1), C1 - Value *NewMinMax = createMinMax(Builder, SPF, A, - ConstantInt::get(X->getType(), *C2 - *C1)); - return BinaryOperator::CreateNUW(BinaryOperator::Add, NewMinMax, - ConstantInt::get(X->getType(), *C1)); - } - - if (!IsUnsigned && match(X, m_NSWAdd(m_Value(A), m_APInt(C1))) && - match(Y, m_APInt(C2)) && X->hasNUses(2)) { - bool Overflow; - APInt Diff = C2->ssub_ov(*C1, Overflow); - if (!Overflow) { - // smin (add nsw A, C1), C2 --> add nsw (smin A, C2 - C1), C1 - // smax (add nsw A, C1), C2 --> add nsw (smax A, C2 - C1), C1 - Value *NewMinMax = createMinMax(Builder, SPF, A, - ConstantInt::get(X->getType(), Diff)); - return BinaryOperator::CreateNSW(BinaryOperator::Add, NewMinMax, - ConstantInt::get(X->getType(), *C1)); - } - } - - return nullptr; -} - -/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value. -Instruction *InstCombinerImpl::matchSAddSubSat(Instruction &MinMax1) { - Type *Ty = MinMax1.getType(); - - // We are looking for a tree of: - // max(INT_MIN, min(INT_MAX, add(sext(A), sext(B)))) - // Where the min and max could be reversed - Instruction *MinMax2; - BinaryOperator *AddSub; - const APInt *MinValue, *MaxValue; - if (match(&MinMax1, m_SMin(m_Instruction(MinMax2), m_APInt(MaxValue)))) { - if (!match(MinMax2, m_SMax(m_BinOp(AddSub), m_APInt(MinValue)))) - return nullptr; - } else if (match(&MinMax1, - m_SMax(m_Instruction(MinMax2), m_APInt(MinValue)))) { - if (!match(MinMax2, m_SMin(m_BinOp(AddSub), m_APInt(MaxValue)))) - return nullptr; - } else - return nullptr; - - // Check that the constants clamp a saturate, and that the new type would be - // sensible to convert to. - if (!(*MaxValue + 1).isPowerOf2() || -*MinValue != *MaxValue + 1) - return nullptr; - // In what bitwidth can this be treated as saturating arithmetics? - unsigned NewBitWidth = (*MaxValue + 1).logBase2() + 1; - // FIXME: This isn't quite right for vectors, but using the scalar type is a - // good first approximation for what should be done there. - if (!shouldChangeType(Ty->getScalarType()->getIntegerBitWidth(), NewBitWidth)) - return nullptr; - - // Also make sure that the number of uses is as expected. The 3 is for the - // the two items of the compare and the select, or 2 from a min/max. - unsigned ExpUses = isa<IntrinsicInst>(MinMax1) ? 2 : 3; - if (MinMax2->hasNUsesOrMore(ExpUses) || AddSub->hasNUsesOrMore(ExpUses)) - return nullptr; - - // Create the new type (which can be a vector type) - Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth); - - Intrinsic::ID IntrinsicID; - if (AddSub->getOpcode() == Instruction::Add) - IntrinsicID = Intrinsic::sadd_sat; - else if (AddSub->getOpcode() == Instruction::Sub) - IntrinsicID = Intrinsic::ssub_sat; - else - return nullptr; - - // The two operands of the add/sub must be nsw-truncatable to the NewTy. This - // is usually achieved via a sext from a smaller type. - if (ComputeMaxSignificantBits(AddSub->getOperand(0), 0, AddSub) > - NewBitWidth || - ComputeMaxSignificantBits(AddSub->getOperand(1), 0, AddSub) > NewBitWidth) - return nullptr; - - // Finally create and return the sat intrinsic, truncated to the new type - Function *F = Intrinsic::getDeclaration(MinMax1.getModule(), IntrinsicID, NewTy); - Value *AT = Builder.CreateTrunc(AddSub->getOperand(0), NewTy); - Value *BT = Builder.CreateTrunc(AddSub->getOperand(1), NewTy); - Value *Sat = Builder.CreateCall(F, {AT, BT}); - return CastInst::Create(Instruction::SExt, Sat, Ty); -} - -/// Reduce a sequence of min/max with a common operand. -static Instruction *factorizeMinMaxTree(SelectPatternFlavor SPF, Value *LHS, - Value *RHS, - InstCombiner::BuilderTy &Builder) { - assert(SelectPatternResult::isMinOrMax(SPF) && "Expected a min/max"); - // TODO: Allow FP min/max with nnan/nsz. - if (!LHS->getType()->isIntOrIntVectorTy()) - return nullptr; - - // Match 3 of the same min/max ops. Example: umin(umin(), umin()). - Value *A, *B, *C, *D; - SelectPatternResult L = matchSelectPattern(LHS, A, B); - SelectPatternResult R = matchSelectPattern(RHS, C, D); - if (SPF != L.Flavor || L.Flavor != R.Flavor) - return nullptr; - - // Look for a common operand. The use checks are different than usual because - // a min/max pattern typically has 2 uses of each op: 1 by the cmp and 1 by - // the select. - Value *MinMaxOp = nullptr; - Value *ThirdOp = nullptr; - if (!LHS->hasNUsesOrMore(3) && RHS->hasNUsesOrMore(3)) { - // If the LHS is only used in this chain and the RHS is used outside of it, - // reuse the RHS min/max because that will eliminate the LHS. - if (D == A || C == A) { - // min(min(a, b), min(c, a)) --> min(min(c, a), b) - // min(min(a, b), min(a, d)) --> min(min(a, d), b) - MinMaxOp = RHS; - ThirdOp = B; - } else if (D == B || C == B) { - // min(min(a, b), min(c, b)) --> min(min(c, b), a) - // min(min(a, b), min(b, d)) --> min(min(b, d), a) - MinMaxOp = RHS; - ThirdOp = A; - } - } else if (!RHS->hasNUsesOrMore(3)) { - // Reuse the LHS. This will eliminate the RHS. - if (D == A || D == B) { - // min(min(a, b), min(c, a)) --> min(min(a, b), c) - // min(min(a, b), min(c, b)) --> min(min(a, b), c) - MinMaxOp = LHS; - ThirdOp = C; - } else if (C == A || C == B) { - // min(min(a, b), min(b, d)) --> min(min(a, b), d) - // min(min(a, b), min(c, b)) --> min(min(a, b), d) - MinMaxOp = LHS; - ThirdOp = D; - } - } - if (!MinMaxOp || !ThirdOp) - return nullptr; - - CmpInst::Predicate P = getMinMaxPred(SPF); - Value *CmpABC = Builder.CreateICmp(P, MinMaxOp, ThirdOp); - return SelectInst::Create(CmpABC, MinMaxOp, ThirdOp); -} - /// Try to reduce a funnel/rotate pattern that includes a compare and select /// into a funnel shift intrinsic. Example: /// rotl32(a, b) --> (b == 0 ? a : ((a >> (32 - b)) | (a << b))) @@ -2484,7 +2273,8 @@ static Instruction *foldSelectToCopysign(SelectInst &Sel, // Match select ?, TC, FC where the constants are equal but negated. // TODO: Generalize to handle a negated variable operand? const APFloat *TC, *FC; - if (!match(TVal, m_APFloat(TC)) || !match(FVal, m_APFloat(FC)) || + if (!match(TVal, m_APFloatAllowUndef(TC)) || + !match(FVal, m_APFloatAllowUndef(FC)) || !abs(*TC).bitwiseIsEqual(abs(*FC))) return nullptr; @@ -2504,17 +2294,16 @@ static Instruction *foldSelectToCopysign(SelectInst &Sel, // (bitcast X) < 0 ? TC : -TC --> copysign(TC, -X) // (bitcast X) >= 0 ? -TC : TC --> copysign(TC, -X) // (bitcast X) >= 0 ? TC : -TC --> copysign(TC, X) + // Note: FMF from the select can not be propagated to the new instructions. if (IsTrueIfSignSet ^ TC->isNegative()) - X = Builder.CreateFNegFMF(X, &Sel); + X = Builder.CreateFNeg(X); // Canonicalize the magnitude argument as the positive constant since we do // not care about its sign. - Value *MagArg = TC->isNegative() ? FVal : TVal; + Value *MagArg = ConstantFP::get(SelType, abs(*TC)); Function *F = Intrinsic::getDeclaration(Sel.getModule(), Intrinsic::copysign, Sel.getType()); - Instruction *CopySign = CallInst::Create(F, { MagArg, X }); - CopySign->setFastMathFlags(Sel.getFastMathFlags()); - return CopySign; + return CallInst::Create(F, { MagArg, X }); } Instruction *InstCombinerImpl::foldVectorSelect(SelectInst &Sel) { @@ -2715,29 +2504,144 @@ Instruction *InstCombinerImpl::foldAndOrOfSelectUsingImpliedCond(Value *Op, } } -Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { +// Canonicalize select with fcmp to fabs(). -0.0 makes this tricky. We need +// fast-math-flags (nsz) or fsub with +0.0 (not fneg) for this to work. +static Instruction *foldSelectWithFCmpToFabs(SelectInst &SI, + InstCombinerImpl &IC) { Value *CondVal = SI.getCondition(); - Value *TrueVal = SI.getTrueValue(); - Value *FalseVal = SI.getFalseValue(); - Type *SelType = SI.getType(); - // FIXME: Remove this workaround when freeze related patches are done. - // For select with undef operand which feeds into an equality comparison, - // don't simplify it so loop unswitch can know the equality comparison - // may have an undef operand. This is a workaround for PR31652 caused by - // descrepancy about branch on undef between LoopUnswitch and GVN. - if (match(TrueVal, m_Undef()) || match(FalseVal, m_Undef())) { - if (llvm::any_of(SI.users(), [&](User *U) { - ICmpInst *CI = dyn_cast<ICmpInst>(U); - if (CI && CI->isEquality()) - return true; - return false; - })) { + for (bool Swap : {false, true}) { + Value *TrueVal = SI.getTrueValue(); + Value *X = SI.getFalseValue(); + CmpInst::Predicate Pred; + + if (Swap) + std::swap(TrueVal, X); + + if (!match(CondVal, m_FCmp(Pred, m_Specific(X), m_AnyZeroFP()))) + continue; + + // fold (X <= +/-0.0) ? (0.0 - X) : X to fabs(X), when 'Swap' is false + // fold (X > +/-0.0) ? X : (0.0 - X) to fabs(X), when 'Swap' is true + if (match(TrueVal, m_FSub(m_PosZeroFP(), m_Specific(X)))) { + if (!Swap && (Pred == FCmpInst::FCMP_OLE || Pred == FCmpInst::FCMP_ULE)) { + Value *Fabs = IC.Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, &SI); + return IC.replaceInstUsesWith(SI, Fabs); + } + if (Swap && (Pred == FCmpInst::FCMP_OGT || Pred == FCmpInst::FCMP_UGT)) { + Value *Fabs = IC.Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, &SI); + return IC.replaceInstUsesWith(SI, Fabs); + } + } + + // With nsz, when 'Swap' is false: + // fold (X < +/-0.0) ? -X : X or (X <= +/-0.0) ? -X : X to fabs(X) + // fold (X > +/-0.0) ? -X : X or (X >= +/-0.0) ? -X : X to -fabs(x) + // when 'Swap' is true: + // fold (X > +/-0.0) ? X : -X or (X >= +/-0.0) ? X : -X to fabs(X) + // fold (X < +/-0.0) ? X : -X or (X <= +/-0.0) ? X : -X to -fabs(X) + if (!match(TrueVal, m_FNeg(m_Specific(X))) || !SI.hasNoSignedZeros()) return nullptr; + + if (Swap) + Pred = FCmpInst::getSwappedPredicate(Pred); + + bool IsLTOrLE = Pred == FCmpInst::FCMP_OLT || Pred == FCmpInst::FCMP_OLE || + Pred == FCmpInst::FCMP_ULT || Pred == FCmpInst::FCMP_ULE; + bool IsGTOrGE = Pred == FCmpInst::FCMP_OGT || Pred == FCmpInst::FCMP_OGE || + Pred == FCmpInst::FCMP_UGT || Pred == FCmpInst::FCMP_UGE; + + if (IsLTOrLE) { + Value *Fabs = IC.Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, &SI); + return IC.replaceInstUsesWith(SI, Fabs); + } + if (IsGTOrGE) { + Value *Fabs = IC.Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, &SI); + Instruction *NewFNeg = UnaryOperator::CreateFNeg(Fabs); + NewFNeg->setFastMathFlags(SI.getFastMathFlags()); + return NewFNeg; } } - if (Value *V = SimplifySelectInst(CondVal, TrueVal, FalseVal, + return nullptr; +} + +// Match the following IR pattern: +// %x.lowbits = and i8 %x, %lowbitmask +// %x.lowbits.are.zero = icmp eq i8 %x.lowbits, 0 +// %x.biased = add i8 %x, %bias +// %x.biased.highbits = and i8 %x.biased, %highbitmask +// %x.roundedup = select i1 %x.lowbits.are.zero, i8 %x, i8 %x.biased.highbits +// Define: +// %alignment = add i8 %lowbitmask, 1 +// Iff 1. an %alignment is a power-of-two (aka, %lowbitmask is a low bit mask) +// and 2. %bias is equal to either %lowbitmask or %alignment, +// and 3. %highbitmask is equal to ~%lowbitmask (aka, to -%alignment) +// then this pattern can be transformed into: +// %x.offset = add i8 %x, %lowbitmask +// %x.roundedup = and i8 %x.offset, %highbitmask +static Value * +foldRoundUpIntegerWithPow2Alignment(SelectInst &SI, + InstCombiner::BuilderTy &Builder) { + Value *Cond = SI.getCondition(); + Value *X = SI.getTrueValue(); + Value *XBiasedHighBits = SI.getFalseValue(); + + ICmpInst::Predicate Pred; + Value *XLowBits; + if (!match(Cond, m_ICmp(Pred, m_Value(XLowBits), m_ZeroInt())) || + !ICmpInst::isEquality(Pred)) + return nullptr; + + if (Pred == ICmpInst::Predicate::ICMP_NE) + std::swap(X, XBiasedHighBits); + + // FIXME: we could support non non-splats here. + + const APInt *LowBitMaskCst; + if (!match(XLowBits, m_And(m_Specific(X), m_APIntAllowUndef(LowBitMaskCst)))) + return nullptr; + + const APInt *BiasCst, *HighBitMaskCst; + if (!match(XBiasedHighBits, + m_And(m_Add(m_Specific(X), m_APIntAllowUndef(BiasCst)), + m_APIntAllowUndef(HighBitMaskCst)))) + return nullptr; + + if (!LowBitMaskCst->isMask()) + return nullptr; + + APInt InvertedLowBitMaskCst = ~*LowBitMaskCst; + if (InvertedLowBitMaskCst != *HighBitMaskCst) + return nullptr; + + APInt AlignmentCst = *LowBitMaskCst + 1; + + if (*BiasCst != AlignmentCst && *BiasCst != *LowBitMaskCst) + return nullptr; + + if (!XBiasedHighBits->hasOneUse()) { + if (*BiasCst == *LowBitMaskCst) + return XBiasedHighBits; + return nullptr; + } + + // FIXME: could we preserve undef's here? + Type *Ty = X->getType(); + Value *XOffset = Builder.CreateAdd(X, ConstantInt::get(Ty, *LowBitMaskCst), + X->getName() + ".biased"); + Value *R = Builder.CreateAnd(XOffset, ConstantInt::get(Ty, *HighBitMaskCst)); + R->takeName(&SI); + return R; +} + +Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { + Value *CondVal = SI.getCondition(); + Value *TrueVal = SI.getTrueValue(); + Value *FalseVal = SI.getFalseValue(); + Type *SelType = SI.getType(); + + if (Value *V = simplifySelectInst(CondVal, TrueVal, FalseVal, SQ.getWithInstruction(&SI))) return replaceInstUsesWith(SI, V); @@ -2747,8 +2651,6 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { if (Instruction *I = canonicalizeScalarSelectOfVecs(SI, *this)) return I; - CmpInst::Predicate Pred; - // Avoid potential infinite loops by checking for non-constant condition. // TODO: Can we assert instead by improving canonicalizeSelectToShuffle()? // Scalar select must have simplified? @@ -2757,13 +2659,29 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { // Folding select to and/or i1 isn't poison safe in general. impliesPoison // checks whether folding it does not convert a well-defined value into // poison. - if (match(TrueVal, m_One()) && impliesPoison(FalseVal, CondVal)) { - // Change: A = select B, true, C --> A = or B, C - return BinaryOperator::CreateOr(CondVal, FalseVal); + if (match(TrueVal, m_One())) { + if (impliesPoison(FalseVal, CondVal)) { + // Change: A = select B, true, C --> A = or B, C + return BinaryOperator::CreateOr(CondVal, FalseVal); + } + + if (auto *LHS = dyn_cast<FCmpInst>(CondVal)) + if (auto *RHS = dyn_cast<FCmpInst>(FalseVal)) + if (Value *V = foldLogicOfFCmps(LHS, RHS, /*IsAnd*/ false, + /*IsSelectLogical*/ true)) + return replaceInstUsesWith(SI, V); } - if (match(FalseVal, m_Zero()) && impliesPoison(TrueVal, CondVal)) { - // Change: A = select B, C, false --> A = and B, C - return BinaryOperator::CreateAnd(CondVal, TrueVal); + if (match(FalseVal, m_Zero())) { + if (impliesPoison(TrueVal, CondVal)) { + // Change: A = select B, C, false --> A = and B, C + return BinaryOperator::CreateAnd(CondVal, TrueVal); + } + + if (auto *LHS = dyn_cast<FCmpInst>(CondVal)) + if (auto *RHS = dyn_cast<FCmpInst>(TrueVal)) + if (Value *V = foldLogicOfFCmps(LHS, RHS, /*IsAnd*/ true, + /*IsSelectLogical*/ true)) + return replaceInstUsesWith(SI, V); } auto *One = ConstantInt::getTrue(SelType); @@ -2821,6 +2739,20 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { match(TrueVal, m_Specific(B)) && match(FalseVal, m_Zero())) return replaceOperand(SI, 0, A); + Value *C; + // select (~a | c), a, b -> and a, (or c, freeze(b)) + if (match(CondVal, m_c_Or(m_Not(m_Specific(TrueVal)), m_Value(C))) && + CondVal->hasOneUse()) { + FalseVal = Builder.CreateFreeze(FalseVal); + return BinaryOperator::CreateAnd(TrueVal, Builder.CreateOr(C, FalseVal)); + } + // select (~c & b), a, b -> and b, (or freeze(a), c) + if (match(CondVal, m_c_And(m_Not(m_Value(C)), m_Specific(FalseVal))) && + CondVal->hasOneUse()) { + TrueVal = Builder.CreateFreeze(TrueVal); + return BinaryOperator::CreateAnd(FalseVal, Builder.CreateOr(C, TrueVal)); + } + if (!SelType->isVectorTy()) { if (Value *S = simplifyWithOpReplaced(TrueVal, CondVal, One, SQ, /* AllowRefinement */ true)) @@ -2846,16 +2778,11 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { /* IsAnd */ IsAnd)) return I; - if (auto *ICmp0 = dyn_cast<ICmpInst>(CondVal)) { - if (auto *ICmp1 = dyn_cast<ICmpInst>(Op1)) { - if (auto *V = foldAndOrOfICmpsOfAndWithPow2(ICmp0, ICmp1, &SI, IsAnd, - /* IsLogical */ true)) - return replaceInstUsesWith(SI, V); - - if (auto *V = foldEqOfParts(ICmp0, ICmp1, IsAnd)) + if (auto *ICmp0 = dyn_cast<ICmpInst>(CondVal)) + if (auto *ICmp1 = dyn_cast<ICmpInst>(Op1)) + if (auto *V = foldAndOrOfICmps(ICmp0, ICmp1, SI, IsAnd, + /* IsLogical */ true)) return replaceInstUsesWith(SI, V); - } - } } // select (select a, true, b), c, false -> select a, c, false @@ -2959,42 +2886,9 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { } } - // Canonicalize select with fcmp to fabs(). -0.0 makes this tricky. We need - // fast-math-flags (nsz) or fsub with +0.0 (not fneg) for this to work. - // (X <= +/-0.0) ? (0.0 - X) : X --> fabs(X) - if (match(CondVal, m_FCmp(Pred, m_Specific(FalseVal), m_AnyZeroFP())) && - match(TrueVal, m_FSub(m_PosZeroFP(), m_Specific(FalseVal))) && - (Pred == FCmpInst::FCMP_OLE || Pred == FCmpInst::FCMP_ULE)) { - Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, FalseVal, &SI); - return replaceInstUsesWith(SI, Fabs); - } - // (X > +/-0.0) ? X : (0.0 - X) --> fabs(X) - if (match(CondVal, m_FCmp(Pred, m_Specific(TrueVal), m_AnyZeroFP())) && - match(FalseVal, m_FSub(m_PosZeroFP(), m_Specific(TrueVal))) && - (Pred == FCmpInst::FCMP_OGT || Pred == FCmpInst::FCMP_UGT)) { - Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, TrueVal, &SI); - return replaceInstUsesWith(SI, Fabs); - } - // With nnan and nsz: - // (X < +/-0.0) ? -X : X --> fabs(X) - // (X <= +/-0.0) ? -X : X --> fabs(X) - if (match(CondVal, m_FCmp(Pred, m_Specific(FalseVal), m_AnyZeroFP())) && - match(TrueVal, m_FNeg(m_Specific(FalseVal))) && SI.hasNoSignedZeros() && - (Pred == FCmpInst::FCMP_OLT || Pred == FCmpInst::FCMP_OLE || - Pred == FCmpInst::FCMP_ULT || Pred == FCmpInst::FCMP_ULE)) { - Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, FalseVal, &SI); - return replaceInstUsesWith(SI, Fabs); - } - // With nnan and nsz: - // (X > +/-0.0) ? X : -X --> fabs(X) - // (X >= +/-0.0) ? X : -X --> fabs(X) - if (match(CondVal, m_FCmp(Pred, m_Specific(TrueVal), m_AnyZeroFP())) && - match(FalseVal, m_FNeg(m_Specific(TrueVal))) && SI.hasNoSignedZeros() && - (Pred == FCmpInst::FCMP_OGT || Pred == FCmpInst::FCMP_OGE || - Pred == FCmpInst::FCMP_UGT || Pred == FCmpInst::FCMP_UGE)) { - Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, TrueVal, &SI); - return replaceInstUsesWith(SI, Fabs); - } + // Fold selecting to fabs. + if (Instruction *Fabs = foldSelectWithFCmpToFabs(SI, *this)) + return Fabs; // See if we are selecting two values based on a comparison of the two values. if (ICmpInst *ICI = dyn_cast<ICmpInst>(CondVal)) @@ -3066,8 +2960,6 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { if (Instruction *R = foldSPFofSPF(cast<Instruction>(RHS), SPF2, LHS2, RHS2, SI, SPF, LHS)) return R; - // TODO. - // ABS(-X) -> ABS(X) } if (SelectPatternResult::isMinOrMax(SPF)) { @@ -3102,46 +2994,6 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { Value *NewCast = Builder.CreateCast(CastOp, NewSI, SelType); return replaceInstUsesWith(SI, NewCast); } - - // MAX(~a, ~b) -> ~MIN(a, b) - // MAX(~a, C) -> ~MIN(a, ~C) - // MIN(~a, ~b) -> ~MAX(a, b) - // MIN(~a, C) -> ~MAX(a, ~C) - auto moveNotAfterMinMax = [&](Value *X, Value *Y) -> Instruction * { - Value *A; - if (match(X, m_Not(m_Value(A))) && !X->hasNUsesOrMore(3) && - !isFreeToInvert(A, A->hasOneUse()) && - // Passing false to only consider m_Not and constants. - isFreeToInvert(Y, false)) { - Value *B = Builder.CreateNot(Y); - Value *NewMinMax = createMinMax(Builder, getInverseMinMaxFlavor(SPF), - A, B); - // Copy the profile metadata. - if (MDNode *MD = SI.getMetadata(LLVMContext::MD_prof)) { - cast<SelectInst>(NewMinMax)->setMetadata(LLVMContext::MD_prof, MD); - // Swap the metadata if the operands are swapped. - if (X == SI.getFalseValue() && Y == SI.getTrueValue()) - cast<SelectInst>(NewMinMax)->swapProfMetadata(); - } - - return BinaryOperator::CreateNot(NewMinMax); - } - - return nullptr; - }; - - if (Instruction *I = moveNotAfterMinMax(LHS, RHS)) - return I; - if (Instruction *I = moveNotAfterMinMax(RHS, LHS)) - return I; - - if (Instruction *I = moveAddAfterMinMax(SPF, LHS, RHS, Builder)) - return I; - - if (Instruction *I = factorizeMinMaxTree(SPF, LHS, RHS, Builder)) - return I; - if (Instruction *I = matchSAddSubSat(SI)) - return I; } } @@ -3307,35 +3159,42 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { if (Value *Fr = foldSelectWithFrozenICmp(SI, Builder)) return replaceInstUsesWith(SI, Fr); + if (Value *V = foldRoundUpIntegerWithPow2Alignment(SI, Builder)) + return replaceInstUsesWith(SI, V); + // select(mask, mload(,,mask,0), 0) -> mload(,,mask,0) // Load inst is intentionally not checked for hasOneUse() if (match(FalseVal, m_Zero()) && - match(TrueVal, m_MaskedLoad(m_Value(), m_Value(), m_Specific(CondVal), - m_CombineOr(m_Undef(), m_Zero())))) { - auto *MaskedLoad = cast<IntrinsicInst>(TrueVal); - if (isa<UndefValue>(MaskedLoad->getArgOperand(3))) - MaskedLoad->setArgOperand(3, FalseVal /* Zero */); - return replaceInstUsesWith(SI, MaskedLoad); + (match(TrueVal, m_MaskedLoad(m_Value(), m_Value(), m_Specific(CondVal), + m_CombineOr(m_Undef(), m_Zero()))) || + match(TrueVal, m_MaskedGather(m_Value(), m_Value(), m_Specific(CondVal), + m_CombineOr(m_Undef(), m_Zero()))))) { + auto *MaskedInst = cast<IntrinsicInst>(TrueVal); + if (isa<UndefValue>(MaskedInst->getArgOperand(3))) + MaskedInst->setArgOperand(3, FalseVal /* Zero */); + return replaceInstUsesWith(SI, MaskedInst); } Value *Mask; if (match(TrueVal, m_Zero()) && - match(FalseVal, m_MaskedLoad(m_Value(), m_Value(), m_Value(Mask), - m_CombineOr(m_Undef(), m_Zero()))) && + (match(FalseVal, m_MaskedLoad(m_Value(), m_Value(), m_Value(Mask), + m_CombineOr(m_Undef(), m_Zero()))) || + match(FalseVal, m_MaskedGather(m_Value(), m_Value(), m_Value(Mask), + m_CombineOr(m_Undef(), m_Zero())))) && (CondVal->getType() == Mask->getType())) { // We can remove the select by ensuring the load zeros all lanes the // select would have. We determine this by proving there is no overlap // between the load and select masks. // (i.e (load_mask & select_mask) == 0 == no overlap) bool CanMergeSelectIntoLoad = false; - if (Value *V = SimplifyAndInst(CondVal, Mask, SQ.getWithInstruction(&SI))) + if (Value *V = simplifyAndInst(CondVal, Mask, SQ.getWithInstruction(&SI))) CanMergeSelectIntoLoad = match(V, m_Zero()); if (CanMergeSelectIntoLoad) { - auto *MaskedLoad = cast<IntrinsicInst>(FalseVal); - if (isa<UndefValue>(MaskedLoad->getArgOperand(3))) - MaskedLoad->setArgOperand(3, TrueVal /* Zero */); - return replaceInstUsesWith(SI, MaskedLoad); + auto *MaskedInst = cast<IntrinsicInst>(FalseVal); + if (isa<UndefValue>(MaskedInst->getArgOperand(3))) + MaskedInst->setArgOperand(3, TrueVal /* Zero */); + return replaceInstUsesWith(SI, MaskedInst); } } |
