diff options
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp')
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 959 |
1 files changed, 644 insertions, 315 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index b104e995019f..ce400ea43f29 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -129,12 +129,12 @@ static cl::opt<unsigned> StoreMergeDependenceLimit( static cl::opt<bool> EnableReduceLoadOpStoreWidth( "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true), - cl::desc("DAG cominber enable reducing the width of load/op/store " + cl::desc("DAG combiner enable reducing the width of load/op/store " "sequence")); static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore( "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true), - cl::desc("DAG cominber enable load/<replace bytes>/store with " + cl::desc("DAG combiner enable load/<replace bytes>/store with " "a narrower store")); namespace { @@ -319,7 +319,7 @@ namespace { /// If so, return true. bool SimplifyDemandedBits(SDValue Op) { unsigned BitWidth = Op.getScalarValueSizeInBits(); - APInt DemandedBits = APInt::getAllOnesValue(BitWidth); + APInt DemandedBits = APInt::getAllOnes(BitWidth); return SimplifyDemandedBits(Op, DemandedBits); } @@ -345,7 +345,7 @@ namespace { return false; unsigned NumElts = Op.getValueType().getVectorNumElements(); - APInt DemandedElts = APInt::getAllOnesValue(NumElts); + APInt DemandedElts = APInt::getAllOnes(NumElts); return SimplifyDemandedVectorElts(Op, DemandedElts); } @@ -436,7 +436,7 @@ namespace { SDValue visitOR(SDNode *N); SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N); SDValue visitXOR(SDNode *N); - SDValue SimplifyVBinOp(SDNode *N); + SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL); SDValue visitSHL(SDNode *N); SDValue visitSRA(SDNode *N); SDValue visitSRL(SDNode *N); @@ -515,6 +515,7 @@ namespace { SDValue visitFP_TO_FP16(SDNode *N); SDValue visitFP16_TO_FP(SDNode *N); SDValue visitVECREDUCE(SDNode *N); + SDValue visitVPOp(SDNode *N); SDValue visitFADDForFMACombine(SDNode *N); SDValue visitFSUBForFMACombine(SDNode *N); @@ -615,7 +616,7 @@ namespace { SmallVectorImpl<SDValue> &Aliases); /// Return true if there is any possibility that the two addresses overlap. - bool isAlias(SDNode *Op0, SDNode *Op1) const; + bool mayAlias(SDNode *Op0, SDNode *Op1) const; /// Walk up chain skipping non-aliasing memory nodes, looking for a better /// chain (aliasing node.) @@ -1062,21 +1063,22 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, if (N0.getOpcode() != Opc) return SDValue(); - if (DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { - if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) { + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + + if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N01))) { + if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1))) { // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2)) - if (SDValue OpNode = - DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1})) - return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); + if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1})) + return DAG.getNode(Opc, DL, VT, N00, OpNode); return SDValue(); } if (N0.hasOneUse()) { // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1) // iff (op x, c1) has one use - SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); - if (!OpNode.getNode()) - return SDValue(); - return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); + if (SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1)) + return DAG.getNode(Opc, DL, VT, OpNode, N01); + return SDValue(); } } return SDValue(); @@ -1738,6 +1740,9 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::VECREDUCE_UMIN: case ISD::VECREDUCE_FMAX: case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N); +#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC: +#include "llvm/IR/VPIntrinsics.def" + return visitVPOp(N); } return SDValue(); } @@ -2257,7 +2262,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { // fold vector ops if (VT.isVector()) { - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; // fold (add x, 0) -> x, vector edition @@ -2439,9 +2444,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { N0.getOperand(0)); // fold (add (add (xor a, -1), b), 1) -> (sub b, a) - if (N0.getOpcode() == ISD::ADD || - N0.getOpcode() == ISD::UADDO || - N0.getOpcode() == ISD::SADDO) { + if (N0.getOpcode() == ISD::ADD) { SDValue A, Xor; if (isBitwiseNot(N0.getOperand(0))) { @@ -2783,7 +2786,7 @@ static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG, IsFlip = Const->isOne(); break; case TargetLowering::ZeroOrNegativeOneBooleanContent: - IsFlip = Const->isAllOnesValue(); + IsFlip = Const->isAllOnes(); break; case TargetLowering::UndefinedBooleanContent: IsFlip = (Const->getAPIntValue() & 0x01) == 1; @@ -3259,7 +3262,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // fold vector ops if (VT.isVector()) { - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; // fold (sub x, 0) -> x, vector edition @@ -3317,11 +3320,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { } // Convert 0 - abs(x). - SDValue Result; if (N1->getOpcode() == ISD::ABS && - !TLI.isOperationLegalOrCustom(ISD::ABS, VT) && - TLI.expandABS(N1.getNode(), Result, DAG, true)) - return Result; + !TLI.isOperationLegalOrCustom(ISD::ABS, VT)) + if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true)) + return Result; // Fold neg(splat(neg(x)) -> splat(x) if (VT.isVector()) { @@ -3785,7 +3787,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // fold vector ops if (VT.isVector()) { - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) return FoldedVOp; N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1); @@ -3810,18 +3812,18 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); // fold (mul x, 0) -> 0 - if (N1IsConst && ConstValue1.isNullValue()) + if (N1IsConst && ConstValue1.isZero()) return N1; // fold (mul x, 1) -> x - if (N1IsConst && ConstValue1.isOneValue()) + if (N1IsConst && ConstValue1.isOne()) return N0; if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; // fold (mul x, -1) -> 0-x - if (N1IsConst && ConstValue1.isAllOnesValue()) { + if (N1IsConst && ConstValue1.isAllOnes()) { SDLoc DL(N); return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); @@ -3839,7 +3841,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { } // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c - if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) { + if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) { unsigned Log2Val = (-ConstValue1).logBase2(); SDLoc DL(N); // FIXME: If the input is something that is easily negated (e.g. a @@ -3968,7 +3970,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { SmallBitVector ClearMask; ClearMask.reserve(NumElts); auto IsClearMask = [&ClearMask](ConstantSDNode *V) { - if (!V || V->isNullValue()) { + if (!V || V->isZero()) { ClearMask.push_back(true); return true; } @@ -4054,9 +4056,7 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) { SDValue Op0 = Node->getOperand(0); SDValue Op1 = Node->getOperand(1); SDValue combined; - for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), - UE = Op0.getNode()->use_end(); UI != UE; ++UI) { - SDNode *User = *UI; + for (SDNode *User : Op0.getNode()->uses()) { if (User == Node || User->getOpcode() == ISD::DELETED_NODE || User->use_empty()) continue; @@ -4113,7 +4113,7 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) { // 0 / X -> 0 // 0 % X -> 0 ConstantSDNode *N0C = isConstOrConstSplat(N0); - if (N0C && N0C->isNullValue()) + if (N0C && N0C->isZero()) return N0; // X / X -> 1 @@ -4138,21 +4138,20 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); EVT CCVT = getSetCCResultType(VT); + SDLoc DL(N); // fold vector ops if (VT.isVector()) - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; - SDLoc DL(N); - // fold (sdiv c1, c2) -> c1/c2 ConstantSDNode *N1C = isConstOrConstSplat(N1); if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1})) return C; // fold (sdiv X, -1) -> 0-X - if (N1C && N1C->isAllOnesValue()) + if (N1C && N1C->isAllOnes()) return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0) @@ -4206,11 +4205,11 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) { // Helper for determining whether a value is a power-2 constant scalar or a // vector of such elements. auto IsPowerOfTwo = [](ConstantSDNode *C) { - if (C->isNullValue() || C->isOpaque()) + if (C->isZero() || C->isOpaque()) return false; if (C->getAPIntValue().isPowerOf2()) return true; - if ((-C->getAPIntValue()).isPowerOf2()) + if (C->getAPIntValue().isNegatedPowerOf2()) return true; return false; }; @@ -4283,21 +4282,20 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); EVT CCVT = getSetCCResultType(VT); + SDLoc DL(N); // fold vector ops if (VT.isVector()) - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; - SDLoc DL(N); - // fold (udiv c1, c2) -> c1/c2 ConstantSDNode *N1C = isConstOrConstSplat(N1); if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1})) return C; // fold (udiv X, -1) -> select(X == -1, 1, 0) - if (N1C && N1C->getAPIntValue().isAllOnesValue()) + if (N1C && N1C->isAllOnes()) return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT)); @@ -4393,7 +4391,7 @@ SDValue DAGCombiner::visitREM(SDNode *N) { return C; // fold (urem X, -1) -> select(X == -1, 0, x) - if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue()) + if (!isSigned && N1C && N1C->isAllOnes()) return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), DAG.getConstant(0, DL, VT), N0); @@ -4477,6 +4475,11 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1})) return C; + // canonicalize constant to RHS. + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0); + // fold (mulhs x, 0) -> 0 if (isNullConstant(N1)) return N1; @@ -4529,6 +4532,11 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1})) return C; + // canonicalize constant to RHS. + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0); + // fold (mulhu x, 0) -> 0 if (isNullConstant(N1)) return N1; @@ -4569,6 +4577,12 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { } } + // Simplify the operands using demanded-bits information. + // We don't have demanded bits support for MULHU so this just enables constant + // folding based on known bits. + if (SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + return SDValue(); } @@ -4770,20 +4784,21 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); unsigned Opcode = N->getOpcode(); + SDLoc DL(N); // fold vector ops if (VT.isVector()) - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; // fold operation with constant operands. - if (SDValue C = DAG.FoldConstantArithmetic(Opcode, SDLoc(N), VT, {N0, N1})) + if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1})) return C; // canonicalize constant to RHS if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) - return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); + return DAG.getNode(N->getOpcode(), DL, VT, N1, N0); // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX. // Only do this if the current op isn't legal and the flipped is. @@ -4799,7 +4814,7 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { default: llvm_unreachable("Unknown MINMAX opcode"); } if (TLI.isOperationLegal(AltOpcode, VT)) - return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1); + return DAG.getNode(AltOpcode, DL, VT, N0, N1); } // Simplify the operands using demanded-bits information. @@ -5135,8 +5150,9 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) { if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL)) return V; + // TODO: Rewrite this to return a new 'AND' instead of using CombineTo. if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && - VT.getSizeInBits() <= 64) { + VT.getSizeInBits() <= 64 && N0->hasOneUse()) { if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal @@ -5608,6 +5624,39 @@ static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) { return DAG.getZExtOrTrunc(Setcc, DL, VT); } +/// For targets that support usubsat, match a bit-hack form of that operation +/// that ends in 'and' and convert it. +static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N1.getValueType(); + + // Canonicalize SRA as operand 1. + if (N0.getOpcode() == ISD::SRA) + std::swap(N0, N1); + + // xor/add with SMIN (signmask) are logically equivalent. + if (N0.getOpcode() != ISD::XOR && N0.getOpcode() != ISD::ADD) + return SDValue(); + + if (N1.getOpcode() != ISD::SRA || !N0.hasOneUse() || !N1.hasOneUse() || + N0.getOperand(0) != N1.getOperand(0)) + return SDValue(); + + unsigned BitWidth = VT.getScalarSizeInBits(); + ConstantSDNode *XorC = isConstOrConstSplat(N0.getOperand(1), true); + ConstantSDNode *SraC = isConstOrConstSplat(N1.getOperand(1), true); + if (!XorC || !XorC->getAPIntValue().isSignMask() || + !SraC || SraC->getAPIntValue() != BitWidth - 1) + return SDValue(); + + // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128 + // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128 + SDLoc DL(N); + SDValue SignMask = DAG.getConstant(XorC->getAPIntValue(), DL, VT); + return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), SignMask); +} + SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -5619,17 +5668,17 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // fold vector ops if (VT.isVector()) { - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) return FoldedVOp; // fold (and x, 0) -> 0, vector edition if (ISD::isConstantSplatVectorAllZeros(N0.getNode())) // do not return N0, because undef node may exist in N0 - return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()), + return DAG.getConstant(APInt::getZero(N0.getScalarValueSizeInBits()), SDLoc(N), N0.getValueType()); if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) // do not return N1, because undef node may exist in N1 - return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()), + return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()), SDLoc(N), N1.getValueType()); // fold (and x, -1) -> x, vector edition @@ -5680,8 +5729,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // if (and x, c) is known to be zero, return 0 unsigned BitWidth = VT.getScalarSizeInBits(); - if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), - APInt::getAllOnesValue(BitWidth))) + if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth))) return DAG.getConstant(0, SDLoc(N), VT); if (SDValue NewSel = foldBinOpIntoSelect(N)) @@ -5743,7 +5791,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // Get the constant (if applicable) the zero'th operand is being ANDed with. // This can be a pure constant or a vector splat, in which case we treat the // vector as a scalar and use the splat value. - APInt Constant = APInt::getNullValue(1); + APInt Constant = APInt::getZero(1); if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { Constant = C->getAPIntValue(); } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) { @@ -5774,7 +5822,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value. if ((SplatBitSize % EltBitWidth) == 0) { - Constant = APInt::getAllOnesValue(EltBitWidth); + Constant = APInt::getAllOnes(EltBitWidth); for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i) Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth); } @@ -5801,7 +5849,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { case ISD::NON_EXTLOAD: B = true; break; } - if (B && Constant.isAllOnesValue()) { + if (B && Constant.isAllOnes()) { // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to // preserve semantics once we get rid of the AND. SDValue NewLoad(Load, 0); @@ -5971,6 +6019,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (IsAndZeroExtMask(N0, N1)) return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0)); + if (hasOperation(ISD::USUBSAT, VT)) + if (SDValue V = foldAndToUsubsat(N, DAG)) + return V; + return SDValue(); } @@ -6385,7 +6437,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { // fold vector ops if (VT.isVector()) { - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) return FoldedVOp; // fold (or x, 0) -> x, vector edition @@ -6926,17 +6978,16 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, // a rot[lr]. This also matches funnel shift patterns, similar to rotation but // with different shifted sources. SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { - // Must be a legal type. Expanded 'n promoted things won't work with rotates. EVT VT = LHS.getValueType(); - if (!TLI.isTypeLegal(VT)) - return SDValue(); // The target must have at least one rotate/funnel flavor. + // We still try to match rotate by constant pre-legalization. + // TODO: Support pre-legalization funnel-shift by constant. bool HasROTL = hasOperation(ISD::ROTL, VT); bool HasROTR = hasOperation(ISD::ROTR, VT); bool HasFSHL = hasOperation(ISD::FSHL, VT); bool HasFSHR = hasOperation(ISD::FSHR, VT); - if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR) + if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR) return SDValue(); // Check for truncated rotate. @@ -6989,6 +7040,7 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { if (LHSShift.getOpcode() == RHSShift.getOpcode()) return SDValue(); // Shifts must disagree. + // TODO: Support pre-legalization funnel-shift by constant. bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0); if (!IsRotate && !(HasFSHL || HasFSHR)) return SDValue(); // Requires funnel shift support. @@ -7017,12 +7069,15 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { }; if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) { SDValue Res; - if (IsRotate && (HasROTL || HasROTR)) - Res = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, - HasROTL ? LHSShiftAmt : RHSShiftAmt); - else - Res = DAG.getNode(HasFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg, - RHSShiftArg, HasFSHL ? LHSShiftAmt : RHSShiftAmt); + if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) { + bool UseROTL = !LegalOperations || HasROTL; + Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, + UseROTL ? LHSShiftAmt : RHSShiftAmt); + } else { + bool UseFSHL = !LegalOperations || HasFSHL; + Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg, + RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt); + } // If there is an AND of either shifted operand, apply it to the result. if (LHSMask.getNode() || RHSMask.getNode()) { @@ -7046,6 +7101,11 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { return Res; } + // Even pre-legalization, we can't easily rotate/funnel-shift by a variable + // shift. + if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR) + return SDValue(); + // If there is a mask here, and we have a variable shift, we can't be sure // that we're masking out the right stuff. if (LHSMask.getNode() || RHSMask.getNode()) @@ -7297,7 +7357,7 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { // TODO: If there is evidence that running this later would help, this // limitation could be removed. Legality checks may need to be added // for the created store and optional bswap/rotate. - if (LegalOperations) + if (LegalOperations || OptLevel == CodeGenOpt::None) return SDValue(); // We only handle merging simple stores of 1-4 bytes. @@ -7672,9 +7732,12 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { // | D | // Into: // (x & m) | (y & ~m) -// If y is a constant, and the 'andn' does not work with immediates, -// we unfold into a different pattern: +// If y is a constant, m is not a 'not', and the 'andn' does not work with +// immediates, we unfold into a different pattern: // ~(~x & m) & (m | y) +// If x is a constant, m is a 'not', and the 'andn' does not work with +// immediates, we unfold into a different pattern: +// (x | ~m) & ~(~m & ~y) // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at // the very least that breaks andnpd / andnps patterns, and because those // patterns are simplified in IR and shouldn't be created in the DAG @@ -7729,8 +7792,9 @@ SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) { SDLoc DL(N); - // If Y is a constant, check that 'andn' works with immediates. - if (!TLI.hasAndNot(Y)) { + // If Y is a constant, check that 'andn' works with immediates. Unless M is + // a bitwise not that would already allow ANDN to be used. + if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) { assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable."); // If not, we need to do a bit more work to make sure andn is still used. SDValue NotX = DAG.getNOT(DL, X, VT); @@ -7740,6 +7804,19 @@ SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) { return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS); } + // If X is a constant and M is a bitwise not, check that 'andn' works with + // immediates. + if (!TLI.hasAndNot(X) && isBitwiseNot(M)) { + assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable."); + // If not, we need to do a bit more work to make sure andn is still used. + SDValue NotM = M.getOperand(0); + SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM); + SDValue NotY = DAG.getNOT(DL, Y, VT); + SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY); + SDValue NotRHS = DAG.getNOT(DL, RHS, VT); + return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS); + } + SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M); SDValue NotM = DAG.getNOT(DL, M, VT); SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM); @@ -7751,10 +7828,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); + SDLoc DL(N); // fold vector ops if (VT.isVector()) { - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; // fold (xor x, 0) -> x, vector edition @@ -7765,7 +7843,6 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { } // fold (xor undef, undef) -> 0. This is a common idiom (misuse). - SDLoc DL(N); if (N0.isUndef() && N1.isUndef()) return DAG.getConstant(0, DL, VT); @@ -7900,7 +7977,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { // shift has been simplified to undef. uint64_t ShiftAmt = ShiftC->getLimitedValue(); if (ShiftAmt < BitWidth) { - APInt Ones = APInt::getAllOnesValue(BitWidth); + APInt Ones = APInt::getAllOnes(BitWidth); Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt); if (XorC->getAPIntValue() == Ones) { // If the xor constant is a shifted -1, do a 'not' before the shift: @@ -8223,7 +8300,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // fold vector ops if (VT.isVector()) { - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) return FoldedVOp; BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1); @@ -8256,8 +8333,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { return NewSel; // if (shl x, c) is known to be zero, return 0 - if (DAG.MaskedValueIsZero(SDValue(N, 0), - APInt::getAllOnesValue(OpSizeInBits))) + if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits))) return DAG.getConstant(0, SDLoc(N), VT); // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). @@ -8502,28 +8578,43 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG, // Both operands must be equivalent extend nodes. SDValue LeftOp = ShiftOperand.getOperand(0); SDValue RightOp = ShiftOperand.getOperand(1); + bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND; bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND; - if ((!(IsSignExt || IsZeroExt)) || LeftOp.getOpcode() != RightOp.getOpcode()) + if (!IsSignExt && !IsZeroExt) return SDValue(); - EVT WideVT1 = LeftOp.getValueType(); - EVT WideVT2 = RightOp.getValueType(); - (void)WideVT2; + EVT NarrowVT = LeftOp.getOperand(0).getValueType(); + unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits(); + + SDValue MulhRightOp; + if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) { + unsigned ActiveBits = IsSignExt + ? Constant->getAPIntValue().getMinSignedBits() + : Constant->getAPIntValue().getActiveBits(); + if (ActiveBits > NarrowVTSize) + return SDValue(); + MulhRightOp = DAG.getConstant( + Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL, + NarrowVT); + } else { + if (LeftOp.getOpcode() != RightOp.getOpcode()) + return SDValue(); + // Check that the two extend nodes are the same type. + if (NarrowVT != RightOp.getOperand(0).getValueType()) + return SDValue(); + MulhRightOp = RightOp.getOperand(0); + } + + EVT WideVT = LeftOp.getValueType(); // Proceed with the transformation if the wide types match. - assert((WideVT1 == WideVT2) && + assert((WideVT == RightOp.getValueType()) && "Cannot have a multiply node with two different operand types."); - EVT NarrowVT = LeftOp.getOperand(0).getValueType(); - // Check that the two extend nodes are the same type. - if (NarrowVT != RightOp.getOperand(0).getValueType()) - return SDValue(); - // Proceed with the transformation if the wide type is twice as large // as the narrow type. - unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits(); - if (WideVT1.getScalarSizeInBits() != 2 * NarrowVTSize) + if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize) return SDValue(); // Check the shift amount with the narrow type size. @@ -8541,10 +8632,10 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG, if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT)) return SDValue(); - SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), - RightOp.getOperand(0)); - return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1) - : DAG.getZExtOrTrunc(Result, DL, WideVT1)); + SDValue Result = + DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp); + return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT) + : DAG.getZExtOrTrunc(Result, DL, WideVT)); } SDValue DAGCombiner::visitSRA(SDNode *N) { @@ -8564,7 +8655,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // fold vector ops if (VT.isVector()) - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) return FoldedVOp; ConstantSDNode *N1C = isConstOrConstSplat(N1); @@ -8762,7 +8853,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold vector ops if (VT.isVector()) - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) return FoldedVOp; ConstantSDNode *N1C = isConstOrConstSplat(N1); @@ -8775,8 +8866,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return NewSel; // if (srl x, c) is known to be zero, return 0 - if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), - APInt::getAllOnesValue(OpSizeInBits))) + if (N1C && + DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits))) return DAG.getConstant(0, SDLoc(N), VT); // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) @@ -9358,27 +9449,27 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { // is also a target-independent combine here in DAGCombiner in the other // direction for (select Cond, -1, 0) when the condition is not i1. if (CondVT == MVT::i1 && !LegalOperations) { - if (C1->isNullValue() && C2->isOne()) { + if (C1->isZero() && C2->isOne()) { // select Cond, 0, 1 --> zext (!Cond) SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); if (VT != MVT::i1) NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond); return NotCond; } - if (C1->isNullValue() && C2->isAllOnesValue()) { + if (C1->isZero() && C2->isAllOnes()) { // select Cond, 0, -1 --> sext (!Cond) SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); if (VT != MVT::i1) NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond); return NotCond; } - if (C1->isOne() && C2->isNullValue()) { + if (C1->isOne() && C2->isZero()) { // select Cond, 1, 0 --> zext (Cond) if (VT != MVT::i1) Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); return Cond; } - if (C1->isAllOnesValue() && C2->isNullValue()) { + if (C1->isAllOnes() && C2->isZero()) { // select Cond, -1, 0 --> sext (Cond) if (VT != MVT::i1) Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); @@ -9406,7 +9497,7 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { } // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2) - if (C1Val.isPowerOf2() && C2Val.isNullValue()) { + if (C1Val.isPowerOf2() && C2Val.isZero()) { if (VT != MVT::i1) Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT); @@ -9434,7 +9525,7 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { TargetLowering::ZeroOrOneBooleanContent && TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) == TargetLowering::ZeroOrOneBooleanContent && - C1->isNullValue() && C2->isOne()) { + C1->isZero() && C2->isOne()) { SDValue NotCond = DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT)); if (VT.bitsEq(CondVT)) @@ -9479,6 +9570,64 @@ static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + EVT VT = N->getValueType(0); + if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse()) + return SDValue(); + + SDValue Cond0 = N0.getOperand(0); + SDValue Cond1 = N0.getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); + if (VT != Cond0.getValueType()) + return SDValue(); + + // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the + // compare is inverted from that pattern ("Cond0 s> -1"). + if (CC == ISD::SETLT && isNullOrNullSplat(Cond1)) + ; // This is the pattern we are looking for. + else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1)) + std::swap(N1, N2); + else + return SDValue(); + + // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1 + if (isNullOrNullSplat(N2)) { + SDLoc DL(N); + SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT); + SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt); + return DAG.getNode(ISD::AND, DL, VT, Sra, N1); + } + + // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2 + if (isAllOnesOrAllOnesSplat(N1)) { + SDLoc DL(N); + SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT); + SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt); + return DAG.getNode(ISD::OR, DL, VT, Sra, N2); + } + + // If we have to invert the sign bit mask, only do that transform if the + // target has a bitwise 'and not' instruction (the invert is free). + // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2 + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) { + SDLoc DL(N); + SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT); + SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt); + SDValue Not = DAG.getNOT(DL, Sra, VT); + return DAG.getNode(ISD::AND, DL, VT, Not, N2); + } + + // TODO: There's another pattern in this family, but it may require + // implementing hasOrNot() to check for profitability: + // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2 + + return SDValue(); +} + SDValue DAGCombiner::visitSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -9703,8 +9852,8 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { "same value. This should have been addressed before this function."); return DAG.getNode( ISD::CONCAT_VECTORS, DL, VT, - BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0), - TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1)); + BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0), + TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1)); } bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) { @@ -10169,6 +10318,10 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { if (SDValue V = foldVSelectOfConstants(N)) return V; + if (hasOperation(ISD::SRA, VT)) + if (SDValue V = foldVSelectToSignBitSplatMask(N, DAG)) + return V; + return SDValue(); } @@ -10190,7 +10343,7 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { AddToWorklist(SCC.getNode()); if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) { - if (!SCCC->isNullValue()) + if (!SCCC->isZero()) return N2; // cond always true -> true val else return N3; // cond always false -> false val @@ -10248,13 +10401,13 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) { // Is 'X Cond C' always true or false? auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) { - bool False = (Cond == ISD::SETULT && C->isNullValue()) || + bool False = (Cond == ISD::SETULT && C->isZero()) || (Cond == ISD::SETLT && C->isMinSignedValue()) || - (Cond == ISD::SETUGT && C->isAllOnesValue()) || + (Cond == ISD::SETUGT && C->isAllOnes()) || (Cond == ISD::SETGT && C->isMaxSignedValue()); - bool True = (Cond == ISD::SETULE && C->isAllOnesValue()) || + bool True = (Cond == ISD::SETULE && C->isAllOnes()) || (Cond == ISD::SETLE && C->isMaxSignedValue()) || - (Cond == ISD::SETUGE && C->isNullValue()) || + (Cond == ISD::SETUGE && C->isZero()) || (Cond == ISD::SETGE && C->isMinSignedValue()); return True || False; }; @@ -10863,7 +11016,7 @@ static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD) return SDValue(); - if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0))) + if (!TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0))) return SDValue(); if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0))) @@ -11257,7 +11410,7 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, Known = DAG.computeKnownBits(Op); - return (Known.Zero | 1).isAllOnesValue(); + return (Known.Zero | 1).isAllOnes(); } /// Given an extending node with a pop-count operand, if the target does not @@ -12016,7 +12169,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1); // If the input is already sign extended, just drop the extension. - if (DAG.ComputeNumSignBits(N0) >= (VTBits - ExtVTBits + 1)) + if (ExtVTBits >= DAG.ComputeMinSignedBits(N0)) return N0; // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 @@ -12032,8 +12185,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { SDValue N00 = N0.getOperand(0); unsigned N00Bits = N00.getScalarValueSizeInBits(); - if ((N00Bits <= ExtVTBits || - (N00Bits - DAG.ComputeNumSignBits(N00)) < ExtVTBits) && + if ((N00Bits <= ExtVTBits || DAG.ComputeMinSignedBits(N00) <= ExtVTBits) && (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00); } @@ -12052,8 +12204,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts); if ((N00Bits == ExtVTBits || (!IsZext && (N00Bits < ExtVTBits || - (N00Bits - DAG.ComputeNumSignBits(N00, DemandedSrcElts)) < - ExtVTBits))) && + DAG.ComputeMinSignedBits(N00) <= ExtVTBits))) && (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))) return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00); @@ -12290,7 +12441,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue Amt = N0.getOperand(1); KnownBits Known = DAG.computeKnownBits(Amt); unsigned Size = VT.getScalarSizeInBits(); - if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) { + if (Known.countMaxActiveBits() <= Log2_32(Size)) { SDLoc SL(N); EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); @@ -12538,8 +12689,8 @@ static SDNode *getBuildPairElt(SDNode *N, unsigned i) { SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { assert(N->getOpcode() == ISD::BUILD_PAIR); - LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); - LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); + auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); + auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); // A BUILD_PAIR is always having the least significant part in elt 0 and the // most significant part in elt 1. So when combining into one large load, we @@ -12547,22 +12698,20 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { if (DAG.getDataLayout().isBigEndian()) std::swap(LD1, LD2); - if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() || + if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) || + !LD1->hasOneUse() || !LD2->hasOneUse() || LD1->getAddressSpace() != LD2->getAddressSpace()) return SDValue(); + + bool LD1Fast = false; EVT LD1VT = LD1->getValueType(0); unsigned LD1Bytes = LD1VT.getStoreSize(); - if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() && - DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) { - Align Alignment = LD1->getAlign(); - Align NewAlign = DAG.getDataLayout().getABITypeAlign( - VT.getTypeForEVT(*DAG.getContext())); - - if (NewAlign <= Alignment && - (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) - return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(), - LD1->getPointerInfo(), Alignment); - } + if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && + DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) && + TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, + *LD1->getMemOperand(), &LD1Fast) && LD1Fast) + return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(), + LD1->getPointerInfo(), LD1->getAlign()); return SDValue(); } @@ -12938,69 +13087,45 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT); } - SDLoc DL(BV); - // Okay, we know the src/dst types are both integers of differing types. - // Handling growing first. assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); - if (SrcBitSize < DstBitSize) { - unsigned NumInputsPerOutput = DstBitSize/SrcBitSize; - - SmallVector<SDValue, 8> Ops; - for (unsigned i = 0, e = BV->getNumOperands(); i != e; - i += NumInputsPerOutput) { - bool isLE = DAG.getDataLayout().isLittleEndian(); - APInt NewBits = APInt(DstBitSize, 0); - bool EltIsUndef = true; - for (unsigned j = 0; j != NumInputsPerOutput; ++j) { - // Shift the previously computed bits over. - NewBits <<= SrcBitSize; - SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j)); - if (Op.isUndef()) continue; - EltIsUndef = false; - NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue(). - zextOrTrunc(SrcBitSize).zext(DstBitSize); - } - - if (EltIsUndef) - Ops.push_back(DAG.getUNDEF(DstEltVT)); - else - Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT)); - } + // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a + // BuildVectorSDNode? + auto *BVN = cast<BuildVectorSDNode>(BV); - EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); - return DAG.getBuildVector(VT, DL, Ops); - } + // Extract the constant raw bit data. + BitVector UndefElements; + SmallVector<APInt> RawBits; + bool IsLE = DAG.getDataLayout().isLittleEndian(); + if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements)) + return SDValue(); - // Finally, this must be the case where we are shrinking elements: each input - // turns into multiple outputs. - unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; - EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, - NumOutputsPerInput*BV->getNumOperands()); + SDLoc DL(BV); SmallVector<SDValue, 8> Ops; + for (unsigned I = 0, E = RawBits.size(); I != E; ++I) { + if (UndefElements[I]) + Ops.push_back(DAG.getUNDEF(DstEltVT)); + else + Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT)); + } - for (const SDValue &Op : BV->op_values()) { - if (Op.isUndef()) { - Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT)); - continue; - } - - APInt OpVal = cast<ConstantSDNode>(Op)-> - getAPIntValue().zextOrTrunc(SrcBitSize); + EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); + return DAG.getBuildVector(VT, DL, Ops); +} - for (unsigned j = 0; j != NumOutputsPerInput; ++j) { - APInt ThisVal = OpVal.trunc(DstBitSize); - Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT)); - OpVal.lshrInPlace(DstBitSize); - } +// Returns true if floating point contraction is allowed on the FMUL-SDValue +// `N` +static bool isContractableFMUL(const TargetOptions &Options, SDValue N) { + assert(N.getOpcode() == ISD::FMUL); - // For big endian targets, swap the order of the pieces of each element. - if (DAG.getDataLayout().isBigEndian()) - std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); - } + return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath || + N->getFlags().hasAllowContract(); +} - return DAG.getBuildVector(VT, DL, Ops); +// Returns true if `N` can assume no infinities involved in its computation. +static bool hasNoInfs(const TargetOptions &Options, SDValue N) { + return Options.NoInfsFPMath || N.getNode()->getFlags().hasNoInfs(); } /// Try to perform FMA combining on a given FADD node. @@ -13039,6 +13164,11 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); + auto isFusedOp = [&](SDValue N) { + unsigned Opcode = N.getOpcode(); + return Opcode == ISD::FMA || Opcode == ISD::FMAD; + }; + // Is the node an FMUL and contractable either due to global flags or // SDNodeFlags. auto isContractableFMUL = [AllowFusionGlobally](SDValue N) { @@ -13070,12 +13200,12 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E) // This requires reassociation because it changes the order of operations. SDValue FMA, E; - if (CanReassociate && N0.getOpcode() == PreferredFusedOpcode && + if (CanReassociate && isFusedOp(N0) && N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() && N0.getOperand(2).hasOneUse()) { FMA = N0; E = N1; - } else if (CanReassociate && N1.getOpcode() == PreferredFusedOpcode && + } else if (CanReassociate && isFusedOp(N1) && N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() && N1.getOperand(2).hasOneUse()) { FMA = N1; @@ -13131,7 +13261,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z)); }; - if (N0.getOpcode() == PreferredFusedOpcode) { + if (isFusedOp(N0)) { SDValue N02 = N0.getOperand(2); if (N02.getOpcode() == ISD::FP_EXTEND) { SDValue N020 = N02.getOperand(0); @@ -13161,7 +13291,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { }; if (N0.getOpcode() == ISD::FP_EXTEND) { SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() == PreferredFusedOpcode) { + if (isFusedOp(N00)) { SDValue N002 = N00.getOperand(2); if (isContractableFMUL(N002) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, @@ -13175,7 +13305,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { // fold (fadd x, (fma y, z, (fpext (fmul u, v))) // -> (fma y, z, (fma (fpext u), (fpext v), x)) - if (N1.getOpcode() == PreferredFusedOpcode) { + if (isFusedOp(N1)) { SDValue N12 = N1.getOperand(2); if (N12.getOpcode() == ISD::FP_EXTEND) { SDValue N120 = N12.getOperand(0); @@ -13196,7 +13326,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { // interesting for all targets, especially GPUs. if (N1.getOpcode() == ISD::FP_EXTEND) { SDValue N10 = N1.getOperand(0); - if (N10.getOpcode() == PreferredFusedOpcode) { + if (isFusedOp(N10)) { SDValue N102 = N10.getOperand(2); if (isContractableFMUL(N102) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, @@ -13392,12 +13522,17 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { return isContractableFMUL(N) && isReassociable(N.getNode()); }; + auto isFusedOp = [&](SDValue N) { + unsigned Opcode = N.getOpcode(); + return Opcode == ISD::FMA || Opcode == ISD::FMAD; + }; + // More folding opportunities when target permits. if (Aggressive && isReassociable(N)) { bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract(); // fold (fsub (fma x, y, (fmul u, v)), z) // -> (fma x, y (fma u, v, (fneg z))) - if (CanFuse && N0.getOpcode() == PreferredFusedOpcode && + if (CanFuse && isFusedOp(N0) && isContractableAndReassociableFMUL(N0.getOperand(2)) && N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) { return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0), @@ -13410,7 +13545,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { // fold (fsub x, (fma y, z, (fmul u, v))) // -> (fma (fneg y), z, (fma (fneg u), v, x)) - if (CanFuse && N1.getOpcode() == PreferredFusedOpcode && + if (CanFuse && isFusedOp(N1) && isContractableAndReassociableFMUL(N1.getOperand(2)) && N1->hasOneUse() && NoSignedZero) { SDValue N20 = N1.getOperand(2).getOperand(0); @@ -13424,8 +13559,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { // fold (fsub (fma x, y, (fpext (fmul u, v))), z) // -> (fma x, y (fma (fpext u), (fpext v), (fneg z))) - if (N0.getOpcode() == PreferredFusedOpcode && - N0->hasOneUse()) { + if (isFusedOp(N0) && N0->hasOneUse()) { SDValue N02 = N0.getOperand(2); if (N02.getOpcode() == ISD::FP_EXTEND) { SDValue N020 = N02.getOperand(0); @@ -13451,7 +13585,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { // interesting for all targets, especially GPUs. if (N0.getOpcode() == ISD::FP_EXTEND) { SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() == PreferredFusedOpcode) { + if (isFusedOp(N00)) { SDValue N002 = N00.getOperand(2); if (isContractableAndReassociableFMUL(N002) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, @@ -13471,8 +13605,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { // fold (fsub x, (fma y, z, (fpext (fmul u, v)))) // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x)) - if (N1.getOpcode() == PreferredFusedOpcode && - N1.getOperand(2).getOpcode() == ISD::FP_EXTEND && + if (isFusedOp(N1) && N1.getOperand(2).getOpcode() == ISD::FP_EXTEND && N1->hasOneUse()) { SDValue N120 = N1.getOperand(2).getOperand(0); if (isContractableAndReassociableFMUL(N120) && @@ -13496,8 +13629,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { // FIXME: This turns two single-precision and one double-precision // operation into two double-precision operations, which might not be // interesting for all targets, especially GPUs. - if (N1.getOpcode() == ISD::FP_EXTEND && - N1.getOperand(0).getOpcode() == PreferredFusedOpcode) { + if (N1.getOpcode() == ISD::FP_EXTEND && isFusedOp(N1.getOperand(0))) { SDValue CvtSrc = N1.getOperand(0); SDValue N100 = CvtSrc.getOperand(0); SDValue N101 = CvtSrc.getOperand(1); @@ -13538,12 +13670,13 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { // The transforms below are incorrect when x == 0 and y == inf, because the // intermediate multiplication produces a nan. - if (!Options.NoInfsFPMath) + SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1; + if (!hasNoInfs(Options, FAdd)) return SDValue(); // Floating-point multiply-add without intermediate rounding. bool HasFMA = - (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && + isContractableFMUL(Options, SDValue(N, 0)) && TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); @@ -13633,7 +13766,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // fold vector ops if (VT.isVector()) - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; // fold (fadd c1, c2) -> c1 + c2 @@ -13841,7 +13974,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // fold vector ops if (VT.isVector()) - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; // fold (fsub c1, c2) -> c1-c2 @@ -13926,7 +14059,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // fold vector ops if (VT.isVector()) { // This just handles C1 * C2 for vectors. Other vector folds are below. - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; } @@ -13971,10 +14104,13 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (N1CFP && N1CFP->isExactlyValue(+2.0)) return DAG.getNode(ISD::FADD, DL, VT, N0, N0); - // fold (fmul X, -1.0) -> (fneg X) - if (N1CFP && N1CFP->isExactlyValue(-1.0)) - if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) - return DAG.getNode(ISD::FNEG, DL, VT, N0); + // fold (fmul X, -1.0) -> (fsub -0.0, X) + if (N1CFP && N1CFP->isExactlyValue(-1.0)) { + if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) { + return DAG.getNode(ISD::FSUB, DL, VT, + DAG.getConstantFP(-0.0, DL, VT), N0, Flags); + } + } // -N0 * -N1 --> N0 * N1 TargetLowering::NegatibleCost CostN0 = @@ -14260,7 +14396,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // fold vector ops if (VT.isVector()) - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; // fold (fdiv c1, c2) -> c1/c2 @@ -16245,11 +16381,12 @@ struct LoadedSlice { return false; // Check if it will be merged with the load. - // 1. Check the alignment constraint. - Align RequiredAlignment = DAG->getDataLayout().getABITypeAlign( - ResVT.getTypeForEVT(*DAG->getContext())); - - if (RequiredAlignment > getAlign()) + // 1. Check the alignment / fast memory access constraint. + bool IsFast = false; + if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT, + Origin->getAddressSpace(), getAlign(), + Origin->getMemOperand()->getFlags(), &IsFast) || + !IsFast) return false; // 2. Check that the load is a legal operation for that type. @@ -16270,7 +16407,7 @@ struct LoadedSlice { /// \p UsedBits looks like 0..0 1..1 0..0. static bool areUsedBitsDense(const APInt &UsedBits) { // If all the bits are one, this is dense! - if (UsedBits.isAllOnesValue()) + if (UsedBits.isAllOnes()) return true; // Get rid of the unused bits on the right. @@ -16279,7 +16416,7 @@ static bool areUsedBitsDense(const APInt &UsedBits) { if (NarrowedUsedBits.countLeadingZeros()) NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits()); // Check that the chunk of bits is completely used. - return NarrowedUsedBits.isAllOnesValue(); + return NarrowedUsedBits.isAllOnes(); } /// Check whether or not \p First and \p Second are next to each other @@ -16697,8 +16834,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { unsigned BitWidth = N1.getValueSizeInBits(); APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue(); if (Opc == ISD::AND) - Imm ^= APInt::getAllOnesValue(BitWidth); - if (Imm == 0 || Imm.isAllOnesValue()) + Imm ^= APInt::getAllOnes(BitWidth); + if (Imm == 0 || Imm.isAllOnes()) return SDValue(); unsigned ShAmt = Imm.countTrailingZeros(); unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; @@ -16725,16 +16862,19 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { if ((Imm & Mask) == Imm) { APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW); if (Opc == ISD::AND) - NewImm ^= APInt::getAllOnesValue(NewBW); + NewImm ^= APInt::getAllOnes(NewBW); uint64_t PtrOff = ShAmt / 8; // For big endian targets, we need to adjust the offset to the pointer to // load the correct bytes. if (DAG.getDataLayout().isBigEndian()) PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; + bool IsFast = false; Align NewAlign = commonAlignment(LD->getAlign(), PtrOff); - Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); - if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy)) + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT, + LD->getAddressSpace(), NewAlign, + LD->getMemOperand()->getFlags(), &IsFast) || + !IsFast) return SDValue(); SDValue NewPtr = @@ -16788,27 +16928,26 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { if (VTSize.isScalable()) return SDValue(); + bool FastLD = false, FastST = false; EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize()); if (!TLI.isOperationLegal(ISD::LOAD, IntVT) || !TLI.isOperationLegal(ISD::STORE, IntVT) || !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) || - !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT)) - return SDValue(); - - Align LDAlign = LD->getAlign(); - Align STAlign = ST->getAlign(); - Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); - Align ABIAlign = DAG.getDataLayout().getABITypeAlign(IntVTTy); - if (LDAlign < ABIAlign || STAlign < ABIAlign) + !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT) || + !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT, + *LD->getMemOperand(), &FastLD) || + !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT, + *ST->getMemOperand(), &FastST) || + !FastLD || !FastST) return SDValue(); SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(), - LD->getPointerInfo(), LDAlign); + LD->getPointerInfo(), LD->getAlign()); SDValue NewST = DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(), - ST->getPointerInfo(), STAlign); + ST->getPointerInfo(), ST->getAlign()); AddToWorklist(NewLD.getNode()); AddToWorklist(NewST.getNode()); @@ -16839,8 +16978,10 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue &ConstNode) { APInt Val; - // If the add only has one use, this would be OK to do. - if (AddNode.getNode()->hasOneUse()) + // If the add only has one use, and the target thinks the folding is + // profitable or does not lead to worse code, this would be OK to do. + if (AddNode.getNode()->hasOneUse() && + TLI.isMulAddWithConstProfitable(AddNode, ConstNode)) return true; // Walk all the users of the constant with which we're multiplying. @@ -16932,6 +17073,22 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts( unsigned SizeInBits = NumStores * ElementSizeBits; unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; + Optional<MachineMemOperand::Flags> Flags; + AAMDNodes AAInfo; + for (unsigned I = 0; I != NumStores; ++I) { + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode); + if (!Flags) { + Flags = St->getMemOperand()->getFlags(); + AAInfo = St->getAAInfo(); + continue; + } + // Skip merging if there's an inconsistent flag. + if (Flags != St->getMemOperand()->getFlags()) + return false; + // Concatenate AA metadata. + AAInfo = AAInfo.concat(St->getAAInfo()); + } + EVT StoreTy; if (UseVector) { unsigned Elts = NumStores * NumMemElts; @@ -17049,9 +17206,9 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts( // make sure we use trunc store if it's necessary to be legal. SDValue NewStore; if (!UseTrunc) { - NewStore = - DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), FirstInChain->getAlign()); + NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), + FirstInChain->getAlign(), Flags.getValue(), AAInfo); } else { // Must be realized as a trunc store EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); @@ -17063,7 +17220,7 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts( NewStore = DAG.getTruncStore( NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/, - FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags()); + FirstInChain->getAlign(), Flags.getValue(), AAInfo); } // Replace all merged stores with the new store. @@ -17360,7 +17517,7 @@ bool DAGCombiner::tryStoreMergeOfConstants( SDValue StoredVal = ST->getValue(); bool IsElementZero = false; if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) - IsElementZero = C->isNullValue(); + IsElementZero = C->isZero(); else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) IsElementZero = C->getConstantFPValue()->isNullValue(); if (IsElementZero) { @@ -17379,7 +17536,8 @@ bool DAGCombiner::tryStoreMergeOfConstants( break; if (TLI.isTypeLegal(StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, + DAG.getMachineFunction()) && TLI.allowsMemoryAccess(Context, DL, StoreTy, *FirstInChain->getMemOperand(), &IsFast) && IsFast) { @@ -17391,7 +17549,8 @@ bool DAGCombiner::tryStoreMergeOfConstants( EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && + TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, + DAG.getMachineFunction()) && TLI.allowsMemoryAccess(Context, DL, StoreTy, *FirstInChain->getMemOperand(), &IsFast) && IsFast) { @@ -17410,7 +17569,7 @@ bool DAGCombiner::tryStoreMergeOfConstants( unsigned Elts = (i + 1) * NumMemElts; EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) && - TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && + TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) && TLI.allowsMemoryAccess(Context, DL, Ty, *FirstInChain->getMemOperand(), &IsFast) && IsFast) @@ -17486,7 +17645,8 @@ bool DAGCombiner::tryStoreMergeOfExtracts( if (Ty.getSizeInBits() > MaximumLegalStoreInBits) break; - if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && + if (TLI.isTypeLegal(Ty) && + TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) && TLI.allowsMemoryAccess(Context, DL, Ty, *FirstInChain->getMemOperand(), &IsFast) && IsFast) @@ -17634,8 +17794,13 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes, bool IsFastSt = false; bool IsFastLd = false; - if (TLI.isTypeLegal(StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && + // Don't try vector types if we need a rotate. We may still fail the + // legality checks for the integer type, but we can't handle the rotate + // case with vectors. + // FIXME: We could use a shuffle in place of the rotate. + if (!NeedRotate && TLI.isTypeLegal(StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, + DAG.getMachineFunction()) && TLI.allowsMemoryAccess(Context, DL, StoreTy, *FirstInChain->getMemOperand(), &IsFastSt) && IsFastSt && @@ -17649,7 +17814,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes, unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; StoreTy = EVT::getIntegerVT(Context, SizeInBits); if (TLI.isTypeLegal(StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, + DAG.getMachineFunction()) && TLI.allowsMemoryAccess(Context, DL, StoreTy, *FirstInChain->getMemOperand(), &IsFastSt) && IsFastSt && @@ -17663,7 +17829,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes, TargetLowering::TypePromoteInteger) { EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy); if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && + TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, + DAG.getMachineFunction()) && TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) && TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) && TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) && @@ -18215,7 +18382,7 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) { case ISD::LIFETIME_END: // We can forward past any lifetime start/end that can be proven not to // alias the node. - if (!isAlias(Chain.getNode(), N)) + if (!mayAlias(Chain.getNode(), N)) Chains.push_back(Chain.getOperand(0)); break; case ISD::STORE: { @@ -18593,32 +18760,35 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, if (!VecEltVT.isByteSized()) return SDValue(); - Align Alignment = OriginalLoad->getAlign(); - Align NewAlign = DAG.getDataLayout().getABITypeAlign( - VecEltVT.getTypeForEVT(*DAG.getContext())); - - if (NewAlign > Alignment || - !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT)) - return SDValue(); - - ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ? - ISD::NON_EXTLOAD : ISD::EXTLOAD; - if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT)) + ISD::LoadExtType ExtTy = + ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD; + if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) || + !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT)) return SDValue(); - Alignment = NewAlign; - + Align Alignment = OriginalLoad->getAlign(); MachinePointerInfo MPI; SDLoc DL(EVE); if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) { int Elt = ConstEltNo->getZExtValue(); unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8; MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); + Alignment = commonAlignment(Alignment, PtrOff); } else { // Discard the pointer info except the address space because the memory // operand can't represent this new access since the offset is variable. MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace()); + Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8); } + + bool IsFast = false; + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT, + OriginalLoad->getAddressSpace(), Alignment, + OriginalLoad->getMemOperand()->getFlags(), + &IsFast) || + !IsFast) + return SDValue(); + SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(), InVecVT, EltNo); @@ -18864,7 +19034,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { Use->getOperand(0) == VecOp && isa<ConstantSDNode>(Use->getOperand(1)); })) { - APInt DemandedElts = APInt::getNullValue(NumElts); + APInt DemandedElts = APInt::getZero(NumElts); for (SDNode *Use : VecOp->uses()) { auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1)); if (CstElt->getAPIntValue().ult(NumElts)) @@ -18877,7 +19047,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { AddToWorklist(N); return SDValue(N, 0); } - APInt DemandedBits = APInt::getAllOnesValue(VecEltBitWidth); + APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth); if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) { // We simplified the vector operand of this extract element. If this // extract is not dead, visit it again so it is folded properly. @@ -19672,8 +19842,10 @@ SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) { // Make sure the first element matches // (zext (extract_vector_elt X, C)) + // Offset must be a constant multiple of the + // known-minimum vector length of the result type. int64_t Offset = checkElem(Op0); - if (Offset < 0) + if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0) return SDValue(); unsigned NumElems = N->getNumOperands(); @@ -19844,6 +20016,44 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops)); } +// Attempt to merge nested concat_vectors/undefs. +// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d)) +// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d) +static SDValue combineConcatVectorOfConcatVectors(SDNode *N, + SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + + // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types. + EVT SubVT; + SDValue FirstConcat; + for (const SDValue &Op : N->ops()) { + if (Op.isUndef()) + continue; + if (Op.getOpcode() != ISD::CONCAT_VECTORS) + return SDValue(); + if (!FirstConcat) { + SubVT = Op.getOperand(0).getValueType(); + if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT)) + return SDValue(); + FirstConcat = Op; + continue; + } + if (SubVT != Op.getOperand(0).getValueType()) + return SDValue(); + } + assert(FirstConcat && "Concat of all-undefs found"); + + SmallVector<SDValue> ConcatOps; + for (const SDValue &Op : N->ops()) { + if (Op.isUndef()) { + ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT)); + continue; + } + ConcatOps.append(Op->op_begin(), Op->op_end()); + } + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps); +} + // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at // most two distinct vectors the same size as the result, attempt to turn this @@ -20103,13 +20313,19 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { } // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR. + // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...). if (SDValue V = combineConcatVectorOfScalars(N, DAG)) return V; - // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE. - if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) + if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) { + // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE. + if (SDValue V = combineConcatVectorOfConcatVectors(N, DAG)) + return V; + + // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE. if (SDValue V = combineConcatVectorOfExtracts(N, DAG)) return V; + } if (SDValue V = combineConcatVectorOfCasts(N, DAG)) return V; @@ -20351,9 +20567,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { return SDValue(); auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0)); - auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1)); - if (!Ld || Ld->getExtensionType() || !Ld->isSimple() || - !ExtIdx) + if (!Ld || Ld->getExtensionType() || !Ld->isSimple()) return SDValue(); // Allow targets to opt-out. @@ -20363,7 +20577,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { if (!VT.isByteSized()) return SDValue(); - unsigned Index = ExtIdx->getZExtValue(); + unsigned Index = Extract->getConstantOperandVal(1); unsigned NumElts = VT.getVectorMinNumElements(); // The definition of EXTRACT_SUBVECTOR states that the index must be a @@ -20492,7 +20706,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { // If the concatenated source types match this extract, it's a direct // simplification: // extract_subvec (concat V1, V2, ...), i --> Vi - if (ConcatSrcNumElts == ExtNumElts) + if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount()) return V.getOperand(ConcatOpIdx); // If the concatenated source vectors are a multiple length of this extract, @@ -20500,7 +20714,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { // concat operand. Example: // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 --> // v2i8 extract_subvec v8i8 Y, 6 - if (NVT.isFixedLengthVector() && ConcatSrcNumElts % ExtNumElts == 0) { + if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() && + ConcatSrcNumElts % ExtNumElts == 0) { SDLoc DL(N); unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts; assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts && @@ -20562,8 +20777,12 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { // otherwise => (extract_subvec V1, ExtIdx) uint64_t InsIdx = V.getConstantOperandVal(2); if (InsIdx * SmallVT.getScalarSizeInBits() == - ExtIdx * NVT.getScalarSizeInBits()) + ExtIdx * NVT.getScalarSizeInBits()) { + if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT)) + return SDValue(); + return DAG.getBitcast(NVT, V.getOperand(1)); + } return DAG.getNode( ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)), @@ -21131,15 +21350,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); // Canonicalize shuffle v, v -> v, undef - if (N0 == N1) { - SmallVector<int, 8> NewMask; - for (unsigned i = 0; i != NumElts; ++i) { - int Idx = SVN->getMaskElt(i); - if (Idx >= (int)NumElts) Idx -= NumElts; - NewMask.push_back(Idx); - } - return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask); - } + if (N0 == N1) + return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), + createUnaryMask(SVN->getMask(), NumElts)); // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. if (N0.isUndef()) @@ -21290,6 +21503,70 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } } + // See if we can replace a shuffle with an insert_subvector. + // e.g. v2i32 into v8i32: + // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7). + // --> insert_subvector(lhs,rhs1,4). + if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) && + TLI.isOperationLegalOrCustom(ISD::INSERT_SUBVECTOR, VT)) { + auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) { + // Ensure RHS subvectors are legal. + assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors"); + EVT SubVT = RHS.getOperand(0).getValueType(); + int NumSubVecs = RHS.getNumOperands(); + int NumSubElts = SubVT.getVectorNumElements(); + assert((NumElts % NumSubElts) == 0 && "Subvector mismatch"); + if (!TLI.isTypeLegal(SubVT)) + return SDValue(); + + // Don't bother if we have an unary shuffle (matches undef + LHS elts). + if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; })) + return SDValue(); + + // Search [NumSubElts] spans for RHS sequence. + // TODO: Can we avoid nested loops to increase performance? + SmallVector<int> InsertionMask(NumElts); + for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) { + for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) { + // Reset mask to identity. + std::iota(InsertionMask.begin(), InsertionMask.end(), 0); + + // Add subvector insertion. + std::iota(InsertionMask.begin() + SubIdx, + InsertionMask.begin() + SubIdx + NumSubElts, + NumElts + (SubVec * NumSubElts)); + + // See if the shuffle mask matches the reference insertion mask. + bool MatchingShuffle = true; + for (int i = 0; i != (int)NumElts; ++i) { + int ExpectIdx = InsertionMask[i]; + int ActualIdx = Mask[i]; + if (0 <= ActualIdx && ExpectIdx != ActualIdx) { + MatchingShuffle = false; + break; + } + } + + if (MatchingShuffle) + return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS, + RHS.getOperand(SubVec), + DAG.getVectorIdxConstant(SubIdx, SDLoc(N))); + } + } + return SDValue(); + }; + ArrayRef<int> Mask = SVN->getMask(); + if (N1.getOpcode() == ISD::CONCAT_VECTORS) + if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask)) + return InsertN1; + if (N0.getOpcode() == ISD::CONCAT_VECTORS) { + SmallVector<int> CommuteMask(Mask.begin(), Mask.end()); + ShuffleVectorSDNode::commuteMask(CommuteMask); + if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask)) + return InsertN0; + } + } + // Attempt to combine a shuffle of 2 inputs of 'scalar sources' - // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) @@ -21859,6 +22136,40 @@ SDValue DAGCombiner::visitVECREDUCE(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitVPOp(SDNode *N) { + // VP operations in which all vector elements are disabled - either by + // determining that the mask is all false or that the EVL is 0 - can be + // eliminated. + bool AreAllEltsDisabled = false; + if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode())) + AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx)); + if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode())) + AreAllEltsDisabled |= + ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode()); + + // This is the only generic VP combine we support for now. + if (!AreAllEltsDisabled) + return SDValue(); + + // Binary operations can be replaced by UNDEF. + if (ISD::isVPBinaryOp(N->getOpcode())) + return DAG.getUNDEF(N->getValueType(0)); + + // VP Memory operations can be replaced by either the chain (stores) or the + // chain + undef (loads). + if (const auto *MemSD = dyn_cast<MemSDNode>(N)) { + if (MemSD->writeMem()) + return MemSD->getChain(); + return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain()); + } + + // Reduction operations return the start operand when no elements are active. + if (ISD::isVPReduction(N->getOpcode())) + return N->getOperand(0); + + return SDValue(); +} + /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle /// with the destination vector and a zero vector. /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> @@ -21915,7 +22226,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { else Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits); - if (Bits.isAllOnesValue()) + if (Bits.isAllOnes()) Indices.push_back(i); else if (Bits == 0) Indices.push_back(i + NumSubElts); @@ -21950,7 +22261,8 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { /// If a vector binop is performed on splat values, it may be profitable to /// extract, scalarize, and insert/splat. -static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) { +static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, + const SDLoc &DL) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); unsigned Opcode = N->getOpcode(); @@ -21971,7 +22283,6 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) { !TLI.isOperationLegalOrCustom(Opcode, EltVT)) return SDValue(); - SDLoc DL(N); SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL); SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC); SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC); @@ -21995,20 +22306,19 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) { } /// Visit a binary vector operation, like ADD. -SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { - assert(N->getValueType(0).isVector() && - "SimplifyVBinOp only works on vectors!"); +SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) { + EVT VT = N->getValueType(0); + assert(VT.isVector() && "SimplifyVBinOp only works on vectors!"); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); SDValue Ops[] = {LHS, RHS}; - EVT VT = N->getValueType(0); unsigned Opcode = N->getOpcode(); SDNodeFlags Flags = N->getFlags(); // See if we can constant fold the vector operation. - if (SDValue Fold = DAG.FoldConstantVectorArithmetic( - Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags())) + if (SDValue Fold = DAG.FoldConstantArithmetic(Opcode, SDLoc(LHS), + LHS.getValueType(), Ops)) return Fold; // Move unary shuffles with identical masks after a vector binop: @@ -22026,7 +22336,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) && LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() && (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) { - SDLoc DL(N); SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0), RHS.getOperand(0), Flags); SDValue UndefV = LHS.getOperand(1); @@ -22043,7 +22352,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() && Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) { // binop (splat X), (splat C) --> splat (binop X, C) - SDLoc DL(N); SDValue X = Shuf0->getOperand(0); SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags); return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT), @@ -22053,7 +22361,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() && Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) { // binop (splat C), (splat X) --> splat (binop C, X) - SDLoc DL(N); SDValue X = Shuf1->getOperand(0); SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags); return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT), @@ -22077,7 +22384,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT, LegalOperations)) { // (binop undef, undef) may not return undef, so compute that result. - SDLoc DL(N); SDValue VecC = DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT)); SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y); @@ -22104,7 +22410,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { EVT NarrowVT = LHS.getOperand(0).getValueType(); if (NarrowVT == RHS.getOperand(0).getValueType() && TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) { - SDLoc DL(N); unsigned NumOperands = LHS.getNumOperands(); SmallVector<SDValue, 4> ConcatOps; for (unsigned i = 0; i != NumOperands; ++i) { @@ -22117,7 +22422,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { } } - if (SDValue V = scalarizeBinOpOfSplats(N, DAG)) + if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL)) return V; return SDValue(); @@ -22431,15 +22736,23 @@ SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) { if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc)) return SDValue(); - if (!N->isOnlyUserOf(N0.getNode()) || !N->isOnlyUserOf(N1.getNode())) + // The use checks are intentionally on SDNode because we may be dealing + // with opcodes that produce more than one SDValue. + // TODO: Do we really need to check N0 (the condition operand of the select)? + // But removing that clause could cause an infinite loop... + if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse()) return SDValue(); + // Binops may include opcodes that return multiple values, so all values + // must be created/propagated from the newly created binops below. + SDVTList OpVTs = N1->getVTList(); + // Fold select(cond, binop(x, y), binop(z, y)) // --> binop(select(cond, x, z), y) if (N1.getOperand(1) == N2.getOperand(1)) { SDValue NewSel = DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0)); - SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, NewSel, N1.getOperand(1)); + SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1)); NewBinOp->setFlags(N1->getFlags()); NewBinOp->intersectFlagsWith(N2->getFlags()); return NewBinOp; @@ -22453,7 +22766,7 @@ SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) { VT == N2.getOperand(1).getValueType()) { SDValue NewSel = DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1)); - SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, N1.getOperand(0), NewSel); + SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel); NewBinOp->setFlags(N1->getFlags()); NewBinOp->intersectFlagsWith(N2->getFlags()); return NewBinOp; @@ -22581,7 +22894,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) { // fold select_cc true, x, y -> x // fold select_cc false, x, y -> y - return !(SCCC->isNullValue()) ? N2 : N3; + return !(SCCC->isZero()) ? N2 : N3; } } @@ -22680,7 +22993,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X) // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X) // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X) - if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { + if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { SDValue ValueOnZero = N2; SDValue Count = N3; // If the condition is NE instead of E, swap the operands. @@ -22707,6 +23020,20 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, } } + // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C + // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C + if (!NotExtCompare && N1C && N2C && N3C && + N2C->getAPIntValue() == ~N3C->getAPIntValue() && + ((N1C->isAllOnes() && CC == ISD::SETGT) || + (N1C->isZero() && CC == ISD::SETLT)) && + !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) { + SDValue ASR = DAG.getNode( + ISD::SRA, DL, CmpOpVT, N0, + DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT)); + return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT), + DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT)); + } + return SDValue(); } @@ -22747,7 +23074,7 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) { return SDValue(); // Avoid division by zero. - if (C->isNullValue()) + if (C->isZero()) return SDValue(); SmallVector<SDNode *, 8> Built; @@ -22792,7 +23119,7 @@ SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) { /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) /// For the reciprocal, we need to find the zero of the function: -/// F(X) = A X - 1 [which has a zero at X = 1/A] +/// F(X) = 1/X - A [which has a zero at X = 1/A] /// => /// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form /// does not require additional intermediate precision] @@ -22803,9 +23130,10 @@ SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op, if (LegalDAG) return SDValue(); - // TODO: Handle half and/or extended types? + // TODO: Handle extended types? EVT VT = Op.getValueType(); - if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64) + if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 && + VT.getScalarType() != MVT::f64) return SDValue(); // If estimates are explicitly disabled for this function, we're done. @@ -22942,9 +23270,10 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, if (LegalDAG) return SDValue(); - // TODO: Handle half and/or extended types? + // TODO: Handle extended types? EVT VT = Op.getValueType(); - if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64) + if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 && + VT.getScalarType() != MVT::f64) return SDValue(); // If estimates are explicitly disabled for this function, we're done. @@ -22994,7 +23323,7 @@ SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) { } /// Return true if there is any possibility that the two addresses overlap. -bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const { +bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const { struct MemUseCharacteristics { bool IsVolatile; @@ -23154,7 +23483,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, // TODO: Relax aliasing for unordered atomics (see D66309) bool IsOpLoad = isa<LoadSDNode>(C.getNode()) && cast<LSBaseSDNode>(C.getNode())->isSimple(); - if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) { + if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) { // Look further up the chain. C = C.getOperand(0); return true; @@ -23172,7 +23501,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, case ISD::LIFETIME_END: { // We can forward past any lifetime start/end that can be proven not to // alias the memory access. - if (!isAlias(N, C.getNode())) { + if (!mayAlias(N, C.getNode())) { // Look further up the chain. C = C.getOperand(0); return true; |
