diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp')
| -rw-r--r-- | contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 946 | 
1 files changed, 636 insertions, 310 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index dc245f0d7b16..ce400ea43f29 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -129,12 +129,12 @@ static cl::opt<unsigned> StoreMergeDependenceLimit(  static cl::opt<bool> EnableReduceLoadOpStoreWidth(      "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true), -    cl::desc("DAG cominber enable reducing the width of load/op/store " +    cl::desc("DAG combiner enable reducing the width of load/op/store "               "sequence"));  static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(      "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true), -    cl::desc("DAG cominber enable load/<replace bytes>/store with " +    cl::desc("DAG combiner enable load/<replace bytes>/store with "               "a narrower store"));  namespace { @@ -319,7 +319,7 @@ namespace {      /// If so, return true.      bool SimplifyDemandedBits(SDValue Op) {        unsigned BitWidth = Op.getScalarValueSizeInBits(); -      APInt DemandedBits = APInt::getAllOnesValue(BitWidth); +      APInt DemandedBits = APInt::getAllOnes(BitWidth);        return SimplifyDemandedBits(Op, DemandedBits);      } @@ -345,7 +345,7 @@ namespace {          return false;        unsigned NumElts = Op.getValueType().getVectorNumElements(); -      APInt DemandedElts = APInt::getAllOnesValue(NumElts); +      APInt DemandedElts = APInt::getAllOnes(NumElts);        return SimplifyDemandedVectorElts(Op, DemandedElts);      } @@ -436,7 +436,7 @@ namespace {      SDValue visitOR(SDNode *N);      SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);      SDValue visitXOR(SDNode *N); -    SDValue SimplifyVBinOp(SDNode *N); +    SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);      SDValue visitSHL(SDNode *N);      SDValue visitSRA(SDNode *N);      SDValue visitSRL(SDNode *N); @@ -515,6 +515,7 @@ namespace {      SDValue visitFP_TO_FP16(SDNode *N);      SDValue visitFP16_TO_FP(SDNode *N);      SDValue visitVECREDUCE(SDNode *N); +    SDValue visitVPOp(SDNode *N);      SDValue visitFADDForFMACombine(SDNode *N);      SDValue visitFSUBForFMACombine(SDNode *N); @@ -615,7 +616,7 @@ namespace {                            SmallVectorImpl<SDValue> &Aliases);      /// Return true if there is any possibility that the two addresses overlap. -    bool isAlias(SDNode *Op0, SDNode *Op1) const; +    bool mayAlias(SDNode *Op0, SDNode *Op1) const;      /// Walk up chain skipping non-aliasing memory nodes, looking for a better      /// chain (aliasing node.) @@ -1062,21 +1063,22 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,    if (N0.getOpcode() != Opc)      return SDValue(); -  if (DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { -    if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) { +  SDValue N00 = N0.getOperand(0); +  SDValue N01 = N0.getOperand(1); + +  if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N01))) { +    if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1))) {        // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2)) -      if (SDValue OpNode = -              DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1})) -        return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); +      if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1})) +        return DAG.getNode(Opc, DL, VT, N00, OpNode);        return SDValue();      }      if (N0.hasOneUse()) {        // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)        //              iff (op x, c1) has one use -      SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); -      if (!OpNode.getNode()) -        return SDValue(); -      return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); +      if (SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1)) +        return DAG.getNode(Opc, DL, VT, OpNode, N01); +      return SDValue();      }    }    return SDValue(); @@ -1738,6 +1740,9 @@ SDValue DAGCombiner::visit(SDNode *N) {    case ISD::VECREDUCE_UMIN:    case ISD::VECREDUCE_FMAX:    case ISD::VECREDUCE_FMIN:     return visitVECREDUCE(N); +#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC: +#include "llvm/IR/VPIntrinsics.def" +    return visitVPOp(N);    }    return SDValue();  } @@ -2257,7 +2262,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {    // fold vector ops    if (VT.isVector()) { -    if (SDValue FoldedVOp = SimplifyVBinOp(N)) +    if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))        return FoldedVOp;      // fold (add x, 0) -> x, vector edition @@ -2781,7 +2786,7 @@ static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,        IsFlip = Const->isOne();        break;      case TargetLowering::ZeroOrNegativeOneBooleanContent: -      IsFlip = Const->isAllOnesValue(); +      IsFlip = Const->isAllOnes();        break;      case TargetLowering::UndefinedBooleanContent:        IsFlip = (Const->getAPIntValue() & 0x01) == 1; @@ -3257,7 +3262,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {    // fold vector ops    if (VT.isVector()) { -    if (SDValue FoldedVOp = SimplifyVBinOp(N)) +    if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))        return FoldedVOp;      // fold (sub x, 0) -> x, vector edition @@ -3315,11 +3320,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {      }      // Convert 0 - abs(x). -    SDValue Result;      if (N1->getOpcode() == ISD::ABS && -        !TLI.isOperationLegalOrCustom(ISD::ABS, VT) && -        TLI.expandABS(N1.getNode(), Result, DAG, true)) -      return Result; +        !TLI.isOperationLegalOrCustom(ISD::ABS, VT)) +      if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true)) +        return Result;      // Fold neg(splat(neg(x)) -> splat(x)      if (VT.isVector()) { @@ -3783,7 +3787,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {    // fold vector ops    if (VT.isVector()) { -    if (SDValue FoldedVOp = SimplifyVBinOp(N)) +    if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))        return FoldedVOp;      N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1); @@ -3808,18 +3812,18 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {      return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);    // fold (mul x, 0) -> 0 -  if (N1IsConst && ConstValue1.isNullValue()) +  if (N1IsConst && ConstValue1.isZero())      return N1;    // fold (mul x, 1) -> x -  if (N1IsConst && ConstValue1.isOneValue()) +  if (N1IsConst && ConstValue1.isOne())      return N0;    if (SDValue NewSel = foldBinOpIntoSelect(N))      return NewSel;    // fold (mul x, -1) -> 0-x -  if (N1IsConst && ConstValue1.isAllOnesValue()) { +  if (N1IsConst && ConstValue1.isAllOnes()) {      SDLoc DL(N);      return DAG.getNode(ISD::SUB, DL, VT,                         DAG.getConstant(0, DL, VT), N0); @@ -3837,7 +3841,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {    }    // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c -  if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) { +  if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {      unsigned Log2Val = (-ConstValue1).logBase2();      SDLoc DL(N);      // FIXME: If the input is something that is easily negated (e.g. a @@ -3966,7 +3970,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {      SmallBitVector ClearMask;      ClearMask.reserve(NumElts);      auto IsClearMask = [&ClearMask](ConstantSDNode *V) { -      if (!V || V->isNullValue()) { +      if (!V || V->isZero()) {          ClearMask.push_back(true);          return true;        } @@ -4052,9 +4056,7 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) {    SDValue Op0 = Node->getOperand(0);    SDValue Op1 = Node->getOperand(1);    SDValue combined; -  for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), -         UE = Op0.getNode()->use_end(); UI != UE; ++UI) { -    SDNode *User = *UI; +  for (SDNode *User : Op0.getNode()->uses()) {      if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||          User->use_empty())        continue; @@ -4111,7 +4113,7 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {    // 0 / X -> 0    // 0 % X -> 0    ConstantSDNode *N0C = isConstOrConstSplat(N0); -  if (N0C && N0C->isNullValue()) +  if (N0C && N0C->isZero())      return N0;    // X / X -> 1 @@ -4136,21 +4138,20 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {    SDValue N1 = N->getOperand(1);    EVT VT = N->getValueType(0);    EVT CCVT = getSetCCResultType(VT); +  SDLoc DL(N);    // fold vector ops    if (VT.isVector()) -    if (SDValue FoldedVOp = SimplifyVBinOp(N)) +    if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))        return FoldedVOp; -  SDLoc DL(N); -    // fold (sdiv c1, c2) -> c1/c2    ConstantSDNode *N1C = isConstOrConstSplat(N1);    if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))      return C;    // fold (sdiv X, -1) -> 0-X -  if (N1C && N1C->isAllOnesValue()) +  if (N1C && N1C->isAllOnes())      return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);    // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0) @@ -4204,11 +4205,11 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {    // Helper for determining whether a value is a power-2 constant scalar or a    // vector of such elements.    auto IsPowerOfTwo = [](ConstantSDNode *C) { -    if (C->isNullValue() || C->isOpaque()) +    if (C->isZero() || C->isOpaque())        return false;      if (C->getAPIntValue().isPowerOf2())        return true; -    if ((-C->getAPIntValue()).isPowerOf2()) +    if (C->getAPIntValue().isNegatedPowerOf2())        return true;      return false;    }; @@ -4281,21 +4282,20 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {    SDValue N1 = N->getOperand(1);    EVT VT = N->getValueType(0);    EVT CCVT = getSetCCResultType(VT); +  SDLoc DL(N);    // fold vector ops    if (VT.isVector()) -    if (SDValue FoldedVOp = SimplifyVBinOp(N)) +    if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))        return FoldedVOp; -  SDLoc DL(N); -    // fold (udiv c1, c2) -> c1/c2    ConstantSDNode *N1C = isConstOrConstSplat(N1);    if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))      return C;    // fold (udiv X, -1) -> select(X == -1, 1, 0) -  if (N1C && N1C->getAPIntValue().isAllOnesValue()) +  if (N1C && N1C->isAllOnes())      return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),                           DAG.getConstant(1, DL, VT),                           DAG.getConstant(0, DL, VT)); @@ -4391,7 +4391,7 @@ SDValue DAGCombiner::visitREM(SDNode *N) {      return C;    // fold (urem X, -1) -> select(X == -1, 0, x) -  if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue()) +  if (!isSigned && N1C && N1C->isAllOnes())      return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),                           DAG.getConstant(0, DL, VT), N0); @@ -4475,6 +4475,11 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {    if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))      return C; +  // canonicalize constant to RHS. +  if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && +      !DAG.isConstantIntBuildVectorOrConstantInt(N1)) +    return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0); +    // fold (mulhs x, 0) -> 0    if (isNullConstant(N1))      return N1; @@ -4527,6 +4532,11 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {    if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))      return C; +  // canonicalize constant to RHS. +  if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && +      !DAG.isConstantIntBuildVectorOrConstantInt(N1)) +    return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0); +    // fold (mulhu x, 0) -> 0    if (isNullConstant(N1))      return N1; @@ -4567,6 +4577,12 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {      }    } +  // Simplify the operands using demanded-bits information. +  // We don't have demanded bits support for MULHU so this just enables constant +  // folding based on known bits. +  if (SimplifyDemandedBits(SDValue(N, 0))) +    return SDValue(N, 0); +    return SDValue();  } @@ -4768,20 +4784,21 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {    SDValue N1 = N->getOperand(1);    EVT VT = N0.getValueType();    unsigned Opcode = N->getOpcode(); +  SDLoc DL(N);    // fold vector ops    if (VT.isVector()) -    if (SDValue FoldedVOp = SimplifyVBinOp(N)) +    if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))        return FoldedVOp;    // fold operation with constant operands. -  if (SDValue C = DAG.FoldConstantArithmetic(Opcode, SDLoc(N), VT, {N0, N1})) +  if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))      return C;    // canonicalize constant to RHS    if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&        !DAG.isConstantIntBuildVectorOrConstantInt(N1)) -    return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); +    return DAG.getNode(N->getOpcode(), DL, VT, N1, N0);    // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.    // Only do this if the current op isn't legal and the flipped is. @@ -4797,7 +4814,7 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {      default: llvm_unreachable("Unknown MINMAX opcode");      }      if (TLI.isOperationLegal(AltOpcode, VT)) -      return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1); +      return DAG.getNode(AltOpcode, DL, VT, N0, N1);    }    // Simplify the operands using demanded-bits information. @@ -5607,6 +5624,39 @@ static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {    return DAG.getZExtOrTrunc(Setcc, DL, VT);  } +/// For targets that support usubsat, match a bit-hack form of that operation +/// that ends in 'and' and convert it. +static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG) { +  SDValue N0 = N->getOperand(0); +  SDValue N1 = N->getOperand(1); +  EVT VT = N1.getValueType(); + +  // Canonicalize SRA as operand 1. +  if (N0.getOpcode() == ISD::SRA) +    std::swap(N0, N1); + +  // xor/add with SMIN (signmask) are logically equivalent. +  if (N0.getOpcode() != ISD::XOR && N0.getOpcode() != ISD::ADD) +    return SDValue(); + +  if (N1.getOpcode() != ISD::SRA || !N0.hasOneUse() || !N1.hasOneUse() || +      N0.getOperand(0) != N1.getOperand(0)) +    return SDValue(); + +  unsigned BitWidth = VT.getScalarSizeInBits(); +  ConstantSDNode *XorC = isConstOrConstSplat(N0.getOperand(1), true); +  ConstantSDNode *SraC = isConstOrConstSplat(N1.getOperand(1), true); +  if (!XorC || !XorC->getAPIntValue().isSignMask() || +      !SraC || SraC->getAPIntValue() != BitWidth - 1) +    return SDValue(); + +  // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128 +  // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128 +  SDLoc DL(N); +  SDValue SignMask = DAG.getConstant(XorC->getAPIntValue(), DL, VT); +  return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), SignMask); +} +  SDValue DAGCombiner::visitAND(SDNode *N) {    SDValue N0 = N->getOperand(0);    SDValue N1 = N->getOperand(1); @@ -5618,17 +5668,17 @@ SDValue DAGCombiner::visitAND(SDNode *N) {    // fold vector ops    if (VT.isVector()) { -    if (SDValue FoldedVOp = SimplifyVBinOp(N)) +    if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))        return FoldedVOp;      // fold (and x, 0) -> 0, vector edition      if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))        // do not return N0, because undef node may exist in N0 -      return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()), +      return DAG.getConstant(APInt::getZero(N0.getScalarValueSizeInBits()),                               SDLoc(N), N0.getValueType());      if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))        // do not return N1, because undef node may exist in N1 -      return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()), +      return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()),                               SDLoc(N), N1.getValueType());      // fold (and x, -1) -> x, vector edition @@ -5679,8 +5729,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {    // if (and x, c) is known to be zero, return 0    unsigned BitWidth = VT.getScalarSizeInBits(); -  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), -                                   APInt::getAllOnesValue(BitWidth))) +  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth)))      return DAG.getConstant(0, SDLoc(N), VT);    if (SDValue NewSel = foldBinOpIntoSelect(N)) @@ -5742,7 +5791,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {      // Get the constant (if applicable) the zero'th operand is being ANDed with.      // This can be a pure constant or a vector splat, in which case we treat the      // vector as a scalar and use the splat value. -    APInt Constant = APInt::getNullValue(1); +    APInt Constant = APInt::getZero(1);      if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {        Constant = C->getAPIntValue();      } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) { @@ -5773,7 +5822,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {          // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a          // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.          if ((SplatBitSize % EltBitWidth) == 0) { -          Constant = APInt::getAllOnesValue(EltBitWidth); +          Constant = APInt::getAllOnes(EltBitWidth);            for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)              Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);          } @@ -5800,7 +5849,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {      case ISD::NON_EXTLOAD: B = true; break;      } -    if (B && Constant.isAllOnesValue()) { +    if (B && Constant.isAllOnes()) {        // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to        // preserve semantics once we get rid of the AND.        SDValue NewLoad(Load, 0); @@ -5970,6 +6019,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {    if (IsAndZeroExtMask(N0, N1))      return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0)); +  if (hasOperation(ISD::USUBSAT, VT)) +    if (SDValue V = foldAndToUsubsat(N, DAG)) +      return V; +    return SDValue();  } @@ -6384,7 +6437,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {    // fold vector ops    if (VT.isVector()) { -    if (SDValue FoldedVOp = SimplifyVBinOp(N)) +    if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))        return FoldedVOp;      // fold (or x, 0) -> x, vector edition @@ -6925,17 +6978,16 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,  // a rot[lr]. This also matches funnel shift patterns, similar to rotation but  // with different shifted sources.  SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { -  // Must be a legal type.  Expanded 'n promoted things won't work with rotates.    EVT VT = LHS.getValueType(); -  if (!TLI.isTypeLegal(VT)) -    return SDValue();    // The target must have at least one rotate/funnel flavor. +  // We still try to match rotate by constant pre-legalization. +  // TODO: Support pre-legalization funnel-shift by constant.    bool HasROTL = hasOperation(ISD::ROTL, VT);    bool HasROTR = hasOperation(ISD::ROTR, VT);    bool HasFSHL = hasOperation(ISD::FSHL, VT);    bool HasFSHR = hasOperation(ISD::FSHR, VT); -  if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR) +  if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)      return SDValue();    // Check for truncated rotate. @@ -6988,6 +7040,7 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {    if (LHSShift.getOpcode() == RHSShift.getOpcode())      return SDValue(); // Shifts must disagree. +  // TODO: Support pre-legalization funnel-shift by constant.    bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);    if (!IsRotate && !(HasFSHL || HasFSHR))      return SDValue(); // Requires funnel shift support. @@ -7016,12 +7069,15 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {    };    if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {      SDValue Res; -    if (IsRotate && (HasROTL || HasROTR)) -      Res = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, -                        HasROTL ? LHSShiftAmt : RHSShiftAmt); -    else -      Res = DAG.getNode(HasFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg, -                        RHSShiftArg, HasFSHL ? LHSShiftAmt : RHSShiftAmt); +    if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) { +      bool UseROTL = !LegalOperations || HasROTL; +      Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, +                        UseROTL ? LHSShiftAmt : RHSShiftAmt); +    } else { +      bool UseFSHL = !LegalOperations || HasFSHL; +      Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg, +                        RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt); +    }      // If there is an AND of either shifted operand, apply it to the result.      if (LHSMask.getNode() || RHSMask.getNode()) { @@ -7045,6 +7101,11 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {      return Res;    } +  // Even pre-legalization, we can't easily rotate/funnel-shift by a variable +  // shift. +  if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR) +    return SDValue(); +    // If there is a mask here, and we have a variable shift, we can't be sure    // that we're masking out the right stuff.    if (LHSMask.getNode() || RHSMask.getNode()) @@ -7296,7 +7357,7 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {    // TODO: If there is evidence that running this later would help, this    //       limitation could be removed. Legality checks may need to be added    //       for the created store and optional bswap/rotate. -  if (LegalOperations) +  if (LegalOperations || OptLevel == CodeGenOpt::None)      return SDValue();    // We only handle merging simple stores of 1-4 bytes. @@ -7671,9 +7732,12 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {  //    |  D  |  // Into:  //   (x & m) | (y & ~m) -// If y is a constant, and the 'andn' does not work with immediates, -// we unfold into a different pattern: +// If y is a constant, m is not a 'not', and the 'andn' does not work with +// immediates, we unfold into a different pattern:  //   ~(~x & m) & (m | y) +// If x is a constant, m is a 'not', and the 'andn' does not work with +// immediates, we unfold into a different pattern: +//   (x | ~m) & ~(~m & ~y)  // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at  //       the very least that breaks andnpd / andnps patterns, and because those  //       patterns are simplified in IR and shouldn't be created in the DAG @@ -7728,8 +7792,9 @@ SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {    SDLoc DL(N); -  // If Y is a constant, check that 'andn' works with immediates. -  if (!TLI.hasAndNot(Y)) { +  // If Y is a constant, check that 'andn' works with immediates. Unless M is +  // a bitwise not that would already allow ANDN to be used. +  if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {      assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");      // If not, we need to do a bit more work to make sure andn is still used.      SDValue NotX = DAG.getNOT(DL, X, VT); @@ -7739,6 +7804,19 @@ SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {      return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);    } +  // If X is a constant and M is a bitwise not, check that 'andn' works with +  // immediates. +  if (!TLI.hasAndNot(X) && isBitwiseNot(M)) { +    assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable."); +    // If not, we need to do a bit more work to make sure andn is still used. +    SDValue NotM = M.getOperand(0); +    SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM); +    SDValue NotY = DAG.getNOT(DL, Y, VT); +    SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY); +    SDValue NotRHS = DAG.getNOT(DL, RHS, VT); +    return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS); +  } +    SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);    SDValue NotM = DAG.getNOT(DL, M, VT);    SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM); @@ -7750,10 +7828,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {    SDValue N0 = N->getOperand(0);    SDValue N1 = N->getOperand(1);    EVT VT = N0.getValueType(); +  SDLoc DL(N);    // fold vector ops    if (VT.isVector()) { -    if (SDValue FoldedVOp = SimplifyVBinOp(N)) +    if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))        return FoldedVOp;      // fold (xor x, 0) -> x, vector edition @@ -7764,7 +7843,6 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {    }    // fold (xor undef, undef) -> 0. This is a common idiom (misuse). -  SDLoc DL(N);    if (N0.isUndef() && N1.isUndef())      return DAG.getConstant(0, DL, VT); @@ -7899,7 +7977,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {        // shift has been simplified to undef.        uint64_t ShiftAmt = ShiftC->getLimitedValue();        if (ShiftAmt < BitWidth) { -        APInt Ones = APInt::getAllOnesValue(BitWidth); +        APInt Ones = APInt::getAllOnes(BitWidth);          Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);          if (XorC->getAPIntValue() == Ones) {            // If the xor constant is a shifted -1, do a 'not' before the shift: @@ -8222,7 +8300,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {    // fold vector ops    if (VT.isVector()) { -    if (SDValue FoldedVOp = SimplifyVBinOp(N)) +    if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))        return FoldedVOp;      BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1); @@ -8255,8 +8333,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {      return NewSel;    // if (shl x, c) is known to be zero, return 0 -  if (DAG.MaskedValueIsZero(SDValue(N, 0), -                            APInt::getAllOnesValue(OpSizeInBits))) +  if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))      return DAG.getConstant(0, SDLoc(N), VT);    // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). @@ -8501,28 +8578,43 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,    // Both operands must be equivalent extend nodes.    SDValue LeftOp = ShiftOperand.getOperand(0);    SDValue RightOp = ShiftOperand.getOperand(1); +    bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;    bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND; -  if ((!(IsSignExt || IsZeroExt)) || LeftOp.getOpcode() != RightOp.getOpcode()) +  if (!IsSignExt && !IsZeroExt)      return SDValue(); -  EVT WideVT1 = LeftOp.getValueType(); -  EVT WideVT2 = RightOp.getValueType(); -  (void)WideVT2; +  EVT NarrowVT = LeftOp.getOperand(0).getValueType(); +  unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits(); + +  SDValue MulhRightOp; +  if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) { +    unsigned ActiveBits = IsSignExt +                              ? Constant->getAPIntValue().getMinSignedBits() +                              : Constant->getAPIntValue().getActiveBits(); +    if (ActiveBits > NarrowVTSize) +      return SDValue(); +    MulhRightOp = DAG.getConstant( +        Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL, +        NarrowVT); +  } else { +    if (LeftOp.getOpcode() != RightOp.getOpcode()) +      return SDValue(); +    // Check that the two extend nodes are the same type. +    if (NarrowVT != RightOp.getOperand(0).getValueType()) +      return SDValue(); +    MulhRightOp = RightOp.getOperand(0); +  } + +  EVT WideVT = LeftOp.getValueType();    // Proceed with the transformation if the wide types match. -  assert((WideVT1 == WideVT2) && +  assert((WideVT == RightOp.getValueType()) &&           "Cannot have a multiply node with two different operand types."); -  EVT NarrowVT = LeftOp.getOperand(0).getValueType(); -  // Check that the two extend nodes are the same type. -  if (NarrowVT !=  RightOp.getOperand(0).getValueType()) -    return SDValue(); -    // Proceed with the transformation if the wide type is twice as large    // as the narrow type. -  unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits(); -  if (WideVT1.getScalarSizeInBits() != 2 * NarrowVTSize) +  if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)      return SDValue();    // Check the shift amount with the narrow type size. @@ -8540,10 +8632,10 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,    if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))      return SDValue(); -  SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), -                               RightOp.getOperand(0)); -  return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1) -                                     : DAG.getZExtOrTrunc(Result, DL, WideVT1)); +  SDValue Result = +      DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp); +  return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT) +                                     : DAG.getZExtOrTrunc(Result, DL, WideVT));  }  SDValue DAGCombiner::visitSRA(SDNode *N) { @@ -8563,7 +8655,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {    // fold vector ops    if (VT.isVector()) -    if (SDValue FoldedVOp = SimplifyVBinOp(N)) +    if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))        return FoldedVOp;    ConstantSDNode *N1C = isConstOrConstSplat(N1); @@ -8761,7 +8853,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {    // fold vector ops    if (VT.isVector()) -    if (SDValue FoldedVOp = SimplifyVBinOp(N)) +    if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))        return FoldedVOp;    ConstantSDNode *N1C = isConstOrConstSplat(N1); @@ -8774,8 +8866,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {      return NewSel;    // if (srl x, c) is known to be zero, return 0 -  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), -                                   APInt::getAllOnesValue(OpSizeInBits))) +  if (N1C && +      DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))      return DAG.getConstant(0, SDLoc(N), VT);    // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) @@ -9357,27 +9449,27 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {    // is also a target-independent combine here in DAGCombiner in the other    // direction for (select Cond, -1, 0) when the condition is not i1.    if (CondVT == MVT::i1 && !LegalOperations) { -    if (C1->isNullValue() && C2->isOne()) { +    if (C1->isZero() && C2->isOne()) {        // select Cond, 0, 1 --> zext (!Cond)        SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);        if (VT != MVT::i1)          NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);        return NotCond;      } -    if (C1->isNullValue() && C2->isAllOnesValue()) { +    if (C1->isZero() && C2->isAllOnes()) {        // select Cond, 0, -1 --> sext (!Cond)        SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);        if (VT != MVT::i1)          NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);        return NotCond;      } -    if (C1->isOne() && C2->isNullValue()) { +    if (C1->isOne() && C2->isZero()) {        // select Cond, 1, 0 --> zext (Cond)        if (VT != MVT::i1)          Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);        return Cond;      } -    if (C1->isAllOnesValue() && C2->isNullValue()) { +    if (C1->isAllOnes() && C2->isZero()) {        // select Cond, -1, 0 --> sext (Cond)        if (VT != MVT::i1)          Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); @@ -9405,7 +9497,7 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {        }        // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2) -      if (C1Val.isPowerOf2() && C2Val.isNullValue()) { +      if (C1Val.isPowerOf2() && C2Val.isZero()) {          if (VT != MVT::i1)            Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);          SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT); @@ -9433,7 +9525,7 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {            TargetLowering::ZeroOrOneBooleanContent &&        TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==            TargetLowering::ZeroOrOneBooleanContent && -      C1->isNullValue() && C2->isOne()) { +      C1->isZero() && C2->isOne()) {      SDValue NotCond =          DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));      if (VT.bitsEq(CondVT)) @@ -9478,6 +9570,64 @@ static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {    return SDValue();  } +static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) { +  SDValue N0 = N->getOperand(0); +  SDValue N1 = N->getOperand(1); +  SDValue N2 = N->getOperand(2); +  EVT VT = N->getValueType(0); +  if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse()) +    return SDValue(); + +  SDValue Cond0 = N0.getOperand(0); +  SDValue Cond1 = N0.getOperand(1); +  ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); +  if (VT != Cond0.getValueType()) +    return SDValue(); + +  // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the +  // compare is inverted from that pattern ("Cond0 s> -1"). +  if (CC == ISD::SETLT && isNullOrNullSplat(Cond1)) +    ; // This is the pattern we are looking for. +  else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1)) +    std::swap(N1, N2); +  else +    return SDValue(); + +  // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1 +  if (isNullOrNullSplat(N2)) { +    SDLoc DL(N); +    SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT); +    SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt); +    return DAG.getNode(ISD::AND, DL, VT, Sra, N1); +  } + +  // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2 +  if (isAllOnesOrAllOnesSplat(N1)) { +    SDLoc DL(N); +    SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT); +    SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt); +    return DAG.getNode(ISD::OR, DL, VT, Sra, N2); +  } + +  // If we have to invert the sign bit mask, only do that transform if the +  // target has a bitwise 'and not' instruction (the invert is free). +  // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2 +  const TargetLowering &TLI = DAG.getTargetLoweringInfo(); +  if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) { +    SDLoc DL(N); +    SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT); +    SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt); +    SDValue Not = DAG.getNOT(DL, Sra, VT); +    return DAG.getNode(ISD::AND, DL, VT, Not, N2); +  } + +  // TODO: There's another pattern in this family, but it may require +  //       implementing hasOrNot() to check for profitability: +  //       (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2 + +  return SDValue(); +} +  SDValue DAGCombiner::visitSELECT(SDNode *N) {    SDValue N0 = N->getOperand(0);    SDValue N1 = N->getOperand(1); @@ -9702,8 +9852,8 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {           "same value. This should have been addressed before this function.");    return DAG.getNode(        ISD::CONCAT_VECTORS, DL, VT, -      BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0), -      TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1)); +      BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0), +      TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));  }  bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) { @@ -10168,6 +10318,10 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {    if (SDValue V = foldVSelectOfConstants(N))      return V; +  if (hasOperation(ISD::SRA, VT)) +    if (SDValue V = foldVSelectToSignBitSplatMask(N, DAG)) +      return V; +    return SDValue();  } @@ -10189,7 +10343,7 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {      AddToWorklist(SCC.getNode());      if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) { -      if (!SCCC->isNullValue()) +      if (!SCCC->isZero())          return N2;    // cond always true -> true val        else          return N3;    // cond always false -> false val @@ -10247,13 +10401,13 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {      // Is 'X Cond C' always true or false?      auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) { -      bool False = (Cond == ISD::SETULT && C->isNullValue()) || +      bool False = (Cond == ISD::SETULT && C->isZero()) ||                     (Cond == ISD::SETLT  && C->isMinSignedValue()) || -                   (Cond == ISD::SETUGT && C->isAllOnesValue()) || +                   (Cond == ISD::SETUGT && C->isAllOnes()) ||                     (Cond == ISD::SETGT  && C->isMaxSignedValue()); -      bool True =  (Cond == ISD::SETULE && C->isAllOnesValue()) || +      bool True =  (Cond == ISD::SETULE && C->isAllOnes()) ||                     (Cond == ISD::SETLE  && C->isMaxSignedValue()) || -                   (Cond == ISD::SETUGE && C->isNullValue()) || +                   (Cond == ISD::SETUGE && C->isZero()) ||                     (Cond == ISD::SETGE  && C->isMinSignedValue());        return True || False;      }; @@ -10862,7 +11016,7 @@ static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,    if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)      return SDValue(); -  if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0))) +  if (!TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))      return SDValue();    if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0))) @@ -11256,7 +11410,7 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,    Known = DAG.computeKnownBits(Op); -  return (Known.Zero | 1).isAllOnesValue(); +  return (Known.Zero | 1).isAllOnes();  }  /// Given an extending node with a pop-count operand, if the target does not @@ -12015,7 +12169,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {      return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);    // If the input is already sign extended, just drop the extension. -  if (DAG.ComputeNumSignBits(N0) >= (VTBits - ExtVTBits + 1)) +  if (ExtVTBits >= DAG.ComputeMinSignedBits(N0))      return N0;    // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 @@ -12031,8 +12185,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {    if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {      SDValue N00 = N0.getOperand(0);      unsigned N00Bits = N00.getScalarValueSizeInBits(); -    if ((N00Bits <= ExtVTBits || -         (N00Bits - DAG.ComputeNumSignBits(N00)) < ExtVTBits) && +    if ((N00Bits <= ExtVTBits || DAG.ComputeMinSignedBits(N00) <= ExtVTBits) &&          (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))        return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);    } @@ -12051,8 +12204,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {      APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);      if ((N00Bits == ExtVTBits ||           (!IsZext && (N00Bits < ExtVTBits || -                      (N00Bits - DAG.ComputeNumSignBits(N00, DemandedSrcElts)) < -                          ExtVTBits))) && +                      DAG.ComputeMinSignedBits(N00) <= ExtVTBits))) &&          (!LegalOperations ||           TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)))        return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00); @@ -12289,7 +12441,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {      SDValue Amt = N0.getOperand(1);      KnownBits Known = DAG.computeKnownBits(Amt);      unsigned Size = VT.getScalarSizeInBits(); -    if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) { +    if (Known.countMaxActiveBits() <= Log2_32(Size)) {        SDLoc SL(N);        EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); @@ -12537,8 +12689,8 @@ static SDNode *getBuildPairElt(SDNode *N, unsigned i) {  SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {    assert(N->getOpcode() == ISD::BUILD_PAIR); -  LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); -  LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); +  auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); +  auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));    // A BUILD_PAIR is always having the least significant part in elt 0 and the    // most significant part in elt 1. So when combining into one large load, we @@ -12546,22 +12698,20 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {    if (DAG.getDataLayout().isBigEndian())      std::swap(LD1, LD2); -  if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() || +  if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) || +      !LD1->hasOneUse() || !LD2->hasOneUse() ||        LD1->getAddressSpace() != LD2->getAddressSpace())      return SDValue(); + +  bool LD1Fast = false;    EVT LD1VT = LD1->getValueType(0);    unsigned LD1Bytes = LD1VT.getStoreSize(); -  if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() && -      DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) { -    Align Alignment = LD1->getAlign(); -    Align NewAlign = DAG.getDataLayout().getABITypeAlign( -        VT.getTypeForEVT(*DAG.getContext())); - -    if (NewAlign <= Alignment && -        (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) -      return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(), -                         LD1->getPointerInfo(), Alignment); -  } +  if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && +      DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) && +      TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, +                             *LD1->getMemOperand(), &LD1Fast) && LD1Fast) +    return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(), +                       LD1->getPointerInfo(), LD1->getAlign());    return SDValue();  } @@ -12937,69 +13087,45 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {      return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);    } -  SDLoc DL(BV); -    // Okay, we know the src/dst types are both integers of differing types. -  // Handling growing first.    assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); -  if (SrcBitSize < DstBitSize) { -    unsigned NumInputsPerOutput = DstBitSize/SrcBitSize; -    SmallVector<SDValue, 8> Ops; -    for (unsigned i = 0, e = BV->getNumOperands(); i != e; -         i += NumInputsPerOutput) { -      bool isLE = DAG.getDataLayout().isLittleEndian(); -      APInt NewBits = APInt(DstBitSize, 0); -      bool EltIsUndef = true; -      for (unsigned j = 0; j != NumInputsPerOutput; ++j) { -        // Shift the previously computed bits over. -        NewBits <<= SrcBitSize; -        SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j)); -        if (Op.isUndef()) continue; -        EltIsUndef = false; - -        NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue(). -                   zextOrTrunc(SrcBitSize).zext(DstBitSize); -      } +  // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a +  // BuildVectorSDNode? +  auto *BVN = cast<BuildVectorSDNode>(BV); -      if (EltIsUndef) -        Ops.push_back(DAG.getUNDEF(DstEltVT)); -      else -        Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT)); -    } - -    EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); -    return DAG.getBuildVector(VT, DL, Ops); -  } +  // Extract the constant raw bit data. +  BitVector UndefElements; +  SmallVector<APInt> RawBits; +  bool IsLE = DAG.getDataLayout().isLittleEndian(); +  if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements)) +    return SDValue(); -  // Finally, this must be the case where we are shrinking elements: each input -  // turns into multiple outputs. -  unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; -  EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, -                            NumOutputsPerInput*BV->getNumOperands()); +  SDLoc DL(BV);    SmallVector<SDValue, 8> Ops; +  for (unsigned I = 0, E = RawBits.size(); I != E; ++I) { +    if (UndefElements[I]) +      Ops.push_back(DAG.getUNDEF(DstEltVT)); +    else +      Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT)); +  } -  for (const SDValue &Op : BV->op_values()) { -    if (Op.isUndef()) { -      Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT)); -      continue; -    } - -    APInt OpVal = cast<ConstantSDNode>(Op)-> -                  getAPIntValue().zextOrTrunc(SrcBitSize); +  EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); +  return DAG.getBuildVector(VT, DL, Ops); +} -    for (unsigned j = 0; j != NumOutputsPerInput; ++j) { -      APInt ThisVal = OpVal.trunc(DstBitSize); -      Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT)); -      OpVal.lshrInPlace(DstBitSize); -    } +// Returns true if floating point contraction is allowed on the FMUL-SDValue +// `N` +static bool isContractableFMUL(const TargetOptions &Options, SDValue N) { +  assert(N.getOpcode() == ISD::FMUL); -    // For big endian targets, swap the order of the pieces of each element. -    if (DAG.getDataLayout().isBigEndian()) -      std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); -  } +  return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath || +         N->getFlags().hasAllowContract(); +} -  return DAG.getBuildVector(VT, DL, Ops); +// Returns true if `N` can assume no infinities involved in its computation. +static bool hasNoInfs(const TargetOptions &Options, SDValue N) { +  return Options.NoInfsFPMath || N.getNode()->getFlags().hasNoInfs();  }  /// Try to perform FMA combining on a given FADD node. @@ -13038,6 +13164,11 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {    unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;    bool Aggressive = TLI.enableAggressiveFMAFusion(VT); +  auto isFusedOp = [&](SDValue N) { +    unsigned Opcode = N.getOpcode(); +    return Opcode == ISD::FMA || Opcode == ISD::FMAD; +  }; +    // Is the node an FMUL and contractable either due to global flags or    // SDNodeFlags.    auto isContractableFMUL = [AllowFusionGlobally](SDValue N) { @@ -13069,12 +13200,12 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {    // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)    // This requires reassociation because it changes the order of operations.    SDValue FMA, E; -  if (CanReassociate && N0.getOpcode() == PreferredFusedOpcode && +  if (CanReassociate && isFusedOp(N0) &&        N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&        N0.getOperand(2).hasOneUse()) {      FMA = N0;      E = N1; -  } else if (CanReassociate && N1.getOpcode() == PreferredFusedOpcode && +  } else if (CanReassociate && isFusedOp(N1) &&               N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&               N1.getOperand(2).hasOneUse()) {      FMA = N1; @@ -13130,7 +13261,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {                                       DAG.getNode(ISD::FP_EXTEND, SL, VT, V),                                       Z));      }; -    if (N0.getOpcode() == PreferredFusedOpcode) { +    if (isFusedOp(N0)) {        SDValue N02 = N0.getOperand(2);        if (N02.getOpcode() == ISD::FP_EXTEND) {          SDValue N020 = N02.getOperand(0); @@ -13160,7 +13291,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {      };      if (N0.getOpcode() == ISD::FP_EXTEND) {        SDValue N00 = N0.getOperand(0); -      if (N00.getOpcode() == PreferredFusedOpcode) { +      if (isFusedOp(N00)) {          SDValue N002 = N00.getOperand(2);          if (isContractableFMUL(N002) &&              TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, @@ -13174,7 +13305,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {      // fold (fadd x, (fma y, z, (fpext (fmul u, v)))      //   -> (fma y, z, (fma (fpext u), (fpext v), x)) -    if (N1.getOpcode() == PreferredFusedOpcode) { +    if (isFusedOp(N1)) {        SDValue N12 = N1.getOperand(2);        if (N12.getOpcode() == ISD::FP_EXTEND) {          SDValue N120 = N12.getOperand(0); @@ -13195,7 +13326,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {      // interesting for all targets, especially GPUs.      if (N1.getOpcode() == ISD::FP_EXTEND) {        SDValue N10 = N1.getOperand(0); -      if (N10.getOpcode() == PreferredFusedOpcode) { +      if (isFusedOp(N10)) {          SDValue N102 = N10.getOperand(2);          if (isContractableFMUL(N102) &&              TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, @@ -13391,12 +13522,17 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {      return isContractableFMUL(N) && isReassociable(N.getNode());    }; +  auto isFusedOp = [&](SDValue N) { +    unsigned Opcode = N.getOpcode(); +    return Opcode == ISD::FMA || Opcode == ISD::FMAD; +  }; +    // More folding opportunities when target permits.    if (Aggressive && isReassociable(N)) {      bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();      // fold (fsub (fma x, y, (fmul u, v)), z)      //   -> (fma x, y (fma u, v, (fneg z))) -    if (CanFuse && N0.getOpcode() == PreferredFusedOpcode && +    if (CanFuse && isFusedOp(N0) &&          isContractableAndReassociableFMUL(N0.getOperand(2)) &&          N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {        return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0), @@ -13409,7 +13545,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {      // fold (fsub x, (fma y, z, (fmul u, v)))      //   -> (fma (fneg y), z, (fma (fneg u), v, x)) -    if (CanFuse && N1.getOpcode() == PreferredFusedOpcode && +    if (CanFuse && isFusedOp(N1) &&          isContractableAndReassociableFMUL(N1.getOperand(2)) &&          N1->hasOneUse() && NoSignedZero) {        SDValue N20 = N1.getOperand(2).getOperand(0); @@ -13423,8 +13559,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {      // fold (fsub (fma x, y, (fpext (fmul u, v))), z)      //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z))) -    if (N0.getOpcode() == PreferredFusedOpcode && -        N0->hasOneUse()) { +    if (isFusedOp(N0) && N0->hasOneUse()) {        SDValue N02 = N0.getOperand(2);        if (N02.getOpcode() == ISD::FP_EXTEND) {          SDValue N020 = N02.getOperand(0); @@ -13450,7 +13585,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {      // interesting for all targets, especially GPUs.      if (N0.getOpcode() == ISD::FP_EXTEND) {        SDValue N00 = N0.getOperand(0); -      if (N00.getOpcode() == PreferredFusedOpcode) { +      if (isFusedOp(N00)) {          SDValue N002 = N00.getOperand(2);          if (isContractableAndReassociableFMUL(N002) &&              TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, @@ -13470,8 +13605,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {      // fold (fsub x, (fma y, z, (fpext (fmul u, v))))      //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x)) -    if (N1.getOpcode() == PreferredFusedOpcode && -        N1.getOperand(2).getOpcode() == ISD::FP_EXTEND && +    if (isFusedOp(N1) && N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&          N1->hasOneUse()) {        SDValue N120 = N1.getOperand(2).getOperand(0);        if (isContractableAndReassociableFMUL(N120) && @@ -13495,8 +13629,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {      // FIXME: This turns two single-precision and one double-precision      // operation into two double-precision operations, which might not be      // interesting for all targets, especially GPUs. -    if (N1.getOpcode() == ISD::FP_EXTEND && -        N1.getOperand(0).getOpcode() == PreferredFusedOpcode) { +    if (N1.getOpcode() == ISD::FP_EXTEND && isFusedOp(N1.getOperand(0))) {        SDValue CvtSrc = N1.getOperand(0);        SDValue N100 = CvtSrc.getOperand(0);        SDValue N101 = CvtSrc.getOperand(1); @@ -13537,12 +13670,13 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {    // The transforms below are incorrect when x == 0 and y == inf, because the    // intermediate multiplication produces a nan. -  if (!Options.NoInfsFPMath) +  SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1; +  if (!hasNoInfs(Options, FAdd))      return SDValue();    // Floating-point multiply-add without intermediate rounding.    bool HasFMA = -      (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && +      isContractableFMUL(Options, SDValue(N, 0)) &&        TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&        (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); @@ -13632,7 +13766,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {    // fold vector ops    if (VT.isVector()) -    if (SDValue FoldedVOp = SimplifyVBinOp(N)) +    if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))        return FoldedVOp;    // fold (fadd c1, c2) -> c1 + c2 @@ -13840,7 +13974,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {    // fold vector ops    if (VT.isVector()) -    if (SDValue FoldedVOp = SimplifyVBinOp(N)) +    if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))        return FoldedVOp;    // fold (fsub c1, c2) -> c1-c2 @@ -13925,7 +14059,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {    // fold vector ops    if (VT.isVector()) {      // This just handles C1 * C2 for vectors. Other vector folds are below. -    if (SDValue FoldedVOp = SimplifyVBinOp(N)) +    if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))        return FoldedVOp;    } @@ -13970,10 +14104,13 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {    if (N1CFP && N1CFP->isExactlyValue(+2.0))      return DAG.getNode(ISD::FADD, DL, VT, N0, N0); -  // fold (fmul X, -1.0) -> (fneg X) -  if (N1CFP && N1CFP->isExactlyValue(-1.0)) -    if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) -      return DAG.getNode(ISD::FNEG, DL, VT, N0); +  // fold (fmul X, -1.0) -> (fsub -0.0, X) +  if (N1CFP && N1CFP->isExactlyValue(-1.0)) { +    if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) { +      return DAG.getNode(ISD::FSUB, DL, VT, +                         DAG.getConstantFP(-0.0, DL, VT), N0, Flags); +    } +  }    // -N0 * -N1 --> N0 * N1    TargetLowering::NegatibleCost CostN0 = @@ -14259,7 +14396,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {    // fold vector ops    if (VT.isVector()) -    if (SDValue FoldedVOp = SimplifyVBinOp(N)) +    if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))        return FoldedVOp;    // fold (fdiv c1, c2) -> c1/c2 @@ -16244,11 +16381,12 @@ struct LoadedSlice {        return false;      // Check if it will be merged with the load. -    // 1. Check the alignment constraint. -    Align RequiredAlignment = DAG->getDataLayout().getABITypeAlign( -        ResVT.getTypeForEVT(*DAG->getContext())); - -    if (RequiredAlignment > getAlign()) +    // 1. Check the alignment / fast memory access constraint. +    bool IsFast = false; +    if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT, +                                Origin->getAddressSpace(), getAlign(), +                                Origin->getMemOperand()->getFlags(), &IsFast) || +        !IsFast)        return false;      // 2. Check that the load is a legal operation for that type. @@ -16269,7 +16407,7 @@ struct LoadedSlice {  /// \p UsedBits looks like 0..0 1..1 0..0.  static bool areUsedBitsDense(const APInt &UsedBits) {    // If all the bits are one, this is dense! -  if (UsedBits.isAllOnesValue()) +  if (UsedBits.isAllOnes())      return true;    // Get rid of the unused bits on the right. @@ -16278,7 +16416,7 @@ static bool areUsedBitsDense(const APInt &UsedBits) {    if (NarrowedUsedBits.countLeadingZeros())      NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());    // Check that the chunk of bits is completely used. -  return NarrowedUsedBits.isAllOnesValue(); +  return NarrowedUsedBits.isAllOnes();  }  /// Check whether or not \p First and \p Second are next to each other @@ -16696,8 +16834,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {      unsigned BitWidth = N1.getValueSizeInBits();      APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();      if (Opc == ISD::AND) -      Imm ^= APInt::getAllOnesValue(BitWidth); -    if (Imm == 0 || Imm.isAllOnesValue()) +      Imm ^= APInt::getAllOnes(BitWidth); +    if (Imm == 0 || Imm.isAllOnes())        return SDValue();      unsigned ShAmt = Imm.countTrailingZeros();      unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; @@ -16724,16 +16862,19 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {      if ((Imm & Mask) == Imm) {        APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);        if (Opc == ISD::AND) -        NewImm ^= APInt::getAllOnesValue(NewBW); +        NewImm ^= APInt::getAllOnes(NewBW);        uint64_t PtrOff = ShAmt / 8;        // For big endian targets, we need to adjust the offset to the pointer to        // load the correct bytes.        if (DAG.getDataLayout().isBigEndian())          PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; +      bool IsFast = false;        Align NewAlign = commonAlignment(LD->getAlign(), PtrOff); -      Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); -      if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy)) +      if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT, +                                  LD->getAddressSpace(), NewAlign, +                                  LD->getMemOperand()->getFlags(), &IsFast) || +          !IsFast)          return SDValue();        SDValue NewPtr = @@ -16787,27 +16928,26 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {      if (VTSize.isScalable())        return SDValue(); +    bool FastLD = false, FastST = false;      EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());      if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||          !TLI.isOperationLegal(ISD::STORE, IntVT) ||          !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) || -        !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT)) -      return SDValue(); - -    Align LDAlign = LD->getAlign(); -    Align STAlign = ST->getAlign(); -    Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); -    Align ABIAlign = DAG.getDataLayout().getABITypeAlign(IntVTTy); -    if (LDAlign < ABIAlign || STAlign < ABIAlign) +        !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT) || +        !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT, +                                *LD->getMemOperand(), &FastLD) || +        !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT, +                                *ST->getMemOperand(), &FastST) || +        !FastLD || !FastST)        return SDValue();      SDValue NewLD =          DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(), -                    LD->getPointerInfo(), LDAlign); +                    LD->getPointerInfo(), LD->getAlign());      SDValue NewST =          DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(), -                     ST->getPointerInfo(), STAlign); +                     ST->getPointerInfo(), ST->getAlign());      AddToWorklist(NewLD.getNode());      AddToWorklist(NewST.getNode()); @@ -16838,8 +16978,10 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,                                                SDValue &ConstNode) {    APInt Val; -  // If the add only has one use, this would be OK to do. -  if (AddNode.getNode()->hasOneUse()) +  // If the add only has one use, and the target thinks the folding is +  // profitable or does not lead to worse code, this would be OK to do. +  if (AddNode.getNode()->hasOneUse() && +      TLI.isMulAddWithConstProfitable(AddNode, ConstNode))      return true;    // Walk all the users of the constant with which we're multiplying. @@ -16931,6 +17073,22 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(    unsigned SizeInBits = NumStores * ElementSizeBits;    unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; +  Optional<MachineMemOperand::Flags> Flags; +  AAMDNodes AAInfo; +  for (unsigned I = 0; I != NumStores; ++I) { +    StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode); +    if (!Flags) { +      Flags = St->getMemOperand()->getFlags(); +      AAInfo = St->getAAInfo(); +      continue; +    } +    // Skip merging if there's an inconsistent flag. +    if (Flags != St->getMemOperand()->getFlags()) +      return false; +    // Concatenate AA metadata. +    AAInfo = AAInfo.concat(St->getAAInfo()); +  } +    EVT StoreTy;    if (UseVector) {      unsigned Elts = NumStores * NumMemElts; @@ -17048,9 +17206,9 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(    // make sure we use trunc store if it's necessary to be legal.    SDValue NewStore;    if (!UseTrunc) { -    NewStore = -        DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), -                     FirstInChain->getPointerInfo(), FirstInChain->getAlign()); +    NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), +                            FirstInChain->getPointerInfo(), +                            FirstInChain->getAlign(), Flags.getValue(), AAInfo);    } else { // Must be realized as a trunc store      EVT LegalizedStoredValTy =          TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); @@ -17062,7 +17220,7 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(      NewStore = DAG.getTruncStore(          NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),          FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/, -        FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags()); +        FirstInChain->getAlign(), Flags.getValue(), AAInfo);    }    // Replace all merged stores with the new store. @@ -17359,7 +17517,7 @@ bool DAGCombiner::tryStoreMergeOfConstants(        SDValue StoredVal = ST->getValue();        bool IsElementZero = false;        if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) -        IsElementZero = C->isNullValue(); +        IsElementZero = C->isZero();        else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))          IsElementZero = C->getConstantFPValue()->isNullValue();        if (IsElementZero) { @@ -17378,7 +17536,8 @@ bool DAGCombiner::tryStoreMergeOfConstants(          break;        if (TLI.isTypeLegal(StoreTy) && -          TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && +          TLI.canMergeStoresTo(FirstStoreAS, StoreTy, +                               DAG.getMachineFunction()) &&            TLI.allowsMemoryAccess(Context, DL, StoreTy,                                   *FirstInChain->getMemOperand(), &IsFast) &&            IsFast) { @@ -17390,7 +17549,8 @@ bool DAGCombiner::tryStoreMergeOfConstants(          EVT LegalizedStoredValTy =              TLI.getTypeToTransformTo(Context, StoredVal.getValueType());          if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && -            TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && +            TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, +                                 DAG.getMachineFunction()) &&              TLI.allowsMemoryAccess(Context, DL, StoreTy,                                     *FirstInChain->getMemOperand(), &IsFast) &&              IsFast) { @@ -17409,7 +17569,7 @@ bool DAGCombiner::tryStoreMergeOfConstants(          unsigned Elts = (i + 1) * NumMemElts;          EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);          if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) && -            TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && +            TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&              TLI.allowsMemoryAccess(Context, DL, Ty,                                     *FirstInChain->getMemOperand(), &IsFast) &&              IsFast) @@ -17485,7 +17645,8 @@ bool DAGCombiner::tryStoreMergeOfExtracts(        if (Ty.getSizeInBits() > MaximumLegalStoreInBits)          break; -      if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && +      if (TLI.isTypeLegal(Ty) && +          TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&            TLI.allowsMemoryAccess(Context, DL, Ty,                                   *FirstInChain->getMemOperand(), &IsFast) &&            IsFast) @@ -17633,8 +17794,13 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,        bool IsFastSt = false;        bool IsFastLd = false; -      if (TLI.isTypeLegal(StoreTy) && -          TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && +      // Don't try vector types if we need a rotate. We may still fail the +      // legality checks for the integer type, but we can't handle the rotate +      // case with vectors. +      // FIXME: We could use a shuffle in place of the rotate. +      if (!NeedRotate && TLI.isTypeLegal(StoreTy) && +          TLI.canMergeStoresTo(FirstStoreAS, StoreTy, +                               DAG.getMachineFunction()) &&            TLI.allowsMemoryAccess(Context, DL, StoreTy,                                   *FirstInChain->getMemOperand(), &IsFastSt) &&            IsFastSt && @@ -17648,7 +17814,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,        unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;        StoreTy = EVT::getIntegerVT(Context, SizeInBits);        if (TLI.isTypeLegal(StoreTy) && -          TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && +          TLI.canMergeStoresTo(FirstStoreAS, StoreTy, +                               DAG.getMachineFunction()) &&            TLI.allowsMemoryAccess(Context, DL, StoreTy,                                   *FirstInChain->getMemOperand(), &IsFastSt) &&            IsFastSt && @@ -17662,7 +17829,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,                   TargetLowering::TypePromoteInteger) {          EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);          if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && -            TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && +            TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, +                                 DAG.getMachineFunction()) &&              TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&              TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&              TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) && @@ -18214,7 +18382,7 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {      case ISD::LIFETIME_END:        // We can forward past any lifetime start/end that can be proven not to        // alias the node. -      if (!isAlias(Chain.getNode(), N)) +      if (!mayAlias(Chain.getNode(), N))          Chains.push_back(Chain.getOperand(0));        break;      case ISD::STORE: { @@ -18592,32 +18760,35 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,    if (!VecEltVT.isByteSized())      return SDValue(); -  Align Alignment = OriginalLoad->getAlign(); -  Align NewAlign = DAG.getDataLayout().getABITypeAlign( -      VecEltVT.getTypeForEVT(*DAG.getContext())); - -  if (NewAlign > Alignment || -      !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT)) -    return SDValue(); - -  ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ? -    ISD::NON_EXTLOAD : ISD::EXTLOAD; -  if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT)) +  ISD::LoadExtType ExtTy = +      ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD; +  if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) || +      !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))      return SDValue(); -  Alignment = NewAlign; - +  Align Alignment = OriginalLoad->getAlign();    MachinePointerInfo MPI;    SDLoc DL(EVE);    if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {      int Elt = ConstEltNo->getZExtValue();      unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;      MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); +    Alignment = commonAlignment(Alignment, PtrOff);    } else {      // Discard the pointer info except the address space because the memory      // operand can't represent this new access since the offset is variable.      MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace()); +    Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);    } + +  bool IsFast = false; +  if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT, +                              OriginalLoad->getAddressSpace(), Alignment, +                              OriginalLoad->getMemOperand()->getFlags(), +                              &IsFast) || +      !IsFast) +    return SDValue(); +    SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),                                                 InVecVT, EltNo); @@ -18863,7 +19034,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {                 Use->getOperand(0) == VecOp &&                 isa<ConstantSDNode>(Use->getOperand(1));        })) { -    APInt DemandedElts = APInt::getNullValue(NumElts); +    APInt DemandedElts = APInt::getZero(NumElts);      for (SDNode *Use : VecOp->uses()) {        auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));        if (CstElt->getAPIntValue().ult(NumElts)) @@ -18876,7 +19047,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {          AddToWorklist(N);        return SDValue(N, 0);      } -    APInt DemandedBits = APInt::getAllOnesValue(VecEltBitWidth); +    APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);      if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {        // We simplified the vector operand of this extract element. If this        // extract is not dead, visit it again so it is folded properly. @@ -19671,8 +19842,10 @@ SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {    // Make sure the first element matches    // (zext (extract_vector_elt X, C)) +  // Offset must be a constant multiple of the +  // known-minimum vector length of the result type.    int64_t Offset = checkElem(Op0); -  if (Offset < 0) +  if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)      return SDValue();    unsigned NumElems = N->getNumOperands(); @@ -19843,6 +20016,44 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {    return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));  } +// Attempt to merge nested concat_vectors/undefs. +// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d)) +//  --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d) +static SDValue combineConcatVectorOfConcatVectors(SDNode *N, +                                                  SelectionDAG &DAG) { +  EVT VT = N->getValueType(0); + +  // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types. +  EVT SubVT; +  SDValue FirstConcat; +  for (const SDValue &Op : N->ops()) { +    if (Op.isUndef()) +      continue; +    if (Op.getOpcode() != ISD::CONCAT_VECTORS) +      return SDValue(); +    if (!FirstConcat) { +      SubVT = Op.getOperand(0).getValueType(); +      if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT)) +        return SDValue(); +      FirstConcat = Op; +      continue; +    } +    if (SubVT != Op.getOperand(0).getValueType()) +      return SDValue(); +  } +  assert(FirstConcat && "Concat of all-undefs found"); + +  SmallVector<SDValue> ConcatOps; +  for (const SDValue &Op : N->ops()) { +    if (Op.isUndef()) { +      ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT)); +      continue; +    } +    ConcatOps.append(Op->op_begin(), Op->op_end()); +  } +  return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps); +} +  // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR  // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at  // most two distinct vectors the same size as the result, attempt to turn this @@ -20102,13 +20313,19 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {    }    // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR. +  // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).    if (SDValue V = combineConcatVectorOfScalars(N, DAG))      return V; -  // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE. -  if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) +  if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) { +    // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE. +    if (SDValue V = combineConcatVectorOfConcatVectors(N, DAG)) +      return V; + +    // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.      if (SDValue V = combineConcatVectorOfExtracts(N, DAG))        return V; +  }    if (SDValue V = combineConcatVectorOfCasts(N, DAG))      return V; @@ -20350,9 +20567,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {      return SDValue();    auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0)); -  auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1)); -  if (!Ld || Ld->getExtensionType() || !Ld->isSimple() || -      !ExtIdx) +  if (!Ld || Ld->getExtensionType() || !Ld->isSimple())      return SDValue();    // Allow targets to opt-out. @@ -20362,7 +20577,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {    if (!VT.isByteSized())      return SDValue(); -  unsigned Index = ExtIdx->getZExtValue(); +  unsigned Index = Extract->getConstantOperandVal(1);    unsigned NumElts = VT.getVectorMinNumElements();    // The definition of EXTRACT_SUBVECTOR states that the index must be a @@ -20491,7 +20706,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {      // If the concatenated source types match this extract, it's a direct      // simplification:      // extract_subvec (concat V1, V2, ...), i --> Vi -    if (ConcatSrcNumElts == ExtNumElts) +    if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())        return V.getOperand(ConcatOpIdx);      // If the concatenated source vectors are a multiple length of this extract, @@ -20499,7 +20714,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {      // concat operand. Example:      //   v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->      //   v2i8 extract_subvec v8i8 Y, 6 -    if (NVT.isFixedLengthVector() && ConcatSrcNumElts % ExtNumElts == 0) { +    if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() && +        ConcatSrcNumElts % ExtNumElts == 0) {        SDLoc DL(N);        unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;        assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts && @@ -21134,15 +21350,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {    ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);    // Canonicalize shuffle v, v -> v, undef -  if (N0 == N1) { -    SmallVector<int, 8> NewMask; -    for (unsigned i = 0; i != NumElts; ++i) { -      int Idx = SVN->getMaskElt(i); -      if (Idx >= (int)NumElts) Idx -= NumElts; -      NewMask.push_back(Idx); -    } -    return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask); -  } +  if (N0 == N1) +    return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), +                                createUnaryMask(SVN->getMask(), NumElts));    // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.    if (N0.isUndef()) @@ -21293,6 +21503,70 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {      }    } +  // See if we can replace a shuffle with an insert_subvector. +  // e.g. v2i32 into v8i32: +  // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7). +  // --> insert_subvector(lhs,rhs1,4). +  if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) && +      TLI.isOperationLegalOrCustom(ISD::INSERT_SUBVECTOR, VT)) { +    auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) { +      // Ensure RHS subvectors are legal. +      assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors"); +      EVT SubVT = RHS.getOperand(0).getValueType(); +      int NumSubVecs = RHS.getNumOperands(); +      int NumSubElts = SubVT.getVectorNumElements(); +      assert((NumElts % NumSubElts) == 0 && "Subvector mismatch"); +      if (!TLI.isTypeLegal(SubVT)) +        return SDValue(); + +      // Don't bother if we have an unary shuffle (matches undef + LHS elts). +      if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; })) +        return SDValue(); + +      // Search [NumSubElts] spans for RHS sequence. +      // TODO: Can we avoid nested loops to increase performance? +      SmallVector<int> InsertionMask(NumElts); +      for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) { +        for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) { +          // Reset mask to identity. +          std::iota(InsertionMask.begin(), InsertionMask.end(), 0); + +          // Add subvector insertion. +          std::iota(InsertionMask.begin() + SubIdx, +                    InsertionMask.begin() + SubIdx + NumSubElts, +                    NumElts + (SubVec * NumSubElts)); + +          // See if the shuffle mask matches the reference insertion mask. +          bool MatchingShuffle = true; +          for (int i = 0; i != (int)NumElts; ++i) { +            int ExpectIdx = InsertionMask[i]; +            int ActualIdx = Mask[i]; +            if (0 <= ActualIdx && ExpectIdx != ActualIdx) { +              MatchingShuffle = false; +              break; +            } +          } + +          if (MatchingShuffle) +            return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS, +                               RHS.getOperand(SubVec), +                               DAG.getVectorIdxConstant(SubIdx, SDLoc(N))); +        } +      } +      return SDValue(); +    }; +    ArrayRef<int> Mask = SVN->getMask(); +    if (N1.getOpcode() == ISD::CONCAT_VECTORS) +      if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask)) +        return InsertN1; +    if (N0.getOpcode() == ISD::CONCAT_VECTORS) { +      SmallVector<int> CommuteMask(Mask.begin(), Mask.end()); +      ShuffleVectorSDNode::commuteMask(CommuteMask); +      if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask)) +        return InsertN0; +    } +  } +    // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -    // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.    if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) @@ -21862,6 +22136,40 @@ SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {    return SDValue();  } +SDValue DAGCombiner::visitVPOp(SDNode *N) { +  // VP operations in which all vector elements are disabled - either by +  // determining that the mask is all false or that the EVL is 0 - can be +  // eliminated. +  bool AreAllEltsDisabled = false; +  if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode())) +    AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx)); +  if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode())) +    AreAllEltsDisabled |= +        ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode()); + +  // This is the only generic VP combine we support for now. +  if (!AreAllEltsDisabled) +    return SDValue(); + +  // Binary operations can be replaced by UNDEF. +  if (ISD::isVPBinaryOp(N->getOpcode())) +    return DAG.getUNDEF(N->getValueType(0)); + +  // VP Memory operations can be replaced by either the chain (stores) or the +  // chain + undef (loads). +  if (const auto *MemSD = dyn_cast<MemSDNode>(N)) { +    if (MemSD->writeMem()) +      return MemSD->getChain(); +    return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain()); +  } + +  // Reduction operations return the start operand when no elements are active. +  if (ISD::isVPReduction(N->getOpcode())) +    return N->getOperand(0); + +  return SDValue(); +} +  /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle  /// with the destination vector and a zero vector.  /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> @@ -21918,7 +22226,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {        else          Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits); -      if (Bits.isAllOnesValue()) +      if (Bits.isAllOnes())          Indices.push_back(i);        else if (Bits == 0)          Indices.push_back(i + NumSubElts); @@ -21953,7 +22261,8 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {  /// If a vector binop is performed on splat values, it may be profitable to  /// extract, scalarize, and insert/splat. -static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) { +static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, +                                      const SDLoc &DL) {    SDValue N0 = N->getOperand(0);    SDValue N1 = N->getOperand(1);    unsigned Opcode = N->getOpcode(); @@ -21974,7 +22283,6 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {        !TLI.isOperationLegalOrCustom(Opcode, EltVT))      return SDValue(); -  SDLoc DL(N);    SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);    SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);    SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC); @@ -21998,20 +22306,19 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {  }  /// Visit a binary vector operation, like ADD. -SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { -  assert(N->getValueType(0).isVector() && -         "SimplifyVBinOp only works on vectors!"); +SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) { +  EVT VT = N->getValueType(0); +  assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");    SDValue LHS = N->getOperand(0);    SDValue RHS = N->getOperand(1);    SDValue Ops[] = {LHS, RHS}; -  EVT VT = N->getValueType(0);    unsigned Opcode = N->getOpcode();    SDNodeFlags Flags = N->getFlags();    // See if we can constant fold the vector operation. -  if (SDValue Fold = DAG.FoldConstantVectorArithmetic( -          Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags())) +  if (SDValue Fold = DAG.FoldConstantArithmetic(Opcode, SDLoc(LHS), +                                                LHS.getValueType(), Ops))      return Fold;    // Move unary shuffles with identical masks after a vector binop: @@ -22029,7 +22336,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {      if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&          LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&          (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) { -      SDLoc DL(N);        SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),                                       RHS.getOperand(0), Flags);        SDValue UndefV = LHS.getOperand(1); @@ -22046,7 +22352,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {          Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&          Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {        // binop (splat X), (splat C) --> splat (binop X, C) -      SDLoc DL(N);        SDValue X = Shuf0->getOperand(0);        SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);        return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT), @@ -22056,7 +22361,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {          Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&          Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {        // binop (splat C), (splat X) --> splat (binop C, X) -      SDLoc DL(N);        SDValue X = Shuf1->getOperand(0);        SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);        return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT), @@ -22080,7 +22384,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {          TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,                                                LegalOperations)) {        // (binop undef, undef) may not return undef, so compute that result. -      SDLoc DL(N);        SDValue VecC =            DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));        SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y); @@ -22107,7 +22410,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {      EVT NarrowVT = LHS.getOperand(0).getValueType();      if (NarrowVT == RHS.getOperand(0).getValueType() &&          TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) { -      SDLoc DL(N);        unsigned NumOperands = LHS.getNumOperands();        SmallVector<SDValue, 4> ConcatOps;        for (unsigned i = 0; i != NumOperands; ++i) { @@ -22120,7 +22422,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {      }    } -  if (SDValue V = scalarizeBinOpOfSplats(N, DAG)) +  if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL))      return V;    return SDValue(); @@ -22434,15 +22736,23 @@ SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {    if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc))      return SDValue(); -  if (!N->isOnlyUserOf(N0.getNode()) || !N->isOnlyUserOf(N1.getNode())) +  // The use checks are intentionally on SDNode because we may be dealing +  // with opcodes that produce more than one SDValue. +  // TODO: Do we really need to check N0 (the condition operand of the select)? +  //       But removing that clause could cause an infinite loop... +  if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())      return SDValue(); +  // Binops may include opcodes that return multiple values, so all values +  // must be created/propagated from the newly created binops below. +  SDVTList OpVTs = N1->getVTList(); +    // Fold select(cond, binop(x, y), binop(z, y))    //  --> binop(select(cond, x, z), y)    if (N1.getOperand(1) == N2.getOperand(1)) {      SDValue NewSel =          DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0)); -    SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, NewSel, N1.getOperand(1)); +    SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));      NewBinOp->setFlags(N1->getFlags());      NewBinOp->intersectFlagsWith(N2->getFlags());      return NewBinOp; @@ -22456,7 +22766,7 @@ SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {        VT == N2.getOperand(1).getValueType()) {      SDValue NewSel =          DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1)); -    SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, N1.getOperand(0), NewSel); +    SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);      NewBinOp->setFlags(N1->getFlags());      NewBinOp->intersectFlagsWith(N2->getFlags());      return NewBinOp; @@ -22584,7 +22894,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,      if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {        // fold select_cc true, x, y -> x        // fold select_cc false, x, y -> y -      return !(SCCC->isNullValue()) ? N2 : N3; +      return !(SCCC->isZero()) ? N2 : N3;      }    } @@ -22683,7 +22993,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,    // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)    // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)    // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X) -  if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { +  if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {      SDValue ValueOnZero = N2;      SDValue Count = N3;      // If the condition is NE instead of E, swap the operands. @@ -22710,6 +23020,20 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,      }    } +  // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C +  // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C +  if (!NotExtCompare && N1C && N2C && N3C && +      N2C->getAPIntValue() == ~N3C->getAPIntValue() && +      ((N1C->isAllOnes() && CC == ISD::SETGT) || +       (N1C->isZero() && CC == ISD::SETLT)) && +      !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) { +    SDValue ASR = DAG.getNode( +        ISD::SRA, DL, CmpOpVT, N0, +        DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT)); +    return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT), +                       DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT)); +  } +    return SDValue();  } @@ -22750,7 +23074,7 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {      return SDValue();    // Avoid division by zero. -  if (C->isNullValue()) +  if (C->isZero())      return SDValue();    SmallVector<SDNode *, 8> Built; @@ -22795,7 +23119,7 @@ SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {  /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)  /// For the reciprocal, we need to find the zero of the function: -///   F(X) = A X - 1 [which has a zero at X = 1/A] +///   F(X) = 1/X - A [which has a zero at X = 1/A]  ///     =>  ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form  ///     does not require additional intermediate precision] @@ -22806,9 +23130,10 @@ SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,    if (LegalDAG)      return SDValue(); -  // TODO: Handle half and/or extended types? +  // TODO: Handle extended types?    EVT VT = Op.getValueType(); -  if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64) +  if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 && +      VT.getScalarType() != MVT::f64)      return SDValue();    // If estimates are explicitly disabled for this function, we're done. @@ -22945,9 +23270,10 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,    if (LegalDAG)      return SDValue(); -  // TODO: Handle half and/or extended types? +  // TODO: Handle extended types?    EVT VT = Op.getValueType(); -  if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64) +  if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 && +      VT.getScalarType() != MVT::f64)      return SDValue();    // If estimates are explicitly disabled for this function, we're done. @@ -22997,7 +23323,7 @@ SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {  }  /// Return true if there is any possibility that the two addresses overlap. -bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const { +bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {    struct MemUseCharacteristics {      bool IsVolatile; @@ -23157,7 +23483,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,        // TODO: Relax aliasing for unordered atomics (see D66309)        bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&                        cast<LSBaseSDNode>(C.getNode())->isSimple(); -      if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) { +      if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {          // Look further up the chain.          C = C.getOperand(0);          return true; @@ -23175,7 +23501,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,      case ISD::LIFETIME_END: {        // We can forward past any lifetime start/end that can be proven not to        // alias the memory access. -      if (!isAlias(N, C.getNode())) { +      if (!mayAlias(N, C.getNode())) {          // Look further up the chain.          C = C.getOperand(0);          return true;  | 
