diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2019-08-20 20:50:12 +0000 | 
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2019-08-20 20:50:12 +0000 | 
| commit | e6d1592492a3a379186bfb02bd0f4eda0669c0d5 (patch) | |
| tree | 599ab169a01f1c86eda9adc774edaedde2f2db5b /lib/CodeGen/SelectionDAG/SelectionDAG.cpp | |
| parent | 1a56a5ead7a2e84bee8240f5f6b033b5f1707154 (diff) | |
Notes
Diffstat (limited to 'lib/CodeGen/SelectionDAG/SelectionDAG.cpp')
| -rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 1429 | 
1 files changed, 844 insertions, 585 deletions
| diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 647496c1afcb..5852e693fa9f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1,9 +1,8 @@  //===- SelectionDAG.cpp - Implement the SelectionDAG data structures ------===//  // -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception  //  //===----------------------------------------------------------------------===//  // @@ -86,6 +85,7 @@ static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {  // Default null implementations of the callbacks.  void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {}  void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {} +void SelectionDAG::DAGUpdateListener::NodeInserted(SDNode *) {}  void SelectionDAG::DAGNodeDeletedListener::anchor() {} @@ -262,12 +262,7 @@ bool ISD::allOperandsUndef(const SDNode *N) {    // is probably the desired behavior.    if (N->getNumOperands() == 0)      return false; - -  for (const SDValue &Op : N->op_values()) -    if (!Op.isUndef()) -      return false; - -  return true; +  return all_of(N->op_values(), [](SDValue Op) { return Op.isUndef(); });  }  bool ISD::matchUnaryPredicate(SDValue Op, @@ -299,8 +294,8 @@ bool ISD::matchUnaryPredicate(SDValue Op,  bool ISD::matchBinaryPredicate(      SDValue LHS, SDValue RHS,      std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match, -    bool AllowUndefs) { -  if (LHS.getValueType() != RHS.getValueType()) +    bool AllowUndefs, bool AllowTypeMismatch) { +  if (!AllowTypeMismatch && LHS.getValueType() != RHS.getValueType())      return false;    // TODO: Add support for scalar UNDEF cases? @@ -323,8 +318,8 @@ bool ISD::matchBinaryPredicate(      auto *RHSCst = dyn_cast<ConstantSDNode>(RHSOp);      if ((!LHSCst && !LHSUndef) || (!RHSCst && !RHSUndef))        return false; -    if (LHSOp.getValueType() != SVT || -        LHSOp.getValueType() != RHSOp.getValueType()) +    if (!AllowTypeMismatch && (LHSOp.getValueType() != SVT || +                               LHSOp.getValueType() != RHSOp.getValueType()))        return false;      if (!Match(LHSCst, RHSCst))        return false; @@ -518,6 +513,13 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {    case ISD::TargetFrameIndex:      ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex());      break; +  case ISD::LIFETIME_START: +  case ISD::LIFETIME_END: +    if (cast<LifetimeSDNode>(N)->hasOffset()) { +      ID.AddInteger(cast<LifetimeSDNode>(N)->getSize()); +      ID.AddInteger(cast<LifetimeSDNode>(N)->getOffset()); +    } +    break;    case ISD::JumpTable:    case ISD::TargetJumpTable:      ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex()); @@ -834,6 +836,8 @@ void SelectionDAG::InsertNode(SDNode *N) {    N->PersistentId = NextPersistentId++;    VerifySDNode(N);  #endif +  for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) +    DUL->NodeInserted(N);  }  /// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that @@ -1136,6 +1140,18 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {                   getConstant(Imm, DL, Op.getValueType()));  } +SDValue SelectionDAG::getPtrExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { +  // Only unsigned pointer semantics are supported right now. In the future this +  // might delegate to TLI to check pointer signedness. +  return getZExtOrTrunc(Op, DL, VT); +} + +SDValue SelectionDAG::getPtrExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) { +  // Only unsigned pointer semantics are supported right now. In the future this +  // might delegate to TLI to check pointer signedness. +  return getZeroExtendInReg(Op, DL, VT); +} +  /// getNOT - Create a bitwise NOT operation as (XOR Val, -1).  SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) {    EVT EltVT = VT.getScalarType(); @@ -1274,6 +1290,12 @@ SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, const SDLoc &DL,    return getConstant(Val, DL, TLI->getPointerTy(getDataLayout()), isTarget);  } +SDValue SelectionDAG::getShiftAmountConstant(uint64_t Val, EVT VT, +                                             const SDLoc &DL, bool LegalTypes) { +  EVT ShiftVT = TLI->getShiftAmountTy(VT, getDataLayout(), LegalTypes); +  return getConstant(Val, DL, ShiftVT); +} +  SDValue SelectionDAG::getConstantFP(const APFloat &V, const SDLoc &DL, EVT VT,                                      bool isTarget) {    return getConstantFP(*ConstantFP::get(*getContext(), V), DL, VT, isTarget); @@ -1403,7 +1425,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,    assert((TargetFlags == 0 || isTarget) &&           "Cannot set target flags on target-independent globals");    if (Alignment == 0) -    Alignment = MF->getFunction().optForSize() +    Alignment = MF->getFunction().hasOptSize()                      ? getDataLayout().getABITypeAlignment(C->getType())                      : getDataLayout().getPrefTypeAlignment(C->getType());    unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; @@ -1770,7 +1792,8 @@ SDValue SelectionDAG::getLabelNode(unsigned Opcode, const SDLoc &dl,    if (SDNode *E = FindNodeOrInsertPos(ID, IP))      return SDValue(E, 0); -  auto *N = newSDNode<LabelSDNode>(dl.getIROrder(), dl.getDebugLoc(), Label); +  auto *N = +      newSDNode<LabelSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(), Label);    createOperands(N, Ops);    CSEMap.InsertNode(N, IP); @@ -1965,10 +1988,30 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,    case ISD::SETUO:    case ISD::SETUEQ:    case ISD::SETUNE: -    assert(!N1.getValueType().isInteger() && "Illegal setcc for integer!"); +    assert(!OpVT.isInteger() && "Illegal setcc for integer!");      break;    } +  if (OpVT.isInteger()) { +    // For EQ and NE, we can always pick a value for the undef to make the +    // predicate pass or fail, so we can return undef. +    // Matches behavior in llvm::ConstantFoldCompareInstruction. +    // icmp eq/ne X, undef -> undef. +    if ((N1.isUndef() || N2.isUndef()) && +        (Cond == ISD::SETEQ || Cond == ISD::SETNE)) +      return getUNDEF(VT); + +    // If both operands are undef, we can return undef for int comparison. +    // icmp undef, undef -> undef. +    if (N1.isUndef() && N2.isUndef()) +      return getUNDEF(VT); + +    // icmp X, X -> true/false +    // icmp X, undef -> true/false because undef could be X. +    if (N1 == N2) +      return getBoolConstant(ISD::isTrueWhenEqual(Cond), dl, VT, OpVT); +  } +    if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2)) {      const APInt &C2 = N2C->getAPIntValue();      if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) { @@ -1989,71 +2032,88 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,        }      }    } -  if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1)) { -    if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2)) { -      APFloat::cmpResult R = N1C->getValueAPF().compare(N2C->getValueAPF()); -      switch (Cond) { -      default: break; -      case ISD::SETEQ:  if (R==APFloat::cmpUnordered) -                          return getUNDEF(VT); -                        LLVM_FALLTHROUGH; -      case ISD::SETOEQ: return getBoolConstant(R==APFloat::cmpEqual, dl, VT, -                                               OpVT); -      case ISD::SETNE:  if (R==APFloat::cmpUnordered) -                          return getUNDEF(VT); -                        LLVM_FALLTHROUGH; -      case ISD::SETONE: return getBoolConstant(R==APFloat::cmpGreaterThan || -                                               R==APFloat::cmpLessThan, dl, VT, -                                               OpVT); -      case ISD::SETLT:  if (R==APFloat::cmpUnordered) -                          return getUNDEF(VT); -                        LLVM_FALLTHROUGH; -      case ISD::SETOLT: return getBoolConstant(R==APFloat::cmpLessThan, dl, VT, -                                               OpVT); -      case ISD::SETGT:  if (R==APFloat::cmpUnordered) -                          return getUNDEF(VT); -                        LLVM_FALLTHROUGH; -      case ISD::SETOGT: return getBoolConstant(R==APFloat::cmpGreaterThan, dl, -                                               VT, OpVT); -      case ISD::SETLE:  if (R==APFloat::cmpUnordered) -                          return getUNDEF(VT); -                        LLVM_FALLTHROUGH; -      case ISD::SETOLE: return getBoolConstant(R==APFloat::cmpLessThan || -                                               R==APFloat::cmpEqual, dl, VT, -                                               OpVT); -      case ISD::SETGE:  if (R==APFloat::cmpUnordered) -                          return getUNDEF(VT); -                        LLVM_FALLTHROUGH; -      case ISD::SETOGE: return getBoolConstant(R==APFloat::cmpGreaterThan || -                                           R==APFloat::cmpEqual, dl, VT, OpVT); -      case ISD::SETO:   return getBoolConstant(R!=APFloat::cmpUnordered, dl, VT, -                                               OpVT); -      case ISD::SETUO:  return getBoolConstant(R==APFloat::cmpUnordered, dl, VT, -                                               OpVT); -      case ISD::SETUEQ: return getBoolConstant(R==APFloat::cmpUnordered || -                                               R==APFloat::cmpEqual, dl, VT, -                                               OpVT); -      case ISD::SETUNE: return getBoolConstant(R!=APFloat::cmpEqual, dl, VT, -                                               OpVT); -      case ISD::SETULT: return getBoolConstant(R==APFloat::cmpUnordered || -                                               R==APFloat::cmpLessThan, dl, VT, -                                               OpVT); -      case ISD::SETUGT: return getBoolConstant(R==APFloat::cmpGreaterThan || -                                               R==APFloat::cmpUnordered, dl, VT, -                                               OpVT); -      case ISD::SETULE: return getBoolConstant(R!=APFloat::cmpGreaterThan, dl, -                                               VT, OpVT); -      case ISD::SETUGE: return getBoolConstant(R!=APFloat::cmpLessThan, dl, VT, -                                               OpVT); -      } -    } else { -      // Ensure that the constant occurs on the RHS. -      ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond); -      MVT CompVT = N1.getValueType().getSimpleVT(); -      if (!TLI->isCondCodeLegal(SwappedCond, CompVT)) -        return SDValue(); -      return getSetCC(dl, VT, N2, N1, SwappedCond); +  auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1); +  auto *N2CFP = dyn_cast<ConstantFPSDNode>(N2); + +  if (N1CFP && N2CFP) { +    APFloat::cmpResult R = N1CFP->getValueAPF().compare(N2CFP->getValueAPF()); +    switch (Cond) { +    default: break; +    case ISD::SETEQ:  if (R==APFloat::cmpUnordered) +                        return getUNDEF(VT); +                      LLVM_FALLTHROUGH; +    case ISD::SETOEQ: return getBoolConstant(R==APFloat::cmpEqual, dl, VT, +                                             OpVT); +    case ISD::SETNE:  if (R==APFloat::cmpUnordered) +                        return getUNDEF(VT); +                      LLVM_FALLTHROUGH; +    case ISD::SETONE: return getBoolConstant(R==APFloat::cmpGreaterThan || +                                             R==APFloat::cmpLessThan, dl, VT, +                                             OpVT); +    case ISD::SETLT:  if (R==APFloat::cmpUnordered) +                        return getUNDEF(VT); +                      LLVM_FALLTHROUGH; +    case ISD::SETOLT: return getBoolConstant(R==APFloat::cmpLessThan, dl, VT, +                                             OpVT); +    case ISD::SETGT:  if (R==APFloat::cmpUnordered) +                        return getUNDEF(VT); +                      LLVM_FALLTHROUGH; +    case ISD::SETOGT: return getBoolConstant(R==APFloat::cmpGreaterThan, dl, +                                             VT, OpVT); +    case ISD::SETLE:  if (R==APFloat::cmpUnordered) +                        return getUNDEF(VT); +                      LLVM_FALLTHROUGH; +    case ISD::SETOLE: return getBoolConstant(R==APFloat::cmpLessThan || +                                             R==APFloat::cmpEqual, dl, VT, +                                             OpVT); +    case ISD::SETGE:  if (R==APFloat::cmpUnordered) +                        return getUNDEF(VT); +                      LLVM_FALLTHROUGH; +    case ISD::SETOGE: return getBoolConstant(R==APFloat::cmpGreaterThan || +                                         R==APFloat::cmpEqual, dl, VT, OpVT); +    case ISD::SETO:   return getBoolConstant(R!=APFloat::cmpUnordered, dl, VT, +                                             OpVT); +    case ISD::SETUO:  return getBoolConstant(R==APFloat::cmpUnordered, dl, VT, +                                             OpVT); +    case ISD::SETUEQ: return getBoolConstant(R==APFloat::cmpUnordered || +                                             R==APFloat::cmpEqual, dl, VT, +                                             OpVT); +    case ISD::SETUNE: return getBoolConstant(R!=APFloat::cmpEqual, dl, VT, +                                             OpVT); +    case ISD::SETULT: return getBoolConstant(R==APFloat::cmpUnordered || +                                             R==APFloat::cmpLessThan, dl, VT, +                                             OpVT); +    case ISD::SETUGT: return getBoolConstant(R==APFloat::cmpGreaterThan || +                                             R==APFloat::cmpUnordered, dl, VT, +                                             OpVT); +    case ISD::SETULE: return getBoolConstant(R!=APFloat::cmpGreaterThan, dl, +                                             VT, OpVT); +    case ISD::SETUGE: return getBoolConstant(R!=APFloat::cmpLessThan, dl, VT, +                                             OpVT); +    } +  } else if (N1CFP && OpVT.isSimple() && !N2.isUndef()) { +    // Ensure that the constant occurs on the RHS. +    ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond); +    if (!TLI->isCondCodeLegal(SwappedCond, OpVT.getSimpleVT())) +      return SDValue(); +    return getSetCC(dl, VT, N2, N1, SwappedCond); +  } else if ((N2CFP && N2CFP->getValueAPF().isNaN()) || +             (OpVT.isFloatingPoint() && (N1.isUndef() || N2.isUndef()))) { +    // If an operand is known to be a nan (or undef that could be a nan), we can +    // fold it. +    // Choosing NaN for the undef will always make unordered comparison succeed +    // and ordered comparison fails. +    // Matches behavior in llvm::ConstantFoldCompareInstruction. +    switch (ISD::getUnorderedFlavor(Cond)) { +    default: +      llvm_unreachable("Unknown flavor!"); +    case 0: // Known false. +      return getBoolConstant(false, dl, VT, OpVT); +    case 1: // Known true. +      return getBoolConstant(true, dl, VT, OpVT); +    case 2: // Undefined. +      return getUNDEF(VT);      }    } @@ -2062,16 +2122,32 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,  }  /// See if the specified operand can be simplified with the knowledge that only -/// the bits specified by Mask are used. -SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &Mask) { +/// the bits specified by DemandedBits are used. +/// TODO: really we should be making this into the DAG equivalent of +/// SimplifyMultipleUseDemandedBits and not generate any new nodes. +SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits) { +  EVT VT = V.getValueType(); +  APInt DemandedElts = VT.isVector() +                           ? APInt::getAllOnesValue(VT.getVectorNumElements()) +                           : APInt(1, 1); +  return GetDemandedBits(V, DemandedBits, DemandedElts); +} + +/// See if the specified operand can be simplified with the knowledge that only +/// the bits specified by DemandedBits are used in the elements specified by +/// DemandedElts. +/// TODO: really we should be making this into the DAG equivalent of +/// SimplifyMultipleUseDemandedBits and not generate any new nodes. +SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits, +                                      const APInt &DemandedElts) {    switch (V.getOpcode()) {    default:      break;    case ISD::Constant: { -    const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode()); +    auto *CV = cast<ConstantSDNode>(V.getNode());      assert(CV && "Const value should be ConstSDNode.");      const APInt &CVal = CV->getAPIntValue(); -    APInt NewVal = CVal & Mask; +    APInt NewVal = CVal & DemandedBits;      if (NewVal != CVal)        return getConstant(NewVal, SDLoc(V), V.getValueType());      break; @@ -2079,44 +2155,51 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &Mask) {    case ISD::OR:    case ISD::XOR:      // If the LHS or RHS don't contribute bits to the or, drop them. -    if (MaskedValueIsZero(V.getOperand(0), Mask)) +    if (MaskedValueIsZero(V.getOperand(0), DemandedBits))        return V.getOperand(1); -    if (MaskedValueIsZero(V.getOperand(1), Mask)) +    if (MaskedValueIsZero(V.getOperand(1), DemandedBits))        return V.getOperand(0);      break;    case ISD::SRL:      // Only look at single-use SRLs.      if (!V.getNode()->hasOneUse())        break; -    if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) { +    if (auto *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {        // See if we can recursively simplify the LHS.        unsigned Amt = RHSC->getZExtValue();        // Watch out for shift count overflow though. -      if (Amt >= Mask.getBitWidth()) +      if (Amt >= DemandedBits.getBitWidth())          break; -      APInt NewMask = Mask << Amt; -      if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask)) +      APInt SrcDemandedBits = DemandedBits << Amt; +      if (SDValue SimplifyLHS = +              GetDemandedBits(V.getOperand(0), SrcDemandedBits))          return getNode(ISD::SRL, SDLoc(V), V.getValueType(), SimplifyLHS,                         V.getOperand(1));      }      break;    case ISD::AND: {      // X & -1 -> X (ignoring bits which aren't demanded). -    ConstantSDNode *AndVal = isConstOrConstSplat(V.getOperand(1)); -    if (AndVal && Mask.isSubsetOf(AndVal->getAPIntValue())) -      return V.getOperand(0); +    // Also handle the case where masked out bits in X are known to be zero. +    if (ConstantSDNode *RHSC = isConstOrConstSplat(V.getOperand(1))) { +      const APInt &AndVal = RHSC->getAPIntValue(); +      if (DemandedBits.isSubsetOf(AndVal) || +          DemandedBits.isSubsetOf(computeKnownBits(V.getOperand(0)).Zero | +                                  AndVal)) +        return V.getOperand(0); +    }      break;    }    case ISD::ANY_EXTEND: {      SDValue Src = V.getOperand(0);      unsigned SrcBitWidth = Src.getScalarValueSizeInBits();      // Being conservative here - only peek through if we only demand bits in the -    // non-extended source (even though the extended bits are technically undef). -    if (Mask.getActiveBits() > SrcBitWidth) +    // non-extended source (even though the extended bits are technically +    // undef). +    if (DemandedBits.getActiveBits() > SrcBitWidth)        break; -    APInt SrcMask = Mask.trunc(SrcBitWidth); -    if (SDValue DemandedSrc = GetDemandedBits(Src, SrcMask)) +    APInt SrcDemandedBits = DemandedBits.trunc(SrcBitWidth); +    if (SDValue DemandedSrc = GetDemandedBits(Src, SrcDemandedBits))        return getNode(ISD::ANY_EXTEND, SDLoc(V), V.getValueType(), DemandedSrc);      break;    } @@ -2125,7 +2208,7 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &Mask) {      unsigned ExVTBits = ExVT.getScalarSizeInBits();      // If none of the extended bits are demanded, eliminate the sextinreg. -    if (Mask.getActiveBits() <= ExVTBits) +    if (DemandedBits.getActiveBits() <= ExVTBits)        return V.getOperand(0);      break; @@ -2143,9 +2226,28 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {  /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero.  We use  /// this predicate to simplify operations downstream.  Mask is known to be zero  /// for bits that V cannot have. -bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, +bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask, +                                     unsigned Depth) const { +  EVT VT = V.getValueType(); +  APInt DemandedElts = VT.isVector() +                           ? APInt::getAllOnesValue(VT.getVectorNumElements()) +                           : APInt(1, 1); +  return MaskedValueIsZero(V, Mask, DemandedElts, Depth); +} + +/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero in +/// DemandedElts.  We use this predicate to simplify operations downstream. +/// Mask is known to be zero for bits that V cannot have. +bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask, +                                     const APInt &DemandedElts,                                       unsigned Depth) const { -  return Mask.isSubsetOf(computeKnownBits(Op, Depth).Zero); +  return Mask.isSubsetOf(computeKnownBits(V, DemandedElts, Depth).Zero); +} + +/// MaskedValueIsAllOnes - Return true if '(Op & Mask) == Mask'. +bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask, +                                        unsigned Depth) const { +  return Mask.isSubsetOf(computeKnownBits(V, Depth).One);  }  /// isSplatValue - Return true if the vector V has the same value @@ -2244,28 +2346,50 @@ bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) {           (AllowUndefs || !UndefElts);  } -/// Helper function that checks to see if a node is a constant or a -/// build vector of splat constants at least within the demanded elts. -static ConstantSDNode *isConstOrDemandedConstSplat(SDValue N, -                                                   const APInt &DemandedElts) { -  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) -    return CN; -  if (N.getOpcode() != ISD::BUILD_VECTOR) -    return nullptr; -  EVT VT = N.getValueType(); -  ConstantSDNode *Cst = nullptr; -  unsigned NumElts = VT.getVectorNumElements(); -  assert(DemandedElts.getBitWidth() == NumElts && "Unexpected vector size"); -  for (unsigned i = 0; i != NumElts; ++i) { -    if (!DemandedElts[i]) -      continue; -    ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(i)); -    if (!C || (Cst && Cst->getAPIntValue() != C->getAPIntValue()) || -        C->getValueType(0) != VT.getScalarType()) -      return nullptr; -    Cst = C; +SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) { +  V = peekThroughExtractSubvectors(V); + +  EVT VT = V.getValueType(); +  unsigned Opcode = V.getOpcode(); +  switch (Opcode) { +  default: { +    APInt UndefElts; +    APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); +    if (isSplatValue(V, DemandedElts, UndefElts)) { +      // Handle case where all demanded elements are UNDEF. +      if (DemandedElts.isSubsetOf(UndefElts)) { +        SplatIdx = 0; +        return getUNDEF(VT); +      } +      SplatIdx = (UndefElts & DemandedElts).countTrailingOnes(); +      return V; +    } +    break; +  } +  case ISD::VECTOR_SHUFFLE: { +    // Check if this is a shuffle node doing a splat. +    // TODO - remove this and rely purely on SelectionDAG::isSplatValue, +    // getTargetVShiftNode currently struggles without the splat source. +    auto *SVN = cast<ShuffleVectorSDNode>(V); +    if (!SVN->isSplat()) +      break; +    int Idx = SVN->getSplatIndex(); +    int NumElts = V.getValueType().getVectorNumElements(); +    SplatIdx = Idx % NumElts; +    return V.getOperand(Idx / NumElts);    } -  return Cst; +  } + +  return SDValue(); +} + +SDValue SelectionDAG::getSplatValue(SDValue V) { +  int SplatIdx; +  if (SDValue SrcVector = getSplatSourceVector(V, SplatIdx)) +    return getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V), +                   SrcVector.getValueType().getScalarType(), SrcVector, +                   getIntPtrConstant(SplatIdx, SDLoc(V))); +  return SDValue();  }  /// If a SHL/SRA/SRL node has a constant or splat constant shift amount that @@ -2708,8 +2832,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,      break;    case ISD::FSHL:    case ISD::FSHR: -    if (ConstantSDNode *C = -            isConstOrDemandedConstSplat(Op.getOperand(2), DemandedElts)) { +    if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(2), DemandedElts)) {        unsigned Amt = C->getAPIntValue().urem(BitWidth);        // For fshl, 0-shift returns the 1st arg. @@ -2801,8 +2924,59 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,    }    case ISD::LOAD: {      LoadSDNode *LD = cast<LoadSDNode>(Op); -    // If this is a ZEXTLoad and we are looking at the loaded value. -    if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) { +    const Constant *Cst = TLI->getTargetConstantFromLoad(LD); +    if (ISD::isNON_EXTLoad(LD) && Cst) { +      // Determine any common known bits from the loaded constant pool value. +      Type *CstTy = Cst->getType(); +      if ((NumElts * BitWidth) == CstTy->getPrimitiveSizeInBits()) { +        // If its a vector splat, then we can (quickly) reuse the scalar path. +        // NOTE: We assume all elements match and none are UNDEF. +        if (CstTy->isVectorTy()) { +          if (const Constant *Splat = Cst->getSplatValue()) { +            Cst = Splat; +            CstTy = Cst->getType(); +          } +        } +        // TODO - do we need to handle different bitwidths? +        if (CstTy->isVectorTy() && BitWidth == CstTy->getScalarSizeInBits()) { +          // Iterate across all vector elements finding common known bits. +          Known.One.setAllBits(); +          Known.Zero.setAllBits(); +          for (unsigned i = 0; i != NumElts; ++i) { +            if (!DemandedElts[i]) +              continue; +            if (Constant *Elt = Cst->getAggregateElement(i)) { +              if (auto *CInt = dyn_cast<ConstantInt>(Elt)) { +                const APInt &Value = CInt->getValue(); +                Known.One &= Value; +                Known.Zero &= ~Value; +                continue; +              } +              if (auto *CFP = dyn_cast<ConstantFP>(Elt)) { +                APInt Value = CFP->getValueAPF().bitcastToAPInt(); +                Known.One &= Value; +                Known.Zero &= ~Value; +                continue; +              } +            } +            Known.One.clearAllBits(); +            Known.Zero.clearAllBits(); +            break; +          } +        } else if (BitWidth == CstTy->getPrimitiveSizeInBits()) { +          if (auto *CInt = dyn_cast<ConstantInt>(Cst)) { +            const APInt &Value = CInt->getValue(); +            Known.One = Value; +            Known.Zero = ~Value; +          } else if (auto *CFP = dyn_cast<ConstantFP>(Cst)) { +            APInt Value = CFP->getValueAPF().bitcastToAPInt(); +            Known.One = Value; +            Known.Zero = ~Value; +          } +        } +      } +    } else if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) { +      // If this is a ZEXTLoad and we are looking at the loaded value.        EVT VT = LD->getMemoryVT();        unsigned MemBits = VT.getScalarSizeInBits();        Known.Zero.setBitsFrom(MemBits); @@ -2816,15 +2990,12 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,      EVT InVT = Op.getOperand(0).getValueType();      APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements());      Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); -    Known = Known.zext(BitWidth); -    Known.Zero.setBitsFrom(InVT.getScalarSizeInBits()); +    Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */);      break;    }    case ISD::ZERO_EXTEND: { -    EVT InVT = Op.getOperand(0).getValueType();      Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); -    Known = Known.zext(BitWidth); -    Known.Zero.setBitsFrom(InVT.getScalarSizeInBits()); +    Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */);      break;    }    case ISD::SIGN_EXTEND_VECTOR_INREG: { @@ -2845,7 +3016,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,    }    case ISD::ANY_EXTEND: {      Known = computeKnownBits(Op.getOperand(0), Depth+1); -    Known = Known.zext(BitWidth); +    Known = Known.zext(BitWidth, false /* ExtendedBitsAreKnownZero */);      break;    }    case ISD::TRUNCATE: { @@ -2878,39 +3049,10 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,      LLVM_FALLTHROUGH;    case ISD::SUB:    case ISD::SUBC: { -    if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0))) { -      // We know that the top bits of C-X are clear if X contains less bits -      // than C (i.e. no wrap-around can happen).  For example, 20-X is -      // positive if we can prove that X is >= 0 and < 16. -      if (CLHS->getAPIntValue().isNonNegative()) { -        unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros(); -        // NLZ can't be BitWidth with no sign bit -        APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); -        Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, -                         Depth + 1); - -        // If all of the MaskV bits are known to be zero, then we know the -        // output top bits are zero, because we now know that the output is -        // from [0-C]. -        if ((Known2.Zero & MaskV) == MaskV) { -          unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros(); -          // Top bits known zero. -          Known.Zero.setHighBits(NLZ2); -        } -      } -    } - -    // If low bits are know to be zero in both operands, then we know they are -    // going to be 0 in the result. Both addition and complement operations -    // preserve the low zero bits. -    Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); -    unsigned KnownZeroLow = Known2.countMinTrailingZeros(); -    if (KnownZeroLow == 0) -      break; - +    Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);      Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); -    KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros()); -    Known.Zero.setLowBits(KnownZeroLow); +    Known = KnownBits::computeForAddSub(/* Add */ false, /* NSW */ false, +                                        Known, Known2);      break;    }    case ISD::UADDO: @@ -2928,34 +3070,26 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,    case ISD::ADD:    case ISD::ADDC:    case ISD::ADDE: { -    // Output known-0 bits are known if clear or set in both the low clear bits -    // common to both LHS & RHS.  For example, 8+(X<<3) is known to have the -    // low 3 bits clear. -    // Output known-0 bits are also known if the top bits of each input are -    // known to be clear. For example, if one input has the top 10 bits clear -    // and the other has the top 8 bits clear, we know the top 7 bits of the -    // output must be clear. -    Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); -    unsigned KnownZeroHigh = Known2.countMinLeadingZeros(); -    unsigned KnownZeroLow = Known2.countMinTrailingZeros(); +    assert(Op.getResNo() == 0 && "We only compute knownbits for the sum here."); + +    // With ADDE and ADDCARRY, a carry bit may be added in. +    KnownBits Carry(1); +    if (Opcode == ISD::ADDE) +      // Can't track carry from glue, set carry to unknown. +      Carry.resetAll(); +    else if (Opcode == ISD::ADDCARRY) +      // TODO: Compute known bits for the carry operand. Not sure if it is worth +      // the trouble (how often will we find a known carry bit). And I haven't +      // tested this very much yet, but something like this might work: +      //   Carry = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1); +      //   Carry = Carry.zextOrTrunc(1, false); +      Carry.resetAll(); +    else +      Carry.setAllZero(); +    Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);      Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); -    KnownZeroHigh = std::min(KnownZeroHigh, Known2.countMinLeadingZeros()); -    KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros()); - -    if (Opcode == ISD::ADDE || Opcode == ISD::ADDCARRY) { -      // With ADDE and ADDCARRY, a carry bit may be added in, so we can only -      // use this information if we know (at least) that the low two bits are -      // clear. We then return to the caller that the low bit is unknown but -      // that other bits are known zero. -      if (KnownZeroLow >= 2) -        Known.Zero.setBits(1, KnownZeroLow); -      break; -    } - -    Known.Zero.setLowBits(KnownZeroLow); -    if (KnownZeroHigh > 1) -      Known.Zero.setHighBits(KnownZeroHigh - 1); +    Known = KnownBits::computeForAddCarry(Known, Known2, Carry);      break;    }    case ISD::SREM: @@ -3010,21 +3144,20 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,    case ISD::EXTRACT_ELEMENT: {      Known = computeKnownBits(Op.getOperand(0), Depth+1);      const unsigned Index = Op.getConstantOperandVal(1); -    const unsigned BitWidth = Op.getValueSizeInBits(); +    const unsigned EltBitWidth = Op.getValueSizeInBits();      // Remove low part of known bits mask -    Known.Zero = Known.Zero.getHiBits(Known.Zero.getBitWidth() - Index * BitWidth); -    Known.One = Known.One.getHiBits(Known.One.getBitWidth() - Index * BitWidth); +    Known.Zero = Known.Zero.getHiBits(Known.getBitWidth() - Index * EltBitWidth); +    Known.One = Known.One.getHiBits(Known.getBitWidth() - Index * EltBitWidth);      // Remove high part of known bit mask -    Known = Known.trunc(BitWidth); +    Known = Known.trunc(EltBitWidth);      break;    }    case ISD::EXTRACT_VECTOR_ELT: {      SDValue InVec = Op.getOperand(0);      SDValue EltNo = Op.getOperand(1);      EVT VecVT = InVec.getValueType(); -    const unsigned BitWidth = Op.getValueSizeInBits();      const unsigned EltBitWidth = VecVT.getScalarSizeInBits();      const unsigned NumSrcElts = VecVT.getVectorNumElements();      // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know @@ -3042,7 +3175,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,        Known = computeKnownBits(InVec, Depth + 1);      }      if (BitWidth > EltBitWidth) -      Known = Known.zext(BitWidth); +      Known = Known.zext(BitWidth, false /* => any extend */);      break;    }    case ISD::INSERT_VECTOR_ELT: { @@ -3146,10 +3279,10 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,      // the minimum of the clamp min/max range.      bool IsMax = (Opcode == ISD::SMAX);      ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr; -    if ((CstLow = isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts))) +    if ((CstLow = isConstOrConstSplat(Op.getOperand(1), DemandedElts)))        if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX)) -        CstHigh = isConstOrDemandedConstSplat(Op.getOperand(0).getOperand(1), -                                              DemandedElts); +        CstHigh = +            isConstOrConstSplat(Op.getOperand(0).getOperand(1), DemandedElts);      if (CstLow && CstHigh) {        if (!IsMax)          std::swap(CstLow, CstHigh); @@ -3430,7 +3563,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,      Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);      // SRA X, C   -> adds C sign bits.      if (ConstantSDNode *C = -            isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)) { +            isConstOrConstSplat(Op.getOperand(1), DemandedElts)) {        APInt ShiftVal = C->getAPIntValue();        ShiftVal += Tmp;        Tmp = ShiftVal.uge(VTBits) ? VTBits : ShiftVal.getZExtValue(); @@ -3438,7 +3571,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,      return Tmp;    case ISD::SHL:      if (ConstantSDNode *C = -            isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)) { +            isConstOrConstSplat(Op.getOperand(1), DemandedElts)) {        // shl destroys sign bits.        Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);        if (C->getAPIntValue().uge(VTBits) ||      // Bad shift. @@ -3478,10 +3611,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,      // the minimum of the clamp min/max range.      bool IsMax = (Opcode == ISD::SMAX);      ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr; -    if ((CstLow = isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts))) +    if ((CstLow = isConstOrConstSplat(Op.getOperand(1), DemandedElts)))        if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX)) -        CstHigh = isConstOrDemandedConstSplat(Op.getOperand(0).getOperand(1), -                                              DemandedElts); +        CstHigh = +            isConstOrConstSplat(Op.getOperand(0).getOperand(1), DemandedElts);      if (CstLow && CstHigh) {        if (!IsMax)          std::swap(CstLow, CstHigh); @@ -3621,7 +3754,6 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,      SDValue InVec = Op.getOperand(0);      SDValue InVal = Op.getOperand(1);      SDValue EltNo = Op.getOperand(2); -    unsigned NumElts = InVec.getValueType().getVectorNumElements();      ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo);      if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) { @@ -3752,13 +3884,43 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,      if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {        unsigned ExtType = LD->getExtensionType();        switch (ExtType) { -        default: break; -        case ISD::SEXTLOAD:    // '17' bits known -          Tmp = LD->getMemoryVT().getScalarSizeInBits(); -          return VTBits-Tmp+1; -        case ISD::ZEXTLOAD:    // '16' bits known -          Tmp = LD->getMemoryVT().getScalarSizeInBits(); -          return VTBits-Tmp; +      default: break; +      case ISD::SEXTLOAD: // e.g. i16->i32 = '17' bits known. +        Tmp = LD->getMemoryVT().getScalarSizeInBits(); +        return VTBits - Tmp + 1; +      case ISD::ZEXTLOAD: // e.g. i16->i32 = '16' bits known. +        Tmp = LD->getMemoryVT().getScalarSizeInBits(); +        return VTBits - Tmp; +      case ISD::NON_EXTLOAD: +        if (const Constant *Cst = TLI->getTargetConstantFromLoad(LD)) { +          // We only need to handle vectors - computeKnownBits should handle +          // scalar cases. +          Type *CstTy = Cst->getType(); +          if (CstTy->isVectorTy() && +              (NumElts * VTBits) == CstTy->getPrimitiveSizeInBits()) { +            Tmp = VTBits; +            for (unsigned i = 0; i != NumElts; ++i) { +              if (!DemandedElts[i]) +                continue; +              if (Constant *Elt = Cst->getAggregateElement(i)) { +                if (auto *CInt = dyn_cast<ConstantInt>(Elt)) { +                  const APInt &Value = CInt->getValue(); +                  Tmp = std::min(Tmp, Value.getNumSignBits()); +                  continue; +                } +                if (auto *CFP = dyn_cast<ConstantFP>(Elt)) { +                  APInt Value = CFP->getValueAPF().bitcastToAPInt(); +                  Tmp = std::min(Tmp, Value.getNumSignBits()); +                  continue; +                } +              } +              // Unknown type. Conservatively assume no bits match sign bit. +              return 1; +            } +            return Tmp; +          } +        } +        break;        }      }    } @@ -3803,8 +3965,7 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {      return false;    if (Op.getOpcode() == ISD::OR && -      !MaskedValueIsZero(Op.getOperand(0), -                     cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue())) +      !MaskedValueIsZero(Op.getOperand(0), Op.getConstantOperandAPInt(1)))      return false;    return true; @@ -4013,7 +4174,9 @@ static SDValue FoldBUILD_VECTOR(const SDLoc &DL, EVT VT,    return SDValue();  } -static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT, +/// Try to simplify vector concatenation to an input value, undef, or build +/// vector. +static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT,                                    ArrayRef<SDValue> Ops,                                    SelectionDAG &DAG) {    assert(!Ops.empty() && "Can't concatenate an empty list of vectors!"); @@ -4033,6 +4196,31 @@ static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT,    if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); }))      return DAG.getUNDEF(VT); +  // Scan the operands and look for extract operations from a single source +  // that correspond to insertion at the same location via this concatenation: +  // concat (extract X, 0*subvec_elts), (extract X, 1*subvec_elts), ... +  SDValue IdentitySrc; +  bool IsIdentity = true; +  for (unsigned i = 0, e = Ops.size(); i != e; ++i) { +    SDValue Op = Ops[i]; +    unsigned IdentityIndex = i * Op.getValueType().getVectorNumElements(); +    if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR || +        Op.getOperand(0).getValueType() != VT || +        (IdentitySrc && Op.getOperand(0) != IdentitySrc) || +        !isa<ConstantSDNode>(Op.getOperand(1)) || +        Op.getConstantOperandVal(1) != IdentityIndex) { +      IsIdentity = false; +      break; +    } +    assert((!IdentitySrc || IdentitySrc == Op.getOperand(0)) && +           "Unexpected identity source vector for concat of extracts"); +    IdentitySrc = Op.getOperand(0); +  } +  if (IsIdentity) { +    assert(IdentitySrc && "Failed to set source vector of extracts"); +    return IdentitySrc; +  } +    // A CONCAT_VECTOR with all UNDEF/BUILD_VECTOR operands can be    // simplified to one big BUILD_VECTOR.    // FIXME: Add support for SCALAR_TO_VECTOR as well. @@ -4288,9 +4476,23 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,      if (Operand.isUndef())        return getUNDEF(VT);      break; +  case ISD::FP_TO_SINT: +  case ISD::FP_TO_UINT: +    if (Operand.isUndef()) +      return getUNDEF(VT); +    break; +  case ISD::SINT_TO_FP: +  case ISD::UINT_TO_FP: +    // [us]itofp(undef) = 0, because the result value is bounded. +    if (Operand.isUndef()) +      return getConstantFP(0.0, DL, VT); +    break;    case ISD::SIGN_EXTEND:      assert(VT.isInteger() && Operand.getValueType().isInteger() &&             "Invalid SIGN_EXTEND!"); +    assert(VT.isVector() == Operand.getValueType().isVector() && +           "SIGN_EXTEND result type type should be vector iff the operand " +           "type is vector!");      if (Operand.getValueType() == VT) return Operand;   // noop extension      assert((!VT.isVector() ||              VT.getVectorNumElements() == @@ -4307,6 +4509,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,    case ISD::ZERO_EXTEND:      assert(VT.isInteger() && Operand.getValueType().isInteger() &&             "Invalid ZERO_EXTEND!"); +    assert(VT.isVector() == Operand.getValueType().isVector() && +           "ZERO_EXTEND result type type should be vector iff the operand " +           "type is vector!");      if (Operand.getValueType() == VT) return Operand;   // noop extension      assert((!VT.isVector() ||              VT.getVectorNumElements() == @@ -4323,6 +4528,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,    case ISD::ANY_EXTEND:      assert(VT.isInteger() && Operand.getValueType().isInteger() &&             "Invalid ANY_EXTEND!"); +    assert(VT.isVector() == Operand.getValueType().isVector() && +           "ANY_EXTEND result type type should be vector iff the operand " +           "type is vector!");      if (Operand.getValueType() == VT) return Operand;   // noop extension      assert((!VT.isVector() ||              VT.getVectorNumElements() == @@ -4350,6 +4558,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,    case ISD::TRUNCATE:      assert(VT.isInteger() && Operand.getValueType().isInteger() &&             "Invalid TRUNCATE!"); +    assert(VT.isVector() == Operand.getValueType().isVector() && +           "TRUNCATE result type type should be vector iff the operand " +           "type is vector!");      if (Operand.getValueType() == VT) return Operand;   // noop truncate      assert((!VT.isVector() ||              VT.getVectorNumElements() == @@ -4429,6 +4640,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,        return Operand.getOperand(0);      break;    case ISD::FNEG: +    // Negation of an unknown bag of bits is still completely undefined. +    if (OpOpcode == ISD::UNDEF) +      return getUNDEF(VT); +      // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0      if ((getTarget().Options.UnsafeFPMath || Flags.hasNoSignedZeros()) &&          OpOpcode == ISD::FSUB) @@ -4513,13 +4728,13 @@ static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1,  }  SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, -                                             EVT VT, const ConstantSDNode *Cst1, -                                             const ConstantSDNode *Cst2) { -  if (Cst1->isOpaque() || Cst2->isOpaque()) +                                             EVT VT, const ConstantSDNode *C1, +                                             const ConstantSDNode *C2) { +  if (C1->isOpaque() || C2->isOpaque())      return SDValue(); -  std::pair<APInt, bool> Folded = FoldValue(Opcode, Cst1->getAPIntValue(), -                                            Cst2->getAPIntValue()); +  std::pair<APInt, bool> Folded = FoldValue(Opcode, C1->getAPIntValue(), +                                            C2->getAPIntValue());    if (!Folded.second)      return SDValue();    return getConstant(Folded.first, DL, VT); @@ -4532,16 +4747,16 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT,      return SDValue();    if (!TLI->isOffsetFoldingLegal(GA))      return SDValue(); -  const ConstantSDNode *Cst2 = dyn_cast<ConstantSDNode>(N2); -  if (!Cst2) +  auto *C2 = dyn_cast<ConstantSDNode>(N2); +  if (!C2)      return SDValue(); -  int64_t Offset = Cst2->getSExtValue(); +  int64_t Offset = C2->getSExtValue();    switch (Opcode) {    case ISD::ADD: break;    case ISD::SUB: Offset = -uint64_t(Offset); break;    default: return SDValue();    } -  return getGlobalAddress(GA->getGlobal(), SDLoc(Cst2), VT, +  return getGlobalAddress(GA->getGlobal(), SDLoc(C2), VT,                            GA->getOffset() + uint64_t(Offset));  } @@ -4571,21 +4786,20 @@ bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) {  }  SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, -                                             EVT VT, SDNode *Cst1, -                                             SDNode *Cst2) { +                                             EVT VT, SDNode *N1, SDNode *N2) {    // If the opcode is a target-specific ISD node, there's nothing we can    // do here and the operand rules may not line up with the below, so    // bail early.    if (Opcode >= ISD::BUILTIN_OP_END)      return SDValue(); -  if (isUndef(Opcode, {SDValue(Cst1, 0), SDValue(Cst2, 0)})) +  if (isUndef(Opcode, {SDValue(N1, 0), SDValue(N2, 0)}))      return getUNDEF(VT);    // Handle the case of two scalars. -  if (const ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1)) { -    if (const ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2)) { -      SDValue Folded = FoldConstantArithmetic(Opcode, DL, VT, Scalar1, Scalar2); +  if (auto *C1 = dyn_cast<ConstantSDNode>(N1)) { +    if (auto *C2 = dyn_cast<ConstantSDNode>(N2)) { +      SDValue Folded = FoldConstantArithmetic(Opcode, DL, VT, C1, C2);        assert((!Folded || !VT.isVector()) &&               "Can't fold vectors ops with scalar operands");        return Folded; @@ -4593,19 +4807,19 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,    }    // fold (add Sym, c) -> Sym+c -  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Cst1)) -    return FoldSymbolOffset(Opcode, VT, GA, Cst2); +  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N1)) +    return FoldSymbolOffset(Opcode, VT, GA, N2);    if (TLI->isCommutativeBinOp(Opcode)) -    if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Cst2)) -      return FoldSymbolOffset(Opcode, VT, GA, Cst1); +    if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N2)) +      return FoldSymbolOffset(Opcode, VT, GA, N1);    // For vectors, extract each constant element and fold them individually.    // Either input may be an undef value. -  auto *BV1 = dyn_cast<BuildVectorSDNode>(Cst1); -  if (!BV1 && !Cst1->isUndef()) +  auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); +  if (!BV1 && !N1->isUndef())      return SDValue(); -  auto *BV2 = dyn_cast<BuildVectorSDNode>(Cst2); -  if (!BV2 && !Cst2->isUndef()) +  auto *BV2 = dyn_cast<BuildVectorSDNode>(N2); +  if (!BV2 && !N2->isUndef())      return SDValue();    // If both operands are undef, that's handled the same way as scalars.    if (!BV1 && !BV2) @@ -4755,6 +4969,64 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,    return V;  } +SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL, +                                         EVT VT, SDValue N1, SDValue N2) { +  // TODO: We don't do any constant folding for strict FP opcodes here, but we +  //       should. That will require dealing with a potentially non-default +  //       rounding mode, checking the "opStatus" return value from the APFloat +  //       math calculations, and possibly other variations. +  auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode()); +  auto *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode()); +  if (N1CFP && N2CFP) { +    APFloat C1 = N1CFP->getValueAPF(), C2 = N2CFP->getValueAPF(); +    switch (Opcode) { +    case ISD::FADD: +      C1.add(C2, APFloat::rmNearestTiesToEven); +      return getConstantFP(C1, DL, VT); +    case ISD::FSUB: +      C1.subtract(C2, APFloat::rmNearestTiesToEven); +      return getConstantFP(C1, DL, VT); +    case ISD::FMUL: +      C1.multiply(C2, APFloat::rmNearestTiesToEven); +      return getConstantFP(C1, DL, VT); +    case ISD::FDIV: +      C1.divide(C2, APFloat::rmNearestTiesToEven); +      return getConstantFP(C1, DL, VT); +    case ISD::FREM: +      C1.mod(C2); +      return getConstantFP(C1, DL, VT); +    case ISD::FCOPYSIGN: +      C1.copySign(C2); +      return getConstantFP(C1, DL, VT); +    default: break; +    } +  } +  if (N1CFP && Opcode == ISD::FP_ROUND) { +    APFloat C1 = N1CFP->getValueAPF();    // make copy +    bool Unused; +    // This can return overflow, underflow, or inexact; we don't care. +    // FIXME need to be more flexible about rounding mode. +    (void) C1.convert(EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven, +                      &Unused); +    return getConstantFP(C1, DL, VT); +  } + +  switch (Opcode) { +  case ISD::FADD: +  case ISD::FSUB: +  case ISD::FMUL: +  case ISD::FDIV: +  case ISD::FREM: +    // If both operands are undef, the result is undef. If 1 operand is undef, +    // the result is NaN. This should match the behavior of the IR optimizer. +    if (N1.isUndef() && N2.isUndef()) +      return getUNDEF(VT); +    if (N1.isUndef() || N2.isUndef()) +      return getConstantFP(APFloat::getNaN(EVTToAPFloatSemantics(VT)), DL, VT); +  } +  return SDValue(); +} +  SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,                                SDValue N1, SDValue N2, const SDNodeFlags Flags) {    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); @@ -4791,9 +5063,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,      break;    }    case ISD::CONCAT_VECTORS: { -    // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.      SDValue Ops[] = {N1, N2}; -    if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this)) +    if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this))        return V;      break;    } @@ -4847,6 +5118,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,      assert(VT.isFloatingPoint() && "This operator only applies to FP types!");      assert(N1.getValueType() == N2.getValueType() &&             N1.getValueType() == VT && "Binary operator types must match!"); +    if (SDValue V = simplifyFPBinop(Opcode, N1, N2)) +      return V;      break;    case ISD::FCOPYSIGN:   // N1 and result must match.  N1/N2 need not match.      assert(N1.getValueType() == VT && @@ -5100,73 +5373,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,            FoldConstantArithmetic(Opcode, DL, VT, N1.getNode(), N2.getNode()))      return SV; -  // Constant fold FP operations. -  bool HasFPExceptions = TLI->hasFloatingPointExceptions(); -  if (N1CFP) { -    if (N2CFP) { -      APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF(); -      APFloat::opStatus s; -      switch (Opcode) { -      case ISD::FADD: -        s = V1.add(V2, APFloat::rmNearestTiesToEven); -        if (!HasFPExceptions || s != APFloat::opInvalidOp) -          return getConstantFP(V1, DL, VT); -        break; -      case ISD::FSUB: -        s = V1.subtract(V2, APFloat::rmNearestTiesToEven); -        if (!HasFPExceptions || s!=APFloat::opInvalidOp) -          return getConstantFP(V1, DL, VT); -        break; -      case ISD::FMUL: -        s = V1.multiply(V2, APFloat::rmNearestTiesToEven); -        if (!HasFPExceptions || s!=APFloat::opInvalidOp) -          return getConstantFP(V1, DL, VT); -        break; -      case ISD::FDIV: -        s = V1.divide(V2, APFloat::rmNearestTiesToEven); -        if (!HasFPExceptions || (s!=APFloat::opInvalidOp && -                                 s!=APFloat::opDivByZero)) { -          return getConstantFP(V1, DL, VT); -        } -        break; -      case ISD::FREM : -        s = V1.mod(V2); -        if (!HasFPExceptions || (s!=APFloat::opInvalidOp && -                                 s!=APFloat::opDivByZero)) { -          return getConstantFP(V1, DL, VT); -        } -        break; -      case ISD::FCOPYSIGN: -        V1.copySign(V2); -        return getConstantFP(V1, DL, VT); -      default: break; -      } -    } - -    if (Opcode == ISD::FP_ROUND) { -      APFloat V = N1CFP->getValueAPF();    // make copy -      bool ignored; -      // This can return overflow, underflow, or inexact; we don't care. -      // FIXME need to be more flexible about rounding mode. -      (void)V.convert(EVTToAPFloatSemantics(VT), -                      APFloat::rmNearestTiesToEven, &ignored); -      return getConstantFP(V, DL, VT); -    } -  } - -  switch (Opcode) { -  case ISD::FADD: -  case ISD::FSUB: -  case ISD::FMUL: -  case ISD::FDIV: -  case ISD::FREM: -    // If both operands are undef, the result is undef. If 1 operand is undef, -    // the result is NaN. This should match the behavior of the IR optimizer. -    if (N1.isUndef() && N2.isUndef()) -      return getUNDEF(VT); -    if (N1.isUndef() || N2.isUndef()) -      return getConstantFP(APFloat::getNaN(EVTToAPFloatSemantics(VT)), DL, VT); -  } +  if (SDValue V = foldConstantFPMath(Opcode, DL, VT, N1, N2)) +    return V;    // Canonicalize an UNDEF to the RHS, even over a constant.    if (N1.isUndef()) { @@ -5261,10 +5469,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,        APFloat  V1 = N1CFP->getValueAPF();        const APFloat &V2 = N2CFP->getValueAPF();        const APFloat &V3 = N3CFP->getValueAPF(); -      APFloat::opStatus s = -        V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven); -      if (!TLI->hasFloatingPointExceptions() || s != APFloat::opInvalidOp) -        return getConstantFP(V1, DL, VT); +      V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven); +      return getConstantFP(V1, DL, VT);      }      break;    } @@ -5276,9 +5482,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,      break;    }    case ISD::CONCAT_VECTORS: { -    // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.      SDValue Ops[] = {N1, N2, N3}; -    if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this)) +    if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this))        return V;      break;    } @@ -5317,6 +5522,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,      break;    }    case ISD::INSERT_SUBVECTOR: { +    // Inserting undef into undef is still undef. +    if (N1.isUndef() && N2.isUndef()) +      return getUNDEF(VT);      SDValue Index = N3;      if (VT.isSimple() && N1.getValueType().isSimple()          && N2.getValueType().isSimple()) { @@ -5337,6 +5545,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,        // Trivial insertion.        if (VT.getSimpleVT() == N2.getSimpleValueType())          return N2; + +      // If this is an insert of an extracted vector into an undef vector, we +      // can just use the input to the extract. +      if (N1.isUndef() && N2.getOpcode() == ISD::EXTRACT_SUBVECTOR && +          N2.getOperand(1) == N3 && N2.getOperand(0).getValueType() == VT) +        return N2.getOperand(0);      }      break;    } @@ -5521,116 +5735,12 @@ static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) {                                    SrcDelta + G->getOffset());  } -/// Determines the optimal series of memory ops to replace the memset / memcpy. -/// Return true if the number of memory ops is below the threshold (Limit). -/// It returns the types of the sequence of memory ops to perform -/// memset / memcpy by reference. -static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, -                                     unsigned Limit, uint64_t Size, -                                     unsigned DstAlign, unsigned SrcAlign, -                                     bool IsMemset, -                                     bool ZeroMemset, -                                     bool MemcpyStrSrc, -                                     bool AllowOverlap, -                                     unsigned DstAS, unsigned SrcAS, -                                     SelectionDAG &DAG, -                                     const TargetLowering &TLI) { -  assert((SrcAlign == 0 || SrcAlign >= DstAlign) && -         "Expecting memcpy / memset source to meet alignment requirement!"); -  // If 'SrcAlign' is zero, that means the memory operation does not need to -  // load the value, i.e. memset or memcpy from constant string. Otherwise, -  // it's the inferred alignment of the source. 'DstAlign', on the other hand, -  // is the specified alignment of the memory operation. If it is zero, that -  // means it's possible to change the alignment of the destination. -  // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does -  // not need to be loaded. -  EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, -                                   IsMemset, ZeroMemset, MemcpyStrSrc, -                                   DAG.getMachineFunction()); - -  if (VT == MVT::Other) { -    // Use the largest integer type whose alignment constraints are satisfied. -    // We only need to check DstAlign here as SrcAlign is always greater or -    // equal to DstAlign (or zero). -    VT = MVT::i64; -    while (DstAlign && DstAlign < VT.getSizeInBits() / 8 && -           !TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign)) -      VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); -    assert(VT.isInteger()); - -    // Find the largest legal integer type. -    MVT LVT = MVT::i64; -    while (!TLI.isTypeLegal(LVT)) -      LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1); -    assert(LVT.isInteger()); - -    // If the type we've chosen is larger than the largest legal integer type -    // then use that instead. -    if (VT.bitsGT(LVT)) -      VT = LVT; -  } - -  unsigned NumMemOps = 0; -  while (Size != 0) { -    unsigned VTSize = VT.getSizeInBits() / 8; -    while (VTSize > Size) { -      // For now, only use non-vector load / store's for the left-over pieces. -      EVT NewVT = VT; -      unsigned NewVTSize; - -      bool Found = false; -      if (VT.isVector() || VT.isFloatingPoint()) { -        NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32; -        if (TLI.isOperationLegalOrCustom(ISD::STORE, NewVT) && -            TLI.isSafeMemOpType(NewVT.getSimpleVT())) -          Found = true; -        else if (NewVT == MVT::i64 && -                 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64) && -                 TLI.isSafeMemOpType(MVT::f64)) { -          // i64 is usually not legal on 32-bit targets, but f64 may be. -          NewVT = MVT::f64; -          Found = true; -        } -      } - -      if (!Found) { -        do { -          NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1); -          if (NewVT == MVT::i8) -            break; -        } while (!TLI.isSafeMemOpType(NewVT.getSimpleVT())); -      } -      NewVTSize = NewVT.getSizeInBits() / 8; - -      // If the new VT cannot cover all of the remaining bits, then consider -      // issuing a (or a pair of) unaligned and overlapping load / store. -      bool Fast; -      if (NumMemOps && AllowOverlap && NewVTSize < Size && -          TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) && -          Fast) -        VTSize = Size; -      else { -        VT = NewVT; -        VTSize = NewVTSize; -      } -    } - -    if (++NumMemOps > Limit) -      return false; - -    MemOps.push_back(VT); -    Size -= VTSize; -  } - -  return true; -} -  static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {    // On Darwin, -Os means optimize for size without hurting performance, so    // only really optimize for size when -Oz (MinSize) is used.    if (MF.getTarget().getTargetTriple().isOSDarwin()) -    return MF.getFunction().optForMinSize(); -  return MF.getFunction().optForSize(); +    return MF.getFunction().hasMinSize(); +  return MF.getFunction().hasOptSize();  }  static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl, @@ -5665,6 +5775,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,                                         MachinePointerInfo DstPtrInfo,                                         MachinePointerInfo SrcPtrInfo) {    // Turn a memcpy of undef to nop. +  // FIXME: We need to honor volatile even is Src is undef.    if (Src.isUndef())      return Chain; @@ -5691,13 +5802,12 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,    bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr;    unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); -  if (!FindOptimalMemOpLowering(MemOps, Limit, Size, -                                (DstAlignCanChange ? 0 : Align), -                                (isZeroConstant ? 0 : SrcAlign), -                                false, false, CopyFromConstant, true, -                                DstPtrInfo.getAddrSpace(), -                                SrcPtrInfo.getAddrSpace(), -                                DAG, TLI)) +  if (!TLI.findOptimalMemOpLowering( +          MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), +          (isZeroConstant ? 0 : SrcAlign), /*IsMemset=*/false, +          /*ZeroMemset=*/false, /*MemcpyStrSrc=*/CopyFromConstant, +          /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), +          SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes()))      return SDValue();    if (DstAlignCanChange) { @@ -5851,6 +5961,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,                                          MachinePointerInfo DstPtrInfo,                                          MachinePointerInfo SrcPtrInfo) {    // Turn a memmove of undef to nop. +  // FIXME: We need to honor volatile even is Src is undef.    if (Src.isUndef())      return Chain; @@ -5871,13 +5982,15 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,    if (Align > SrcAlign)      SrcAlign = Align;    unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize); - -  if (!FindOptimalMemOpLowering(MemOps, Limit, Size, -                                (DstAlignCanChange ? 0 : Align), SrcAlign, -                                false, false, false, false, -                                DstPtrInfo.getAddrSpace(), -                                SrcPtrInfo.getAddrSpace(), -                                DAG, TLI)) +  // FIXME: `AllowOverlap` should really be `!isVol` but there is a bug in +  // findOptimalMemOpLowering. Meanwhile, setting it to `false` produces the +  // correct code. +  bool AllowOverlap = false; +  if (!TLI.findOptimalMemOpLowering( +          MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), SrcAlign, +          /*IsMemset=*/false, /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false, +          AllowOverlap, DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), +          MF.getFunction().getAttributes()))      return SDValue();    if (DstAlignCanChange) { @@ -5956,6 +6069,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,                                 uint64_t Size, unsigned Align, bool isVol,                                 MachinePointerInfo DstPtrInfo) {    // Turn a memset of undef to nop. +  // FIXME: We need to honor volatile even is Src is undef.    if (Src.isUndef())      return Chain; @@ -5972,11 +6086,12 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,      DstAlignCanChange = true;    bool IsZeroVal =      isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue(); -  if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize), -                                Size, (DstAlignCanChange ? 0 : Align), 0, -                                true, IsZeroVal, false, true, -                                DstPtrInfo.getAddrSpace(), ~0u, -                                DAG, TLI)) +  if (!TLI.findOptimalMemOpLowering( +          MemOps, TLI.getMaxStoresPerMemset(OptSize), Size, +          (DstAlignCanChange ? 0 : Align), 0, /*IsMemset=*/true, +          /*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false, +          /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), ~0u, +          MF.getFunction().getAttributes()))      return SDValue();    if (DstAlignCanChange) { @@ -6097,9 +6212,11 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,    // Emit a library call.    TargetLowering::ArgListTy Args;    TargetLowering::ArgListEntry Entry; -  Entry.Ty = getDataLayout().getIntPtrType(*getContext()); +  Entry.Ty = Type::getInt8PtrTy(*getContext());    Entry.Node = Dst; Args.push_back(Entry);    Entry.Node = Src; Args.push_back(Entry); + +  Entry.Ty = getDataLayout().getIntPtrType(*getContext());    Entry.Node = Size; Args.push_back(Entry);    // FIXME: pass in SDLoc    TargetLowering::CallLoweringInfo CLI(*this); @@ -6199,9 +6316,11 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,    // Emit a library call.    TargetLowering::ArgListTy Args;    TargetLowering::ArgListEntry Entry; -  Entry.Ty = getDataLayout().getIntPtrType(*getContext()); +  Entry.Ty = Type::getInt8PtrTy(*getContext());    Entry.Node = Dst; Args.push_back(Entry);    Entry.Node = Src; Args.push_back(Entry); + +  Entry.Ty = getDataLayout().getIntPtrType(*getContext());    Entry.Node = Size; Args.push_back(Entry);    // FIXME:  pass in SDLoc    TargetLowering::CallLoweringInfo CLI(*this); @@ -6294,16 +6413,15 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,    checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());    // Emit a library call. -  Type *IntPtrTy = getDataLayout().getIntPtrType(*getContext());    TargetLowering::ArgListTy Args;    TargetLowering::ArgListEntry Entry; -  Entry.Node = Dst; Entry.Ty = IntPtrTy; +  Entry.Node = Dst; Entry.Ty = Type::getInt8PtrTy(*getContext());    Args.push_back(Entry);    Entry.Node = Src;    Entry.Ty = Src.getValueType().getTypeForEVT(*getContext());    Args.push_back(Entry);    Entry.Node = Size; -  Entry.Ty = IntPtrTy; +  Entry.Ty = getDataLayout().getIntPtrType(*getContext());    Args.push_back(Entry);    // FIXME: pass in SDLoc @@ -6384,32 +6502,6 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,    return SDValue(N, 0);  } -SDValue SelectionDAG::getAtomicCmpSwap( -    unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTs, SDValue Chain, -    SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo, -    unsigned Alignment, AtomicOrdering SuccessOrdering, -    AtomicOrdering FailureOrdering, SyncScope::ID SSID) { -  assert(Opcode == ISD::ATOMIC_CMP_SWAP || -         Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS); -  assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types"); - -  if (Alignment == 0)  // Ensure that codegen never sees alignment 0 -    Alignment = getEVTAlignment(MemVT); - -  MachineFunction &MF = getMachineFunction(); - -  // FIXME: Volatile isn't really correct; we should keep track of atomic -  // orderings in the memoperand. -  auto Flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad | -               MachineMemOperand::MOStore; -  MachineMemOperand *MMO = -    MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment, -                            AAMDNodes(), nullptr, SSID, SuccessOrdering, -                            FailureOrdering); - -  return getAtomicCmpSwap(Opcode, dl, MemVT, VTs, Chain, Ptr, Cmp, Swp, MMO); -} -  SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl,                                         EVT MemVT, SDVTList VTs, SDValue Chain,                                         SDValue Ptr, SDValue Cmp, SDValue Swp, @@ -6424,35 +6516,6 @@ SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl,  SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,                                  SDValue Chain, SDValue Ptr, SDValue Val, -                                const Value *PtrVal, unsigned Alignment, -                                AtomicOrdering Ordering, -                                SyncScope::ID SSID) { -  if (Alignment == 0)  // Ensure that codegen never sees alignment 0 -    Alignment = getEVTAlignment(MemVT); - -  MachineFunction &MF = getMachineFunction(); -  // An atomic store does not load. An atomic load does not store. -  // (An atomicrmw obviously both loads and stores.) -  // For now, atomics are considered to be volatile always, and they are -  // chained as such. -  // FIXME: Volatile isn't really correct; we should keep track of atomic -  // orderings in the memoperand. -  auto Flags = MachineMemOperand::MOVolatile; -  if (Opcode != ISD::ATOMIC_STORE) -    Flags |= MachineMemOperand::MOLoad; -  if (Opcode != ISD::ATOMIC_LOAD) -    Flags |= MachineMemOperand::MOStore; - -  MachineMemOperand *MMO = -    MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags, -                            MemVT.getStoreSize(), Alignment, AAMDNodes(), -                            nullptr, SSID, Ordering); - -  return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO); -} - -SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, -                                SDValue Chain, SDValue Ptr, SDValue Val,                                  MachineMemOperand *MMO) {    assert((Opcode == ISD::ATOMIC_LOAD_ADD ||            Opcode == ISD::ATOMIC_LOAD_SUB || @@ -6465,6 +6528,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,            Opcode == ISD::ATOMIC_LOAD_MAX ||            Opcode == ISD::ATOMIC_LOAD_UMIN ||            Opcode == ISD::ATOMIC_LOAD_UMAX || +          Opcode == ISD::ATOMIC_LOAD_FADD || +          Opcode == ISD::ATOMIC_LOAD_FSUB ||            Opcode == ISD::ATOMIC_SWAP ||            Opcode == ISD::ATOMIC_STORE) &&           "Invalid Atomic Op"); @@ -6502,7 +6567,7 @@ SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) {  SDValue SelectionDAG::getMemIntrinsicNode(      unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops,      EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, -    MachineMemOperand::Flags Flags, unsigned Size) { +    MachineMemOperand::Flags Flags, unsigned Size, const AAMDNodes &AAInfo) {    if (Align == 0)  // Ensure that codegen never sees alignment 0      Align = getEVTAlignment(MemVT); @@ -6511,7 +6576,7 @@ SDValue SelectionDAG::getMemIntrinsicNode(    MachineFunction &MF = getMachineFunction();    MachineMemOperand *MMO = -    MF.getMachineMemOperand(PtrInfo, Flags, Size, Align); +      MF.getMachineMemOperand(PtrInfo, Flags, Size, Align, AAInfo);    return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, MMO);  } @@ -6557,6 +6622,36 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl,    return SDValue(N, 0);  } +SDValue SelectionDAG::getLifetimeNode(bool IsStart, const SDLoc &dl, +                                      SDValue Chain, int FrameIndex, +                                      int64_t Size, int64_t Offset) { +  const unsigned Opcode = IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END; +  const auto VTs = getVTList(MVT::Other); +  SDValue Ops[2] = { +      Chain, +      getFrameIndex(FrameIndex, +                    getTargetLoweringInfo().getFrameIndexTy(getDataLayout()), +                    true)}; + +  FoldingSetNodeID ID; +  AddNodeIDNode(ID, Opcode, VTs, Ops); +  ID.AddInteger(FrameIndex); +  ID.AddInteger(Size); +  ID.AddInteger(Offset); +  void *IP = nullptr; +  if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) +    return SDValue(E, 0); + +  LifetimeSDNode *N = newSDNode<LifetimeSDNode>( +      Opcode, dl.getIROrder(), dl.getDebugLoc(), VTs, Size, Offset); +  createOperands(N, Ops); +  CSEMap.InsertNode(N, IP); +  InsertNode(N); +  SDValue V(N, 0); +  NewSDValueDbgMsg(V, "Creating new node: ", this); +  return V; +} +  /// InferPointerInfo - If the specified ptr/offset is a frame index, infer a  /// MachinePointerInfo record from it.  This is particularly useful because the  /// code generator has many cases where it doesn't bother passing in a @@ -6875,7 +6970,7 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,    SDValue Ops[] = { Chain, Ptr, Mask, PassThru };    FoldingSetNodeID ID;    AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops); -  ID.AddInteger(VT.getRawBits()); +  ID.AddInteger(MemVT.getRawBits());    ID.AddInteger(getSyntheticNodeSubclassData<MaskedLoadSDNode>(        dl.getIROrder(), VTs, ExtTy, isExpanding, MemVT, MMO));    ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); @@ -6901,12 +6996,11 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,                                       bool IsTruncating, bool IsCompressing) {    assert(Chain.getValueType() == MVT::Other &&          "Invalid chain type"); -  EVT VT = Val.getValueType();    SDVTList VTs = getVTList(MVT::Other);    SDValue Ops[] = { Chain, Val, Ptr, Mask };    FoldingSetNodeID ID;    AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops); -  ID.AddInteger(VT.getRawBits()); +  ID.AddInteger(MemVT.getRawBits());    ID.AddInteger(getSyntheticNodeSubclassData<MaskedStoreSDNode>(        dl.getIROrder(), VTs, IsTruncating, IsCompressing, MemVT, MMO));    ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); @@ -7057,6 +7151,31 @@ SDValue SelectionDAG::simplifyShift(SDValue X, SDValue Y) {    return SDValue();  } +// TODO: Use fast-math-flags to enable more simplifications. +SDValue SelectionDAG::simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y) { +  ConstantFPSDNode *YC = isConstOrConstSplatFP(Y, /* AllowUndefs */ true); +  if (!YC) +    return SDValue(); + +  // X + -0.0 --> X +  if (Opcode == ISD::FADD) +    if (YC->getValueAPF().isNegZero()) +      return X; + +  // X - +0.0 --> X +  if (Opcode == ISD::FSUB) +    if (YC->getValueAPF().isPosZero()) +      return X; + +  // X * 1.0 --> X +  // X / 1.0 --> X +  if (Opcode == ISD::FMUL || Opcode == ISD::FDIV) +    if (YC->getValueAPF().isExactlyValue(1.0)) +      return X; + +  return SDValue(); +} +  SDValue SelectionDAG::getVAArg(EVT VT, const SDLoc &dl, SDValue Chain,                                 SDValue Ptr, SDValue SV, unsigned Align) {    SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, dl, MVT::i32) }; @@ -7098,8 +7217,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,        return V;      break;    case ISD::CONCAT_VECTORS: -    // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF. -    if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this)) +    if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this))        return V;      break;    case ISD::SELECT_CC: @@ -7629,56 +7747,50 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,  SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {    unsigned OrigOpc = Node->getOpcode();    unsigned NewOpc; -  bool IsUnary = false; -  bool IsTernary = false;    switch (OrigOpc) {    default:      llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!"); -  case ISD::STRICT_FADD: NewOpc = ISD::FADD; break; -  case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; break; -  case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break; -  case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break; -  case ISD::STRICT_FREM: NewOpc = ISD::FREM; break; -  case ISD::STRICT_FMA: NewOpc = ISD::FMA; IsTernary = true; break; -  case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; IsUnary = true; break; -  case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break; -  case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break; -  case ISD::STRICT_FSIN: NewOpc = ISD::FSIN; IsUnary = true; break; -  case ISD::STRICT_FCOS: NewOpc = ISD::FCOS; IsUnary = true; break; -  case ISD::STRICT_FEXP: NewOpc = ISD::FEXP; IsUnary = true; break; -  case ISD::STRICT_FEXP2: NewOpc = ISD::FEXP2; IsUnary = true; break; -  case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; IsUnary = true; break; -  case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; IsUnary = true; break; -  case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; IsUnary = true; break; -  case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; IsUnary = true; break; -  case ISD::STRICT_FNEARBYINT: -    NewOpc = ISD::FNEARBYINT; -    IsUnary = true; -    break; -  case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break; -  case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break; -  case ISD::STRICT_FCEIL: NewOpc = ISD::FCEIL; IsUnary = true; break; -  case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; IsUnary = true; break; -  case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; IsUnary = true; break; -  case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; IsUnary = true; break; -  } +  case ISD::STRICT_FADD:       NewOpc = ISD::FADD;       break; +  case ISD::STRICT_FSUB:       NewOpc = ISD::FSUB;       break; +  case ISD::STRICT_FMUL:       NewOpc = ISD::FMUL;       break; +  case ISD::STRICT_FDIV:       NewOpc = ISD::FDIV;       break; +  case ISD::STRICT_FREM:       NewOpc = ISD::FREM;       break; +  case ISD::STRICT_FMA:        NewOpc = ISD::FMA;        break; +  case ISD::STRICT_FSQRT:      NewOpc = ISD::FSQRT;      break; +  case ISD::STRICT_FPOW:       NewOpc = ISD::FPOW;       break; +  case ISD::STRICT_FPOWI:      NewOpc = ISD::FPOWI;      break; +  case ISD::STRICT_FSIN:       NewOpc = ISD::FSIN;       break; +  case ISD::STRICT_FCOS:       NewOpc = ISD::FCOS;       break; +  case ISD::STRICT_FEXP:       NewOpc = ISD::FEXP;       break; +  case ISD::STRICT_FEXP2:      NewOpc = ISD::FEXP2;      break; +  case ISD::STRICT_FLOG:       NewOpc = ISD::FLOG;       break; +  case ISD::STRICT_FLOG10:     NewOpc = ISD::FLOG10;     break; +  case ISD::STRICT_FLOG2:      NewOpc = ISD::FLOG2;      break; +  case ISD::STRICT_FRINT:      NewOpc = ISD::FRINT;      break; +  case ISD::STRICT_FNEARBYINT: NewOpc = ISD::FNEARBYINT; break; +  case ISD::STRICT_FMAXNUM:    NewOpc = ISD::FMAXNUM;    break; +  case ISD::STRICT_FMINNUM:    NewOpc = ISD::FMINNUM;    break; +  case ISD::STRICT_FCEIL:      NewOpc = ISD::FCEIL;      break; +  case ISD::STRICT_FFLOOR:     NewOpc = ISD::FFLOOR;     break; +  case ISD::STRICT_FROUND:     NewOpc = ISD::FROUND;     break; +  case ISD::STRICT_FTRUNC:     NewOpc = ISD::FTRUNC;     break; +  case ISD::STRICT_FP_ROUND:   NewOpc = ISD::FP_ROUND;   break; +  case ISD::STRICT_FP_EXTEND:  NewOpc = ISD::FP_EXTEND;  break; +  } + +  assert(Node->getNumValues() == 2 && "Unexpected number of results!");    // We're taking this node out of the chain, so we need to re-link things.    SDValue InputChain = Node->getOperand(0);    SDValue OutputChain = SDValue(Node, 1);    ReplaceAllUsesOfValueWith(OutputChain, InputChain); -  SDVTList VTs = getVTList(Node->getOperand(1).getValueType()); -  SDNode *Res = nullptr; -  if (IsUnary) -    Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1) }); -  else if (IsTernary) -    Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1), -                                           Node->getOperand(2), -                                           Node->getOperand(3)}); -  else -    Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1), -                                           Node->getOperand(2) }); +  SmallVector<SDValue, 3> Ops; +  for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) +    Ops.push_back(Node->getOperand(i)); + +  SDVTList VTs = getVTList(Node->getValueType(0)); +  SDNode *Res = MorphNodeTo(Node, NewOpc, VTs, Ops);    // MorphNodeTo can operate in two ways: if an existing node with the    // specified operands exists, it can just return it.  Otherwise, it @@ -7980,9 +8092,8 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {          // DIExpression, we need to mark the expression with a          // DW_OP_stack_value.          auto *DIExpr = DV->getExpression(); -        DIExpr = DIExpression::prepend(DIExpr, DIExpression::NoDeref, Offset, -                                       DIExpression::NoDeref, -                                       DIExpression::WithStackValue); +        DIExpr = +            DIExpression::prepend(DIExpr, DIExpression::StackValue, Offset);          SDDbgValue *Clone =              getDbgValue(DV->getVariable(), DIExpr, N0.getNode(), N0.getResNo(),                          DV->isIndirect(), DV->getDebugLoc(), DV->getOrder()); @@ -8288,19 +8399,17 @@ void SelectionDAG::updateDivergence(SDNode * N)    }  } - -void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode*>& Order) { +void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode *> &Order) {    DenseMap<SDNode *, unsigned> Degree;    Order.reserve(AllNodes.size()); -  for (auto & N : allnodes()) { +  for (auto &N : allnodes()) {      unsigned NOps = N.getNumOperands();      Degree[&N] = NOps;      if (0 == NOps)        Order.push_back(&N);    } -  for (std::vector<SDNode *>::iterator I = Order.begin(); -  I!=Order.end();++I) { -    SDNode * N = *I; +  for (size_t I = 0; I != Order.size(); ++I) { +    SDNode *N = Order[I];      for (auto U : N->uses()) {        unsigned &UnsortedOps = Degree[U];        if (0 == --UnsortedOps) @@ -8310,9 +8419,8 @@ void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode*>& Order) {  }  #ifndef NDEBUG -void SelectionDAG::VerifyDAGDiverence() -{ -  std::vector<SDNode*> TopoOrder; +void SelectionDAG::VerifyDAGDiverence() { +  std::vector<SDNode *> TopoOrder;    CreateTopologicalOrder(TopoOrder);    const TargetLowering &TLI = getTargetLoweringInfo();    DenseMap<const SDNode *, bool> DivergenceMap; @@ -8338,7 +8446,6 @@ void SelectionDAG::VerifyDAGDiverence()  }  #endif -  /// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving  /// uses of other values produced by From.getNode() alone.  The same value  /// may appear in both the From and To list.  The Deleted vector is @@ -8584,14 +8691,24 @@ SDValue llvm::peekThroughOneUseBitcasts(SDValue V) {    return V;  } -bool llvm::isBitwiseNot(SDValue V) { +SDValue llvm::peekThroughExtractSubvectors(SDValue V) { +  while (V.getOpcode() == ISD::EXTRACT_SUBVECTOR) +    V = V.getOperand(0); +  return V; +} + +bool llvm::isBitwiseNot(SDValue V, bool AllowUndefs) {    if (V.getOpcode() != ISD::XOR)      return false; -  ConstantSDNode *C = isConstOrConstSplat(peekThroughBitcasts(V.getOperand(1))); -  return C && C->isAllOnesValue(); +  V = peekThroughBitcasts(V.getOperand(1)); +  unsigned NumBits = V.getScalarValueSizeInBits(); +  ConstantSDNode *C = +      isConstOrConstSplat(V, AllowUndefs, /*AllowTruncation*/ true); +  return C && (C->getAPIntValue().countTrailingOnes() >= NumBits);  } -ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs) { +ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs, +                                          bool AllowTruncation) {    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))      return CN; @@ -8599,10 +8716,39 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs) {      BitVector UndefElements;      ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements); -    // BuildVectors can truncate their operands. Ignore that case here. -    if (CN && (UndefElements.none() || AllowUndefs) && -        CN->getValueType(0) == N.getValueType().getScalarType()) -      return CN; +    // BuildVectors can truncate their operands. Ignore that case here unless +    // AllowTruncation is set. +    if (CN && (UndefElements.none() || AllowUndefs)) { +      EVT CVT = CN->getValueType(0); +      EVT NSVT = N.getValueType().getScalarType(); +      assert(CVT.bitsGE(NSVT) && "Illegal build vector element extension"); +      if (AllowTruncation || (CVT == NSVT)) +        return CN; +    } +  } + +  return nullptr; +} + +ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, const APInt &DemandedElts, +                                          bool AllowUndefs, +                                          bool AllowTruncation) { +  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) +    return CN; + +  if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { +    BitVector UndefElements; +    ConstantSDNode *CN = BV->getConstantSplatNode(DemandedElts, &UndefElements); + +    // BuildVectors can truncate their operands. Ignore that case here unless +    // AllowTruncation is set. +    if (CN && (UndefElements.none() || AllowUndefs)) { +      EVT CVT = CN->getValueType(0); +      EVT NSVT = N.getValueType().getScalarType(); +      assert(CVT.bitsGE(NSVT) && "Illegal build vector element extension"); +      if (AllowTruncation || (CVT == NSVT)) +        return CN; +    }    }    return nullptr; @@ -8622,9 +8768,26 @@ ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, bool AllowUndefs) {    return nullptr;  } -bool llvm::isNullOrNullSplat(SDValue N) { +ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, +                                              const APInt &DemandedElts, +                                              bool AllowUndefs) { +  if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) +    return CN; + +  if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { +    BitVector UndefElements; +    ConstantFPSDNode *CN = +        BV->getConstantFPSplatNode(DemandedElts, &UndefElements); +    if (CN && (UndefElements.none() || AllowUndefs)) +      return CN; +  } + +  return nullptr; +} + +bool llvm::isNullOrNullSplat(SDValue N, bool AllowUndefs) {    // TODO: may want to use peekThroughBitcast() here. -  ConstantSDNode *C = isConstOrConstSplat(N); +  ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs);    return C && C->isNullValue();  } @@ -8773,17 +8936,12 @@ bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) {  /// isOperand - Return true if this node is an operand of N.  bool SDValue::isOperandOf(const SDNode *N) const { -  for (const SDValue &Op : N->op_values()) -    if (*this == Op) -      return true; -  return false; +  return any_of(N->op_values(), [this](SDValue Op) { return *this == Op; });  }  bool SDNode::isOperandOf(const SDNode *N) const { -  for (const SDValue &Op : N->op_values()) -    if (this == Op.getNode()) -      return true; -  return false; +  return any_of(N->op_values(), +                [this](SDValue Op) { return this == Op.getNode(); });  }  /// reachesChainWithoutSideEffects - Return true if this operand (which must @@ -8973,6 +9131,56 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {    return getBuildVector(VecVT, dl, Scalars);  } +std::pair<SDValue, SDValue> SelectionDAG::UnrollVectorOverflowOp( +    SDNode *N, unsigned ResNE) { +  unsigned Opcode = N->getOpcode(); +  assert((Opcode == ISD::UADDO || Opcode == ISD::SADDO || +          Opcode == ISD::USUBO || Opcode == ISD::SSUBO || +          Opcode == ISD::UMULO || Opcode == ISD::SMULO) && +         "Expected an overflow opcode"); + +  EVT ResVT = N->getValueType(0); +  EVT OvVT = N->getValueType(1); +  EVT ResEltVT = ResVT.getVectorElementType(); +  EVT OvEltVT = OvVT.getVectorElementType(); +  SDLoc dl(N); + +  // If ResNE is 0, fully unroll the vector op. +  unsigned NE = ResVT.getVectorNumElements(); +  if (ResNE == 0) +    ResNE = NE; +  else if (NE > ResNE) +    NE = ResNE; + +  SmallVector<SDValue, 8> LHSScalars; +  SmallVector<SDValue, 8> RHSScalars; +  ExtractVectorElements(N->getOperand(0), LHSScalars, 0, NE); +  ExtractVectorElements(N->getOperand(1), RHSScalars, 0, NE); + +  EVT SVT = TLI->getSetCCResultType(getDataLayout(), *getContext(), ResEltVT); +  SDVTList VTs = getVTList(ResEltVT, SVT); +  SmallVector<SDValue, 8> ResScalars; +  SmallVector<SDValue, 8> OvScalars; +  for (unsigned i = 0; i < NE; ++i) { +    SDValue Res = getNode(Opcode, dl, VTs, LHSScalars[i], RHSScalars[i]); +    SDValue Ov = +        getSelect(dl, OvEltVT, Res.getValue(1), +                  getBoolConstant(true, dl, OvEltVT, ResVT), +                  getConstant(0, dl, OvEltVT)); + +    ResScalars.push_back(Res); +    OvScalars.push_back(Ov); +  } + +  ResScalars.append(ResNE - NE, getUNDEF(ResEltVT)); +  OvScalars.append(ResNE - NE, getUNDEF(OvEltVT)); + +  EVT NewResVT = EVT::getVectorVT(*getContext(), ResEltVT, ResNE); +  EVT NewOvVT = EVT::getVectorVT(*getContext(), OvEltVT, ResNE); +  return std::make_pair(getBuildVector(NewResVT, dl, ResScalars), +                        getBuildVector(NewOvVT, dl, OvScalars)); +} +  bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,                                                    LoadSDNode *Base,                                                    unsigned Bytes, @@ -9014,7 +9222,7 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {    // If this is a direct reference to a stack slot, use information about the    // stack slot's alignment. -  int FrameIdx = 1 << 31; +  int FrameIdx = INT_MIN;    int64_t FrameOffset = 0;    if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) {      FrameIdx = FI->getIndex(); @@ -9025,7 +9233,7 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {      FrameOffset = Ptr.getConstantOperandVal(1);    } -  if (FrameIdx != (1 << 31)) { +  if (FrameIdx != INT_MIN) {      const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();      unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),                                      FrameOffset); @@ -9065,6 +9273,15 @@ SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT,    return std::make_pair(Lo, Hi);  } +/// Widen the vector up to the next power of two using INSERT_SUBVECTOR. +SDValue SelectionDAG::WidenVector(const SDValue &N, const SDLoc &DL) { +  EVT VT = N.getValueType(); +  EVT WideVT = EVT::getVectorVT(*getContext(), VT.getVectorElementType(), +                                NextPowerOf2(VT.getVectorNumElements())); +  return getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, getUNDEF(WideVT), N, +                 getConstant(0, DL, TLI->getVectorIdxTy(getDataLayout()))); +} +  void SelectionDAG::ExtractVectorElements(SDValue Op,                                           SmallVectorImpl<SDValue> &Args,                                           unsigned Start, unsigned Count) { @@ -9158,13 +9375,20 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef,    return true;  } -SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const { +SDValue BuildVectorSDNode::getSplatValue(const APInt &DemandedElts, +                                         BitVector *UndefElements) const {    if (UndefElements) {      UndefElements->clear();      UndefElements->resize(getNumOperands());    } +  assert(getNumOperands() == DemandedElts.getBitWidth() && +         "Unexpected vector size"); +  if (!DemandedElts) +    return SDValue();    SDValue Splatted;    for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { +    if (!DemandedElts[i]) +      continue;      SDValue Op = getOperand(i);      if (Op.isUndef()) {        if (UndefElements) @@ -9177,20 +9401,40 @@ SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const {    }    if (!Splatted) { -    assert(getOperand(0).isUndef() && +    unsigned FirstDemandedIdx = DemandedElts.countTrailingZeros(); +    assert(getOperand(FirstDemandedIdx).isUndef() &&             "Can only have a splat without a constant for all undefs."); -    return getOperand(0); +    return getOperand(FirstDemandedIdx);    }    return Splatted;  } +SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const { +  APInt DemandedElts = APInt::getAllOnesValue(getNumOperands()); +  return getSplatValue(DemandedElts, UndefElements); +} + +ConstantSDNode * +BuildVectorSDNode::getConstantSplatNode(const APInt &DemandedElts, +                                        BitVector *UndefElements) const { +  return dyn_cast_or_null<ConstantSDNode>( +      getSplatValue(DemandedElts, UndefElements)); +} +  ConstantSDNode *  BuildVectorSDNode::getConstantSplatNode(BitVector *UndefElements) const {    return dyn_cast_or_null<ConstantSDNode>(getSplatValue(UndefElements));  }  ConstantFPSDNode * +BuildVectorSDNode::getConstantFPSplatNode(const APInt &DemandedElts, +                                          BitVector *UndefElements) const { +  return dyn_cast_or_null<ConstantFPSDNode>( +      getSplatValue(DemandedElts, UndefElements)); +} + +ConstantFPSDNode *  BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const {    return dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements));  } @@ -9228,7 +9472,10 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {    for (i = 0, e = VT.getVectorNumElements(); i != e && Mask[i] < 0; ++i)      /* search */; -  assert(i != e && "VECTOR_SHUFFLE node with all undef indices!"); +  // If all elements are undefined, this shuffle can be considered a splat +  // (although it should eventually get simplified away completely). +  if (i == e) +    return true;    // Make sure all remaining elements are either undef or the same as the first    // non-undef value. @@ -9266,8 +9513,7 @@ SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) {  void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) {    assert(!Node->OperandList && "Node already has operands"); -  assert(std::numeric_limits<decltype(SDNode::NumOperands)>::max() >= -             Vals.size() && +  assert(SDNode::getMaxNumOperands() >= Vals.size() &&           "too many operands to fit into SDNode");    SDUse *Ops = OperandRecycler.allocate(        ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator); @@ -9287,6 +9533,19 @@ void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) {    checkForCycles(Node);  } +SDValue SelectionDAG::getTokenFactor(const SDLoc &DL, +                                     SmallVectorImpl<SDValue> &Vals) { +  size_t Limit = SDNode::getMaxNumOperands(); +  while (Vals.size() > Limit) { +    unsigned SliceIdx = Vals.size() - Limit; +    auto ExtractedTFs = ArrayRef<SDValue>(Vals).slice(SliceIdx, Limit); +    SDValue NewTF = getNode(ISD::TokenFactor, DL, MVT::Other, ExtractedTFs); +    Vals.erase(Vals.begin() + SliceIdx, Vals.end()); +    Vals.emplace_back(NewTF); +  } +  return getNode(ISD::TokenFactor, DL, MVT::Other, Vals); +} +  #ifndef NDEBUG  static void checkForCyclesHelper(const SDNode *N,                                   SmallPtrSetImpl<const SDNode*> &Visited, | 
