diff options
Diffstat (limited to 'lib/CodeGen/SelectionDAG/SelectionDAG.cpp')
| -rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 844 | 
1 files changed, 658 insertions, 186 deletions
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 4c8b63d2f2397..48e03c6da68fb 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -32,7 +32,6 @@  #include "llvm/CodeGen/MachineFrameInfo.h"  #include "llvm/CodeGen/MachineFunction.h"  #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineValueType.h"  #include "llvm/CodeGen/RuntimeLibcalls.h"  #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"  #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -58,6 +57,7 @@  #include "llvm/Support/Debug.h"  #include "llvm/Support/ErrorHandling.h"  #include "llvm/Support/KnownBits.h" +#include "llvm/Support/MachineValueType.h"  #include "llvm/Support/ManagedStatic.h"  #include "llvm/Support/MathExtras.h"  #include "llvm/Support/Mutex.h" @@ -89,11 +89,16 @@ void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {}  #define DEBUG_TYPE "selectiondag" +static cl::opt<bool> EnableMemCpyDAGOpt("enable-memcpy-dag-opt", +       cl::Hidden, cl::init(true), +       cl::desc("Gang up loads and stores generated by inlining of memcpy")); + +static cl::opt<int> MaxLdStGlue("ldstmemcpy-glue-max", +       cl::desc("Number limit for gluing ld/st of memcpy."), +       cl::Hidden, cl::init(0)); +  static void NewSDValueDbgMsg(SDValue V, StringRef Msg, SelectionDAG *G) { -  DEBUG( -    dbgs() << Msg; -    V.getNode()->dump(G); -  ); +  LLVM_DEBUG(dbgs() << Msg; V.getNode()->dump(G););  }  //===----------------------------------------------------------------------===// @@ -263,6 +268,52 @@ bool ISD::allOperandsUndef(const SDNode *N) {    return true;  } +bool ISD::matchUnaryPredicate(SDValue Op, +                              std::function<bool(ConstantSDNode *)> Match) { +  if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) +    return Match(Cst); + +  if (ISD::BUILD_VECTOR != Op.getOpcode()) +    return false; + +  EVT SVT = Op.getValueType().getScalarType(); +  for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { +    auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i)); +    if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst)) +      return false; +  } +  return true; +} + +bool ISD::matchBinaryPredicate( +    SDValue LHS, SDValue RHS, +    std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match) { +  if (LHS.getValueType() != RHS.getValueType()) +    return false; + +  if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS)) +    if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS)) +      return Match(LHSCst, RHSCst); + +  if (ISD::BUILD_VECTOR != LHS.getOpcode() || +      ISD::BUILD_VECTOR != RHS.getOpcode()) +    return false; + +  EVT SVT = LHS.getValueType().getScalarType(); +  for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { +    auto *LHSCst = dyn_cast<ConstantSDNode>(LHS.getOperand(i)); +    auto *RHSCst = dyn_cast<ConstantSDNode>(RHS.getOperand(i)); +    if (!LHSCst || !RHSCst) +      return false; +    if (LHSCst->getValueType(0) != SVT || +        LHSCst->getValueType(0) != RHSCst->getValueType(0)) +      return false; +    if (!Match(LHSCst, RHSCst)) +      return false; +  } +  return true; +} +  ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) {    switch (ExtType) {    case ISD::EXTLOAD: @@ -487,12 +538,41 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {      ID.AddInteger(ST->getPointerInfo().getAddrSpace());      break;    } +  case ISD::MLOAD: { +    const MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N); +    ID.AddInteger(MLD->getMemoryVT().getRawBits()); +    ID.AddInteger(MLD->getRawSubclassData()); +    ID.AddInteger(MLD->getPointerInfo().getAddrSpace()); +    break; +  } +  case ISD::MSTORE: { +    const MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N); +    ID.AddInteger(MST->getMemoryVT().getRawBits()); +    ID.AddInteger(MST->getRawSubclassData()); +    ID.AddInteger(MST->getPointerInfo().getAddrSpace()); +    break; +  } +  case ISD::MGATHER: { +    const MaskedGatherSDNode *MG = cast<MaskedGatherSDNode>(N); +    ID.AddInteger(MG->getMemoryVT().getRawBits()); +    ID.AddInteger(MG->getRawSubclassData()); +    ID.AddInteger(MG->getPointerInfo().getAddrSpace()); +    break; +  } +  case ISD::MSCATTER: { +    const MaskedScatterSDNode *MS = cast<MaskedScatterSDNode>(N); +    ID.AddInteger(MS->getMemoryVT().getRawBits()); +    ID.AddInteger(MS->getRawSubclassData()); +    ID.AddInteger(MS->getPointerInfo().getAddrSpace()); +    break; +  }    case ISD::ATOMIC_CMP_SWAP:    case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:    case ISD::ATOMIC_SWAP:    case ISD::ATOMIC_LOAD_ADD:    case ISD::ATOMIC_LOAD_SUB:    case ISD::ATOMIC_LOAD_AND: +  case ISD::ATOMIC_LOAD_CLR:    case ISD::ATOMIC_LOAD_OR:    case ISD::ATOMIC_LOAD_XOR:    case ISD::ATOMIC_LOAD_NAND: @@ -726,7 +806,7 @@ static void VerifySDNode(SDNode *N) {  }  #endif // NDEBUG -/// \brief Insert a newly allocated node into the DAG. +/// Insert a newly allocated node into the DAG.  ///  /// Handles insertion into the all nodes list and CSE map, as well as  /// verification and other common operations when a new node is allocated. @@ -903,13 +983,16 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)  void SelectionDAG::init(MachineFunction &NewMF,                          OptimizationRemarkEmitter &NewORE, -                        Pass *PassPtr) { +                        Pass *PassPtr, const TargetLibraryInfo *LibraryInfo, +                        DivergenceAnalysis * Divergence) {    MF = &NewMF;    SDAGISelPass = PassPtr;    ORE = &NewORE;    TLI = getSubtarget().getTargetLowering();    TSI = getSubtarget().getSelectionDAGInfo(); +  LibInfo = LibraryInfo;    Context = &MF->getFunction().getContext(); +  DA = Divergence;  }  SelectionDAG::~SelectionDAG() { @@ -1077,21 +1160,25 @@ SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) {  }  SDValue SelectionDAG::getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT) { -  EVT EltVT = VT.getScalarType(); -  SDValue TrueValue; -  switch (TLI->getBooleanContents(VT)) { -    case TargetLowering::ZeroOrOneBooleanContent: -    case TargetLowering::UndefinedBooleanContent: -      TrueValue = getConstant(1, DL, VT); -      break; -    case TargetLowering::ZeroOrNegativeOneBooleanContent: -      TrueValue = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, -                              VT); -      break; -  } +  SDValue TrueValue = getBoolConstant(true, DL, VT, VT);    return getNode(ISD::XOR, DL, VT, Val, TrueValue);  } +SDValue SelectionDAG::getBoolConstant(bool V, const SDLoc &DL, EVT VT, +                                      EVT OpVT) { +  if (!V) +    return getConstant(0, DL, VT); + +  switch (TLI->getBooleanContents(OpVT)) { +  case TargetLowering::ZeroOrOneBooleanContent: +  case TargetLowering::UndefinedBooleanContent: +    return getConstant(1, DL, VT); +  case TargetLowering::ZeroOrNegativeOneBooleanContent: +    return getAllOnesConstant(DL, VT); +  } +  llvm_unreachable("Unexpected boolean content enum!"); +} +  SDValue SelectionDAG::getConstant(uint64_t Val, const SDLoc &DL, EVT VT,                                    bool isT, bool isO) {    EVT EltVT = VT.getScalarType(); @@ -1184,7 +1271,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,        return SDValue(N, 0);    if (!N) { -    N = newSDNode<ConstantSDNode>(isT, isO, Elt, DL.getDebugLoc(), EltVT); +    N = newSDNode<ConstantSDNode>(isT, isO, Elt, EltVT);      CSEMap.InsertNode(N, IP);      InsertNode(N);      NewSDValueDbgMsg(SDValue(N, 0), "Creating constant: ", this); @@ -1227,7 +1314,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL,        return SDValue(N, 0);    if (!N) { -    N = newSDNode<ConstantFPSDNode>(isTarget, &V, DL.getDebugLoc(), EltVT); +    N = newSDNode<ConstantFPSDNode>(isTarget, &V, EltVT);      CSEMap.InsertNode(N, IP);      InsertNode(N);    } @@ -1503,33 +1590,35 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,    if (N1.isUndef())      commuteShuffle(N1, N2, MaskVec); -  // If shuffling a splat, try to blend the splat instead. We do this here so -  // that even when this arises during lowering we don't have to re-handle it. -  auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) { -    BitVector UndefElements; -    SDValue Splat = BV->getSplatValue(&UndefElements); -    if (!Splat) -      return; +  if (TLI->hasVectorBlend()) { +    // If shuffling a splat, try to blend the splat instead. We do this here so +    // that even when this arises during lowering we don't have to re-handle it. +    auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) { +      BitVector UndefElements; +      SDValue Splat = BV->getSplatValue(&UndefElements); +      if (!Splat) +        return; -    for (int i = 0; i < NElts; ++i) { -      if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + NElts)) -        continue; +      for (int i = 0; i < NElts; ++i) { +        if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + NElts)) +          continue; -      // If this input comes from undef, mark it as such. -      if (UndefElements[MaskVec[i] - Offset]) { -        MaskVec[i] = -1; -        continue; -      } +        // If this input comes from undef, mark it as such. +        if (UndefElements[MaskVec[i] - Offset]) { +          MaskVec[i] = -1; +          continue; +        } -      // If we can blend a non-undef lane, use that instead. -      if (!UndefElements[i]) -        MaskVec[i] = i + Offset; -    } -  }; -  if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1)) -    BlendSplat(N1BV, 0); -  if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2)) -    BlendSplat(N2BV, NElts); +        // If we can blend a non-undef lane, use that instead. +        if (!UndefElements[i]) +          MaskVec[i] = i + Offset; +      } +    }; +    if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1)) +      BlendSplat(N1BV, 0); +    if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2)) +      BlendSplat(N2BV, NElts); +  }    // Canonicalize all index into lhs, -> shuffle lhs, undef    // Canonicalize all index into rhs, -> shuffle rhs, undef @@ -1643,7 +1732,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,  }  SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) { -  MVT VT = SV.getSimpleValueType(0); +  EVT VT = SV.getValueType(0);    SmallVector<int, 8> MaskVec(SV.getMask().begin(), SV.getMask().end());    ShuffleVectorSDNode::commuteMask(MaskVec); @@ -1661,6 +1750,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {      return SDValue(E, 0);    auto *N = newSDNode<RegisterSDNode>(RegNo, VT); +  N->SDNodeBits.IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA);    CSEMap.InsertNode(N, IP);    InsertNode(N);    return SDValue(N, 0); @@ -1870,19 +1960,15 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {  SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,                                  ISD::CondCode Cond, const SDLoc &dl) { +  EVT OpVT = N1.getValueType(); +    // These setcc operations always fold.    switch (Cond) {    default: break;    case ISD::SETFALSE: -  case ISD::SETFALSE2: return getConstant(0, dl, VT); +  case ISD::SETFALSE2: return getBoolConstant(false, dl, VT, OpVT);    case ISD::SETTRUE: -  case ISD::SETTRUE2: { -    TargetLowering::BooleanContent Cnt = -        TLI->getBooleanContents(N1->getValueType(0)); -    return getConstant( -        Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, dl, -        VT); -  } +  case ISD::SETTRUE2: return getBoolConstant(true, dl, VT, OpVT);    case ISD::SETOEQ:    case ISD::SETOGT: @@ -1905,16 +1991,16 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,        switch (Cond) {        default: llvm_unreachable("Unknown integer setcc!"); -      case ISD::SETEQ:  return getConstant(C1 == C2, dl, VT); -      case ISD::SETNE:  return getConstant(C1 != C2, dl, VT); -      case ISD::SETULT: return getConstant(C1.ult(C2), dl, VT); -      case ISD::SETUGT: return getConstant(C1.ugt(C2), dl, VT); -      case ISD::SETULE: return getConstant(C1.ule(C2), dl, VT); -      case ISD::SETUGE: return getConstant(C1.uge(C2), dl, VT); -      case ISD::SETLT:  return getConstant(C1.slt(C2), dl, VT); -      case ISD::SETGT:  return getConstant(C1.sgt(C2), dl, VT); -      case ISD::SETLE:  return getConstant(C1.sle(C2), dl, VT); -      case ISD::SETGE:  return getConstant(C1.sge(C2), dl, VT); +      case ISD::SETEQ:  return getBoolConstant(C1 == C2, dl, VT, OpVT); +      case ISD::SETNE:  return getBoolConstant(C1 != C2, dl, VT, OpVT); +      case ISD::SETULT: return getBoolConstant(C1.ult(C2), dl, VT, OpVT); +      case ISD::SETUGT: return getBoolConstant(C1.ugt(C2), dl, VT, OpVT); +      case ISD::SETULE: return getBoolConstant(C1.ule(C2), dl, VT, OpVT); +      case ISD::SETUGE: return getBoolConstant(C1.uge(C2), dl, VT, OpVT); +      case ISD::SETLT:  return getBoolConstant(C1.slt(C2), dl, VT, OpVT); +      case ISD::SETGT:  return getBoolConstant(C1.sgt(C2), dl, VT, OpVT); +      case ISD::SETLE:  return getBoolConstant(C1.sle(C2), dl, VT, OpVT); +      case ISD::SETGE:  return getBoolConstant(C1.sge(C2), dl, VT, OpVT);        }      }    } @@ -1926,41 +2012,54 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,        case ISD::SETEQ:  if (R==APFloat::cmpUnordered)                            return getUNDEF(VT);                          LLVM_FALLTHROUGH; -      case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, dl, VT); +      case ISD::SETOEQ: return getBoolConstant(R==APFloat::cmpEqual, dl, VT, +                                               OpVT);        case ISD::SETNE:  if (R==APFloat::cmpUnordered)                            return getUNDEF(VT);                          LLVM_FALLTHROUGH; -      case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan || -                                           R==APFloat::cmpLessThan, dl, VT); +      case ISD::SETONE: return getBoolConstant(R==APFloat::cmpGreaterThan || +                                               R==APFloat::cmpLessThan, dl, VT, +                                               OpVT);        case ISD::SETLT:  if (R==APFloat::cmpUnordered)                            return getUNDEF(VT);                          LLVM_FALLTHROUGH; -      case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, dl, VT); +      case ISD::SETOLT: return getBoolConstant(R==APFloat::cmpLessThan, dl, VT, +                                               OpVT);        case ISD::SETGT:  if (R==APFloat::cmpUnordered)                            return getUNDEF(VT);                          LLVM_FALLTHROUGH; -      case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, dl, VT); +      case ISD::SETOGT: return getBoolConstant(R==APFloat::cmpGreaterThan, dl, +                                               VT, OpVT);        case ISD::SETLE:  if (R==APFloat::cmpUnordered)                            return getUNDEF(VT);                          LLVM_FALLTHROUGH; -      case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan || -                                           R==APFloat::cmpEqual, dl, VT); +      case ISD::SETOLE: return getBoolConstant(R==APFloat::cmpLessThan || +                                               R==APFloat::cmpEqual, dl, VT, +                                               OpVT);        case ISD::SETGE:  if (R==APFloat::cmpUnordered)                            return getUNDEF(VT);                          LLVM_FALLTHROUGH; -      case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan || -                                           R==APFloat::cmpEqual, dl, VT); -      case ISD::SETO:   return getConstant(R!=APFloat::cmpUnordered, dl, VT); -      case ISD::SETUO:  return getConstant(R==APFloat::cmpUnordered, dl, VT); -      case ISD::SETUEQ: return getConstant(R==APFloat::cmpUnordered || -                                           R==APFloat::cmpEqual, dl, VT); -      case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, dl, VT); -      case ISD::SETULT: return getConstant(R==APFloat::cmpUnordered || -                                           R==APFloat::cmpLessThan, dl, VT); -      case ISD::SETUGT: return getConstant(R==APFloat::cmpGreaterThan || -                                           R==APFloat::cmpUnordered, dl, VT); -      case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, dl, VT); -      case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, dl, VT); +      case ISD::SETOGE: return getBoolConstant(R==APFloat::cmpGreaterThan || +                                           R==APFloat::cmpEqual, dl, VT, OpVT); +      case ISD::SETO:   return getBoolConstant(R!=APFloat::cmpUnordered, dl, VT, +                                               OpVT); +      case ISD::SETUO:  return getBoolConstant(R==APFloat::cmpUnordered, dl, VT, +                                               OpVT); +      case ISD::SETUEQ: return getBoolConstant(R==APFloat::cmpUnordered || +                                               R==APFloat::cmpEqual, dl, VT, +                                               OpVT); +      case ISD::SETUNE: return getBoolConstant(R!=APFloat::cmpEqual, dl, VT, +                                               OpVT); +      case ISD::SETULT: return getBoolConstant(R==APFloat::cmpUnordered || +                                               R==APFloat::cmpLessThan, dl, VT, +                                               OpVT); +      case ISD::SETUGT: return getBoolConstant(R==APFloat::cmpGreaterThan || +                                               R==APFloat::cmpUnordered, dl, VT, +                                               OpVT); +      case ISD::SETULE: return getBoolConstant(R!=APFloat::cmpGreaterThan, dl, +                                               VT, OpVT); +      case ISD::SETUGE: return getBoolConstant(R!=APFloat::cmpLessThan, dl, VT, +                                               OpVT);        }      } else {        // Ensure that the constant occurs on the RHS. @@ -2297,10 +2396,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,        break;      } -    // Support big-endian targets when it becomes useful.      bool IsLE = getDataLayout().isLittleEndian(); -    if (!IsLE) -      break;      // Bitcast 'small element' vector to 'large element' scalar/vector.      if ((BitWidth % SubBitWidth) == 0) { @@ -2319,8 +2415,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,        for (unsigned i = 0; i != SubScale; ++i) {          computeKnownBits(N0, Known2, SubDemandedElts.shl(i),                           Depth + 1); -        Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * i); -        Known.Zero |= Known2.Zero.zext(BitWidth).shl(SubBitWidth * i); +        unsigned Shifts = IsLE ? i : SubScale - 1 - i; +        Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * Shifts); +        Known.Zero |= Known2.Zero.zext(BitWidth).shl(SubBitWidth * Shifts);        }      } @@ -2342,7 +2439,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,        Known.Zero.setAllBits(); Known.One.setAllBits();        for (unsigned i = 0; i != NumElts; ++i)          if (DemandedElts[i]) { -          unsigned Offset = (i % SubScale) * BitWidth; +          unsigned Shifts = IsLE ? i : NumElts - 1 - i; +          unsigned Offset = (Shifts % SubScale) * BitWidth;            Known.One &= Known2.One.lshr(Offset).trunc(BitWidth);            Known.Zero &= Known2.Zero.lshr(Offset).trunc(BitWidth);            // If we don't know any bits, early out. @@ -2441,6 +2539,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,      break;    case ISD::SMULO:    case ISD::UMULO: +  case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:      if (Op.getResNo() != 1)        break;      // The boolean result conforms to getBooleanContents. @@ -2904,11 +3003,38 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,    }    case ISD::SMIN:    case ISD::SMAX: { -    computeKnownBits(Op.getOperand(0), Known, DemandedElts, -                     Depth + 1); -    // If we don't know any bits, early out. -    if (Known.isUnknown()) -      break; +    // If we have a clamp pattern, we know that the number of sign bits will be +    // the minimum of the clamp min/max range. +    bool IsMax = (Opcode == ISD::SMAX); +    ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr; +    if ((CstLow = isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts))) +      if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX)) +        CstHigh = isConstOrDemandedConstSplat(Op.getOperand(0).getOperand(1), +                                              DemandedElts); +    if (CstLow && CstHigh) { +      if (!IsMax) +        std::swap(CstLow, CstHigh); + +      const APInt &ValueLow = CstLow->getAPIntValue(); +      const APInt &ValueHigh = CstHigh->getAPIntValue(); +      if (ValueLow.sle(ValueHigh)) { +        unsigned LowSignBits = ValueLow.getNumSignBits(); +        unsigned HighSignBits = ValueHigh.getNumSignBits(); +        unsigned MinSignBits = std::min(LowSignBits, HighSignBits); +        if (ValueLow.isNegative() && ValueHigh.isNegative()) { +          Known.One.setHighBits(MinSignBits); +          break; +        } +        if (ValueLow.isNonNegative() && ValueHigh.isNonNegative()) { +          Known.Zero.setHighBits(MinSignBits); +          break; +        } +      } +    } + +    // Fallback - just get the shared known bits of the operands. +    computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); +    if (Known.isUnknown()) break; // Early-out      computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);      Known.Zero &= Known2.Zero;      Known.One &= Known2.One; @@ -3038,7 +3164,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,    if (!DemandedElts)      return 1;  // No demanded elts, better to assume we don't know anything. -  switch (Op.getOpcode()) { +  unsigned Opcode = Op.getOpcode(); +  switch (Opcode) {    default: break;    case ISD::AssertSext:      Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits(); @@ -3189,7 +3316,32 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,      return std::min(Tmp, Tmp2);    case ISD::SMIN: -  case ISD::SMAX: +  case ISD::SMAX: { +    // If we have a clamp pattern, we know that the number of sign bits will be +    // the minimum of the clamp min/max range. +    bool IsMax = (Opcode == ISD::SMAX); +    ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr; +    if ((CstLow = isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts))) +      if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX)) +        CstHigh = isConstOrDemandedConstSplat(Op.getOperand(0).getOperand(1), +                                              DemandedElts); +    if (CstLow && CstHigh) { +      if (!IsMax) +        std::swap(CstLow, CstHigh); +      if (CstLow->getAPIntValue().sle(CstHigh->getAPIntValue())) { +        Tmp = CstLow->getAPIntValue().getNumSignBits(); +        Tmp2 = CstHigh->getAPIntValue().getNumSignBits(); +        return std::min(Tmp, Tmp2); +      } +    } + +    // Fallback - just get the minimum number of sign bits of the operands. +    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1); +    if (Tmp == 1) +      return 1;  // Early out. +    Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth + 1); +    return std::min(Tmp, Tmp2); +  }    case ISD::UMIN:    case ISD::UMAX:      Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1); @@ -3225,7 +3377,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,        unsigned RotAmt = C->getAPIntValue().urem(VTBits);        // Handle rotate right by N like a rotate left by 32-N. -      if (Op.getOpcode() == ISD::ROTR) +      if (Opcode == ISD::ROTR)          RotAmt = (VTBits - RotAmt) % VTBits;        // If we aren't rotating out all of the known-in sign bits, return the @@ -3423,10 +3575,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,    }    // Allow the target to implement this method for its nodes. -  if (Op.getOpcode() >= ISD::BUILTIN_OP_END || -      Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || -      Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || -      Op.getOpcode() == ISD::INTRINSIC_VOID) { +  if (Opcode >= ISD::BUILTIN_OP_END || +      Opcode == ISD::INTRINSIC_WO_CHAIN || +      Opcode == ISD::INTRINSIC_W_CHAIN || +      Opcode == ISD::INTRINSIC_VOID) {      unsigned NumBits =          TLI->ComputeNumSignBitsForTargetNode(Op, DemandedElts, *this, Depth);      if (NumBits > 1) @@ -3487,17 +3639,33 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {    return false;  } -bool SelectionDAG::isKnownNeverZero(SDValue Op) const { +bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) const { +  assert(Op.getValueType().isFloatingPoint() && +         "Floating point type expected"); +    // If the value is a constant, we can obviously see if it is a zero or not. +  // TODO: Add BuildVector support.    if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))      return !C->isZero(); +  return false; +} + +bool SelectionDAG::isKnownNeverZero(SDValue Op) const { +  assert(!Op.getValueType().isFloatingPoint() && +         "Floating point types unsupported - use isKnownNeverZeroFloat"); + +  // If the value is a constant, we can obviously see if it is a zero or not. +  if (ISD::matchUnaryPredicate( +          Op, [](ConstantSDNode *C) { return !C->isNullValue(); })) +    return true;    // TODO: Recognize more cases here.    switch (Op.getOpcode()) {    default: break;    case ISD::OR: -    if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) -      return !C->isNullValue(); +    if (isKnownNeverZero(Op.getOperand(1)) || +        isKnownNeverZero(Op.getOperand(0))) +      return true;      break;    } @@ -3517,6 +3685,8 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {    return false;  } +// FIXME: unify with llvm::haveNoCommonBitsSet. +// FIXME: could also handle masked merge pattern (X & ~M) op (Y & M)  bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {    assert(A.getValueType() == B.getValueType() &&           "Values must have the same type"); @@ -3841,11 +4011,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,      else if (OpOpcode == ISD::UNDEF)        return getUNDEF(VT); -    // (ext (trunx x)) -> x +    // (ext (trunc x)) -> x      if (OpOpcode == ISD::TRUNCATE) {        SDValue OpOp = Operand.getOperand(0); -      if (OpOp.getValueType() == VT) +      if (OpOp.getValueType() == VT) { +        transferDbgValues(Operand, OpOp);          return OpOp; +      }      }      break;    case ISD::TRUNCATE: @@ -3921,10 +4093,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,      break;    case ISD::FNEG:      // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 -    if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB) -      // FIXME: FNEG has no fast-math-flags to propagate; use the FSUB's flags? +    if ((getTarget().Options.UnsafeFPMath || Flags.hasNoSignedZeros()) && +        OpOpcode == ISD::FSUB)        return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1), -                     Operand.getOperand(0), Operand.getNode()->getFlags()); +                     Operand.getOperand(0), Flags);      if (OpOpcode == ISD::FNEG)  // --X -> X        return Operand.getOperand(0);      break; @@ -4314,24 +4486,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,    case ISD::FMUL:    case ISD::FDIV:    case ISD::FREM: -    if (getTarget().Options.UnsafeFPMath) { -      if (Opcode == ISD::FADD) { -        // x+0 --> x -        if (N2CFP && N2CFP->getValueAPF().isZero()) -          return N1; -      } else if (Opcode == ISD::FSUB) { -        // x-0 --> x -        if (N2CFP && N2CFP->getValueAPF().isZero()) -          return N1; -      } else if (Opcode == ISD::FMUL) { -        // x*0 --> 0 -        if (N2CFP && N2CFP->isZero()) -          return N2; -        // x*1 --> x -        if (N2CFP && N2CFP->isExactlyValue(1.0)) -          return N1; -      } -    }      assert(VT.isFloatingPoint() && "This operator only applies to FP types!");      assert(N1.getValueType() == N2.getValueType() &&             N1.getValueType() == VT && "Binary operator types must match!"); @@ -4448,12 +4602,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,      break;    }    case ISD::EXTRACT_VECTOR_ELT: +    assert(VT.getSizeInBits() >= N1.getValueType().getScalarSizeInBits() && +           "The result of EXTRACT_VECTOR_ELT must be at least as wide as the \ +             element type of the vector."); +      // EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF.      if (N1.isUndef())        return getUNDEF(VT);      // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF -    if (N2C && N2C->getZExtValue() >= N1.getValueType().getVectorNumElements()) +    if (N2C && N2C->getAPIntValue().uge(N1.getValueType().getVectorNumElements()))        return getUNDEF(VT);      // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is @@ -4635,6 +4793,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,      }    } +  // Any FP binop with an undef operand is folded to NaN. This matches the +  // behavior of the IR optimizer. +  switch (Opcode) { +  case ISD::FADD: +  case ISD::FSUB: +  case ISD::FMUL: +  case ISD::FDIV: +  case ISD::FREM: +    if (N1.isUndef() || N2.isUndef()) +      return getConstantFP(APFloat::getNaN(EVTToAPFloatSemantics(VT)), DL, VT); +  } +    // Canonicalize an UNDEF to the RHS, even over a constant.    if (N1.isUndef()) {      if (TLI->isCommutativeBinOp(Opcode)) { @@ -4644,22 +4814,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,        case ISD::FP_ROUND_INREG:        case ISD::SIGN_EXTEND_INREG:        case ISD::SUB: -      case ISD::FSUB: -      case ISD::FDIV: -      case ISD::FREM: -      case ISD::SRA: -        return N1;     // fold op(undef, arg2) -> undef +        return getUNDEF(VT);     // fold op(undef, arg2) -> undef        case ISD::UDIV:        case ISD::SDIV:        case ISD::UREM:        case ISD::SREM: +      case ISD::SRA:        case ISD::SRL:        case ISD::SHL: -        if (!VT.isVector()) -          return getConstant(0, DL, VT);    // fold op(undef, arg2) -> 0 -        // For vectors, we can't easily build an all zero vector, just return -        // the LHS. -        return N2; +        return getConstant(0, DL, VT);    // fold op(undef, arg2) -> 0        }      }    } @@ -4681,32 +4844,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,      case ISD::SDIV:      case ISD::UREM:      case ISD::SREM: -      return N2;       // fold op(arg1, undef) -> undef -    case ISD::FADD: -    case ISD::FSUB: -    case ISD::FMUL: -    case ISD::FDIV: -    case ISD::FREM: -      if (getTarget().Options.UnsafeFPMath) -        return N2; -      break; -    case ISD::MUL: -    case ISD::AND: +    case ISD::SRA:      case ISD::SRL:      case ISD::SHL: -      if (!VT.isVector()) -        return getConstant(0, DL, VT);  // fold op(arg1, undef) -> 0 -      // For vectors, we can't easily build an all zero vector, just return -      // the LHS. -      return N1; +      return getUNDEF(VT);       // fold op(arg1, undef) -> undef +    case ISD::MUL: +    case ISD::AND: +      return getConstant(0, DL, VT);  // fold op(arg1, undef) -> 0      case ISD::OR: -      if (!VT.isVector()) -        return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT); -      // For vectors, we can't easily build an all one vector, just return -      // the LHS. -      return N1; -    case ISD::SRA: -      return N1; +      return getAllOnesConstant(DL, VT);      }    } @@ -4739,10 +4885,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,  }  SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, -                              SDValue N1, SDValue N2, SDValue N3) { +                              SDValue N1, SDValue N2, SDValue N3, +                              const SDNodeFlags Flags) {    // Perform various simplifications.    switch (Opcode) {    case ISD::FMA: { +    assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); +    assert(N1.getValueType() == VT && N2.getValueType() == VT && +           N3.getValueType() == VT && "FMA types must match!");      ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);      ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);      ConstantFPSDNode *N3CFP = dyn_cast<ConstantFPSDNode>(N3); @@ -4833,10 +4983,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,      FoldingSetNodeID ID;      AddNodeIDNode(ID, Opcode, VTs, Ops);      void *IP = nullptr; -    if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) +    if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { +      E->intersectFlagsWith(Flags);        return SDValue(E, 0); +    }      N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); +    N->setFlags(Flags);      createOperands(N, Ops);      CSEMap.InsertNode(N, IP);    } else { @@ -5107,6 +5260,31 @@ static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {    return MF.getFunction().optForSize();  } +static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl, +                          SmallVector<SDValue, 32> &OutChains, unsigned From, +                          unsigned To, SmallVector<SDValue, 16> &OutLoadChains, +                          SmallVector<SDValue, 16> &OutStoreChains) { +  assert(OutLoadChains.size() && "Missing loads in memcpy inlining"); +  assert(OutStoreChains.size() && "Missing stores in memcpy inlining"); +  SmallVector<SDValue, 16> GluedLoadChains; +  for (unsigned i = From; i < To; ++i) { +    OutChains.push_back(OutLoadChains[i]); +    GluedLoadChains.push_back(OutLoadChains[i]); +  } + +  // Chain for all loads. +  SDValue LoadToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, +                                  GluedLoadChains); + +  for (unsigned i = From; i < To; ++i) { +    StoreSDNode *ST = dyn_cast<StoreSDNode>(OutStoreChains[i]); +    SDValue NewStore = DAG.getTruncStore(LoadToken, dl, ST->getValue(), +                                  ST->getBasePtr(), ST->getMemoryVT(), +                                  ST->getMemOperand()); +    OutChains.push_back(NewStore); +  } +} +  static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,                                         SDValue Chain, SDValue Dst, SDValue Src,                                         uint64_t Size, unsigned Align, @@ -5171,7 +5349,9 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,    MachineMemOperand::Flags MMOFlags =        isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; -  SmallVector<SDValue, 8> OutChains; +  SmallVector<SDValue, 16> OutLoadChains; +  SmallVector<SDValue, 16> OutStoreChains; +  SmallVector<SDValue, 32> OutChains;    unsigned NumMemOps = MemOps.size();    uint64_t SrcOff = 0, DstOff = 0;    for (unsigned i = 0; i != NumMemOps; ++i) { @@ -5205,11 +5385,13 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,          SubSlice.Length = VTSize;        }        Value = getMemsetStringVal(VT, dl, DAG, TLI, SubSlice); -      if (Value.getNode()) +      if (Value.getNode()) {          Store = DAG.getStore(Chain, dl, Value,                               DAG.getMemBasePlusOffset(Dst, DstOff, dl),                               DstPtrInfo.getWithOffset(DstOff), Align,                               MMOFlags); +        OutChains.push_back(Store); +      }      }      if (!Store.getNode()) { @@ -5231,17 +5413,61 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,                               DAG.getMemBasePlusOffset(Src, SrcOff, dl),                               SrcPtrInfo.getWithOffset(SrcOff), VT,                               MinAlign(SrcAlign, SrcOff), SrcMMOFlags); -      OutChains.push_back(Value.getValue(1)); +      OutLoadChains.push_back(Value.getValue(1)); +        Store = DAG.getTruncStore(            Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),            DstPtrInfo.getWithOffset(DstOff), VT, Align, MMOFlags); +      OutStoreChains.push_back(Store);      } -    OutChains.push_back(Store);      SrcOff += VTSize;      DstOff += VTSize;      Size -= VTSize;    } +  unsigned GluedLdStLimit = MaxLdStGlue == 0 ? +                                TLI.getMaxGluedStoresPerMemcpy() : MaxLdStGlue; +  unsigned NumLdStInMemcpy = OutStoreChains.size(); + +  if (NumLdStInMemcpy) { +    // It may be that memcpy might be converted to memset if it's memcpy +    // of constants. In such a case, we won't have loads and stores, but +    // just stores. In the absence of loads, there is nothing to gang up. +    if ((GluedLdStLimit <= 1) || !EnableMemCpyDAGOpt) { +      // If target does not care, just leave as it. +      for (unsigned i = 0; i < NumLdStInMemcpy; ++i) { +        OutChains.push_back(OutLoadChains[i]); +        OutChains.push_back(OutStoreChains[i]); +      } +    } else { +      // Ld/St less than/equal limit set by target. +      if (NumLdStInMemcpy <= GluedLdStLimit) { +          chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, 0, +                                        NumLdStInMemcpy, OutLoadChains, +                                        OutStoreChains); +      } else { +        unsigned NumberLdChain =  NumLdStInMemcpy / GluedLdStLimit; +        unsigned RemainingLdStInMemcpy = NumLdStInMemcpy % GluedLdStLimit; +        unsigned GlueIter = 0; + +        for (unsigned cnt = 0; cnt < NumberLdChain; ++cnt) { +          unsigned IndexFrom = NumLdStInMemcpy - GlueIter - GluedLdStLimit; +          unsigned IndexTo   = NumLdStInMemcpy - GlueIter; + +          chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, IndexFrom, IndexTo, +                                       OutLoadChains, OutStoreChains); +          GlueIter += GluedLdStLimit; +        } + +        // Residual ld/st. +        if (RemainingLdStInMemcpy) { +          chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, 0, +                                        RemainingLdStInMemcpy, OutLoadChains, +                                        OutStoreChains); +        } +      } +    } +  }    return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);  } @@ -5334,7 +5560,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,    return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);  } -/// \brief Lower the call to 'memset' intrinsic function into a series of store +/// Lower the call to 'memset' intrinsic function into a series of store  /// operations.  ///  /// \param DAG Selection DAG where lowered code is placed. @@ -5518,6 +5744,47 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,    return CallResult.second;  } +SDValue SelectionDAG::getAtomicMemcpy(SDValue Chain, const SDLoc &dl, +                                      SDValue Dst, unsigned DstAlign, +                                      SDValue Src, unsigned SrcAlign, +                                      SDValue Size, Type *SizeTy, +                                      unsigned ElemSz, bool isTailCall, +                                      MachinePointerInfo DstPtrInfo, +                                      MachinePointerInfo SrcPtrInfo) { +  // Emit a library call. +  TargetLowering::ArgListTy Args; +  TargetLowering::ArgListEntry Entry; +  Entry.Ty = getDataLayout().getIntPtrType(*getContext()); +  Entry.Node = Dst; +  Args.push_back(Entry); + +  Entry.Node = Src; +  Args.push_back(Entry); + +  Entry.Ty = SizeTy; +  Entry.Node = Size; +  Args.push_back(Entry); + +  RTLIB::Libcall LibraryCall = +      RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(ElemSz); +  if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) +    report_fatal_error("Unsupported element size"); + +  TargetLowering::CallLoweringInfo CLI(*this); +  CLI.setDebugLoc(dl) +      .setChain(Chain) +      .setLibCallee(TLI->getLibcallCallingConv(LibraryCall), +                    Type::getVoidTy(*getContext()), +                    getExternalSymbol(TLI->getLibcallName(LibraryCall), +                                      TLI->getPointerTy(getDataLayout())), +                    std::move(Args)) +      .setDiscardResult() +      .setTailCall(isTailCall); + +  std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI); +  return CallResult.second; +} +  SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,                                   SDValue Src, SDValue Size, unsigned Align,                                   bool isVol, bool isTailCall, @@ -5579,6 +5846,47 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,    return CallResult.second;  } +SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl, +                                       SDValue Dst, unsigned DstAlign, +                                       SDValue Src, unsigned SrcAlign, +                                       SDValue Size, Type *SizeTy, +                                       unsigned ElemSz, bool isTailCall, +                                       MachinePointerInfo DstPtrInfo, +                                       MachinePointerInfo SrcPtrInfo) { +  // Emit a library call. +  TargetLowering::ArgListTy Args; +  TargetLowering::ArgListEntry Entry; +  Entry.Ty = getDataLayout().getIntPtrType(*getContext()); +  Entry.Node = Dst; +  Args.push_back(Entry); + +  Entry.Node = Src; +  Args.push_back(Entry); + +  Entry.Ty = SizeTy; +  Entry.Node = Size; +  Args.push_back(Entry); + +  RTLIB::Libcall LibraryCall = +      RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(ElemSz); +  if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) +    report_fatal_error("Unsupported element size"); + +  TargetLowering::CallLoweringInfo CLI(*this); +  CLI.setDebugLoc(dl) +      .setChain(Chain) +      .setLibCallee(TLI->getLibcallCallingConv(LibraryCall), +                    Type::getVoidTy(*getContext()), +                    getExternalSymbol(TLI->getLibcallName(LibraryCall), +                                      TLI->getPointerTy(getDataLayout())), +                    std::move(Args)) +      .setDiscardResult() +      .setTailCall(isTailCall); + +  std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI); +  return CallResult.second; +} +  SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,                                  SDValue Src, SDValue Size, unsigned Align,                                  bool isVol, bool isTailCall, @@ -5641,6 +5949,46 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,    return CallResult.second;  } +SDValue SelectionDAG::getAtomicMemset(SDValue Chain, const SDLoc &dl, +                                      SDValue Dst, unsigned DstAlign, +                                      SDValue Value, SDValue Size, Type *SizeTy, +                                      unsigned ElemSz, bool isTailCall, +                                      MachinePointerInfo DstPtrInfo) { +  // Emit a library call. +  TargetLowering::ArgListTy Args; +  TargetLowering::ArgListEntry Entry; +  Entry.Ty = getDataLayout().getIntPtrType(*getContext()); +  Entry.Node = Dst; +  Args.push_back(Entry); + +  Entry.Ty = Type::getInt8Ty(*getContext()); +  Entry.Node = Value; +  Args.push_back(Entry); + +  Entry.Ty = SizeTy; +  Entry.Node = Size; +  Args.push_back(Entry); + +  RTLIB::Libcall LibraryCall = +      RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElemSz); +  if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) +    report_fatal_error("Unsupported element size"); + +  TargetLowering::CallLoweringInfo CLI(*this); +  CLI.setDebugLoc(dl) +      .setChain(Chain) +      .setLibCallee(TLI->getLibcallCallingConv(LibraryCall), +                    Type::getVoidTy(*getContext()), +                    getExternalSymbol(TLI->getLibcallName(LibraryCall), +                                      TLI->getPointerTy(getDataLayout())), +                    std::move(Args)) +      .setDiscardResult() +      .setTailCall(isTailCall); + +  std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI); +  return CallResult.second; +} +  SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,                                  SDVTList VTList, ArrayRef<SDValue> Ops,                                  MachineMemOperand *MMO) { @@ -5736,6 +6084,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,    assert((Opcode == ISD::ATOMIC_LOAD_ADD ||            Opcode == ISD::ATOMIC_LOAD_SUB ||            Opcode == ISD::ATOMIC_LOAD_AND || +          Opcode == ISD::ATOMIC_LOAD_CLR ||            Opcode == ISD::ATOMIC_LOAD_OR ||            Opcode == ISD::ATOMIC_LOAD_XOR ||            Opcode == ISD::ATOMIC_LOAD_NAND || @@ -6207,7 +6556,7 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,  SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,                                        ArrayRef<SDValue> Ops,                                        MachineMemOperand *MMO) { -  assert(Ops.size() == 5 && "Incompatible number of operands"); +  assert(Ops.size() == 6 && "Incompatible number of operands");    FoldingSetNodeID ID;    AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops); @@ -6233,6 +6582,9 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,    assert(N->getIndex().getValueType().getVectorNumElements() ==               N->getValueType(0).getVectorNumElements() &&           "Vector width mismatch between index and data"); +  assert(isa<ConstantSDNode>(N->getScale()) && +         cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() && +         "Scale should be a constant power of 2");    CSEMap.InsertNode(N, IP);    InsertNode(N); @@ -6244,7 +6596,7 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,  SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,                                         ArrayRef<SDValue> Ops,                                         MachineMemOperand *MMO) { -  assert(Ops.size() == 5 && "Incompatible number of operands"); +  assert(Ops.size() == 6 && "Incompatible number of operands");    FoldingSetNodeID ID;    AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops); @@ -6267,6 +6619,9 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,    assert(N->getIndex().getValueType().getVectorNumElements() ==               N->getValue().getValueType().getVectorNumElements() &&           "Vector width mismatch between index and data"); +  assert(isa<ConstantSDNode>(N->getScale()) && +         cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() && +         "Scale should be a constant power of 2");    CSEMap.InsertNode(N, IP);    InsertNode(N); @@ -6558,6 +6913,7 @@ SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) {    // Now we update the operands.    N->OperandList[0].set(Op); +  updateDivergence(N);    // If this gets put into a CSE map, add it.    if (InsertPos) CSEMap.InsertNode(N, InsertPos);    return N; @@ -6586,6 +6942,7 @@ SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) {    if (N->OperandList[1] != Op2)      N->OperandList[1].set(Op2); +  updateDivergence(N);    // If this gets put into a CSE map, add it.    if (InsertPos) CSEMap.InsertNode(N, InsertPos);    return N; @@ -6636,6 +6993,7 @@ UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) {      if (N->OperandList[i] != Ops[i])        N->OperandList[i].set(Ops[i]); +  updateDivergence(N);    // If this gets put into a CSE map, add it.    if (InsertPos) CSEMap.InsertNode(N, InsertPos);    return N; @@ -7061,11 +7419,24 @@ SDDbgValue *SelectionDAG::getConstantDbgValue(DIVariable *Var,  /// FrameIndex  SDDbgValue *SelectionDAG::getFrameIndexDbgValue(DIVariable *Var,                                                  DIExpression *Expr, unsigned FI, +                                                bool IsIndirect,                                                  const DebugLoc &DL,                                                  unsigned O) {    assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&           "Expected inlined-at fields to agree"); -  return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, FI, DL, O); +  return new (DbgInfo->getAlloc()) +      SDDbgValue(Var, Expr, FI, IsIndirect, DL, O, SDDbgValue::FRAMEIX); +} + +/// VReg +SDDbgValue *SelectionDAG::getVRegDbgValue(DIVariable *Var, +                                          DIExpression *Expr, +                                          unsigned VReg, bool IsIndirect, +                                          const DebugLoc &DL, unsigned O) { +  assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && +         "Expected inlined-at fields to agree"); +  return new (DbgInfo->getAlloc()) +      SDDbgValue(Var, Expr, VReg, IsIndirect, DL, O, SDDbgValue::VREG);  }  void SelectionDAG::transferDbgValues(SDValue From, SDValue To, @@ -7155,8 +7526,9 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {                          DV->isIndirect(), DV->getDebugLoc(), DV->getOrder());          ClonedDVs.push_back(Clone);          DV->setIsInvalidated(); -        DEBUG(dbgs() << "SALVAGE: Rewriting"; N0.getNode()->dumprFull(this); -              dbgs() << " into " << *DIExpr << '\n'); +        LLVM_DEBUG(dbgs() << "SALVAGE: Rewriting"; +                   N0.getNode()->dumprFull(this); +                   dbgs() << " into " << *DIExpr << '\n');        }      }    } @@ -7165,6 +7537,14 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {      AddDbgValue(Dbg, Dbg->getSDNode(), false);  } +/// Creates a SDDbgLabel node. +SDDbgLabel *SelectionDAG::getDbgLabel(DILabel *Label, +                                      const DebugLoc &DL, unsigned O) { +  assert(cast<DILabel>(Label)->isValidLocationForIntrinsic(DL) && +         "Expected inlined-at fields to agree"); +  return new (DbgInfo->getAlloc()) SDDbgLabel(Label, DL, O); +} +  namespace {  /// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node @@ -7227,8 +7607,9 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) {        SDUse &Use = UI.getUse();        ++UI;        Use.set(To); +      if (To->isDivergent() != From->isDivergent()) +        updateDivergence(User);      } while (UI != UE && *UI == User); -      // Now that we have modified User, add it back to the CSE maps.  If it      // already exists there, recursively merge the results together.      AddModifiedNodeToCSEMaps(User); @@ -7282,6 +7663,8 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) {        SDUse &Use = UI.getUse();        ++UI;        Use.setNode(To); +      if (To->isDivergent() != From->isDivergent()) +        updateDivergence(User);      } while (UI != UE && *UI == User);      // Now that we have modified User, add it back to the CSE maps.  If it @@ -7326,8 +7709,9 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) {        const SDValue &ToOp = To[Use.getResNo()];        ++UI;        Use.set(ToOp); +      if (To->getNode()->isDivergent() != From->isDivergent()) +        updateDivergence(User);      } while (UI != UE && *UI == User); -      // Now that we have modified User, add it back to the CSE maps.  If it      // already exists there, recursively merge the results together.      AddModifiedNodeToCSEMaps(User); @@ -7385,8 +7769,9 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){        ++UI;        Use.set(To); +      if (To->isDivergent() != From->isDivergent()) +        updateDivergence(User);      } while (UI != UE && *UI == User); -      // We are iterating over all uses of the From node, so if a use      // doesn't use the specific value, no changes are made.      if (!UserRemovedFromCSEMaps) @@ -7419,6 +7804,72 @@ namespace {  } // end anonymous namespace +void SelectionDAG::updateDivergence(SDNode * N) +{ +  if (TLI->isSDNodeAlwaysUniform(N)) +    return; +  bool IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA); +  for (auto &Op : N->ops()) { +    if (Op.Val.getValueType() != MVT::Other) +      IsDivergent |= Op.getNode()->isDivergent(); +  } +  if (N->SDNodeBits.IsDivergent != IsDivergent) { +    N->SDNodeBits.IsDivergent = IsDivergent; +    for (auto U : N->uses()) { +      updateDivergence(U); +    } +  } +} + + +void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode*>& Order) { +  DenseMap<SDNode *, unsigned> Degree; +  Order.reserve(AllNodes.size()); +  for (auto & N : allnodes()) { +    unsigned NOps = N.getNumOperands(); +    Degree[&N] = NOps; +    if (0 == NOps) +      Order.push_back(&N); +  } +  for (std::vector<SDNode *>::iterator I = Order.begin(); +  I!=Order.end();++I) { +    SDNode * N = *I; +    for (auto U : N->uses()) { +      unsigned &UnsortedOps = Degree[U]; +      if (0 == --UnsortedOps) +        Order.push_back(U); +    } +  } +} + +void SelectionDAG::VerifyDAGDiverence() +{ +  std::vector<SDNode*> TopoOrder; +  CreateTopologicalOrder(TopoOrder); +  const TargetLowering &TLI = getTargetLoweringInfo(); +  DenseMap<const SDNode *, bool> DivergenceMap; +  for (auto &N : allnodes()) { +    DivergenceMap[&N] = false; +  } +  for (auto N : TopoOrder) { +    bool IsDivergent = DivergenceMap[N]; +    bool IsSDNodeDivergent = TLI.isSDNodeSourceOfDivergence(N, FLI, DA); +    for (auto &Op : N->ops()) { +      if (Op.Val.getValueType() != MVT::Other) +        IsSDNodeDivergent |= DivergenceMap[Op.getNode()]; +    } +    if (!IsDivergent && IsSDNodeDivergent && !TLI.isSDNodeAlwaysUniform(N)) { +      DivergenceMap[N] = true; +    } +  } +  for (auto &N : allnodes()) { +    (void)N; +    assert(DivergenceMap[&N] == N.isDivergent() && +           "Divergence bit inconsistency detected\n"); +  } +} + +  /// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving  /// uses of other values produced by From.getNode() alone.  The same value  /// may appear in both the From and To list.  The Deleted vector is @@ -7450,7 +7901,7 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,    }    // Sort the uses, so that all the uses from a given User are together. -  std::sort(Uses.begin(), Uses.end()); +  llvm::sort(Uses.begin(), Uses.end());    for (unsigned UseIndex = 0, UseIndexEnd = Uses.size();         UseIndex != UseIndexEnd; ) { @@ -7579,6 +8030,10 @@ void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) {    DbgInfo->add(DB, SD, isParameter);  } +void SelectionDAG::AddDbgLabel(SDDbgLabel *DB) { +  DbgInfo->add(DB); +} +  SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,                                                     SDValue NewMemOp) {    assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node"); @@ -7947,11 +8402,8 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,    if (VT.getSizeInBits() / 8 != Bytes)      return false; -  SDValue Loc = LD->getOperand(1); -  SDValue BaseLoc = Base->getOperand(1); - -  auto BaseLocDecomp = BaseIndexOffset::match(BaseLoc, *this); -  auto LocDecomp = BaseIndexOffset::match(Loc, *this); +  auto BaseLocDecomp = BaseIndexOffset::match(Base, *this); +  auto LocDecomp = BaseIndexOffset::match(LD, *this);    int64_t Offset = 0;    if (BaseLocDecomp.equalBaseIndex(LocDecomp, *this, Offset)) @@ -7966,8 +8418,8 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {    const GlobalValue *GV;    int64_t GVOffset = 0;    if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { -    unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType()); -    KnownBits Known(PtrWidth); +    unsigned IdxWidth = getDataLayout().getIndexTypeSizeInBits(GV->getType()); +    KnownBits Known(IdxWidth);      llvm::computeKnownBits(GV, Known, getDataLayout());      unsigned AlignBits = Known.countMinTrailingZeros();      unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; @@ -8201,7 +8653,7 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {    return true;  } -// \brief Returns the SDNode if it is a constant integer BuildVector +// Returns the SDNode if it is a constant integer BuildVector  // or constant integer.  SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) {    if (isa<ConstantSDNode>(N)) @@ -8227,6 +8679,26 @@ SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) {    return nullptr;  } +void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) { +  assert(!Node->OperandList && "Node already has operands"); +  SDUse *Ops = OperandRecycler.allocate( +    ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator); + +  bool IsDivergent = false; +  for (unsigned I = 0; I != Vals.size(); ++I) { +    Ops[I].setUser(Node); +    Ops[I].setInitial(Vals[I]); +    if (Ops[I].Val.getValueType() != MVT::Other) // Skip Chain. It does not carry divergence. +      IsDivergent = IsDivergent || Ops[I].getNode()->isDivergent(); +  } +  Node->NumOperands = Vals.size(); +  Node->OperandList = Ops; +  IsDivergent |= TLI->isSDNodeSourceOfDivergence(Node, FLI, DA); +  if (!TLI->isSDNodeAlwaysUniform(Node)) +    Node->SDNodeBits.IsDivergent = IsDivergent; +  checkForCycles(Node); +} +  #ifndef NDEBUG  static void checkForCyclesHelper(const SDNode *N,                                   SmallPtrSetImpl<const SDNode*> &Visited,  | 
