diff options
Diffstat (limited to 'contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp')
| -rw-r--r-- | contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 914 | 
1 files changed, 583 insertions, 331 deletions
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index f75d5f4b2bd2..efd4bd9a4d89 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -49,6 +49,7 @@  #include "llvm/Target/TargetSubtargetInfo.h"  #include <algorithm>  #include <cmath> +#include <utility>  using namespace llvm; @@ -196,6 +197,22 @@ bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) {    return true;  } +/// \brief Return true if the specified node is a BUILD_VECTOR node of +/// all ConstantFPSDNode or undef. +bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) { +  if (N->getOpcode() != ISD::BUILD_VECTOR) +    return false; + +  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { +    SDValue Op = N->getOperand(i); +    if (Op.getOpcode() == ISD::UNDEF) +      continue; +    if (!isa<ConstantFPSDNode>(Op)) +      return false; +  } +  return true; +} +  /// isScalarToVector - Return true if the specified node is a  /// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low  /// element is not an undef. @@ -499,8 +516,9 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {    case ISD::SUB:    case ISD::SHL: {      const BinaryWithFlagsSDNode *BinNode = cast<BinaryWithFlagsSDNode>(N); -    AddBinaryNodeIDCustom(ID, N->getOpcode(), BinNode->hasNoUnsignedWrap(), -                          BinNode->hasNoSignedWrap(), BinNode->isExact()); +    AddBinaryNodeIDCustom( +        ID, N->getOpcode(), BinNode->Flags.hasNoUnsignedWrap(), +        BinNode->Flags.hasNoSignedWrap(), BinNode->Flags.hasExact());      break;    }    case ISD::ATOMIC_CMP_SWAP: @@ -860,7 +878,7 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op,    FoldingSetNodeID ID;    AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);    AddNodeIDCustom(ID, N); -  SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); +  SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos);    return Node;  } @@ -878,7 +896,7 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,    FoldingSetNodeID ID;    AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);    AddNodeIDCustom(ID, N); -  SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); +  SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos);    return Node;  } @@ -895,7 +913,7 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops,    FoldingSetNodeID ID;    AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);    AddNodeIDCustom(ID, N); -  SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); +  SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos);    return Node;  } @@ -947,9 +965,9 @@ BinarySDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, SDLoc DL,    if (isBinOpWithFlags(Opcode)) {      BinaryWithFlagsSDNode *FN = new (NodeAllocator) BinaryWithFlagsSDNode(          Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2); -    FN->setHasNoUnsignedWrap(nuw); -    FN->setHasNoSignedWrap(nsw); -    FN->setIsExact(exact); +    FN->Flags.setNoUnsignedWrap(nuw); +    FN->Flags.setNoSignedWrap(nsw); +    FN->Flags.setExact(exact);      return FN;    } @@ -959,6 +977,40 @@ BinarySDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, SDLoc DL,    return N;  } +SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID, +                                          void *&InsertPos) { +  SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos); +  if (N) { +    switch (N->getOpcode()) { +    default: break; +    case ISD::Constant: +    case ISD::ConstantFP: +      llvm_unreachable("Querying for Constant and ConstantFP nodes requires " +                       "debug location.  Use another overload."); +    } +  } +  return N; +} + +SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID, +                                          DebugLoc DL, void *&InsertPos) { +  SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos); +  if (N) { +    switch (N->getOpcode()) { +    default: break; // Process only regular (non-target) constant nodes. +    case ISD::Constant: +    case ISD::ConstantFP: +      // Erase debug location from the node if the node is used at several +      // different places to do not propagate one location to all uses as it +      // leads to incorrect debug info. +      if (N->getDebugLoc() != DL) +        N->setDebugLoc(DebugLoc()); +      break; +    } +  } +  return N; +} +  void SelectionDAG::clear() {    allnodes_clear();    OperandAllocator.Reset(); @@ -1014,7 +1066,7 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, SDLoc DL, EVT VT) {    APInt Imm = APInt::getLowBitsSet(BitWidth,                                     VT.getSizeInBits());    return getNode(ISD::AND, DL, Op.getValueType(), Op, -                 getConstant(Imm, Op.getValueType())); +                 getConstant(Imm, DL, Op.getValueType()));  }  SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { @@ -1052,7 +1104,7 @@ SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) {  SDValue SelectionDAG::getNOT(SDLoc DL, SDValue Val, EVT VT) {    EVT EltVT = VT.getScalarType();    SDValue NegOne = -    getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); +    getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, VT);    return getNode(ISD::XOR, DL, VT, Val, NegOne);  } @@ -1062,31 +1114,33 @@ SDValue SelectionDAG::getLogicalNOT(SDLoc DL, SDValue Val, EVT VT) {    switch (TLI->getBooleanContents(VT)) {      case TargetLowering::ZeroOrOneBooleanContent:      case TargetLowering::UndefinedBooleanContent: -      TrueValue = getConstant(1, VT); +      TrueValue = getConstant(1, DL, VT);        break;      case TargetLowering::ZeroOrNegativeOneBooleanContent: -      TrueValue = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), +      TrueValue = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL,                                VT);        break;    }    return getNode(ISD::XOR, DL, VT, Val, TrueValue);  } -SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT, bool isO) { +SDValue SelectionDAG::getConstant(uint64_t Val, SDLoc DL, EVT VT, bool isT, +                                  bool isO) {    EVT EltVT = VT.getScalarType();    assert((EltVT.getSizeInBits() >= 64 ||           (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) &&           "getConstant with a uint64_t value that doesn't fit in the type!"); -  return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT, isO); +  return getConstant(APInt(EltVT.getSizeInBits(), Val), DL, VT, isT, isO);  } -SDValue SelectionDAG::getConstant(const APInt &Val, EVT VT, bool isT, bool isO) +SDValue SelectionDAG::getConstant(const APInt &Val, SDLoc DL, EVT VT, bool isT, +                                  bool isO)  { -  return getConstant(*ConstantInt::get(*Context, Val), VT, isT, isO); +  return getConstant(*ConstantInt::get(*Context, Val), DL, VT, isT, isO);  } -SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT, -                                  bool isO) { +SDValue SelectionDAG::getConstant(const ConstantInt &Val, SDLoc DL, EVT VT, +                                  bool isT, bool isO) {    assert(VT.isInteger() && "Cannot create FP integer constant!");    EVT EltVT = VT.getScalarType(); @@ -1125,7 +1179,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT,      SmallVector<SDValue, 2> EltParts;      for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) {        EltParts.push_back(getConstant(NewVal.lshr(i * ViaEltSizeInBits) -                                           .trunc(ViaEltSizeInBits), +                                           .trunc(ViaEltSizeInBits), DL,                                       ViaEltVT, isT, isO));      } @@ -1160,12 +1214,13 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT,    ID.AddBoolean(isO);    void *IP = nullptr;    SDNode *N = nullptr; -  if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) +  if ((N = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)))      if (!VT.isVector())        return SDValue(N, 0);    if (!N) { -    N = new (NodeAllocator) ConstantSDNode(isT, isO, Elt, EltVT); +    N = new (NodeAllocator) ConstantSDNode(isT, isO, Elt, DL.getDebugLoc(), +                                           EltVT);      CSEMap.InsertNode(N, IP);      InsertNode(N);    } @@ -1179,16 +1234,17 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT,    return Result;  } -SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, bool isTarget) { -  return getConstant(Val, TLI->getPointerTy(), isTarget); +SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, SDLoc DL, bool isTarget) { +  return getConstant(Val, DL, TLI->getPointerTy(), isTarget);  } - -SDValue SelectionDAG::getConstantFP(const APFloat& V, EVT VT, bool isTarget) { -  return getConstantFP(*ConstantFP::get(*getContext(), V), VT, isTarget); +SDValue SelectionDAG::getConstantFP(const APFloat& V, SDLoc DL, EVT VT, +                                    bool isTarget) { +  return getConstantFP(*ConstantFP::get(*getContext(), V), DL, VT, isTarget);  } -SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){ +SDValue SelectionDAG::getConstantFP(const ConstantFP& V, SDLoc DL, EVT VT, +                                    bool isTarget){    assert(VT.isFloatingPoint() && "Cannot create integer FP constant!");    EVT EltVT = VT.getScalarType(); @@ -1202,12 +1258,13 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){    ID.AddPointer(&V);    void *IP = nullptr;    SDNode *N = nullptr; -  if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) +  if ((N = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)))      if (!VT.isVector())        return SDValue(N, 0);    if (!N) { -    N = new (NodeAllocator) ConstantFPSDNode(isTarget, &V, EltVT); +    N = new (NodeAllocator) ConstantFPSDNode(isTarget, &V, DL.getDebugLoc(), +                                             EltVT);      CSEMap.InsertNode(N, IP);      InsertNode(N);    } @@ -1216,25 +1273,25 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){    if (VT.isVector()) {      SmallVector<SDValue, 8> Ops;      Ops.assign(VT.getVectorNumElements(), Result); -    // FIXME SDLoc info might be appropriate here      Result = getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Ops);    }    return Result;  } -SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) { +SDValue SelectionDAG::getConstantFP(double Val, SDLoc DL, EVT VT, +                                    bool isTarget) {    EVT EltVT = VT.getScalarType();    if (EltVT==MVT::f32) -    return getConstantFP(APFloat((float)Val), VT, isTarget); +    return getConstantFP(APFloat((float)Val), DL, VT, isTarget);    else if (EltVT==MVT::f64) -    return getConstantFP(APFloat(Val), VT, isTarget); +    return getConstantFP(APFloat(Val), DL, VT, isTarget);    else if (EltVT==MVT::f80 || EltVT==MVT::f128 || EltVT==MVT::ppcf128 ||             EltVT==MVT::f16) {      bool ignored;      APFloat apf = APFloat(Val);      apf.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,                  &ignored); -    return getConstantFP(apf, VT, isTarget); +    return getConstantFP(apf, DL, VT, isTarget);    } else      llvm_unreachable("Unsupported type in getConstantFP");  } @@ -1264,7 +1321,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL,    ID.AddInteger(TargetFlags);    ID.AddInteger(GV->getType()->getAddressSpace());    void *IP = nullptr; -  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) +  if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))      return SDValue(E, 0);    SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, DL.getIROrder(), @@ -1281,7 +1338,7 @@ SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) {    AddNodeIDNode(ID, Opc, getVTList(VT), None);    ID.AddInteger(FI);    void *IP = nullptr; -  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) +  if (SDNode *E = FindNodeOrInsertPos(ID, IP))      return SDValue(E, 0);    SDNode *N = new (NodeAllocator) FrameIndexSDNode(FI, VT, isTarget); @@ -1300,7 +1357,7 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,    ID.AddInteger(JTI);    ID.AddInteger(TargetFlags);    void *IP = nullptr; -  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) +  if (SDNode *E = FindNodeOrInsertPos(ID, IP))      return SDValue(E, 0);    SDNode *N = new (NodeAllocator) JumpTableSDNode(JTI, VT, isTarget, @@ -1326,7 +1383,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,    ID.AddPointer(C);    ID.AddInteger(TargetFlags);    void *IP = nullptr; -  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) +  if (SDNode *E = FindNodeOrInsertPos(ID, IP))      return SDValue(E, 0);    SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset, @@ -1353,7 +1410,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,    C->addSelectionDAGCSEId(ID);    ID.AddInteger(TargetFlags);    void *IP = nullptr; -  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) +  if (SDNode *E = FindNodeOrInsertPos(ID, IP))      return SDValue(E, 0);    SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset, @@ -1371,7 +1428,7 @@ SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset,    ID.AddInteger(Offset);    ID.AddInteger(TargetFlags);    void *IP = nullptr; -  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) +  if (SDNode *E = FindNodeOrInsertPos(ID, IP))      return SDValue(E, 0);    SDNode *N = new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset, @@ -1386,7 +1443,7 @@ SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {    AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), None);    ID.AddPointer(MBB);    void *IP = nullptr; -  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) +  if (SDNode *E = FindNodeOrInsertPos(ID, IP))      return SDValue(E, 0);    SDNode *N = new (NodeAllocator) BasicBlockSDNode(MBB); @@ -1446,13 +1503,7 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {  // N2 to point at N1.  static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) {    std::swap(N1, N2); -  int NElts = M.size(); -  for (int i = 0; i != NElts; ++i) { -    if (M[i] >= NElts) -      M[i] -= NElts; -    else if (M[i] >= 0) -      M[i] += NElts; -  } +  ShuffleVectorSDNode::commuteMask(M);  }  SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, @@ -1484,6 +1535,34 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,    if (N1.getOpcode() == ISD::UNDEF)      commuteShuffle(N1, N2, MaskVec); +  // If shuffling a splat, try to blend the splat instead. We do this here so +  // that even when this arises during lowering we don't have to re-handle it. +  auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) { +    BitVector UndefElements; +    SDValue Splat = BV->getSplatValue(&UndefElements); +    if (!Splat) +      return; + +    for (int i = 0; i < (int)NElts; ++i) { +      if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + (int)NElts)) +        continue; + +      // If this input comes from undef, mark it as such. +      if (UndefElements[MaskVec[i] - Offset]) { +        MaskVec[i] = -1; +        continue; +      } + +      // If we can blend a non-undef lane, use that instead. +      if (!UndefElements[i]) +        MaskVec[i] = i + Offset; +    } +  }; +  if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1)) +    BlendSplat(N1BV, 0); +  if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2)) +    BlendSplat(N2BV, NElts); +    // Canonicalize all index into lhs, -> shuffle lhs, undef    // Canonicalize all index into rhs, -> shuffle rhs, undef    bool AllLHS = true, AllRHS = true; @@ -1513,9 +1592,10 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,      return getUNDEF(VT);    // If Identity shuffle return that node. -  bool Identity = true; +  bool Identity = true, AllSame = true;    for (unsigned i = 0; i != NElts; ++i) {      if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false; +    if (MaskVec[i] != MaskVec[0]) AllSame = false;    }    if (Identity && NElts)      return N1; @@ -1537,18 +1617,35 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,        if (Splat && Splat.getOpcode() == ISD::UNDEF)          return getUNDEF(VT); +      bool SameNumElts = +          V.getValueType().getVectorNumElements() == VT.getVectorNumElements(); +        // We only have a splat which can skip shuffles if there is a splatted        // value and no undef lanes rearranged by the shuffle.        if (Splat && UndefElements.none()) {          // Splat of <x, x, ..., x>, return <x, x, ..., x>, provided that the          // number of elements match or the value splatted is a zero constant. -        if (V.getValueType().getVectorNumElements() == -            VT.getVectorNumElements()) +        if (SameNumElts)            return N1;          if (auto *C = dyn_cast<ConstantSDNode>(Splat))            if (C->isNullValue())              return N1;        } + +      // If the shuffle itself creates a splat, build the vector directly. +      if (AllSame && SameNumElts) { +        const SDValue &Splatted = BV->getOperand(MaskVec[0]); +        SmallVector<SDValue, 8> Ops(NElts, Splatted); + +        EVT BuildVT = BV->getValueType(0); +        SDValue NewBV = getNode(ISD::BUILD_VECTOR, dl, BuildVT, Ops); + +        // We may have jumped through bitcasts, so the type of the +        // BUILD_VECTOR may not match the type of the shuffle. +        if (BuildVT != VT) +          NewBV = getNode(ISD::BITCAST, dl, VT, NewBV); +        return NewBV; +      }      }    } @@ -1559,7 +1656,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,      ID.AddInteger(MaskVec[i]);    void* IP = nullptr; -  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) +  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP))      return SDValue(E, 0);    // Allocate the mask array for the node out of the BumpPtrAllocator, since @@ -1579,19 +1676,8 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,  SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) {    MVT VT = SV.getSimpleValueType(0); -  unsigned NumElems = VT.getVectorNumElements(); -  SmallVector<int, 8> MaskVec; - -  for (unsigned i = 0; i != NumElems; ++i) { -    int Idx = SV.getMaskElt(i); -    if (Idx >= 0) { -      if (Idx < (int)NumElems) -        Idx += NumElems; -      else -        Idx -= NumElems; -    } -    MaskVec.push_back(Idx); -  } +  SmallVector<int, 8> MaskVec(SV.getMask().begin(), SV.getMask().end()); +  ShuffleVectorSDNode::commuteMask(MaskVec);    SDValue Op0 = SV.getOperand(0);    SDValue Op1 = SV.getOperand(1); @@ -1612,7 +1698,7 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl,    SDValue Ops[] = { Val, DTy, STy, Rnd, Sat };    AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), Ops);    void* IP = nullptr; -  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) +  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP))      return SDValue(E, 0);    CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl.getIROrder(), @@ -1628,7 +1714,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {    AddNodeIDNode(ID, ISD::Register, getVTList(VT), None);    ID.AddInteger(RegNo);    void *IP = nullptr; -  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) +  if (SDNode *E = FindNodeOrInsertPos(ID, IP))      return SDValue(E, 0);    SDNode *N = new (NodeAllocator) RegisterSDNode(RegNo, VT); @@ -1642,7 +1728,7 @@ SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) {    AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), None);    ID.AddPointer(RegMask);    void *IP = nullptr; -  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) +  if (SDNode *E = FindNodeOrInsertPos(ID, IP))      return SDValue(E, 0);    SDNode *N = new (NodeAllocator) RegisterMaskSDNode(RegMask); @@ -1657,7 +1743,7 @@ SDValue SelectionDAG::getEHLabel(SDLoc dl, SDValue Root, MCSymbol *Label) {    AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), Ops);    ID.AddPointer(Label);    void *IP = nullptr; -  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) +  if (SDNode *E = FindNodeOrInsertPos(ID, IP))      return SDValue(E, 0);    SDNode *N = new (NodeAllocator) EHLabelSDNode(dl.getIROrder(), @@ -1680,7 +1766,7 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,    ID.AddInteger(Offset);    ID.AddInteger(TargetFlags);    void *IP = nullptr; -  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) +  if (SDNode *E = FindNodeOrInsertPos(ID, IP))      return SDValue(E, 0);    SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, Offset, @@ -1699,7 +1785,7 @@ SDValue SelectionDAG::getSrcValue(const Value *V) {    ID.AddPointer(V);    void *IP = nullptr; -  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) +  if (SDNode *E = FindNodeOrInsertPos(ID, IP))      return SDValue(E, 0);    SDNode *N = new (NodeAllocator) SrcValueSDNode(V); @@ -1715,7 +1801,7 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) {    ID.AddPointer(MD);    void *IP = nullptr; -  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) +  if (SDNode *E = FindNodeOrInsertPos(ID, IP))      return SDValue(E, 0);    SDNode *N = new (NodeAllocator) MDNodeSDNode(MD); @@ -1734,7 +1820,7 @@ SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr,    ID.AddInteger(DestAS);    void *IP = nullptr; -  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) +  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP))      return SDValue(E, 0);    SDNode *N = new (NodeAllocator) AddrSpaceCastSDNode(dl.getIROrder(), @@ -1791,13 +1877,14 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,    switch (Cond) {    default: break;    case ISD::SETFALSE: -  case ISD::SETFALSE2: return getConstant(0, VT); +  case ISD::SETFALSE2: return getConstant(0, dl, VT);    case ISD::SETTRUE:    case ISD::SETTRUE2: {      TargetLowering::BooleanContent Cnt =          TLI->getBooleanContents(N1->getValueType(0));      return getConstant( -        Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT); +        Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, dl, +        VT);    }    case ISD::SETOEQ: @@ -1821,16 +1908,16 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,        switch (Cond) {        default: llvm_unreachable("Unknown integer setcc!"); -      case ISD::SETEQ:  return getConstant(C1 == C2, VT); -      case ISD::SETNE:  return getConstant(C1 != C2, VT); -      case ISD::SETULT: return getConstant(C1.ult(C2), VT); -      case ISD::SETUGT: return getConstant(C1.ugt(C2), VT); -      case ISD::SETULE: return getConstant(C1.ule(C2), VT); -      case ISD::SETUGE: return getConstant(C1.uge(C2), VT); -      case ISD::SETLT:  return getConstant(C1.slt(C2), VT); -      case ISD::SETGT:  return getConstant(C1.sgt(C2), VT); -      case ISD::SETLE:  return getConstant(C1.sle(C2), VT); -      case ISD::SETGE:  return getConstant(C1.sge(C2), VT); +      case ISD::SETEQ:  return getConstant(C1 == C2, dl, VT); +      case ISD::SETNE:  return getConstant(C1 != C2, dl, VT); +      case ISD::SETULT: return getConstant(C1.ult(C2), dl, VT); +      case ISD::SETUGT: return getConstant(C1.ugt(C2), dl, VT); +      case ISD::SETULE: return getConstant(C1.ule(C2), dl, VT); +      case ISD::SETUGE: return getConstant(C1.uge(C2), dl, VT); +      case ISD::SETLT:  return getConstant(C1.slt(C2), dl, VT); +      case ISD::SETGT:  return getConstant(C1.sgt(C2), dl, VT); +      case ISD::SETLE:  return getConstant(C1.sle(C2), dl, VT); +      case ISD::SETGE:  return getConstant(C1.sge(C2), dl, VT);        }      }    } @@ -1842,41 +1929,41 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,        case ISD::SETEQ:  if (R==APFloat::cmpUnordered)                            return getUNDEF(VT);                          // fall through -      case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, VT); +      case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, dl, VT);        case ISD::SETNE:  if (R==APFloat::cmpUnordered)                            return getUNDEF(VT);                          // fall through        case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan || -                                           R==APFloat::cmpLessThan, VT); +                                           R==APFloat::cmpLessThan, dl, VT);        case ISD::SETLT:  if (R==APFloat::cmpUnordered)                            return getUNDEF(VT);                          // fall through -      case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, VT); +      case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, dl, VT);        case ISD::SETGT:  if (R==APFloat::cmpUnordered)                            return getUNDEF(VT);                          // fall through -      case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, VT); +      case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, dl, VT);        case ISD::SETLE:  if (R==APFloat::cmpUnordered)                            return getUNDEF(VT);                          // fall through        case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan || -                                           R==APFloat::cmpEqual, VT); +                                           R==APFloat::cmpEqual, dl, VT);        case ISD::SETGE:  if (R==APFloat::cmpUnordered)                            return getUNDEF(VT);                          // fall through        case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan || -                                           R==APFloat::cmpEqual, VT); -      case ISD::SETO:   return getConstant(R!=APFloat::cmpUnordered, VT); -      case ISD::SETUO:  return getConstant(R==APFloat::cmpUnordered, VT); +                                           R==APFloat::cmpEqual, dl, VT); +      case ISD::SETO:   return getConstant(R!=APFloat::cmpUnordered, dl, VT); +      case ISD::SETUO:  return getConstant(R==APFloat::cmpUnordered, dl, VT);        case ISD::SETUEQ: return getConstant(R==APFloat::cmpUnordered || -                                           R==APFloat::cmpEqual, VT); -      case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, VT); +                                           R==APFloat::cmpEqual, dl, VT); +      case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, dl, VT);        case ISD::SETULT: return getConstant(R==APFloat::cmpUnordered || -                                           R==APFloat::cmpLessThan, VT); +                                           R==APFloat::cmpLessThan, dl, VT);        case ISD::SETUGT: return getConstant(R==APFloat::cmpGreaterThan || -                                           R==APFloat::cmpUnordered, VT); -      case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, VT); -      case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, VT); +                                           R==APFloat::cmpUnordered, dl, VT); +      case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, dl, VT); +      case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, dl, VT);        }      } else {        // Ensure that the constant occurs on the RHS. @@ -2323,6 +2410,21 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,      KnownZero = APInt::getHighBitsSet(BitWidth, Leaders);      break;    } +  case ISD::EXTRACT_ELEMENT: { +    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); +    const unsigned Index = +      cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); +    const unsigned BitWidth = Op.getValueType().getSizeInBits(); + +    // Remove low part of known bits mask +    KnownZero = KnownZero.getHiBits(KnownZero.getBitWidth() - Index * BitWidth); +    KnownOne = KnownOne.getHiBits(KnownOne.getBitWidth() - Index * BitWidth); + +    // Remove high part of known bit mask +    KnownZero = KnownZero.trunc(BitWidth); +    KnownOne = KnownOne.trunc(BitWidth); +    break; +  }    case ISD::FrameIndex:    case ISD::TargetFrameIndex:      if (unsigned Align = InferPtrAlignment(Op)) { @@ -2522,6 +2624,21 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{      // FIXME: it's tricky to do anything useful for this, but it is an important      // case for targets like X86.      break; +  case ISD::EXTRACT_ELEMENT: { +    const int KnownSign = ComputeNumSignBits(Op.getOperand(0), Depth+1); +    const int BitWidth = Op.getValueType().getSizeInBits(); +    const int Items = +      Op.getOperand(0).getValueType().getSizeInBits() / BitWidth; + +    // Get reverse index (starting from 1), Op1 value indexes elements from +    // little end. Sign starts at big end. +    const int rIndex = Items - 1 - +      cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + +    // If the sign portion ends in our element the substraction gives correct +    // result. Otherwise it gives either negative or > bitwidth result +    return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0); +  }    }    // If we are looking at the loaded value of the SDNode. @@ -2643,7 +2760,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) {    FoldingSetNodeID ID;    AddNodeIDNode(ID, Opcode, getVTList(VT), None);    void *IP = nullptr; -  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) +  if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))      return SDValue(E, 0);    SDNode *N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), @@ -2666,12 +2783,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,      switch (Opcode) {      default: break;      case ISD::SIGN_EXTEND: -      return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), VT, +      return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT,                           C->isTargetOpcode(), C->isOpaque());      case ISD::ANY_EXTEND:      case ISD::ZERO_EXTEND:      case ISD::TRUNCATE: -      return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT, +      return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT,                           C->isTargetOpcode(), C->isOpaque());      case ISD::UINT_TO_FP:      case ISD::SINT_TO_FP: { @@ -2680,29 +2797,29 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,        (void)apf.convertFromAPInt(Val,                                   Opcode==ISD::SINT_TO_FP,                                   APFloat::rmNearestTiesToEven); -      return getConstantFP(apf, VT); +      return getConstantFP(apf, DL, VT);      }      case ISD::BITCAST:        if (VT == MVT::f16 && C->getValueType(0) == MVT::i16) -        return getConstantFP(APFloat(APFloat::IEEEhalf, Val), VT); +        return getConstantFP(APFloat(APFloat::IEEEhalf, Val), DL, VT);        if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) -        return getConstantFP(APFloat(APFloat::IEEEsingle, Val), VT); +        return getConstantFP(APFloat(APFloat::IEEEsingle, Val), DL, VT);        else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) -        return getConstantFP(APFloat(APFloat::IEEEdouble, Val), VT); +        return getConstantFP(APFloat(APFloat::IEEEdouble, Val), DL, VT);        break;      case ISD::BSWAP: -      return getConstant(Val.byteSwap(), VT, C->isTargetOpcode(), +      return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(),                           C->isOpaque());      case ISD::CTPOP: -      return getConstant(Val.countPopulation(), VT, C->isTargetOpcode(), +      return getConstant(Val.countPopulation(), DL, VT, C->isTargetOpcode(),                           C->isOpaque());      case ISD::CTLZ:      case ISD::CTLZ_ZERO_UNDEF: -      return getConstant(Val.countLeadingZeros(), VT, C->isTargetOpcode(), +      return getConstant(Val.countLeadingZeros(), DL, VT, C->isTargetOpcode(),                           C->isOpaque());      case ISD::CTTZ:      case ISD::CTTZ_ZERO_UNDEF: -      return getConstant(Val.countTrailingZeros(), VT, C->isTargetOpcode(), +      return getConstant(Val.countTrailingZeros(), DL, VT, C->isTargetOpcode(),                           C->isOpaque());      }    } @@ -2713,26 +2830,26 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,      switch (Opcode) {      case ISD::FNEG:        V.changeSign(); -      return getConstantFP(V, VT); +      return getConstantFP(V, DL, VT);      case ISD::FABS:        V.clearSign(); -      return getConstantFP(V, VT); +      return getConstantFP(V, DL, VT);      case ISD::FCEIL: {        APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive);        if (fs == APFloat::opOK || fs == APFloat::opInexact) -        return getConstantFP(V, VT); +        return getConstantFP(V, DL, VT);        break;      }      case ISD::FTRUNC: {        APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero);        if (fs == APFloat::opOK || fs == APFloat::opInexact) -        return getConstantFP(V, VT); +        return getConstantFP(V, DL, VT);        break;      }      case ISD::FFLOOR: {        APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative);        if (fs == APFloat::opOK || fs == APFloat::opInexact) -        return getConstantFP(V, VT); +        return getConstantFP(V, DL, VT);        break;      }      case ISD::FP_EXTEND: { @@ -2741,7 +2858,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,        // FIXME need to be more flexible about rounding mode.        (void)V.convert(EVTToAPFloatSemantics(VT),                        APFloat::rmNearestTiesToEven, &ignored); -      return getConstantFP(V, VT); +      return getConstantFP(V, DL, VT);      }      case ISD::FP_TO_SINT:      case ISD::FP_TO_UINT: { @@ -2755,20 +2872,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,        if (s==APFloat::opInvalidOp)     // inexact is OK, in fact usual          break;        APInt api(VT.getSizeInBits(), x); -      return getConstant(api, VT); +      return getConstant(api, DL, VT);      }      case ISD::BITCAST:        if (VT == MVT::i16 && C->getValueType(0) == MVT::f16) -        return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), VT); +        return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, VT);        else if (VT == MVT::i32 && C->getValueType(0) == MVT::f32) -        return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT); +        return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), DL, VT);        else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) -        return getConstant(V.bitcastToAPInt().getZExtValue(), VT); +        return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT);        break;      }    } -  // Constant fold unary operations with a vector integer operand. +  // Constant fold unary operations with a vector integer or float operand.    if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Operand.getNode())) {      if (BV->isConstant()) {        switch (Opcode) { @@ -2776,18 +2893,55 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,          // FIXME: Entirely reasonable to perform folding of other unary          // operations here as the need arises.          break; +      case ISD::FNEG: +      case ISD::FABS: +      case ISD::FCEIL: +      case ISD::FTRUNC: +      case ISD::FFLOOR: +      case ISD::FP_EXTEND: +      case ISD::FP_TO_SINT: +      case ISD::FP_TO_UINT: +      case ISD::TRUNCATE:        case ISD::UINT_TO_FP:        case ISD::SINT_TO_FP: { +        EVT SVT = VT.getScalarType(); +        EVT InVT = BV->getValueType(0); +        EVT InSVT = InVT.getScalarType(); + +        // Find legal integer scalar type for constant promotion and +        // ensure that its scalar size is at least as large as source. +        EVT LegalSVT = SVT; +        if (SVT.isInteger()) { +          LegalSVT = TLI->getTypeToTransformTo(*getContext(), SVT); +          if (LegalSVT.bitsLT(SVT)) break; +        } + +        // Let the above scalar folding handle the folding of each element.          SmallVector<SDValue, 8> Ops;          for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) {            SDValue OpN = BV->getOperand(i); -          // Let the above scalar folding handle the conversion of each -          // element. -          OpN = getNode(ISD::SINT_TO_FP, DL, VT.getVectorElementType(), -                        OpN); +          EVT OpVT = OpN.getValueType(); + +          // Build vector (integer) scalar operands may need implicit +          // truncation - do this before constant folding. +          if (OpVT.isInteger() && OpVT.bitsGT(InSVT)) +            OpN = getNode(ISD::TRUNCATE, DL, InSVT, OpN); + +          OpN = getNode(Opcode, DL, SVT, OpN); + +          // Legalize the (integer) scalar constant if necessary. +          if (LegalSVT != SVT) +            OpN = getNode(ISD::ANY_EXTEND, DL, LegalSVT, OpN); + +          if (OpN.getOpcode() != ISD::UNDEF && +              OpN.getOpcode() != ISD::Constant && +              OpN.getOpcode() != ISD::ConstantFP) +            break;            Ops.push_back(OpN);          } -        return getNode(ISD::BUILD_VECTOR, DL, VT, Ops); +        if (Ops.size() == VT.getVectorNumElements()) +          return getNode(ISD::BUILD_VECTOR, DL, VT, Ops); +        break;        }        }      } @@ -2825,7 +2979,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,        return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));      else if (OpOpcode == ISD::UNDEF)        // sext(undef) = 0, because the top bits will all be the same. -      return getConstant(0, VT); +      return getConstant(0, DL, VT);      break;    case ISD::ZERO_EXTEND:      assert(VT.isInteger() && Operand.getValueType().isInteger() && @@ -2842,7 +2996,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,                       Operand.getNode()->getOperand(0));      else if (OpOpcode == ISD::UNDEF)        // zext(undef) = 0, because the top bits will be zero. -      return getConstant(0, VT); +      return getConstant(0, DL, VT);      break;    case ISD::ANY_EXTEND:      assert(VT.isInteger() && Operand.getValueType().isInteger() && @@ -2941,7 +3095,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,      SDValue Ops[1] = { Operand };      AddNodeIDNode(ID, Opcode, VTs, Ops);      void *IP = nullptr; -    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) +    if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))        return SDValue(E, 0);      N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), @@ -2956,7 +3110,54 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,    return SDValue(N, 0);  } -SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT, +static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1, +                                        const APInt &C2) { +  switch (Opcode) { +  case ISD::ADD:  return std::make_pair(C1 + C2, true); +  case ISD::SUB:  return std::make_pair(C1 - C2, true); +  case ISD::MUL:  return std::make_pair(C1 * C2, true); +  case ISD::AND:  return std::make_pair(C1 & C2, true); +  case ISD::OR:   return std::make_pair(C1 | C2, true); +  case ISD::XOR:  return std::make_pair(C1 ^ C2, true); +  case ISD::SHL:  return std::make_pair(C1 << C2, true); +  case ISD::SRL:  return std::make_pair(C1.lshr(C2), true); +  case ISD::SRA:  return std::make_pair(C1.ashr(C2), true); +  case ISD::ROTL: return std::make_pair(C1.rotl(C2), true); +  case ISD::ROTR: return std::make_pair(C1.rotr(C2), true); +  case ISD::UDIV: +    if (!C2.getBoolValue()) +      break; +    return std::make_pair(C1.udiv(C2), true); +  case ISD::UREM: +    if (!C2.getBoolValue()) +      break; +    return std::make_pair(C1.urem(C2), true); +  case ISD::SDIV: +    if (!C2.getBoolValue()) +      break; +    return std::make_pair(C1.sdiv(C2), true); +  case ISD::SREM: +    if (!C2.getBoolValue()) +      break; +    return std::make_pair(C1.srem(C2), true); +  } +  return std::make_pair(APInt(1, 0), false); +} + +SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT, +                                             const ConstantSDNode *Cst1, +                                             const ConstantSDNode *Cst2) { +  if (Cst1->isOpaque() || Cst2->isOpaque()) +    return SDValue(); + +  std::pair<APInt, bool> Folded = FoldValue(Opcode, Cst1->getAPIntValue(), +                                            Cst2->getAPIntValue()); +  if (!Folded.second) +    return SDValue(); +  return getConstant(Folded.first, DL, VT); +} + +SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT,                                               SDNode *Cst1, SDNode *Cst2) {    // If the opcode is a target-specific ISD node, there's nothing we can    // do here and the operand rules may not line up with the below, so @@ -2964,116 +3165,59 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT,    if (Opcode >= ISD::BUILTIN_OP_END)      return SDValue(); -  SmallVector<std::pair<ConstantSDNode *, ConstantSDNode *>, 4> Inputs; -  SmallVector<SDValue, 4> Outputs; -  EVT SVT = VT.getScalarType(); +  // Handle the case of two scalars. +  if (const ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1)) { +    if (const ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2)) { +      if (SDValue Folded = +          FoldConstantArithmetic(Opcode, DL, VT, Scalar1, Scalar2)) { +        if (!VT.isVector()) +          return Folded; +        SmallVector<SDValue, 4> Outputs; +        // We may have a vector type but a scalar result. Create a splat. +        Outputs.resize(VT.getVectorNumElements(), Outputs.back()); +        // Build a big vector out of the scalar elements we generated. +        return getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Outputs); +      } else { +        return SDValue(); +      } +    } +  } -  ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1); -  ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2); -  if (Scalar1 && Scalar2 && (Scalar1->isOpaque() || Scalar2->isOpaque())) +  // For vectors extract each constant element into Inputs so we can constant +  // fold them individually. +  BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1); +  BuildVectorSDNode *BV2 = dyn_cast<BuildVectorSDNode>(Cst2); +  if (!BV1 || !BV2)      return SDValue(); -  if (Scalar1 && Scalar2) -    // Scalar instruction. -    Inputs.push_back(std::make_pair(Scalar1, Scalar2)); -  else { -    // For vectors extract each constant element into Inputs so we can constant -    // fold them individually. -    BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1); -    BuildVectorSDNode *BV2 = dyn_cast<BuildVectorSDNode>(Cst2); -    if (!BV1 || !BV2) -      return SDValue(); - -    assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!"); - -    for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) { -      ConstantSDNode *V1 = dyn_cast<ConstantSDNode>(BV1->getOperand(I)); -      ConstantSDNode *V2 = dyn_cast<ConstantSDNode>(BV2->getOperand(I)); -      if (!V1 || !V2) // Not a constant, bail. -        return SDValue(); +  assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!"); -      if (V1->isOpaque() || V2->isOpaque()) -        return SDValue(); - -      // Avoid BUILD_VECTOR nodes that perform implicit truncation. -      // FIXME: This is valid and could be handled by truncating the APInts. -      if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT) -        return SDValue(); +  EVT SVT = VT.getScalarType(); +  SmallVector<SDValue, 4> Outputs; +  for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) { +    ConstantSDNode *V1 = dyn_cast<ConstantSDNode>(BV1->getOperand(I)); +    ConstantSDNode *V2 = dyn_cast<ConstantSDNode>(BV2->getOperand(I)); +    if (!V1 || !V2) // Not a constant, bail. +      return SDValue(); -      Inputs.push_back(std::make_pair(V1, V2)); -    } -  } +    if (V1->isOpaque() || V2->isOpaque()) +      return SDValue(); -  // We have a number of constant values, constant fold them element by element. -  for (unsigned I = 0, E = Inputs.size(); I != E; ++I) { -    const APInt &C1 = Inputs[I].first->getAPIntValue(); -    const APInt &C2 = Inputs[I].second->getAPIntValue(); +    // Avoid BUILD_VECTOR nodes that perform implicit truncation. +    // FIXME: This is valid and could be handled by truncating the APInts. +    if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT) +      return SDValue(); -    switch (Opcode) { -    case ISD::ADD: -      Outputs.push_back(getConstant(C1 + C2, SVT)); -      break; -    case ISD::SUB: -      Outputs.push_back(getConstant(C1 - C2, SVT)); -      break; -    case ISD::MUL: -      Outputs.push_back(getConstant(C1 * C2, SVT)); -      break; -    case ISD::UDIV: -      if (!C2.getBoolValue()) -        return SDValue(); -      Outputs.push_back(getConstant(C1.udiv(C2), SVT)); -      break; -    case ISD::UREM: -      if (!C2.getBoolValue()) -        return SDValue(); -      Outputs.push_back(getConstant(C1.urem(C2), SVT)); -      break; -    case ISD::SDIV: -      if (!C2.getBoolValue()) -        return SDValue(); -      Outputs.push_back(getConstant(C1.sdiv(C2), SVT)); -      break; -    case ISD::SREM: -      if (!C2.getBoolValue()) -        return SDValue(); -      Outputs.push_back(getConstant(C1.srem(C2), SVT)); -      break; -    case ISD::AND: -      Outputs.push_back(getConstant(C1 & C2, SVT)); -      break; -    case ISD::OR: -      Outputs.push_back(getConstant(C1 | C2, SVT)); -      break; -    case ISD::XOR: -      Outputs.push_back(getConstant(C1 ^ C2, SVT)); -      break; -    case ISD::SHL: -      Outputs.push_back(getConstant(C1 << C2, SVT)); -      break; -    case ISD::SRL: -      Outputs.push_back(getConstant(C1.lshr(C2), SVT)); -      break; -    case ISD::SRA: -      Outputs.push_back(getConstant(C1.ashr(C2), SVT)); -      break; -    case ISD::ROTL: -      Outputs.push_back(getConstant(C1.rotl(C2), SVT)); -      break; -    case ISD::ROTR: -      Outputs.push_back(getConstant(C1.rotr(C2), SVT)); -      break; -    default: +    // Fold one vector element. +    std::pair<APInt, bool> Folded = FoldValue(Opcode, V1->getAPIntValue(), +                                              V2->getAPIntValue()); +    if (!Folded.second)        return SDValue(); -    } +    Outputs.push_back(getConstant(Folded.first, DL, SVT));    } -  assert((Scalar1 && Scalar2) || (VT.getVectorNumElements() == Outputs.size() && -                                  "Expected a scalar or vector!")); - -  // Handle the scalar case first. -  if (!VT.isVector()) -    return Outputs.back(); +  assert(VT.getVectorNumElements() == Outputs.size() && +         "Vector size mismatch!");    // We may have a vector type but a scalar result. Create a splat.    Outputs.resize(VT.getVectorNumElements(), Outputs.back()); @@ -3109,6 +3253,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,        SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(),                                      N1.getNode()->op_end());        Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end()); + +      // BUILD_VECTOR requires all inputs to be of the same type, find the +      // maximum type and extend them all. +      EVT SVT = VT.getScalarType(); +      for (SDValue Op : Elts) +        SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT); +      if (SVT.bitsGT(VT.getScalarType())) +        for (SDValue &Op : Elts) +          Op = TLI->isZExtFree(Op.getValueType(), SVT) +             ? getZExtOrTrunc(Op, DL, SVT) +             : getSExtOrTrunc(Op, DL, SVT); +        return getNode(ISD::BUILD_VECTOR, DL, VT, Elts);      }      break; @@ -3273,12 +3429,35 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,      assert(EVT.bitsLE(VT) && "Not extending!");      if (EVT == VT) return N1;  // Not actually extending +    auto SignExtendInReg = [&](APInt Val) { +      unsigned FromBits = EVT.getScalarType().getSizeInBits(); +      Val <<= Val.getBitWidth() - FromBits; +      Val = Val.ashr(Val.getBitWidth() - FromBits); +      return getConstant(Val, DL, VT.getScalarType()); +    }; +      if (N1C) {        APInt Val = N1C->getAPIntValue(); -      unsigned FromBits = EVT.getScalarType().getSizeInBits(); -      Val <<= Val.getBitWidth()-FromBits; -      Val = Val.ashr(Val.getBitWidth()-FromBits); -      return getConstant(Val, VT); +      return SignExtendInReg(Val); +    } +    if (ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) { +      SmallVector<SDValue, 8> Ops; +      for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) { +        SDValue Op = N1.getOperand(i); +        if (Op.getValueType() != VT.getScalarType()) break; +        if (Op.getOpcode() == ISD::UNDEF) { +          Ops.push_back(Op); +          continue; +        } +        if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getNode())) { +          APInt Val = C->getAPIntValue(); +          Ops.push_back(SignExtendInReg(Val)); +          continue; +        } +        break; +      } +      if (Ops.size() == VT.getVectorNumElements()) +        return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);      }      break;    } @@ -3287,6 +3466,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,      if (N1.getOpcode() == ISD::UNDEF)        return getUNDEF(VT); +    // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF +    if (N2C && N2C->getZExtValue() >= N1.getValueType().getVectorNumElements()) +      return getUNDEF(VT); +      // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is      // expanding copies of large vectors from registers.      if (N2C && @@ -3296,7 +3479,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,          N1.getOperand(0).getValueType().getVectorNumElements();        return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,                       N1.getOperand(N2C->getZExtValue() / Factor), -                     getConstant(N2C->getZExtValue() % Factor, +                     getConstant(N2C->getZExtValue() % Factor, DL,                                   N2.getValueType()));      } @@ -3353,7 +3536,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,        unsigned ElementSize = VT.getSizeInBits();        unsigned Shift = ElementSize * N2C->getZExtValue();        APInt ShiftedVal = C->getAPIntValue().lshr(Shift); -      return getConstant(ShiftedVal.trunc(ElementSize), VT); +      return getConstant(ShiftedVal.trunc(ElementSize), DL, VT);      }      break;    case ISD::EXTRACT_SUBVECTOR: { @@ -3384,7 +3567,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,    // Perform trivial constant folding.    if (SDValue SV = -          FoldConstantArithmetic(Opcode, VT, N1.getNode(), N2.getNode())) +          FoldConstantArithmetic(Opcode, DL, VT, N1.getNode(), N2.getNode()))      return SV;    // Canonicalize constant to RHS if commutative. @@ -3409,35 +3592,35 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,        case ISD::FADD:          s = V1.add(V2, APFloat::rmNearestTiesToEven);          if (!HasFPExceptions || s != APFloat::opInvalidOp) -          return getConstantFP(V1, VT); +          return getConstantFP(V1, DL, VT);          break;        case ISD::FSUB:          s = V1.subtract(V2, APFloat::rmNearestTiesToEven);          if (!HasFPExceptions || s!=APFloat::opInvalidOp) -          return getConstantFP(V1, VT); +          return getConstantFP(V1, DL, VT);          break;        case ISD::FMUL:          s = V1.multiply(V2, APFloat::rmNearestTiesToEven);          if (!HasFPExceptions || s!=APFloat::opInvalidOp) -          return getConstantFP(V1, VT); +          return getConstantFP(V1, DL, VT);          break;        case ISD::FDIV:          s = V1.divide(V2, APFloat::rmNearestTiesToEven);          if (!HasFPExceptions || (s!=APFloat::opInvalidOp &&                                   s!=APFloat::opDivByZero)) { -          return getConstantFP(V1, VT); +          return getConstantFP(V1, DL, VT);          }          break;        case ISD::FREM :          s = V1.mod(V2, APFloat::rmNearestTiesToEven);          if (!HasFPExceptions || (s!=APFloat::opInvalidOp &&                                   s!=APFloat::opDivByZero)) { -          return getConstantFP(V1, VT); +          return getConstantFP(V1, DL, VT);          }          break;        case ISD::FCOPYSIGN:          V1.copySign(V2); -        return getConstantFP(V1, VT); +        return getConstantFP(V1, DL, VT);        default: break;        }      } @@ -3449,7 +3632,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,        // FIXME need to be more flexible about rounding mode.        (void)V.convert(EVTToAPFloatSemantics(VT),                        APFloat::rmNearestTiesToEven, &ignored); -      return getConstantFP(V, VT); +      return getConstantFP(V, DL, VT);      }    } @@ -3474,7 +3657,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,        case ISD::SRL:        case ISD::SHL:          if (!VT.isVector()) -          return getConstant(0, VT);    // fold op(undef, arg2) -> 0 +          return getConstant(0, DL, VT);    // fold op(undef, arg2) -> 0          // For vectors, we can't easily build an all zero vector, just return          // the LHS.          return N2; @@ -3489,7 +3672,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,        if (N1.getOpcode() == ISD::UNDEF)          // Handle undef ^ undef -> 0 special case. This is a common          // idiom (misuse). -        return getConstant(0, VT); +        return getConstant(0, DL, VT);        // fallthrough      case ISD::ADD:      case ISD::ADDC: @@ -3513,13 +3696,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,      case ISD::SRL:      case ISD::SHL:        if (!VT.isVector()) -        return getConstant(0, VT);  // fold op(arg1, undef) -> 0 +        return getConstant(0, DL, VT);  // fold op(arg1, undef) -> 0        // For vectors, we can't easily build an all zero vector, just return        // the LHS.        return N1;      case ISD::OR:        if (!VT.isVector()) -        return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT); +        return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT);        // For vectors, we can't easily build an all one vector, just return        // the LHS.        return N1; @@ -3539,14 +3722,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,      if (BinOpHasFlags)        AddBinaryNodeIDCustom(ID, Opcode, nuw, nsw, exact);      void *IP = nullptr; -    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) +    if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))        return SDValue(E, 0);      N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact);      CSEMap.InsertNode(N, IP);    } else { -      N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact);    } @@ -3569,8 +3751,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,        const APFloat &V3 = N3CFP->getValueAPF();        APFloat::opStatus s =          V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven); -      if (s != APFloat::opInvalidOp) -        return getConstantFP(V1, VT); +      if (!TLI->hasFloatingPointExceptions() || s != APFloat::opInvalidOp) +        return getConstantFP(V1, DL, VT);      }      break;    } @@ -3643,7 +3825,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,      FoldingSetNodeID ID;      AddNodeIDNode(ID, Opcode, VTs, Ops);      void *IP = nullptr; -    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) +    if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))        return SDValue(E, 0);      N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), @@ -3705,16 +3887,32 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,      assert(C->getAPIntValue().getBitWidth() == 8);      APInt Val = APInt::getSplat(NumBits, C->getAPIntValue());      if (VT.isInteger()) -      return DAG.getConstant(Val, VT); -    return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), VT); +      return DAG.getConstant(Val, dl, VT); +    return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), dl, +                             VT);    } -  Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value); +  assert(Value.getValueType() == MVT::i8 && "memset with non-byte fill value?"); +  EVT IntVT = VT.getScalarType(); +  if (!IntVT.isInteger()) +    IntVT = EVT::getIntegerVT(*DAG.getContext(), IntVT.getSizeInBits()); + +  Value = DAG.getNode(ISD::ZERO_EXTEND, dl, IntVT, Value);    if (NumBits > 8) {      // Use a multiplication with 0x010101... to extend the input to the      // required length.      APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01)); -    Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT)); +    Value = DAG.getNode(ISD::MUL, dl, IntVT, Value, +                        DAG.getConstant(Magic, dl, IntVT)); +  } + +  if (VT != Value.getValueType() && !VT.isInteger()) +    Value = DAG.getNode(ISD::BITCAST, dl, VT.getScalarType(), Value); +  if (VT != Value.getValueType()) { +    assert(VT.getVectorElementType() == Value.getValueType() && +           "value type should be one vector element here"); +    SmallVector<SDValue, 8> BVOps(VT.getVectorNumElements(), Value); +    Value = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, BVOps);    }    return Value; @@ -3728,15 +3926,16 @@ static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG,    // Handle vector with all elements zero.    if (Str.empty()) {      if (VT.isInteger()) -      return DAG.getConstant(0, VT); +      return DAG.getConstant(0, dl, VT);      else if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128) -      return DAG.getConstantFP(0.0, VT); +      return DAG.getConstantFP(0.0, dl, VT);      else if (VT.isVector()) {        unsigned NumElts = VT.getVectorNumElements();        MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;        return DAG.getNode(ISD::BITCAST, dl, VT, -                         DAG.getConstant(0, EVT::getVectorVT(*DAG.getContext(), -                                                             EltVT, NumElts))); +                         DAG.getConstant(0, dl, +                                         EVT::getVectorVT(*DAG.getContext(), +                                                          EltVT, NumElts)));      } else        llvm_unreachable("Expected type!");    } @@ -3759,7 +3958,7 @@ static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG,    // of a load, then it is cost effective to turn the load into the immediate.    Type *Ty = VT.getTypeForEVT(*DAG.getContext());    if (TLI.shouldConvertConstantLoadToIntImm(Val, Ty)) -    return DAG.getConstant(Val, VT); +    return DAG.getConstant(Val, dl, VT);    return SDValue(nullptr, 0);  } @@ -3769,7 +3968,7 @@ static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, SDLoc dl,                                        SelectionDAG &DAG) {    EVT VT = Base.getValueType();    return DAG.getNode(ISD::ADD, dl, -                     VT, Base, DAG.getConstant(Offset, VT)); +                     VT, Base, DAG.getConstant(Offset, dl, VT));  }  /// isMemSrcFromString - Returns true if memcpy source is a string constant. @@ -3918,9 +4117,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,    bool DstAlignCanChange = false;    MachineFunction &MF = DAG.getMachineFunction();    MachineFrameInfo *MFI = MF.getFrameInfo(); -  bool OptSize = -    MF.getFunction()->getAttributes(). -      hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); +  bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);    FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);    if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))      DstAlignCanChange = true; @@ -4033,8 +4230,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl,    bool DstAlignCanChange = false;    MachineFunction &MF = DAG.getMachineFunction();    MachineFrameInfo *MFI = MF.getFrameInfo(); -  bool OptSize = MF.getFunction()->getAttributes(). -    hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); +  bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);    FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);    if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))      DstAlignCanChange = true; @@ -4128,8 +4324,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl,    bool DstAlignCanChange = false;    MachineFunction &MF = DAG.getMachineFunction();    MachineFrameInfo *MFI = MF.getFrameInfo(); -  bool OptSize = MF.getFunction()->getAttributes(). -    hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); +  bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);    FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);    if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))      DstAlignCanChange = true; @@ -4198,7 +4393,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl,  SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,                                  SDValue Src, SDValue Size,                                  unsigned Align, bool isVol, bool AlwaysInline, -                                MachinePointerInfo DstPtrInfo, +                                bool isTailCall, MachinePointerInfo DstPtrInfo,                                  MachinePointerInfo SrcPtrInfo) {    assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); @@ -4219,11 +4414,13 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,    // Then check to see if we should lower the memcpy with target-specific    // code. If the target chooses to do this, this is the next best. -  SDValue Result = -      TSI->EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align, -                                   isVol, AlwaysInline, DstPtrInfo, SrcPtrInfo); -  if (Result.getNode()) -    return Result; +  if (TSI) { +    SDValue Result = TSI->EmitTargetCodeForMemcpy( +        *this, dl, Chain, Dst, Src, Size, Align, isVol, AlwaysInline, +        DstPtrInfo, SrcPtrInfo); +    if (Result.getNode()) +      return Result; +  }    // If we really need inline code and the target declined to provide it,    // use a (potentially long) sequence of loads and stores. @@ -4254,15 +4451,16 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,                 Type::getVoidTy(*getContext()),                 getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY),                                   TLI->getPointerTy()), std::move(Args), 0) -    .setDiscardResult(); -  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); +    .setDiscardResult() +    .setTailCall(isTailCall); +  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);    return CallResult.second;  }  SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,                                   SDValue Src, SDValue Size, -                                 unsigned Align, bool isVol, +                                 unsigned Align, bool isVol, bool isTailCall,                                   MachinePointerInfo DstPtrInfo,                                   MachinePointerInfo SrcPtrInfo) {    assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); @@ -4285,10 +4483,12 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,    // Then check to see if we should lower the memmove with target-specific    // code. If the target chooses to do this, this is the next best. -  SDValue Result = TSI->EmitTargetCodeForMemmove( -      *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo); -  if (Result.getNode()) -    return Result; +  if (TSI) { +    SDValue Result = TSI->EmitTargetCodeForMemmove( +        *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo); +    if (Result.getNode()) +      return Result; +  }    // FIXME: If the memmove is volatile, lowering it to plain libc memmove may    // not be safe.  See memcpy above for more details. @@ -4307,15 +4507,16 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,                 Type::getVoidTy(*getContext()),                 getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE),                                   TLI->getPointerTy()), std::move(Args), 0) -    .setDiscardResult(); -  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); +    .setDiscardResult() +    .setTailCall(isTailCall); +  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);    return CallResult.second;  }  SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,                                  SDValue Src, SDValue Size, -                                unsigned Align, bool isVol, +                                unsigned Align, bool isVol, bool isTailCall,                                  MachinePointerInfo DstPtrInfo) {    assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); @@ -4337,10 +4538,12 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,    // Then check to see if we should lower the memset with target-specific    // code. If the target chooses to do this, this is the next best. -  SDValue Result = TSI->EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, -                                                Size, Align, isVol, DstPtrInfo); -  if (Result.getNode()) -    return Result; +  if (TSI) { +    SDValue Result = TSI->EmitTargetCodeForMemset( +        *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo); +    if (Result.getNode()) +      return Result; +  }    // Emit a library call.    Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(*getContext()); @@ -4362,7 +4565,8 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,                 Type::getVoidTy(*getContext()),                 getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET),                                   TLI->getPointerTy()), std::move(Args), 0) -    .setDiscardResult(); +    .setDiscardResult() +    .setTailCall(isTailCall);    std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);    return CallResult.second; @@ -4379,7 +4583,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,    AddNodeIDNode(ID, Opcode, VTList, Ops);    ID.AddInteger(MMO->getPointerInfo().getAddrSpace());    void* IP = nullptr; -  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { +  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) {      cast<AtomicSDNode>(E)->refineAlignment(MMO);      return SDValue(E, 0);    } @@ -4584,7 +4788,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList,      AddNodeIDNode(ID, Opcode, VTList, Ops);      ID.AddInteger(MMO->getPointerInfo().getAddrSpace());      void *IP = nullptr; -    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { +    if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) {        cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO);        return SDValue(E, 0);      } @@ -4685,10 +4889,10 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,      assert(VT.isInteger() == MemVT.isInteger() &&             "Cannot convert from FP to Int or Int -> FP!");      assert(VT.isVector() == MemVT.isVector() && -           "Cannot use trunc store to convert to or from a vector!"); +           "Cannot use an ext load to convert to or from a vector!");      assert((!VT.isVector() ||              VT.getVectorNumElements() == MemVT.getVectorNumElements()) && -           "Cannot use trunc store to change the number of vector elements!"); +           "Cannot use an ext load to change the number of vector elements!");    }    bool Indexed = AM != ISD::UNINDEXED; @@ -4706,7 +4910,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,                                       MMO->isInvariant()));    ID.AddInteger(MMO->getPointerInfo().getAddrSpace());    void *IP = nullptr; -  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { +  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) {      cast<LoadSDNode>(E)->refineAlignment(MMO);      return SDValue(E, 0);    } @@ -4814,7 +5018,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val,                                       MMO->isNonTemporal(), MMO->isInvariant()));    ID.AddInteger(MMO->getPointerInfo().getAddrSpace());    void *IP = nullptr; -  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { +  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) {      cast<StoreSDNode>(E)->refineAlignment(MMO);      return SDValue(E, 0);    } @@ -4883,7 +5087,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val,                                       MMO->isNonTemporal(), MMO->isInvariant()));    ID.AddInteger(MMO->getPointerInfo().getAddrSpace());    void *IP = nullptr; -  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { +  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) {      cast<StoreSDNode>(E)->refineAlignment(MMO);      return SDValue(E, 0);    } @@ -4909,7 +5113,7 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base,    ID.AddInteger(ST->getRawSubclassData());    ID.AddInteger(ST->getPointerInfo().getAddrSpace());    void *IP = nullptr; -  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) +  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP))      return SDValue(E, 0);    SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), @@ -4938,7 +5142,7 @@ SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain,                                       MMO->isInvariant()));    ID.AddInteger(MMO->getPointerInfo().getAddrSpace());    void *IP = nullptr; -  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { +  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) {      cast<MaskedLoadSDNode>(E)->refineAlignment(MMO);      return SDValue(E, 0);    } @@ -4965,7 +5169,7 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,                                       MMO->isNonTemporal(), MMO->isInvariant()));    ID.AddInteger(MMO->getPointerInfo().getAddrSpace());    void *IP = nullptr; -  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { +  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) {      cast<MaskedStoreSDNode>(E)->refineAlignment(MMO);      return SDValue(E, 0);    } @@ -4977,11 +5181,60 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,    return SDValue(N, 0);  } +SDValue +SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, SDLoc dl, +                              ArrayRef<SDValue> Ops, +                              MachineMemOperand *MMO) { + +  FoldingSetNodeID ID; +  AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops); +  ID.AddInteger(VT.getRawBits()); +  ID.AddInteger(encodeMemSDNodeFlags(ISD::NON_EXTLOAD, ISD::UNINDEXED, +                                     MMO->isVolatile(), +                                     MMO->isNonTemporal(), +                                     MMO->isInvariant())); +  ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); +  void *IP = nullptr; +  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { +    cast<MaskedGatherSDNode>(E)->refineAlignment(MMO); +    return SDValue(E, 0); +  } +  MaskedGatherSDNode *N =  +    new (NodeAllocator) MaskedGatherSDNode(dl.getIROrder(), dl.getDebugLoc(), +                                           Ops, VTs, VT, MMO); +  CSEMap.InsertNode(N, IP); +  InsertNode(N); +  return SDValue(N, 0); +} + +SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, SDLoc dl, +                                       ArrayRef<SDValue> Ops, +                                       MachineMemOperand *MMO) { +  FoldingSetNodeID ID; +  AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops); +  ID.AddInteger(VT.getRawBits()); +  ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(), +                                     MMO->isNonTemporal(), +                                     MMO->isInvariant())); +  ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); +  void *IP = nullptr; +  if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { +    cast<MaskedScatterSDNode>(E)->refineAlignment(MMO); +    return SDValue(E, 0); +  } +  SDNode *N = +    new (NodeAllocator) MaskedScatterSDNode(dl.getIROrder(), dl.getDebugLoc(), +                                            Ops, VTs, VT, MMO); +  CSEMap.InsertNode(N, IP); +  InsertNode(N); +  return SDValue(N, 0); +} +  SDValue SelectionDAG::getVAArg(EVT VT, SDLoc dl,                                 SDValue Chain, SDValue Ptr,                                 SDValue SV,                                 unsigned Align) { -  SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, MVT::i32) }; +  SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, dl, MVT::i32) };    return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops);  } @@ -5041,7 +5294,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,      AddNodeIDNode(ID, Opcode, VTs, Ops);      void *IP = nullptr; -    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) +    if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))        return SDValue(E, 0);      N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), @@ -5096,7 +5349,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList,      FoldingSetNodeID ID;      AddNodeIDNode(ID, Opcode, VTList, Ops);      void *IP = nullptr; -    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) +    if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))        return SDValue(E, 0);      if (NumOps == 1) { @@ -5340,17 +5593,9 @@ UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) {    assert(N->getNumOperands() == NumOps &&           "Update with wrong number of operands"); -  // Check to see if there is no change. -  bool AnyChange = false; -  for (unsigned i = 0; i != NumOps; ++i) { -    if (Ops[i] != N->getOperand(i)) { -      AnyChange = true; -      break; -    } -  } - -  // No operands changed, just return the input node. -  if (!AnyChange) return N; +  // If no operands changed just return the input node. +  if (Ops.empty() || std::equal(Ops.begin(), Ops.end(), N->op_begin())) +    return N;    // See if the modified node already exists.    void *InsertPos = nullptr; @@ -5498,8 +5743,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,  /// For IROrder, we keep the smaller of the two  SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, SDLoc OLoc) {    DebugLoc NLoc = N->getDebugLoc(); -  if (!(NLoc.isUnknown()) && (OptLevel == CodeGenOpt::None) && -    (OLoc.getDebugLoc() != NLoc)) { +  if (NLoc && OptLevel == CodeGenOpt::None && OLoc.getDebugLoc() != NLoc) {      N->setDebugLoc(DebugLoc());    }    unsigned Order = std::min(N->getIROrder(), OLoc.getIROrder()); @@ -5531,7 +5775,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,    if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) {      FoldingSetNodeID ID;      AddNodeIDNode(ID, Opc, VTs, Ops); -    if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) +    if (SDNode *ON = FindNodeOrInsertPos(ID, N->getDebugLoc(), IP))        return UpdadeSDLocOnMergedSDNode(ON, SDLoc(N));    } @@ -5737,7 +5981,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, SDLoc DL, SDVTList VTs,      FoldingSetNodeID ID;      AddNodeIDNode(ID, ~Opcode, VTs, OpsArray);      IP = nullptr; -    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { +    if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) {        return cast<MachineSDNode>(UpdadeSDLocOnMergedSDNode(E, DL));      }    } @@ -5769,7 +6013,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, SDLoc DL, SDVTList VTs,  SDValue  SelectionDAG::getTargetExtractSubreg(int SRIdx, SDLoc DL, EVT VT,                                       SDValue Operand) { -  SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32); +  SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32);    SDNode *Subreg = getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,                                    VT, Operand, SRIdxVal);    return SDValue(Subreg, 0); @@ -5780,7 +6024,7 @@ SelectionDAG::getTargetExtractSubreg(int SRIdx, SDLoc DL, EVT VT,  SDValue  SelectionDAG::getTargetInsertSubreg(int SRIdx, SDLoc DL, EVT VT,                                      SDValue Operand, SDValue Subreg) { -  SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32); +  SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32);    SDNode *Result = getMachineNode(TargetOpcode::INSERT_SUBREG, DL,                                    VT, Operand, Subreg, SRIdxVal);    return SDValue(Result, 0); @@ -5797,7 +6041,7 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,      if (isBinOpWithFlags(Opcode))        AddBinaryNodeIDCustom(ID, nuw, nsw, exact);      void *IP = nullptr; -    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) +    if (SDNode *E = FindNodeOrInsertPos(ID, DebugLoc(), IP))        return E;    }    return nullptr; @@ -5809,21 +6053,28 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,  SDDbgValue *SelectionDAG::getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N,                                        unsigned R, bool IsIndirect, uint64_t Off,                                        DebugLoc DL, unsigned O) { -  return new (Allocator) SDDbgValue(Var, Expr, N, R, IsIndirect, Off, DL, O); +  assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && +         "Expected inlined-at fields to agree"); +  return new (DbgInfo->getAlloc()) +      SDDbgValue(Var, Expr, N, R, IsIndirect, Off, DL, O);  }  /// Constant  SDDbgValue *SelectionDAG::getConstantDbgValue(MDNode *Var, MDNode *Expr,                                                const Value *C, uint64_t Off,                                                DebugLoc DL, unsigned O) { -  return new (Allocator) SDDbgValue(Var, Expr, C, Off, DL, O); +  assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && +         "Expected inlined-at fields to agree"); +  return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, C, Off, DL, O);  }  /// FrameIndex  SDDbgValue *SelectionDAG::getFrameIndexDbgValue(MDNode *Var, MDNode *Expr,                                                  unsigned FI, uint64_t Off,                                                  DebugLoc DL, unsigned O) { -  return new (Allocator) SDDbgValue(Var, Expr, FI, Off, DL, O); +  assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && +         "Expected inlined-at fields to agree"); +  return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, FI, Off, DL, O);  }  namespace { @@ -6489,7 +6740,7 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {          Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl,                                OperandEltVT,                                Operand, -                              getConstant(i, TLI->getVectorIdxTy())); +                              getConstant(i, dl, TLI->getVectorIdxTy()));        } else {          // A scalar operand; just use it as is.          Operands[j] = Operand; @@ -6595,8 +6846,8 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {    if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {      unsigned PtrWidth = TLI->getPointerTypeSizeInBits(GV->getType());      APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0); -    llvm::computeKnownBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne, -                           TLI->getDataLayout()); +    llvm::computeKnownBits(const_cast<GlobalValue *>(GV), KnownZero, KnownOne, +                           *TLI->getDataLayout());      unsigned AlignBits = KnownZero.countTrailingOnes();      unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;      if (Align) @@ -6652,9 +6903,10 @@ SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT,           "More vector elements requested than available!");    SDValue Lo, Hi;    Lo = getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N, -               getConstant(0, TLI->getVectorIdxTy())); +               getConstant(0, DL, TLI->getVectorIdxTy()));    Hi = getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, N, -               getConstant(LoVT.getVectorNumElements(), TLI->getVectorIdxTy())); +               getConstant(LoVT.getVectorNumElements(), DL, +                           TLI->getVectorIdxTy()));    return std::make_pair(Lo, Hi);  } @@ -6670,7 +6922,7 @@ void SelectionDAG::ExtractVectorElements(SDValue Op,    SDLoc SL(Op);    for (unsigned i = Start, e = Start + Count; i != e; ++i) {      Args.push_back(getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, -                           Op, getConstant(i, IdxTy))); +                           Op, getConstant(i, SL, IdxTy)));    }  }  | 
