diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2011-02-20 12:57:14 +0000 | 
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2011-02-20 12:57:14 +0000 | 
| commit | cf099d11218cb6f6c5cce947d6738e347f07fb12 (patch) | |
| tree | d2b61ce94e654cb01a254d2195259db5f9cc3f3c /lib/CodeGen/SelectionDAG | |
| parent | 49011b52fcba02a6051957b84705159f52fae4e4 (diff) | |
Notes
Diffstat (limited to 'lib/CodeGen/SelectionDAG')
25 files changed, 5559 insertions, 3299 deletions
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt index 799988a4c862..15932c03a190 100644 --- a/lib/CodeGen/SelectionDAG/CMakeLists.txt +++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -21,5 +21,3 @@ add_llvm_library(LLVMSelectionDAG    TargetLowering.cpp    TargetSelectionDAGInfo.cpp    ) - -target_link_libraries (LLVMSelectionDAG LLVMAnalysis LLVMAsmPrinter LLVMCodeGen) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c9c4d91e9736..90356021f602 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -25,7 +25,6 @@  #include "llvm/CodeGen/PseudoSourceValue.h"  #include "llvm/Analysis/AliasAnalysis.h"  #include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetFrameInfo.h"  #include "llvm/Target/TargetLowering.h"  #include "llvm/Target/TargetMachine.h"  #include "llvm/Target/TargetOptions.h" @@ -43,6 +42,7 @@ STATISTIC(NodesCombined   , "Number of dag nodes combined");  STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");  STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");  STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed"); +STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");  namespace {    static cl::opt<bool> @@ -185,7 +185,7 @@ namespace {      SDValue visitANY_EXTEND(SDNode *N);      SDValue visitSIGN_EXTEND_INREG(SDNode *N);      SDValue visitTRUNCATE(SDNode *N); -    SDValue visitBIT_CONVERT(SDNode *N); +    SDValue visitBITCAST(SDNode *N);      SDValue visitBUILD_PAIR(SDNode *N);      SDValue visitFADD(SDNode *N);      SDValue visitFSUB(SDNode *N); @@ -229,12 +229,13 @@ namespace {      SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,                                           unsigned HiOp);      SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); -    SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, EVT); +    SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);      SDValue BuildSDIV(SDNode *N);      SDValue BuildUDIV(SDNode *N);      SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL);      SDValue ReduceLoadWidth(SDNode *N);      SDValue ReduceLoadOpStoreWidth(SDNode *N); +    SDValue TransformFPLoadStorePair(SDNode *N);      SDValue GetDemandedBits(SDValue V, const APInt &Mask); @@ -248,16 +249,19 @@ namespace {      bool isAlias(SDValue Ptr1, int64_t Size1,                   const Value *SrcValue1, int SrcValueOffset1,                   unsigned SrcValueAlign1, +                 const MDNode *TBAAInfo1,                   SDValue Ptr2, int64_t Size2,                   const Value *SrcValue2, int SrcValueOffset2, -                 unsigned SrcValueAlign2) const; +                 unsigned SrcValueAlign2, +                 const MDNode *TBAAInfo2) const;      /// FindAliasInfo - Extracts the relevant alias information from the memory      /// node.  Returns true if the operand was a load.      bool FindAliasInfo(SDNode *N,                         SDValue &Ptr, int64_t &Size,                         const Value *&SrcValue, int &SrcValueOffset, -                       unsigned &SrcValueAlignment) const; +                       unsigned &SrcValueAlignment, +                       const MDNode *&TBAAInfo) const;      /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,      /// looking for a better chain (aliasing node.) @@ -270,15 +274,15 @@ namespace {      /// Run - runs the dag combiner on all nodes in the work list      void Run(CombineLevel AtLevel); -     +      SelectionDAG &getDAG() const { return DAG; } -     +      /// getShiftAmountTy - Returns a type large enough to hold any valid      /// shift amount - before type legalization these can be huge.      EVT getShiftAmountTy() {        return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy();      } -     +      /// isTypeLegal - This method returns true if we are running before type      /// legalization or if the specified VT is legal.      bool isTypeLegal(const EVT &VT) { @@ -631,7 +635,7 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {    // Replace the old value with the new one.    ++NodesCombined; -  DEBUG(dbgs() << "\nReplacing.2 ";  +  DEBUG(dbgs() << "\nReplacing.2 ";          TLO.Old.getNode()->dump(&DAG);          dbgs() << "\nWith: ";          TLO.New.getNode()->dump(&DAG); @@ -666,12 +670,13 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {    if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {      EVT MemVT = LD->getMemoryVT();      ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) -      ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD) +      ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD  +                                                  : ISD::EXTLOAD)        : LD->getExtensionType();      Replace = true; -    return DAG.getExtLoad(ExtType, PVT, dl, +    return DAG.getExtLoad(ExtType, dl, PVT,                            LD->getChain(), LD->getBasePtr(), -                          LD->getSrcValue(), LD->getSrcValueOffset(), +                          LD->getPointerInfo(),                            MemVT, LD->isVolatile(),                            LD->isNonTemporal(), LD->getAlignment());    } @@ -691,7 +696,7 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {      unsigned ExtOpc =        Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;      return DAG.getNode(ExtOpc, dl, PVT, Op); -  }     +  }    }    if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)) @@ -889,11 +894,12 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {      LoadSDNode *LD = cast<LoadSDNode>(N);      EVT MemVT = LD->getMemoryVT();      ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) -      ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD) +      ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD  +                                                  : ISD::EXTLOAD)        : LD->getExtensionType(); -    SDValue NewLD = DAG.getExtLoad(ExtType, PVT, dl, +    SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,                                     LD->getChain(), LD->getBasePtr(), -                                   LD->getSrcValue(), LD->getSrcValueOffset(), +                                   LD->getPointerInfo(),                                     MemVT, LD->isVolatile(),                                     LD->isNonTemporal(), LD->getAlignment());      SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD); @@ -975,7 +981,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) {             RV.getNode()->getOpcode() != ISD::DELETED_NODE &&             "Node was deleted but visit returned new node!"); -    DEBUG(dbgs() << "\nReplacing.3 ";  +    DEBUG(dbgs() << "\nReplacing.3 ";            N->dump(&DAG);            dbgs() << "\nWith: ";            RV.getNode()->dump(&DAG); @@ -1054,7 +1060,7 @@ SDValue DAGCombiner::visit(SDNode *N) {    case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);    case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);    case ISD::TRUNCATE:           return visitTRUNCATE(N); -  case ISD::BIT_CONVERT:        return visitBIT_CONVERT(N); +  case ISD::BITCAST:            return visitBITCAST(N);    case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);    case ISD::FADD:               return visitFADD(N);    case ISD::FSUB:               return visitFSUB(N); @@ -1225,7 +1231,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {        }      }    } -   +    SDValue Result;    // If we've change things around then replace token factor. @@ -1424,6 +1430,29 @@ SDValue DAGCombiner::visitADD(SDNode *N) {                                         N0.getOperand(0).getOperand(1),                                         N0.getOperand(1))); +  if (N1.getOpcode() == ISD::AND) { +    SDValue AndOp0 = N1.getOperand(0); +    ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1)); +    unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0); +    unsigned DestBits = VT.getScalarType().getSizeInBits(); + +    // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) +    // and similar xforms where the inner op is either ~0 or 0. +    if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) { +      DebugLoc DL = N->getDebugLoc(); +      return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); +    } +  } + +  // add (sext i1), X -> sub X, (zext i1) +  if (N0.getOpcode() == ISD::SIGN_EXTEND && +      N0.getOperand(0).getValueType() == MVT::i1 && +      !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) { +    DebugLoc DL = N->getDebugLoc(); +    SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); +    return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); +  } +    return SDValue();  } @@ -1438,7 +1467,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {    if (N->hasNUsesOfValue(0, 1))      return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0),                       DAG.getNode(ISD::CARRY_FALSE, -                                 N->getDebugLoc(), MVT::Flag)); +                                 N->getDebugLoc(), MVT::Glue));    // canonicalize constant to RHS.    if (N0C && !N1C) @@ -1447,7 +1476,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {    // fold (addc x, 0) -> x + no carry out    if (N1C && N1C->isNullValue())      return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, -                                        N->getDebugLoc(), MVT::Flag)); +                                        N->getDebugLoc(), MVT::Glue));    // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.    APInt LHSZero, LHSOne; @@ -1464,7 +1493,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {          (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))        return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1),                         DAG.getNode(ISD::CARRY_FALSE, -                                   N->getDebugLoc(), MVT::Flag)); +                                   N->getDebugLoc(), MVT::Glue));    }    return SDValue(); @@ -1489,6 +1518,22 @@ SDValue DAGCombiner::visitADDE(SDNode *N) {    return SDValue();  } +// Since it may not be valid to emit a fold to zero for vector initializers +// check if we can before folding. +static SDValue tryFoldToZero(DebugLoc DL, const TargetLowering &TLI, EVT VT, +                             SelectionDAG &DAG, bool LegalOperations) {                             +  if (!VT.isVector()) { +    return DAG.getConstant(0, VT); +  } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) { +    // Produce a vector of zeros. +    SDValue El = DAG.getConstant(0, VT.getVectorElementType()); +    std::vector<SDValue> Ops(VT.getVectorNumElements(), El); +    return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, +      &Ops[0], Ops.size()); +  } +  return SDValue(); +} +  SDValue DAGCombiner::visitSUB(SDNode *N) {    SDValue N0 = N->getOperand(0);    SDValue N1 = N->getOperand(1); @@ -1503,8 +1548,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {    }    // fold (sub x, x) -> 0 +  // FIXME: Refactor this and xor and other similar operations together.    if (N0 == N1) -    return DAG.getConstant(0, N->getValueType(0)); +    return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations);    // fold (sub c1, c2) -> c1-c2    if (N0C && N1C)      return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C); @@ -1515,6 +1561,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {    // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)    if (N0C && N0C->isAllOnesValue())      return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0); +  // fold A-(A-B) -> B +  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) +    return N1.getOperand(1);    // fold (A+B)-A -> B    if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)      return N0.getOperand(1); @@ -1897,6 +1946,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {    SDValue N1 = N->getOperand(1);    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);    EVT VT = N->getValueType(0); +  DebugLoc DL = N->getDebugLoc();    // fold (mulhs x, 0) -> 0    if (N1C && N1C->isNullValue()) @@ -1910,6 +1960,22 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {    if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)      return DAG.getConstant(0, VT); +  // If the type twice as wide is legal, transform the mulhs to a wider multiply +  // plus a shift. +  if (VT.isSimple() && !VT.isVector()) { +    MVT Simple = VT.getSimpleVT(); +    unsigned SimpleSize = Simple.getSizeInBits(); +    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); +    if (TLI.isOperationLegal(ISD::MUL, NewVT)) { +      N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0); +      N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); +      N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); +      N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, +                       DAG.getConstant(SimpleSize, getShiftAmountTy())); +      return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); +    } +  } +      return SDValue();  } @@ -1918,6 +1984,7 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {    SDValue N1 = N->getOperand(1);    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);    EVT VT = N->getValueType(0); +  DebugLoc DL = N->getDebugLoc();    // fold (mulhu x, 0) -> 0    if (N1C && N1C->isNullValue()) @@ -1929,6 +1996,22 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {    if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)      return DAG.getConstant(0, VT); +  // If the type twice as wide is legal, transform the mulhu to a wider multiply +  // plus a shift. +  if (VT.isSimple() && !VT.isVector()) { +    MVT Simple = VT.getSimpleVT(); +    unsigned SimpleSize = Simple.getSizeInBits(); +    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); +    if (TLI.isOperationLegal(ISD::MUL, NewVT)) { +      N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0); +      N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); +      N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); +      N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, +                       DAG.getConstant(SimpleSize, getShiftAmountTy())); +      return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); +    } +  } +      return SDValue();  } @@ -1992,6 +2075,29 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {    SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS);    if (Res.getNode()) return Res; +  EVT VT = N->getValueType(0); +  DebugLoc DL = N->getDebugLoc(); + +  // If the type twice as wide is legal, transform the mulhu to a wider multiply +  // plus a shift. +  if (VT.isSimple() && !VT.isVector()) { +    MVT Simple = VT.getSimpleVT(); +    unsigned SimpleSize = Simple.getSizeInBits(); +    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); +    if (TLI.isOperationLegal(ISD::MUL, NewVT)) { +      SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0)); +      SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1)); +      Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); +      // Compute the high part as N1. +      Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, +                       DAG.getConstant(SimpleSize, getShiftAmountTy())); +      Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); +      // Compute the low part as N0. +      Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); +      return CombineTo(N, Lo, Hi); +    } +  } +      return SDValue();  } @@ -1999,6 +2105,29 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {    SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU);    if (Res.getNode()) return Res; +  EVT VT = N->getValueType(0); +  DebugLoc DL = N->getDebugLoc(); +   +  // If the type twice as wide is legal, transform the mulhu to a wider multiply +  // plus a shift. +  if (VT.isSimple() && !VT.isVector()) { +    MVT Simple = VT.getSimpleVT(); +    unsigned SimpleSize = Simple.getSizeInBits(); +    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); +    if (TLI.isOperationLegal(ISD::MUL, NewVT)) { +      SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0)); +      SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1)); +      Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); +      // Compute the high part as N1. +      Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, +                       DAG.getConstant(SimpleSize, getShiftAmountTy())); +      Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); +      // Compute the low part as N0. +      Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); +      return CombineTo(N, Lo, Hi); +    } +  } +      return SDValue();  } @@ -2116,7 +2245,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {    if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {      SDValue N0Op0 = N0.getOperand(0);      APInt Mask = ~N1C->getAPIntValue(); -    Mask.trunc(N0Op0.getValueSizeInBits()); +    Mask = Mask.trunc(N0Op0.getValueSizeInBits());      if (DAG.MaskedValueIsZero(N0Op0, Mask)) {        SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(),                                   N0.getValueType(), N0Op0); @@ -2198,10 +2327,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {                             BitWidth - MemVT.getScalarType().getSizeInBits())) &&          ((!LegalOperations && !LN0->isVolatile()) ||           TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { -      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N0.getDebugLoc(), +      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,                                         LN0->getChain(), LN0->getBasePtr(), -                                       LN0->getSrcValue(), -                                       LN0->getSrcValueOffset(), MemVT, +                                       LN0->getPointerInfo(), MemVT,                                         LN0->isVolatile(), LN0->isNonTemporal(),                                         LN0->getAlignment());        AddToWorkList(N); @@ -2221,10 +2349,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {                             BitWidth - MemVT.getScalarType().getSizeInBits())) &&          ((!LegalOperations && !LN0->isVolatile()) ||           TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { -      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N0.getDebugLoc(), +      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,                                         LN0->getChain(), -                                       LN0->getBasePtr(), LN0->getSrcValue(), -                                       LN0->getSrcValueOffset(), MemVT, +                                       LN0->getBasePtr(), LN0->getPointerInfo(), +                                       MemVT,                                         LN0->isVolatile(), LN0->isNonTemporal(),                                         LN0->getAlignment());        AddToWorkList(N); @@ -2253,18 +2381,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) {          if (ExtVT == LoadedVT &&              (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {            EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; -           -          SDValue NewLoad =  -            DAG.getExtLoad(ISD::ZEXTLOAD, LoadResultTy, LN0->getDebugLoc(), + +          SDValue NewLoad = +            DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,                             LN0->getChain(), LN0->getBasePtr(), -                           LN0->getSrcValue(), LN0->getSrcValueOffset(), +                           LN0->getPointerInfo(),                             ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),                             LN0->getAlignment());            AddToWorkList(N);            CombineTo(LN0, NewLoad, NewLoad.getValue(1));            return SDValue(N, 0);   // Return N so it doesn't get rechecked!          } -         +          // Do not change the width of a volatile load.          // Do not generate loads of non-round integer types since these can          // be expensive (and would be wrong if the type is not byte sized). @@ -2288,12 +2416,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) {            }            AddToWorkList(NewPtr.getNode()); -           +            EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;            SDValue Load = -            DAG.getExtLoad(ISD::ZEXTLOAD, LoadResultTy, LN0->getDebugLoc(), +            DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,                             LN0->getChain(), NewPtr, -                           LN0->getSrcValue(), LN0->getSrcValueOffset(), +                           LN0->getPointerInfo(),                             ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),                             Alignment);            AddToWorkList(N); @@ -2722,17 +2850,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {                                           N01C->getAPIntValue(), VT));    }    // fold (xor x, x) -> 0 -  if (N0 == N1) { -    if (!VT.isVector()) { -      return DAG.getConstant(0, VT); -    } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)){ -      // Produce a vector of zeros. -      SDValue El = DAG.getConstant(0, VT.getVectorElementType()); -      std::vector<SDValue> Ops(VT.getVectorNumElements(), El); -      return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, -                         &Ops[0], Ops.size()); -    } -  } +  if (N0 == N1) +    return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations);    // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))    if (N0.getOpcode() == N1.getOpcode()) { @@ -2810,7 +2929,8 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {                                 LHS->getOperand(1), N->getOperand(1));    // Create the new shift. -  SDValue NewShift = DAG.getNode(N->getOpcode(), LHS->getOperand(0).getDebugLoc(), +  SDValue NewShift = DAG.getNode(N->getOpcode(), +                                 LHS->getOperand(0).getDebugLoc(),                                   VT, LHS->getOperand(0), N->getOperand(1));    // Create the new binop. @@ -2850,7 +2970,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {        EVT TruncVT = N1.getValueType();        SDValue N100 = N1.getOperand(0).getOperand(0);        APInt TruncC = N101C->getAPIntValue(); -      TruncC.trunc(TruncVT.getSizeInBits()); +      TruncC = TruncC.trunc(TruncVT.getSizeInBits());        return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,                           DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT,                                       DAG.getNode(ISD::TRUNCATE, @@ -2868,11 +2988,37 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {        N0.getOperand(1).getOpcode() == ISD::Constant) {      uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();      uint64_t c2 = N1C->getZExtValue(); -    if (c1 + c2 > OpSizeInBits) +    if (c1 + c2 >= OpSizeInBits)        return DAG.getConstant(0, VT);      return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0),                         DAG.getConstant(c1 + c2, N1.getValueType()));    } + +  // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2))) +  // For this to be valid, the second form must not preserve any of the bits +  // that are shifted out by the inner shift in the first form.  This means +  // the outer shift size must be >= the number of bits added by the ext. +  // As a corollary, we don't care what kind of ext it is. +  if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND || +              N0.getOpcode() == ISD::ANY_EXTEND || +              N0.getOpcode() == ISD::SIGN_EXTEND) && +      N0.getOperand(0).getOpcode() == ISD::SHL && +      isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { +    uint64_t c1 =  +      cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); +    uint64_t c2 = N1C->getZExtValue(); +    EVT InnerShiftVT = N0.getOperand(0).getValueType(); +    uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); +    if (c2 >= OpSizeInBits - InnerShiftSize) { +      if (c1 + c2 >= OpSizeInBits) +        return DAG.getConstant(0, VT); +      return DAG.getNode(ISD::SHL, N0->getDebugLoc(), VT, +                         DAG.getNode(N0.getOpcode(), N0->getDebugLoc(), VT, +                                     N0.getOperand(0)->getOperand(0)), +                         DAG.getConstant(c1 + c2, N1.getValueType())); +    } +  } +    // fold (shl (srl x, c1), c2) -> (shl (and x, (shl -1, c1)), (sub c2, c1)) or    //                               (srl (and x, (shl -1, c1)), (sub c1, c2))    if (N1C && N0.getOpcode() == ISD::SRL && @@ -2973,7 +3119,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {      if (N01C && N1C) {        // Determine what the truncate's result bitsize and type would be.        EVT TruncVT = -        EVT::getIntegerVT(*DAG.getContext(), OpSizeInBits - N1C->getZExtValue()); +        EVT::getIntegerVT(*DAG.getContext(), +                          OpSizeInBits - N1C->getZExtValue());        // Determine the residual right-shift amount.        signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); @@ -3006,7 +3153,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {        EVT TruncVT = N1.getValueType();        SDValue N100 = N1.getOperand(0).getOperand(0);        APInt TruncC = N101C->getAPIntValue(); -      TruncC.trunc(TruncVT.getScalarType().getSizeInBits()); +      TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits());        return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,                           DAG.getNode(ISD::AND, N->getDebugLoc(),                                       TruncVT, @@ -3017,6 +3164,29 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {      }    } +  // fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2)) +  //      if c1 is equal to the number of bits the trunc removes +  if (N0.getOpcode() == ISD::TRUNCATE && +      (N0.getOperand(0).getOpcode() == ISD::SRL || +       N0.getOperand(0).getOpcode() == ISD::SRA) && +      N0.getOperand(0).hasOneUse() && +      N0.getOperand(0).getOperand(1).hasOneUse() && +      N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) { +    EVT LargeVT = N0.getOperand(0).getValueType(); +    ConstantSDNode *LargeShiftAmt = +      cast<ConstantSDNode>(N0.getOperand(0).getOperand(1)); + +    if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits == +        LargeShiftAmt->getZExtValue()) { +      SDValue Amt = +        DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(), +                        getShiftAmountTy()); +      SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), LargeVT, +                                N0.getOperand(0).getOperand(0), Amt); +      return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, SRA); +    } +  } +    // Simplify, based on bits shifted out of the LHS.    if (N1C && SimplifyDemandedBits(SDValue(N, 0)))      return SDValue(N, 0); @@ -3065,12 +3235,33 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {        N0.getOperand(1).getOpcode() == ISD::Constant) {      uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();      uint64_t c2 = N1C->getZExtValue(); -    if (c1 + c2 > OpSizeInBits) +    if (c1 + c2 >= OpSizeInBits)        return DAG.getConstant(0, VT);      return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0),                         DAG.getConstant(c1 + c2, N1.getValueType()));    } -   + +  // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2))) +  if (N1C && N0.getOpcode() == ISD::TRUNCATE && +      N0.getOperand(0).getOpcode() == ISD::SRL && +      isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { +    uint64_t c1 =  +      cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); +    uint64_t c2 = N1C->getZExtValue(); +    EVT InnerShiftVT = N0.getOperand(0).getValueType(); +    EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType(); +    uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); +    // This is only valid if the OpSizeInBits + c1 = size of inner shift. +    if (c1 + OpSizeInBits == InnerShiftSize) { +      if (c1 + c2 >= InnerShiftSize) +        return DAG.getConstant(0, VT); +      return DAG.getNode(ISD::TRUNCATE, N0->getDebugLoc(), VT, +                         DAG.getNode(ISD::SRL, N0->getDebugLoc(), InnerShiftVT,  +                                     N0.getOperand(0)->getOperand(0), +                                     DAG.getConstant(c1 + c2, ShiftCountVT))); +    } +  } +    // fold (srl (shl x, c), c) -> (and x, cst2)    if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&        N0.getValueSizeInBits() <= 64) { @@ -3078,7 +3269,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {      return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0),                         DAG.getConstant(~0ULL >> ShAmt, VT));    } -   +    // fold (srl (anyextend x), c) -> (anyextend (srl x, c))    if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { @@ -3147,7 +3338,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {        EVT TruncVT = N1.getValueType();        SDValue N100 = N1.getOperand(0).getOperand(0);        APInt TruncC = N101C->getAPIntValue(); -      TruncC.trunc(TruncVT.getSizeInBits()); +      TruncC = TruncC.trunc(TruncVT.getSizeInBits());        return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0,                           DAG.getNode(ISD::AND, N->getDebugLoc(),                                       TruncVT, @@ -3182,7 +3373,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {    //   brcond i32 %c ...    //    // into -  //  +  //    //   %a = ...    //   %b = and %a, 2    //   %c = setcc eq %b, 0 @@ -3422,7 +3613,7 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,      }      if (BothLiveOut)        // Both unextended and extended values are live out. There had better be -      // good a reason for the transformation. +      // a good reason for the transformation.        return ExtendNodes.size();    }    return true; @@ -3503,10 +3694,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {        DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);      if (DoXform) {        LoadSDNode *LN0 = cast<LoadSDNode>(N0); -      SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), +      SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,                                         LN0->getChain(), -                                       LN0->getBasePtr(), LN0->getSrcValue(), -                                       LN0->getSrcValueOffset(), +                                       LN0->getBasePtr(), LN0->getPointerInfo(),                                         N0.getValueType(),                                         LN0->isVolatile(), LN0->isNonTemporal(),                                         LN0->getAlignment()); @@ -3547,10 +3737,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {      EVT MemVT = LN0->getMemoryVT();      if ((!LegalOperations && !LN0->isVolatile()) ||          TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) { -      SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), +      SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,                                         LN0->getChain(), -                                       LN0->getBasePtr(), LN0->getSrcValue(), -                                       LN0->getSrcValueOffset(), MemVT, +                                       LN0->getBasePtr(), LN0->getPointerInfo(), +                                       MemVT,                                         LN0->isVolatile(), LN0->isNonTemporal(),                                         LN0->getAlignment());        CombineTo(N, ExtLoad); @@ -3611,7 +3801,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {                                        N0.getOperand(0), N0.getOperand(1),                                   cast<CondCodeSDNode>(N0.getOperand(2))->get()),                           NegOne, DAG.getConstant(0, VT)); -  }   +  }    // fold (sext x) -> (zext x) if the sign bit is known zero.    if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && @@ -3652,6 +3842,20 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {    // fold (zext (truncate x)) -> (and x, mask)    if (N0.getOpcode() == ISD::TRUNCATE &&        (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) { + +    // fold (zext (truncate (load x))) -> (zext (smaller load x)) +    // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n))) +    SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); +    if (NarrowLoad.getNode()) { +      SDNode* oye = N0.getNode()->getOperand(0).getNode(); +      if (NarrowLoad.getNode() != N0.getNode()) { +        CombineTo(N0.getNode(), NarrowLoad); +        // CombineTo deleted the truncate, if needed, but not what's under it. +        AddToWorkList(oye); +      } +      return SDValue(N, 0);   // Return N so it doesn't get rechecked! +    } +      SDValue Op = N0.getOperand(0);      if (Op.getValueType().bitsLT(VT)) {        Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op); @@ -3677,7 +3881,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {        X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X);      }      APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); -    Mask.zext(VT.getSizeInBits()); +    Mask = Mask.zext(VT.getSizeInBits());      return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,                         X, DAG.getConstant(Mask, VT));    } @@ -3692,10 +3896,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {        DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);      if (DoXform) {        LoadSDNode *LN0 = cast<LoadSDNode>(N0); -      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N->getDebugLoc(), +      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,                                         LN0->getChain(), -                                       LN0->getBasePtr(), LN0->getSrcValue(), -                                       LN0->getSrcValueOffset(), +                                       LN0->getBasePtr(), LN0->getPointerInfo(),                                         N0.getValueType(),                                         LN0->isVolatile(), LN0->isNonTemporal(),                                         LN0->getAlignment()); @@ -3736,10 +3939,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {      EVT MemVT = LN0->getMemoryVT();      if ((!LegalOperations && !LN0->isVolatile()) ||          TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) { -      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N->getDebugLoc(), +      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,                                         LN0->getChain(), -                                       LN0->getBasePtr(), LN0->getSrcValue(), -                                       LN0->getSrcValueOffset(), MemVT, +                                       LN0->getBasePtr(), LN0->getPointerInfo(), +                                       MemVT,                                         LN0->isVolatile(), LN0->isNonTemporal(),                                         LN0->getAlignment());        CombineTo(N, ExtLoad); @@ -3805,21 +4008,27 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {        isa<ConstantSDNode>(N0.getOperand(1)) &&        N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&        N0.hasOneUse()) { +    SDValue ShAmt = N0.getOperand(1); +    unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();      if (N0.getOpcode() == ISD::SHL) { +      SDValue InnerZExt = N0.getOperand(0);        // If the original shl may be shifting out bits, do not perform this        // transformation. -      unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); -      unsigned KnownZeroBits = N0.getOperand(0).getValueType().getSizeInBits() - -        N0.getOperand(0).getOperand(0).getValueType().getSizeInBits(); -      if (ShAmt > KnownZeroBits) +      unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() - +        InnerZExt.getOperand(0).getValueType().getSizeInBits(); +      if (ShAmtVal > KnownZeroBits)          return SDValue();      } -    DebugLoc dl = N->getDebugLoc(); -    return DAG.getNode(N0.getOpcode(), dl, VT, -                       DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)), -                       DAG.getNode(ISD::ZERO_EXTEND, dl, -                                   N0.getOperand(1).getValueType(), -                                   N0.getOperand(1))); + +    DebugLoc DL = N->getDebugLoc(); +     +    // Ensure that the shift amount is wide enough for the shifted value.  +    if (VT.getSizeInBits() >= 256) +      ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt); +     +    return DAG.getNode(N0.getOpcode(), DL, VT, +                       DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)), +                       ShAmt);    }    return SDValue(); @@ -3879,7 +4088,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {        X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X);      }      APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); -    Mask.zext(VT.getSizeInBits()); +    Mask = Mask.zext(VT.getSizeInBits());      return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,                         X, DAG.getConstant(Mask, VT));    } @@ -3894,10 +4103,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {        DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);      if (DoXform) {        LoadSDNode *LN0 = cast<LoadSDNode>(N0); -      SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, N->getDebugLoc(), +      SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,                                         LN0->getChain(), -                                       LN0->getBasePtr(), LN0->getSrcValue(), -                                       LN0->getSrcValueOffset(), +                                       LN0->getBasePtr(), LN0->getPointerInfo(),                                         N0.getValueType(),                                         LN0->isVolatile(), LN0->isNonTemporal(),                                         LN0->getAlignment()); @@ -3938,11 +4146,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {        N0.hasOneUse()) {      LoadSDNode *LN0 = cast<LoadSDNode>(N0);      EVT MemVT = LN0->getMemoryVT(); -    SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), VT, -                                     N->getDebugLoc(), -                                     LN0->getChain(), LN0->getBasePtr(), -                                     LN0->getSrcValue(), -                                     LN0->getSrcValueOffset(), MemVT, +    SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(), +                                     VT, LN0->getChain(), LN0->getBasePtr(), +                                     LN0->getPointerInfo(), MemVT,                                       LN0->isVolatile(), LN0->isNonTemporal(),                                       LN0->getAlignment());      CombineTo(N, ExtLoad); @@ -4053,11 +4259,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {    if (Opc == ISD::SIGN_EXTEND_INREG) {      ExtType = ISD::SEXTLOAD;      ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT(); -    if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT)) -      return SDValue();    } else if (Opc == ISD::SRL) { -    // Annother special-case: SRL is basically zero-extending a narrower -    // value. +    // Another special-case: SRL is basically zero-extending a narrower value.      ExtType = ISD::ZEXTLOAD;      N0 = SDValue(N, 0);      ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); @@ -4065,10 +4268,18 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {      ExtVT = EVT::getIntegerVT(*DAG.getContext(),                                VT.getSizeInBits() - N01->getZExtValue());    } +  if (LegalOperations && !TLI.isLoadExtLegal(ExtType, ExtVT)) +    return SDValue();    unsigned EVTBits = ExtVT.getSizeInBits(); +   +  // Do not generate loads of non-round integer types since these can +  // be expensive (and would be wrong if the type is not byte sized). +  if (!ExtVT.isRound()) +    return SDValue(); +      unsigned ShAmt = 0; -  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse() && ExtVT.isRound()) { +  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {      if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {        ShAmt = N01->getZExtValue();        // Is the shift amount a multiple of size of VT? @@ -4078,52 +4289,88 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {          if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)            return SDValue();        } + +      // At this point, we must have a load or else we can't do the transform. +      if (!isa<LoadSDNode>(N0)) return SDValue(); +       +      // If the shift amount is larger than the input type then we're not +      // accessing any of the loaded bytes.  If the load was a zextload/extload +      // then the result of the shift+trunc is zero/undef (handled elsewhere). +      // If the load was a sextload then the result is a splat of the sign bit +      // of the extended byte.  This is not worth optimizing for. +      if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits()) +        return SDValue();      }    } -  // Do not generate loads of non-round integer types since these can -  // be expensive (and would be wrong if the type is not byte sized). -  if (isa<LoadSDNode>(N0) && N0.hasOneUse() && ExtVT.isRound() && -      cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() >= EVTBits && -      // Do not change the width of a volatile load. -      !cast<LoadSDNode>(N0)->isVolatile()) { -    LoadSDNode *LN0 = cast<LoadSDNode>(N0); -    EVT PtrType = N0.getOperand(1).getValueType(); - -    // For big endian targets, we need to adjust the offset to the pointer to -    // load the correct bytes. -    if (TLI.isBigEndian()) { -      unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); -      unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); -      ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; -    } - -    uint64_t PtrOff =  ShAmt / 8; -    unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); -    SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), -                                 PtrType, LN0->getBasePtr(), -                                 DAG.getConstant(PtrOff, PtrType)); -    AddToWorkList(NewPtr.getNode()); - -    SDValue Load = (ExtType == ISD::NON_EXTLOAD) -      ? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, -                    LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff, -                    LN0->isVolatile(), LN0->isNonTemporal(), NewAlign) -      : DAG.getExtLoad(ExtType, VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, -                       LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff, -                       ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), -                       NewAlign); - -    // Replace the old load's chain with the new load's chain. -    WorkListRemover DeadNodes(*this); -    DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1), -                                  &DeadNodes); +  // If the load is shifted left (and the result isn't shifted back right), +  // we can fold the truncate through the shift. +  unsigned ShLeftAmt = 0; +  if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() && +      ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) { +    if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { +      ShLeftAmt = N01->getZExtValue(); +      N0 = N0.getOperand(0); +    } +  } +   +  // If we haven't found a load, we can't narrow it.  Don't transform one with +  // multiple uses, this would require adding a new load. +  if (!isa<LoadSDNode>(N0) || !N0.hasOneUse() || +      // Don't change the width of a volatile load. +      cast<LoadSDNode>(N0)->isVolatile()) +    return SDValue(); +   +  // Verify that we are actually reducing a load width here. +  if (cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() < EVTBits) +    return SDValue(); +   +  LoadSDNode *LN0 = cast<LoadSDNode>(N0); +  EVT PtrType = N0.getOperand(1).getValueType(); + +  // For big endian targets, we need to adjust the offset to the pointer to +  // load the correct bytes. +  if (TLI.isBigEndian()) { +    unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); +    unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); +    ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; +  } + +  uint64_t PtrOff = ShAmt / 8; +  unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); +  SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), +                               PtrType, LN0->getBasePtr(), +                               DAG.getConstant(PtrOff, PtrType)); +  AddToWorkList(NewPtr.getNode()); + +  SDValue Load; +  if (ExtType == ISD::NON_EXTLOAD) +    Load =  DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, +                        LN0->getPointerInfo().getWithOffset(PtrOff), +                        LN0->isVolatile(), LN0->isNonTemporal(), NewAlign); +  else +    Load = DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(),NewPtr, +                          LN0->getPointerInfo().getWithOffset(PtrOff), +                          ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), +                          NewAlign); + +  // Replace the old load's chain with the new load's chain. +  WorkListRemover DeadNodes(*this); +  DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1), +                                &DeadNodes); -    // Return the new loaded value. -    return Load; +  // Shift the result left, if we've swallowed a left shift. +  SDValue Result = Load; +  if (ShLeftAmt != 0) { +    EVT ShImmTy = getShiftAmountTy(); +    if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt)) +      ShImmTy = VT; +    Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, +                         Result, DAG.getConstant(ShLeftAmt, ShImmTy));    } -  return SDValue(); +  // Return the new loaded value. +  return Result;  }  SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { @@ -4196,10 +4443,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {        ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||         TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {      LoadSDNode *LN0 = cast<LoadSDNode>(N0); -    SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), +    SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,                                       LN0->getChain(), -                                     LN0->getBasePtr(), LN0->getSrcValue(), -                                     LN0->getSrcValueOffset(), EVT, +                                     LN0->getBasePtr(), LN0->getPointerInfo(), +                                     EVT,                                       LN0->isVolatile(), LN0->isNonTemporal(),                                       LN0->getAlignment());      CombineTo(N, ExtLoad); @@ -4213,10 +4460,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {        ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||         TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {      LoadSDNode *LN0 = cast<LoadSDNode>(N0); -    SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), +    SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,                                       LN0->getChain(), -                                     LN0->getBasePtr(), LN0->getSrcValue(), -                                     LN0->getSrcValueOffset(), EVT, +                                     LN0->getBasePtr(), LN0->getPointerInfo(), +                                     EVT,                                       LN0->isVolatile(), LN0->isNonTemporal(),                                       LN0->getAlignment());      CombineTo(N, ExtLoad); @@ -4295,7 +4542,9 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {    LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));    LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); -  if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse()) +  if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() || +      LD1->getPointerInfo().getAddrSpace() != +         LD2->getPointerInfo().getAddrSpace())      return SDValue();    EVT LD1VT = LD1->getValueType(0); @@ -4313,14 +4562,14 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {      if (NewAlign <= Align &&          (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))        return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(), -                         LD1->getBasePtr(), LD1->getSrcValue(), -                         LD1->getSrcValueOffset(), false, false, Align); +                         LD1->getBasePtr(), LD1->getPointerInfo(), +                         false, false, Align);    }    return SDValue();  } -SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { +SDValue DAGCombiner::visitBITCAST(SDNode *N) {    SDValue N0 = N->getOperand(0);    EVT VT = N->getValueType(0); @@ -4344,12 +4593,12 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {      assert(!DestEltVT.isVector() &&             "Element type of vector ValueType must not be vector!");      if (isSimple) -      return ConstantFoldBIT_CONVERTofBUILD_VECTOR(N0.getNode(), DestEltVT); +      return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);    }    // If the input is a constant, let getNode fold it.    if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) { -    SDValue Res = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, N0); +    SDValue Res = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, N0);      if (Res.getNode() != N) {        if (!LegalOperations ||            TLI.isOperationLegal(Res.getNode()->getOpcode(), VT)) @@ -4365,8 +4614,8 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {    }    // (conv (conv x, t1), t2) -> (conv x, t2) -  if (N0.getOpcode() == ISD::BIT_CONVERT) -    return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, +  if (N0.getOpcode() == ISD::BITCAST) +    return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT,                         N0.getOperand(0));    // fold (conv (load x)) -> (load (conv*)x) @@ -4382,13 +4631,12 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {      if (Align <= OrigAlign) {        SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(), -                                 LN0->getBasePtr(), -                                 LN0->getSrcValue(), LN0->getSrcValueOffset(), +                                 LN0->getBasePtr(), LN0->getPointerInfo(),                                   LN0->isVolatile(), LN0->isNonTemporal(),                                   OrigAlign);        AddToWorkList(N);        CombineTo(N0.getNode(), -                DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), +                DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),                              N0.getValueType(), Load),                  Load.getValue(1));        return Load; @@ -4400,7 +4648,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {    // This often reduces constant pool loads.    if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) &&        N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) { -    SDValue NewConv = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), VT, +    SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT,                                    N0.getOperand(0));      AddToWorkList(NewConv.getNode()); @@ -4423,7 +4671,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {      unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();      EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);      if (isTypeLegal(IntXVT)) { -      SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), +      SDValue X = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),                                IntXVT, N0.getOperand(1));        AddToWorkList(X.getNode()); @@ -4448,7 +4696,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {                        X, DAG.getConstant(SignBit, VT));        AddToWorkList(X.getNode()); -      SDValue Cst = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), +      SDValue Cst = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),                                  VT, N0.getOperand(0));        Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT,                          Cst, DAG.getConstant(~SignBit, VT)); @@ -4473,11 +4721,11 @@ SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {    return CombineConsecutiveLoads(N, VT);  } -/// ConstantFoldBIT_CONVERTofBUILD_VECTOR - We know that BV is a build_vector +/// ConstantFoldBITCASTofBUILD_VECTOR - We know that BV is a build_vector  /// node with Constant, ConstantFP or Undef operands.  DstEltVT indicates the  /// destination element value type.  SDValue DAGCombiner:: -ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { +ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {    EVT SrcEltVT = BV->getValueType(0).getVectorElementType();    // If this is already the right type, we're done. @@ -4495,10 +4743,10 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {      // Due to the FP element handling below calling this routine recursively,      // we can end up with a scalar-to-vector node here.      if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR) -      return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,  -                         DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(), +      return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, +                         DAG.getNode(ISD::BITCAST, BV->getDebugLoc(),                                       DstEltVT, BV->getOperand(0))); -       +      SmallVector<SDValue, 8> Ops;      for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {        SDValue Op = BV->getOperand(i); @@ -4506,7 +4754,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {        // are promoted and implicitly truncated.  Make that explicit here.        if (Op.getValueType() != SrcEltVT)          Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op); -      Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(), +      Ops.push_back(DAG.getNode(ISD::BITCAST, BV->getDebugLoc(),                                  DstEltVT, Op));        AddToWorkList(Ops.back().getNode());      } @@ -4522,7 +4770,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {      // same sizes.      assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!");      EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits()); -    BV = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, IntVT).getNode(); +    BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();      SrcEltVT = IntVT;    } @@ -4531,10 +4779,10 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {    if (DstEltVT.isFloatingPoint()) {      assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!");      EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits()); -    SDNode *Tmp = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, TmpVT).getNode(); +    SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();      // Next, convert to FP elements of the same size. -    return ConstantFoldBIT_CONVERTofBUILD_VECTOR(Tmp, DstEltVT); +    return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);    }    // Okay, we know the src/dst types are both integers of differing types. @@ -4556,7 +4804,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {          if (Op.getOpcode() == ISD::UNDEF) continue;          EltIsUndef = false; -        NewBits |= APInt(cast<ConstantSDNode>(Op)->getAPIntValue()). +        NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().                     zextOrTrunc(SrcBitSize).zext(DstBitSize);        } @@ -4586,13 +4834,13 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {        continue;      } -    APInt OpVal = APInt(cast<ConstantSDNode>(BV->getOperand(i))-> -                        getAPIntValue()).zextOrTrunc(SrcBitSize); +    APInt OpVal = cast<ConstantSDNode>(BV->getOperand(i))-> +                  getAPIntValue().zextOrTrunc(SrcBitSize);      for (unsigned j = 0; j != NumOutputsPerInput; ++j) { -      APInt ThisVal = APInt(OpVal).trunc(DstBitSize); +      APInt ThisVal = OpVal.trunc(DstBitSize);        Ops.push_back(DAG.getConstant(ThisVal, DstEltVT)); -      if (isS2V && i == 0 && j == 0 && APInt(ThisVal).zext(SrcBitSize) == OpVal) +      if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal)          // Simply turn this into a SCALAR_TO_VECTOR of the new type.          return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,                             Ops[0]); @@ -4984,10 +5232,9 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {        ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||         TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {      LoadSDNode *LN0 = cast<LoadSDNode>(N0); -    SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, N->getDebugLoc(), +    SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,                                       LN0->getChain(), -                                     LN0->getBasePtr(), LN0->getSrcValue(), -                                     LN0->getSrcValueOffset(), +                                     LN0->getBasePtr(), LN0->getPointerInfo(),                                       N0.getValueType(),                                       LN0->isVolatile(), LN0->isNonTemporal(),                                       LN0->getAlignment()); @@ -5011,7 +5258,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {    // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading    // constant pool values. -  if (N0.getOpcode() == ISD::BIT_CONVERT &&  +  if (N0.getOpcode() == ISD::BITCAST &&        !VT.isVector() &&        N0.getNode()->hasOneUse() &&        N0.getOperand(0).getValueType().isInteger()) { @@ -5021,7 +5268,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {        Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int,                DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT));        AddToWorkList(Int.getNode()); -      return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), +      return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),                           VT, Int);      }    } @@ -5047,7 +5294,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {    // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading    // constant pool values. -  if (N0.getOpcode() == ISD::BIT_CONVERT && N0.getNode()->hasOneUse() && +  if (N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&        N0.getOperand(0).getValueType().isInteger() &&        !N0.getOperand(0).getValueType().isVector()) {      SDValue Int = N0.getOperand(0); @@ -5056,7 +5303,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {        Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int,               DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT));        AddToWorkList(Int.getNode()); -      return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), +      return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),                           N->getValueType(0), Int);      }    } @@ -5084,14 +5331,17 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {                         N1.getOperand(0), N1.getOperand(1), N2);    } -  SDNode *Trunc = 0; -  if (N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) { -    // Look past truncate. -    Trunc = N1.getNode(); -    N1 = N1.getOperand(0); -  } +  if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) || +      ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) && +       (N1.getOperand(0).hasOneUse() && +        N1.getOperand(0).getOpcode() == ISD::SRL))) { +    SDNode *Trunc = 0; +    if (N1.getOpcode() == ISD::TRUNCATE) { +      // Look pass the truncate. +      Trunc = N1.getNode(); +      N1 = N1.getOperand(0); +    } -  if (N1.hasOneUse() && N1.getOpcode() == ISD::SRL) {      // Match this pattern so that we can generate simpler code:      //      //   %a = ... @@ -5100,7 +5350,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {      //   brcond i32 %c ...      //      // into -    //  +    //      //   %a = ...      //   %b = and i32 %a, 2      //   %c = setcc eq %b, 0 @@ -5146,8 +5396,12 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {          }        }      } + +    if (Trunc) +      // Restore N1 if the above transformation doesn't match. +      N1 = N->getOperand(1);    } -   +    // Transform br(xor(x, y)) -> br(x != y)    // Transform br(xor(xor(x,y), 1)) -> br (x == y)    if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) { @@ -5181,9 +5435,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {            Equal = true;          } -      SDValue NodeToReplace = Trunc ? SDValue(Trunc, 0) : N1; -       -      EVT SetCCVT = NodeToReplace.getValueType(); +      EVT SetCCVT = N1.getValueType();        if (LegalTypes)          SetCCVT = TLI.getSetCCResultType(SetCCVT);        SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(), @@ -5192,9 +5444,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {                                     Equal ? ISD::SETEQ : ISD::SETNE);        // Replace the uses of XOR with SETCC        WorkListRemover DeadNodes(*this); -      DAG.ReplaceAllUsesOfValueWith(NodeToReplace, SetCC, &DeadNodes); -      removeFromWorkList(NodeToReplace.getNode()); -      DAG.DeleteNode(NodeToReplace.getNode()); +      DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes); +      removeFromWorkList(N1.getNode()); +      DAG.DeleteNode(N1.getNode());        return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),                           MVT::Other, Chain, SetCC, N2);      } @@ -5568,10 +5820,10 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {    if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {      if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {        if (Align > LD->getAlignment()) -        return DAG.getExtLoad(LD->getExtensionType(), LD->getValueType(0), -                              N->getDebugLoc(), -                              Chain, Ptr, LD->getSrcValue(), -                              LD->getSrcValueOffset(), LD->getMemoryVT(), +        return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), +                              LD->getValueType(0), +                              Chain, Ptr, LD->getPointerInfo(), +                              LD->getMemoryVT(),                                LD->isVolatile(), LD->isNonTemporal(), Align);      }    } @@ -5587,15 +5839,13 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {        // Replace the chain to void dependency.        if (LD->getExtensionType() == ISD::NON_EXTLOAD) {          ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(), -                               BetterChain, Ptr, -                               LD->getSrcValue(), LD->getSrcValueOffset(), +                               BetterChain, Ptr, LD->getPointerInfo(),                                 LD->isVolatile(), LD->isNonTemporal(),                                 LD->getAlignment());        } else { -        ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getValueType(0), -                                  LD->getDebugLoc(), -                                  BetterChain, Ptr, LD->getSrcValue(), -                                  LD->getSrcValueOffset(), +        ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), +                                  LD->getValueType(0), +                                  BetterChain, Ptr, LD->getPointerInfo(),                                    LD->getMemoryVT(),                                    LD->isVolatile(),                                    LD->isNonTemporal(), @@ -5605,10 +5855,10 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {        // Create token factor to keep old chain connected.        SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),                                    MVT::Other, Chain, ReplLoad.getValue(1)); -       +        // Make sure the new and old chains are cleaned up.        AddToWorkList(Token.getNode()); -       +        // Replace uses with load result and token factor. Don't add users        // to work list.        return CombineTo(N, ReplLoad.getValue(0), Token, false); @@ -5628,17 +5878,17 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {  static std::pair<unsigned, unsigned>  CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {    std::pair<unsigned, unsigned> Result(0, 0); -   +    // Check for the structure we're looking for.    if (V->getOpcode() != ISD::AND ||        !isa<ConstantSDNode>(V->getOperand(1)) ||        !ISD::isNormalLoad(V->getOperand(0).getNode()))      return Result; -   +    // Check the chain and pointer.    LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));    if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer. -   +    // The store should be chained directly to the load or be an operand of a    // tokenfactor.    if (LD == Chain.getNode()) @@ -5654,7 +5904,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {        }      if (!isOk) return Result;    } -   +    // This only handles simple types.    if (V.getValueType() != MVT::i16 &&        V.getValueType() != MVT::i32 && @@ -5670,7 +5920,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {    unsigned NotMaskTZ = CountTrailingZeros_64(NotMask);    if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.    if (NotMaskLZ == 64) return Result;  // All zero mask. -   +    // See if we have a continuous run of bits.  If so, we have 0*1+0*    if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64)      return Result; @@ -5678,19 +5928,19 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {    // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.    if (V.getValueType() != MVT::i64 && NotMaskLZ)      NotMaskLZ -= 64-V.getValueSizeInBits(); -   +    unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;    switch (MaskedBytes) { -  case 1:  -  case 2:  +  case 1: +  case 2:    case 4: break;    default: return Result; // All one mask, or 5-byte mask.    } -   +    // Verify that the first bit starts at a multiple of mask so that the access    // is aligned the same as the access width.    if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result; -   +    Result.first = MaskedBytes;    Result.second = NotMaskTZ/8;    return Result; @@ -5707,20 +5957,20 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,    unsigned NumBytes = MaskInfo.first;    unsigned ByteShift = MaskInfo.second;    SelectionDAG &DAG = DC->getDAG(); -   +    // Check to see if IVal is all zeros in the part being masked in by the 'or'    // that uses this.  If not, this is not a replacement.    APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),                                    ByteShift*8, (ByteShift+NumBytes)*8);    if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0; -   +    // Check that it is legal on the target to do this.  It is legal if the new    // VT we're shrinking to (i8/i16/i32) is legal or we're still before type    // legalization.    MVT VT = MVT::getIntegerVT(NumBytes*8);    if (!DC->isTypeLegal(VT))      return 0; -   +    // Okay, we can do this!  Replace the 'St' store with a store of IVal that is    // shifted by ByteShift and truncated down to NumBytes.    if (ByteShift) @@ -5735,20 +5985,20 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,      StOffset = ByteShift;    else      StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes; -   +    SDValue Ptr = St->getBasePtr();    if (StOffset) {      Ptr = DAG.getNode(ISD::ADD, IVal->getDebugLoc(), Ptr.getValueType(),                        Ptr, DAG.getConstant(StOffset, Ptr.getValueType()));      NewAlign = MinAlign(NewAlign, StOffset);    } -   +    // Truncate down to the new size.    IVal = DAG.getNode(ISD::TRUNCATE, IVal->getDebugLoc(), VT, IVal); -   +    ++OpsNarrowed; -  return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr,  -                      St->getSrcValue(), St->getSrcValueOffset()+StOffset, +  return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr, +                      St->getPointerInfo().getWithOffset(StOffset),                        false, false, NewAlign).getNode();  } @@ -5771,7 +6021,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {      return SDValue();    unsigned Opc = Value.getOpcode(); -   +    // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst    // is a byte mask indicating a consecutive number of bytes, check to see if    // Y is known to provide just those bytes.  If so, we try to replace the @@ -5784,7 +6034,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {        if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,                                                    Value.getOperand(1), ST,this))          return SDValue(NewST, 0); -                                            +      // Or is commutative, so try swapping X and Y.      MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);      if (MaskedLoad.first) @@ -5792,7 +6042,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {                                                    Value.getOperand(0), ST,this))          return SDValue(NewST, 0);    } -   +    if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||        Value.getOperand(1).getOpcode() != ISD::Constant)      return SDValue(); @@ -5801,7 +6051,9 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {    if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&        Chain == SDValue(N0.getNode(), 1)) {      LoadSDNode *LD = cast<LoadSDNode>(N0); -    if (LD->getBasePtr() != Ptr) +    if (LD->getBasePtr() != Ptr || +        LD->getPointerInfo().getAddrSpace() != +        ST->getPointerInfo().getAddrSpace())        return SDValue();      // Find the type to narrow it the load / op / store to. @@ -5850,14 +6102,14 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {                                     DAG.getConstant(PtrOff, Ptr.getValueType()));        SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(),                                    LD->getChain(), NewPtr, -                                  LD->getSrcValue(), LD->getSrcValueOffset(), +                                  LD->getPointerInfo().getWithOffset(PtrOff),                                    LD->isVolatile(), LD->isNonTemporal(),                                    NewAlign);        SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD,                                     DAG.getConstant(NewImm, NewVT));        SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(),                                     NewVal, NewPtr, -                                   ST->getSrcValue(), ST->getSrcValueOffset(), +                                   ST->getPointerInfo().getWithOffset(PtrOff),                                     false, false, NewAlign);        AddToWorkList(NewPtr.getNode()); @@ -5874,6 +6126,63 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {    return SDValue();  } +/// TransformFPLoadStorePair - For a given floating point load / store pair, +/// if the load value isn't used by any other operations, then consider +/// transforming the pair to integer load / store operations if the target +/// deems the transformation profitable. +SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { +  StoreSDNode *ST  = cast<StoreSDNode>(N); +  SDValue Chain = ST->getChain(); +  SDValue Value = ST->getValue(); +  if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) && +      Value.hasOneUse() && +      Chain == SDValue(Value.getNode(), 1)) { +    LoadSDNode *LD = cast<LoadSDNode>(Value); +    EVT VT = LD->getMemoryVT(); +    if (!VT.isFloatingPoint() || +        VT != ST->getMemoryVT() || +        LD->isNonTemporal() || +        ST->isNonTemporal() || +        LD->getPointerInfo().getAddrSpace() != 0 || +        ST->getPointerInfo().getAddrSpace() != 0) +      return SDValue(); + +    EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); +    if (!TLI.isOperationLegal(ISD::LOAD, IntVT) || +        !TLI.isOperationLegal(ISD::STORE, IntVT) || +        !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) || +        !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT)) +      return SDValue(); + +    unsigned LDAlign = LD->getAlignment(); +    unsigned STAlign = ST->getAlignment(); +    const Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); +    unsigned ABIAlign = TLI.getTargetData()->getABITypeAlignment(IntVTTy); +    if (LDAlign < ABIAlign || STAlign < ABIAlign) +      return SDValue(); + +    SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(), +                                LD->getChain(), LD->getBasePtr(), +                                LD->getPointerInfo(), +                                false, false, LDAlign); + +    SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(), +                                 NewLD, ST->getBasePtr(), +                                 ST->getPointerInfo(), +                                 false, false, STAlign); + +    AddToWorkList(NewLD.getNode()); +    AddToWorkList(NewST.getNode()); +    WorkListRemover DeadNodes(*this); +    DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1), +                                  &DeadNodes); +    ++LdStFP2Int; +    return NewST; +  } + +  return SDValue(); +} +  SDValue DAGCombiner::visitSTORE(SDNode *N) {    StoreSDNode *ST  = cast<StoreSDNode>(N);    SDValue Chain = ST->getChain(); @@ -5882,7 +6191,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {    // If this is a store of a bit convert, store the input value if the    // resultant store does not need a higher alignment than the original. -  if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() && +  if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&        ST->isUnindexed()) {      unsigned OrigAlign = ST->getAlignment();      EVT SVT = Value.getOperand(0).getValueType(); @@ -5892,8 +6201,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {          ((!LegalOperations && !ST->isVolatile()) ||           TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))        return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0), -                          Ptr, ST->getSrcValue(), -                          ST->getSrcValueOffset(), ST->isVolatile(), +                          Ptr, ST->getPointerInfo(), ST->isVolatile(),                            ST->isNonTemporal(), OrigAlign);    } @@ -5917,8 +6225,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {            Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().                                bitcastToAPInt().getZExtValue(), MVT::i32);            return DAG.getStore(Chain, N->getDebugLoc(), Tmp, -                              Ptr, ST->getSrcValue(), -                              ST->getSrcValueOffset(), ST->isVolatile(), +                              Ptr, ST->getPointerInfo(), ST->isVolatile(),                                ST->isNonTemporal(), ST->getAlignment());          }          break; @@ -5929,8 +6236,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {            Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().                                  getZExtValue(), MVT::i64);            return DAG.getStore(Chain, N->getDebugLoc(), Tmp, -                              Ptr, ST->getSrcValue(), -                              ST->getSrcValueOffset(), ST->isVolatile(), +                              Ptr, ST->getPointerInfo(), ST->isVolatile(),                                ST->isNonTemporal(), ST->getAlignment());          } else if (!ST->isVolatile() &&                     TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { @@ -5942,23 +6248,20 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {            SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32);            if (TLI.isBigEndian()) std::swap(Lo, Hi); -          int SVOffset = ST->getSrcValueOffset();            unsigned Alignment = ST->getAlignment();            bool isVolatile = ST->isVolatile();            bool isNonTemporal = ST->isNonTemporal();            SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo, -                                     Ptr, ST->getSrcValue(), -                                     ST->getSrcValueOffset(), +                                     Ptr, ST->getPointerInfo(),                                       isVolatile, isNonTemporal,                                       ST->getAlignment());            Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr,                              DAG.getConstant(4, Ptr.getValueType())); -          SVOffset += 4;            Alignment = MinAlign(Alignment, 4U);            SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi, -                                     Ptr, ST->getSrcValue(), -                                     SVOffset, isVolatile, isNonTemporal, +                                     Ptr, ST->getPointerInfo().getWithOffset(4), +                                     isVolatile, isNonTemporal,                                       Alignment);            return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,                               St0, St1); @@ -5974,12 +6277,17 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {      if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {        if (Align > ST->getAlignment())          return DAG.getTruncStore(Chain, N->getDebugLoc(), Value, -                                 Ptr, ST->getSrcValue(), -                                 ST->getSrcValueOffset(), ST->getMemoryVT(), +                                 Ptr, ST->getPointerInfo(), ST->getMemoryVT(),                                   ST->isVolatile(), ST->isNonTemporal(), Align);      }    } +  // Try transforming a pair floating point load / store ops to integer +  // load / store ops. +  SDValue NewST = TransformFPLoadStorePair(N); +  if (NewST.getNode()) +    return NewST; +    if (CombinerAA) {      // Walk up chain skipping non-aliasing memory nodes.      SDValue BetterChain = FindBetterChain(N, Chain); @@ -5991,12 +6299,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {        // Replace the chain to avoid dependency.        if (ST->isTruncatingStore()) {          ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr, -                                      ST->getSrcValue(),ST->getSrcValueOffset(), +                                      ST->getPointerInfo(),                                        ST->getMemoryVT(), ST->isVolatile(),                                        ST->isNonTemporal(), ST->getAlignment());        } else {          ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr, -                                 ST->getSrcValue(), ST->getSrcValueOffset(), +                                 ST->getPointerInfo(),                                   ST->isVolatile(), ST->isNonTemporal(),                                   ST->getAlignment());        } @@ -6030,17 +6338,16 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {      AddToWorkList(Value.getNode());      if (Shorter.getNode())        return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter, -                               Ptr, ST->getSrcValue(), -                               ST->getSrcValueOffset(), ST->getMemoryVT(), +                               Ptr, ST->getPointerInfo(), ST->getMemoryVT(),                                 ST->isVolatile(), ST->isNonTemporal(),                                 ST->getAlignment());      // Otherwise, see if we can simplify the operation with      // SimplifyDemandedBits, which only works if the value has a single use.      if (SimplifyDemandedBits(Value, -                             APInt::getLowBitsSet( -                               Value.getValueType().getScalarType().getSizeInBits(), -                               ST->getMemoryVT().getScalarType().getSizeInBits()))) +                        APInt::getLowBitsSet( +                          Value.getValueType().getScalarType().getSizeInBits(), +                          ST->getMemoryVT().getScalarType().getSizeInBits())))        return SDValue(N, 0);    } @@ -6064,8 +6371,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {        TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),                              ST->getMemoryVT())) {      return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0), -                             Ptr, ST->getSrcValue(), -                             ST->getSrcValueOffset(), ST->getMemoryVT(), +                             Ptr, ST->getPointerInfo(), ST->getMemoryVT(),                               ST->isVolatile(), ST->isNonTemporal(),                               ST->getAlignment());    } @@ -6082,6 +6388,12 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {    if (InVal.getOpcode() == ISD::UNDEF)      return InVec; +  EVT VT = InVec.getValueType(); + +  // If we can't generate a legal BUILD_VECTOR, exit  +  if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) +    return SDValue(); +    // If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new    // vector with the inserted element.    if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa<ConstantSDNode>(EltNo)) { @@ -6091,13 +6403,12 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {      if (Elt < Ops.size())        Ops[Elt] = InVal;      return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), -                       InVec.getValueType(), &Ops[0], Ops.size()); +                       VT, &Ops[0], Ops.size());    } -  // If the invec is an UNDEF and if EltNo is a constant, create a new  +  // If the invec is an UNDEF and if EltNo is a constant, create a new    // BUILD_VECTOR with undef elements and the inserted element. -  if (!LegalOperations && InVec.getOpcode() == ISD::UNDEF &&  +  if (InVec.getOpcode() == ISD::UNDEF &&        isa<ConstantSDNode>(EltNo)) { -    EVT VT = InVec.getValueType();      EVT EltVT = VT.getVectorElementType();      unsigned NElts = VT.getVectorNumElements();      SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EltVT)); @@ -6106,7 +6417,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {      if (Elt < Ops.size())        Ops[Elt] = InVal;      return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), -                       InVec.getValueType(), &Ops[0], Ops.size()); +                       VT, &Ops[0], Ops.size());    }    return SDValue();  } @@ -6138,14 +6449,14 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {    SDValue EltNo = N->getOperand(1);    if (isa<ConstantSDNode>(EltNo)) { -    unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); +    int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();      bool NewLoad = false;      bool BCNumEltsChanged = false;      EVT VT = InVec.getValueType();      EVT ExtVT = VT.getVectorElementType();      EVT LVT = ExtVT; -    if (InVec.getOpcode() == ISD::BIT_CONVERT) { +    if (InVec.getOpcode() == ISD::BITCAST) {        EVT BCVT = InVec.getOperand(0).getValueType();        if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))          return SDValue(); @@ -6176,10 +6487,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {        // Select the input vector, guarding against out of range extract vector.        unsigned NumElems = VT.getVectorNumElements(); -      int Idx = (Elt > NumElems) ? -1 : SVN->getMaskElt(Elt); +      int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);        InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); -      if (InVec.getOpcode() == ISD::BIT_CONVERT) +      if (InVec.getOpcode() == ISD::BITCAST)          InVec = InVec.getOperand(0);        if (ISD::isNormalLoad(InVec.getNode())) {          LN0 = cast<LoadSDNode>(InVec); @@ -6190,12 +6501,17 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {      if (!LN0 || !LN0->hasOneUse() || LN0->isVolatile())        return SDValue(); +    // If Idx was -1 above, Elt is going to be -1, so just return undef. +    if (Elt == -1) +      return DAG.getUNDEF(LN0->getBasePtr().getValueType()); +      unsigned Align = LN0->getAlignment();      if (NewLoad) {        // Check the resultant load doesn't need a higher alignment than the        // original load.        unsigned NewAlign = -        TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext())); +        TLI.getTargetData() +            ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));        if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))          return SDValue(); @@ -6204,8 +6520,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {      }      SDValue NewPtr = LN0->getBasePtr(); +    unsigned PtrOff = 0; +      if (Elt) { -      unsigned PtrOff = LVT.getSizeInBits() * Elt / 8; +      PtrOff = LVT.getSizeInBits() * Elt / 8;        EVT PtrType = NewPtr.getValueType();        if (TLI.isBigEndian())          PtrOff = VT.getSizeInBits() / 8 - PtrOff; @@ -6214,7 +6532,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {      }      return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, -                       LN0->getSrcValue(), LN0->getSrcValueOffset(), +                       LN0->getPointerInfo().getWithOffset(PtrOff),                         LN0->isVolatile(), LN0->isNonTemporal(), Align);    } @@ -6280,7 +6598,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {          unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();          if (ExtIndex > VT.getVectorNumElements())            return SDValue(); -         +          Mask.push_back(ExtIndex);          continue;        } @@ -6328,15 +6646,16 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {    // FIXME: implement canonicalizations from DAG.getVectorShuffle() -  // If it is a splat, check if the argument vector is a build_vector with -  // all scalar elements the same. -  if (cast<ShuffleVectorSDNode>(N)->isSplat()) { +  // If it is a splat, check if the argument vector is another splat or a +  // build_vector with all scalar elements the same. +  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); +  if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {      SDNode *V = N0.getNode();      // If this is a bit convert that changes the element type of the vector but      // not the number of vector elements, look through it.  Be careful not to      // look though conversions that change things like v4f32 to v2f64. -    if (V->getOpcode() == ISD::BIT_CONVERT) { +    if (V->getOpcode() == ISD::BITCAST) {        SDValue ConvInput = V->getOperand(0);        if (ConvInput.getValueType().isVector() &&            ConvInput.getValueType().getVectorNumElements() == NumElts) @@ -6344,30 +6663,28 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {      }      if (V->getOpcode() == ISD::BUILD_VECTOR) { -      unsigned NumElems = V->getNumOperands(); -      unsigned BaseIdx = cast<ShuffleVectorSDNode>(N)->getSplatIndex(); -      if (NumElems > BaseIdx) { -        SDValue Base; -        bool AllSame = true; -        for (unsigned i = 0; i != NumElems; ++i) { -          if (V->getOperand(i).getOpcode() != ISD::UNDEF) { -            Base = V->getOperand(i); -            break; -          } +      assert(V->getNumOperands() == NumElts && +             "BUILD_VECTOR has wrong number of operands"); +      SDValue Base; +      bool AllSame = true; +      for (unsigned i = 0; i != NumElts; ++i) { +        if (V->getOperand(i).getOpcode() != ISD::UNDEF) { +          Base = V->getOperand(i); +          break;          } -        // Splat of <u, u, u, u>, return <u, u, u, u> -        if (!Base.getNode()) -          return N0; -        for (unsigned i = 0; i != NumElems; ++i) { -          if (V->getOperand(i) != Base) { -            AllSame = false; -            break; -          } +      } +      // Splat of <u, u, u, u>, return <u, u, u, u> +      if (!Base.getNode()) +        return N0; +      for (unsigned i = 0; i != NumElts; ++i) { +        if (V->getOperand(i) != Base) { +          AllSame = false; +          break;          } -        // Splat of <x, x, x, x>, return <x, x, x, x> -        if (AllSame) -          return N0;        } +      // Splat of <x, x, x, x>, return <x, x, x, x> +      if (AllSame) +        return N0;      }    }    return SDValue(); @@ -6436,7 +6753,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {    SDValue LHS = N->getOperand(0);    SDValue RHS = N->getOperand(1);    if (N->getOpcode() == ISD::AND) { -    if (RHS.getOpcode() == ISD::BIT_CONVERT) +    if (RHS.getOpcode() == ISD::BITCAST)        RHS = RHS.getOperand(0);      if (RHS.getOpcode() == ISD::BUILD_VECTOR) {        SmallVector<int, 8> Indices; @@ -6464,9 +6781,9 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {                                       DAG.getConstant(0, EltVT));        SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),                                   RVT, &ZeroOps[0], ZeroOps.size()); -      LHS = DAG.getNode(ISD::BIT_CONVERT, dl, RVT, LHS); +      LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);        SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); -      return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuf); +      return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);      }    } @@ -6480,10 +6797,9 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {    // things. Simplifying them may result in a loss of legality.    if (LegalOperations) return SDValue(); -  EVT VT = N->getValueType(0); -  assert(VT.isVector() && "SimplifyVBinOp only works on vectors!"); +  assert(N->getValueType(0).isVector() && +         "SimplifyVBinOp only works on vectors!"); -  EVT EltType = VT.getVectorElementType();    SDValue LHS = N->getOperand(0);    SDValue RHS = N->getOperand(1);    SDValue Shuffle = XformToShuffleWithZero(N); @@ -6516,14 +6832,10 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {            break;        } -      // If the vector element type is not legal, the BUILD_VECTOR operands -      // are promoted and implicitly truncated.  Make that explicit here. -      if (LHSOp.getValueType() != EltType) -        LHSOp = DAG.getNode(ISD::TRUNCATE, LHS.getDebugLoc(), EltType, LHSOp); -      if (RHSOp.getValueType() != EltType) -        RHSOp = DAG.getNode(ISD::TRUNCATE, RHS.getDebugLoc(), EltType, RHSOp); - -      SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), EltType, +      EVT VT = LHSOp.getValueType(); +      assert(RHSOp.getValueType() == VT && +             "SimplifyVBinOp with different BUILD_VECTOR element types"); +      SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), VT,                                     LHSOp, RHSOp);        if (FoldOp.getOpcode() != ISD::UNDEF &&            FoldOp.getOpcode() != ISD::Constant && @@ -6533,11 +6845,9 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {        AddToWorkList(FoldOp.getNode());      } -    if (Ops.size() == LHS.getNumOperands()) { -      EVT VT = LHS.getValueType(); -      return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, -                         &Ops[0], Ops.size()); -    } +    if (Ops.size() == LHS.getNumOperands()) +      return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), +                         LHS.getValueType(), &Ops[0], Ops.size());    }    return SDValue(); @@ -6580,103 +6890,101 @@ SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0,  bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,                                      SDValue RHS) { +  // Cannot simplify select with vector condition +  if (TheSelect->getOperand(0).getValueType().isVector()) return false; +    // If this is a select from two identical things, try to pull the operation    // through the select. -  if (LHS.getOpcode() == RHS.getOpcode() && LHS.hasOneUse() && RHS.hasOneUse()){ -    // If this is a load and the token chain is identical, replace the select -    // of two loads with a load through a select of the address to load from. -    // This triggers in things like "select bool X, 10.0, 123.0" after the FP -    // constants have been dropped into the constant pool. -    if (LHS.getOpcode() == ISD::LOAD && +  if (LHS.getOpcode() != RHS.getOpcode() || +      !LHS.hasOneUse() || !RHS.hasOneUse()) +    return false; + +  // If this is a load and the token chain is identical, replace the select +  // of two loads with a load through a select of the address to load from. +  // This triggers in things like "select bool X, 10.0, 123.0" after the FP +  // constants have been dropped into the constant pool. +  if (LHS.getOpcode() == ISD::LOAD) { +    LoadSDNode *LLD = cast<LoadSDNode>(LHS); +    LoadSDNode *RLD = cast<LoadSDNode>(RHS); + +    // Token chains must be identical. +    if (LHS.getOperand(0) != RHS.getOperand(0) ||          // Do not let this transformation reduce the number of volatile loads. -        !cast<LoadSDNode>(LHS)->isVolatile() && -        !cast<LoadSDNode>(RHS)->isVolatile() && -        // Token chains must be identical. -        LHS.getOperand(0) == RHS.getOperand(0)) { -      LoadSDNode *LLD = cast<LoadSDNode>(LHS); -      LoadSDNode *RLD = cast<LoadSDNode>(RHS); - -      // If this is an EXTLOAD, the VT's must match. -      if (LLD->getMemoryVT() == RLD->getMemoryVT()) { +        LLD->isVolatile() || RLD->isVolatile() || +        // If this is an EXTLOAD, the VT's must match. +        LLD->getMemoryVT() != RLD->getMemoryVT() || +        // If this is an EXTLOAD, the kind of extension must match. +        (LLD->getExtensionType() != RLD->getExtensionType() && +         // The only exception is if one of the extensions is anyext. +         LLD->getExtensionType() != ISD::EXTLOAD && +         RLD->getExtensionType() != ISD::EXTLOAD) ||          // FIXME: this discards src value information.  This is          // over-conservative. It would be beneficial to be able to remember          // both potential memory locations.  Since we are discarding          // src value info, don't do the transformation if the memory          // locations are not in the default address space. -        unsigned LLDAddrSpace = 0, RLDAddrSpace = 0; -        if (const Value *LLDVal = LLD->getMemOperand()->getValue()) { -          if (const PointerType *PT = dyn_cast<PointerType>(LLDVal->getType())) -            LLDAddrSpace = PT->getAddressSpace(); -        } -        if (const Value *RLDVal = RLD->getMemOperand()->getValue()) { -          if (const PointerType *PT = dyn_cast<PointerType>(RLDVal->getType())) -            RLDAddrSpace = PT->getAddressSpace(); -        } -        SDValue Addr; -        if (LLDAddrSpace == 0 && RLDAddrSpace == 0) { -          if (TheSelect->getOpcode() == ISD::SELECT) { -            // Check that the condition doesn't reach either load.  If so, folding -            // this will induce a cycle into the DAG. -            if ((!LLD->hasAnyUseOfValue(1) || -                 !LLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) && -                (!RLD->hasAnyUseOfValue(1) || -                 !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()))) { -              Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(), -                                 LLD->getBasePtr().getValueType(), -                                 TheSelect->getOperand(0), LLD->getBasePtr(), -                                 RLD->getBasePtr()); -            } -          } else { -            // Check that the condition doesn't reach either load.  If so, folding -            // this will induce a cycle into the DAG. -            if ((!LLD->hasAnyUseOfValue(1) || -                 (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && -                  !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()))) && -                (!RLD->hasAnyUseOfValue(1) || -                 (!RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && -                  !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())))) { -              Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(), -                                 LLD->getBasePtr().getValueType(), -                                 TheSelect->getOperand(0), -                                 TheSelect->getOperand(1), -                                 LLD->getBasePtr(), RLD->getBasePtr(), -                                 TheSelect->getOperand(4)); -            } -          } -        } - -        if (Addr.getNode()) { -          SDValue Load; -          if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { -            Load = DAG.getLoad(TheSelect->getValueType(0), -                               TheSelect->getDebugLoc(), -                               LLD->getChain(), -                               Addr, 0, 0, -                               LLD->isVolatile(), -                               LLD->isNonTemporal(), -                               LLD->getAlignment()); -          } else { -            Load = DAG.getExtLoad(LLD->getExtensionType(), -                                  TheSelect->getValueType(0), -                                  TheSelect->getDebugLoc(), -                                  LLD->getChain(), Addr, 0, 0, -                                  LLD->getMemoryVT(), -                                  LLD->isVolatile(), -                                  LLD->isNonTemporal(), -                                  LLD->getAlignment()); -          } +        LLD->getPointerInfo().getAddrSpace() != 0 || +        RLD->getPointerInfo().getAddrSpace() != 0) +      return false; -          // Users of the select now use the result of the load. -          CombineTo(TheSelect, Load); +    // Check that the select condition doesn't reach either load.  If so, +    // folding this will induce a cycle into the DAG.  If not, this is safe to +    // xform, so create a select of the addresses. +    SDValue Addr; +    if (TheSelect->getOpcode() == ISD::SELECT) { +      SDNode *CondNode = TheSelect->getOperand(0).getNode(); +      if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) || +          (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode))) +        return false; +      Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(), +                         LLD->getBasePtr().getValueType(), +                         TheSelect->getOperand(0), LLD->getBasePtr(), +                         RLD->getBasePtr()); +    } else {  // Otherwise SELECT_CC +      SDNode *CondLHS = TheSelect->getOperand(0).getNode(); +      SDNode *CondRHS = TheSelect->getOperand(1).getNode(); + +      if ((LLD->hasAnyUseOfValue(1) && +           (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) || +          (LLD->hasAnyUseOfValue(1) && +           (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS)))) +        return false; -          // Users of the old loads now use the new load's chain.  We know the -          // old-load value is dead now. -          CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1)); -          CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1)); -          return true; -        } -      } -    } +      Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(), +                         LLD->getBasePtr().getValueType(), +                         TheSelect->getOperand(0), +                         TheSelect->getOperand(1), +                         LLD->getBasePtr(), RLD->getBasePtr(), +                         TheSelect->getOperand(4)); +    } + +    SDValue Load; +    if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { +      Load = DAG.getLoad(TheSelect->getValueType(0), +                         TheSelect->getDebugLoc(), +                         // FIXME: Discards pointer info. +                         LLD->getChain(), Addr, MachinePointerInfo(), +                         LLD->isVolatile(), LLD->isNonTemporal(), +                         LLD->getAlignment()); +    } else { +      Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ? +                            RLD->getExtensionType() : LLD->getExtensionType(), +                            TheSelect->getDebugLoc(), +                            TheSelect->getValueType(0), +                            // FIXME: Discards pointer info. +                            LLD->getChain(), Addr, MachinePointerInfo(), +                            LLD->getMemoryVT(), LLD->isVolatile(), +                            LLD->isNonTemporal(), LLD->getAlignment()); +    } + +    // Users of the select now use the result of the load. +    CombineTo(TheSelect, Load); + +    // Users of the old loads now use the new load's chain.  We know the +    // old-load value is dead now. +    CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1)); +    CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1)); +    return true;    }    return false; @@ -6689,7 +6997,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,                                        ISD::CondCode CC, bool NotExtCompare) {    // (x ? y : y) -> y.    if (N2 == N3) return N2; -   +    EVT VT = N2.getValueType();    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());    ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); @@ -6725,7 +7033,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,          return DAG.getNode(ISD::FABS, DL, VT, N3);      }    } -   +    // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"    // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0    // in it.  This is a win when the constant is not otherwise available because @@ -6748,7 +7056,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,          };          const Type *FPTy = Elts[0]->getType();          const TargetData &TD = *TLI.getTargetData(); -         +          // Create a ConstantArray of the two constants.          Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts, 2);          SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(), @@ -6760,7 +7068,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,          SDValue Zero = DAG.getIntPtrConstant(0);          unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());          SDValue One = DAG.getIntPtrConstant(EltSize); -         +          SDValue Cond = DAG.getSetCC(DL,                                      TLI.getSetCCResultType(N0.getValueType()),                                      N0, N1, CC); @@ -6769,11 +7077,11 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,          CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx,                              CstOffset);          return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, -                           PseudoSourceValue::getConstantPool(), 0, false, +                           MachinePointerInfo::getConstantPool(), false,                             false, Alignment);        } -    }   +    }    // Check to see if we can perform the "gzip trick", transforming    // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A) @@ -6818,6 +7126,35 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,      }    } +  // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A) +  // where y is has a single bit set. +  // A plaintext description would be, we can turn the SELECT_CC into an AND +  // when the condition can be materialized as an all-ones register.  Any +  // single bit-test can be materialized as an all-ones register with +  // shift-left and shift-right-arith. +  if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND && +      N0->getValueType(0) == VT && +      N1C && N1C->isNullValue() && +      N2C && N2C->isNullValue()) { +    SDValue AndLHS = N0->getOperand(0); +    ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1)); +    if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) { +      // Shift the tested bit over the sign bit. +      APInt AndMask = ConstAndRHS->getAPIntValue(); +      SDValue ShlAmt = +        DAG.getConstant(AndMask.countLeadingZeros(), getShiftAmountTy()); +      SDValue Shl = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, AndLHS, ShlAmt); + +      // Now arithmetic right shift it all the way over, so the result is either +      // all-ones, or zero. +      SDValue ShrAmt = +        DAG.getConstant(AndMask.getBitWidth()-1, getShiftAmountTy()); +      SDValue Shr = DAG.getNode(ISD::SRA, N0.getDebugLoc(), VT, Shl, ShrAmt); + +      return DAG.getNode(ISD::AND, DL, VT, Shr, N3); +    } +  } +    // fold select C, 16, 0 -> shl C, 4    if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&        TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent) { @@ -6971,7 +7308,8 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {  }  /// FindBaseOffset - Return true if base is a frame index, which is known not -// to alias with anything but itself.  Provides base object and offset as results. +// to alias with anything but itself.  Provides base object and offset as +// results.  static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,                             const GlobalValue *&GV, void *&CV) {    // Assume it is a primitive operation. @@ -6984,7 +7322,7 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,        Offset += C->getZExtValue();      }    } -   +    // Return the underlying GlobalValue, and update the Offset.  Return false    // for GlobalAddressSDNode since the same GlobalAddress may be represented    // by multiple nodes with different offsets. @@ -7012,9 +7350,11 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,  bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,                            const Value *SrcValue1, int SrcValueOffset1,                            unsigned SrcValueAlign1, +                          const MDNode *TBAAInfo1,                            SDValue Ptr2, int64_t Size2,                            const Value *SrcValue2, int SrcValueOffset2, -                          unsigned SrcValueAlign2) const { +                          unsigned SrcValueAlign2, +                          const MDNode *TBAAInfo2) const {    // If they are the same then they must be aliases.    if (Ptr1 == Ptr2) return true; @@ -7030,8 +7370,19 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,    if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))      return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1); -  // If we know what the bases are, and they aren't identical, then we know they -  // cannot alias. +  // It is possible for different frame indices to alias each other, mostly +  // when tail call optimization reuses return address slots for arguments. +  // To catch this case, look up the actual index of frame indices to compute +  // the real alias relationship. +  if (isFrameIndex1 && isFrameIndex2) { +    MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); +    Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex()); +    Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex()); +    return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1); +  } + +  // Otherwise, if we know what the bases are, and they aren't identical, then +  // we know they cannot alias.    if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))      return false; @@ -7044,20 +7395,21 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,        (Size1 == Size2) && (SrcValueAlign1 > Size1)) {      int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1;      int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1; -     +      // There is no overlap between these relatively aligned accesses of similar      // size, return no alias.      if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1)        return false;    } -   +    if (CombinerGlobalAA) {      // Use alias analysis information.      int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);      int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset;      int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset;      AliasAnalysis::AliasResult AAResult = -                             AA.alias(SrcValue1, Overlap1, SrcValue2, Overlap2); +      AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1, TBAAInfo1), +               AliasAnalysis::Location(SrcValue2, Overlap2, TBAAInfo2));      if (AAResult == AliasAnalysis::NoAlias)        return false;    } @@ -7070,15 +7422,17 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,  /// node.  Returns true if the operand was a load.  bool DAGCombiner::FindAliasInfo(SDNode *N,                          SDValue &Ptr, int64_t &Size, -                        const Value *&SrcValue,  +                        const Value *&SrcValue,                          int &SrcValueOffset, -                        unsigned &SrcValueAlign) const { +                        unsigned &SrcValueAlign, +                        const MDNode *&TBAAInfo) const {    if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {      Ptr = LD->getBasePtr();      Size = LD->getMemoryVT().getSizeInBits() >> 3;      SrcValue = LD->getSrcValue();      SrcValueOffset = LD->getSrcValueOffset();      SrcValueAlign = LD->getOriginalAlignment(); +    TBAAInfo = LD->getTBAAInfo();      return true;    } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {      Ptr = ST->getBasePtr(); @@ -7086,6 +7440,7 @@ bool DAGCombiner::FindAliasInfo(SDNode *N,      SrcValue = ST->getSrcValue();      SrcValueOffset = ST->getSrcValueOffset();      SrcValueAlign = ST->getOriginalAlignment(); +    TBAAInfo = ST->getTBAAInfo();    } else {      llvm_unreachable("FindAliasInfo expected a memory operand");    } @@ -7106,26 +7461,27 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,    const Value *SrcValue;    int SrcValueOffset;    unsigned SrcValueAlign; -  bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset,  -                              SrcValueAlign); +  const MDNode *SrcTBAAInfo; +  bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset, +                              SrcValueAlign, SrcTBAAInfo);    // Starting off.    Chains.push_back(OriginalChain);    unsigned Depth = 0; -   +    // Look at each chain and determine if it is an alias.  If so, add it to the    // aliases list.  If not, then continue up the chain looking for the next    // candidate.    while (!Chains.empty()) {      SDValue Chain = Chains.back();      Chains.pop_back(); -     -    // For TokenFactor nodes, look at each operand and only continue up the  -    // chain until we find two aliases.  If we've seen two aliases, assume we'll  + +    // For TokenFactor nodes, look at each operand and only continue up the +    // chain until we find two aliases.  If we've seen two aliases, assume we'll      // find more and revert to original chain since the xform is unlikely to be      // profitable. -    //  -    // FIXME: The depth check could be made to return the last non-aliasing  +    // +    // FIXME: The depth check could be made to return the last non-aliasing      // chain we found before we hit a tokenfactor rather than the original      // chain.      if (Depth > 6 || Aliases.size() == 2) { @@ -7151,15 +7507,18 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,        const Value *OpSrcValue;        int OpSrcValueOffset;        unsigned OpSrcValueAlign; +      const MDNode *OpSrcTBAAInfo;        bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize,                                      OpSrcValue, OpSrcValueOffset, -                                    OpSrcValueAlign); +                                    OpSrcValueAlign, +                                    OpSrcTBAAInfo);        // If chain is alias then stop here.        if (!(IsLoad && IsOpLoad) &&            isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign, +                  SrcTBAAInfo,                    OpPtr, OpSize, OpSrcValue, OpSrcValueOffset, -                  OpSrcValueAlign)) { +                  OpSrcValueAlign, OpSrcTBAAInfo)) {          Aliases.push_back(Chain);        } else {          // Look further up the chain. @@ -7206,9 +7565,9 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {      // If a single operand then chain to it.  We don't need to revisit it.      return Aliases[0];    } -   +    // Construct a custom tailored token factor. -  return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,  +  return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,                       &Aliases[0], Aliases.size());  } diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index a4eed71e65c0..490b857b0e9c 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -55,6 +55,7 @@  #include "llvm/Target/TargetLowering.h"  #include "llvm/Target/TargetMachine.h"  #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Debug.h"  using namespace llvm;  /// startNewBlock - Set the current block to which generated machine @@ -197,12 +198,12 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,              TII.get(TargetOpcode::IMPLICIT_DEF), Reg);    } -   +    // If target-independent code couldn't handle the value, give target-specific    // code a try.    if (!Reg && isa<Constant>(V))      Reg = TargetMaterializeConstant(cast<Constant>(V)); -   +    // Don't cache constant materializations in the general ValueMap.    // To do so would require tracking what uses they dominate.    if (Reg != 0) { @@ -234,7 +235,7 @@ unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) {      LocalValueMap[I] = Reg;      return Reg;    } -   +    unsigned &AssignedReg = FuncInfo.ValueMap[I];    if (AssignedReg == 0)      // Use the new register. @@ -414,7 +415,7 @@ bool FastISel::SelectGetElementPtr(const User *I) {        // If this is a constant subscript, handle it quickly.        if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {          if (CI->isZero()) continue; -        uint64_t Offs =  +        uint64_t Offs =            TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();          N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);          if (N == 0) @@ -423,7 +424,7 @@ bool FastISel::SelectGetElementPtr(const User *I) {          NIsKill = true;          continue;        } -       +        // N = N + Idx * ElementSize;        uint64_t ElementSize = TD.getTypeAllocSize(Ty);        std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx); @@ -467,16 +468,28 @@ bool FastISel::SelectCall(const User *I) {        return true;      const Value *Address = DI->getAddress(); -    if (!Address) +    if (!Address || isa<UndefValue>(Address) || isa<AllocaInst>(Address))        return true; -    if (isa<UndefValue>(Address)) -      return true; -    const AllocaInst *AI = dyn_cast<AllocaInst>(Address); -    // Don't handle byval struct arguments or VLAs, for example. -    if (!AI) -      // Building the map above is target independent.  Generating DBG_VALUE -      // inline is target dependent; do this now. -      (void)TargetSelectInstruction(cast<Instruction>(I)); + +    unsigned Reg = 0; +    unsigned Offset = 0; +    if (const Argument *Arg = dyn_cast<Argument>(Address)) { +      if (Arg->hasByValAttr()) { +        // Byval arguments' frame index is recorded during argument lowering. +        // Use this info directly. +        Offset = FuncInfo.getByValArgumentFrameIndex(Arg); +        if (Offset) +          Reg = TRI.getFrameRegister(*FuncInfo.MF); +      } +    } +    if (!Reg) +      Reg = getRegForValue(Address); + +    if (Reg) +      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, +              TII.get(TargetOpcode::DBG_VALUE)) +        .addReg(Reg, RegState::Debug).addImm(Offset) +        .addMetadata(DI->getVariable());      return true;    }    case Intrinsic::dbg_value: { @@ -505,11 +518,8 @@ bool FastISel::SelectCall(const User *I) {      } else {        // We can't yet handle anything else here because it would require        // generating code, thus altering codegen because of debug info. -      // Insert an undef so we can see what we dropped. -      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) -        .addReg(0U).addImm(DI->getOffset()) -        .addMetadata(DI->getVariable()); -    }      +      DEBUG(dbgs() << "Dropping debug info for " << DI); +    }      return true;    }    case Intrinsic::eh_exception: { @@ -582,12 +592,12 @@ bool FastISel::SelectCall(const User *I) {  bool FastISel::SelectCast(const User *I, unsigned Opcode) {    EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());    EVT DstVT = TLI.getValueType(I->getType()); -     +    if (SrcVT == MVT::Other || !SrcVT.isSimple() ||        DstVT == MVT::Other || !DstVT.isSimple())      // Unhandled type. Halt "fast" selection and bail.      return false; -     +    // Check if the destination type is legal. Or as a special case,    // it may be i1 if we're doing a truncate because that's    // easy and somewhat common. @@ -629,7 +639,7 @@ bool FastISel::SelectCast(const User *I, unsigned Opcode) {                                    InputReg, InputRegIsKill);    if (!ResultReg)      return false; -     +    UpdateValueMap(I, ResultReg);    return true;  } @@ -644,23 +654,23 @@ bool FastISel::SelectBitCast(const User *I) {      return true;    } -  // Bitcasts of other values become reg-reg copies or BIT_CONVERT operators. +  // Bitcasts of other values become reg-reg copies or BITCAST operators.    EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());    EVT DstVT = TLI.getValueType(I->getType()); -   +    if (SrcVT == MVT::Other || !SrcVT.isSimple() ||        DstVT == MVT::Other || !DstVT.isSimple() ||        !TLI.isTypeLegal(SrcVT) || !TLI.isTypeLegal(DstVT))      // Unhandled type. Halt "fast" selection and bail.      return false; -   +    unsigned Op0 = getRegForValue(I->getOperand(0));    if (Op0 == 0)      // Unhandled operand. Halt "fast" selection and bail.      return false;    bool Op0IsKill = hasTrivialKill(I->getOperand(0)); -   +    // First, try to perform the bitcast by inserting a reg-reg copy.    unsigned ResultReg = 0;    if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) { @@ -673,15 +683,15 @@ bool FastISel::SelectBitCast(const User *I) {                ResultReg).addReg(Op0);      }    } -   -  // If the reg-reg copy failed, select a BIT_CONVERT opcode. + +  // If the reg-reg copy failed, select a BITCAST opcode.    if (!ResultReg)      ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), -                           ISD::BIT_CONVERT, Op0, Op0IsKill); -   +                           ISD::BITCAST, Op0, Op0IsKill); +    if (!ResultReg)      return false; -   +    UpdateValueMap(I, ResultReg);    return true;  } @@ -753,7 +763,7 @@ FastISel::SelectFNeg(const User *I) {      return false;    unsigned IntReg = FastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(), -                               ISD::BIT_CONVERT, OpReg, OpRegIsKill); +                               ISD::BITCAST, OpReg, OpRegIsKill);    if (IntReg == 0)      return false; @@ -765,7 +775,7 @@ FastISel::SelectFNeg(const User *I) {      return false;    ResultReg = FastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(), -                         ISD::BIT_CONVERT, IntResultReg, /*Kill=*/true); +                         ISD::BITCAST, IntResultReg, /*Kill=*/true);    if (ResultReg == 0)      return false; @@ -845,10 +855,10 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) {      // Dynamic-sized alloca is not handled yet.      return false; -     +    case Instruction::Call:      return SelectCall(I); -   +    case Instruction::BitCast:      return SelectBitCast(I); @@ -911,7 +921,7 @@ unsigned FastISel::FastEmit_r(MVT, MVT,    return 0;  } -unsigned FastISel::FastEmit_rr(MVT, MVT,  +unsigned FastISel::FastEmit_rr(MVT, MVT,                                 unsigned,                                 unsigned /*Op0*/, bool /*Op0IsKill*/,                                 unsigned /*Op1*/, bool /*Op1IsKill*/) { @@ -1139,7 +1149,7 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,                                    uint64_t Imm) {    unsigned ResultReg = createResultReg(RC);    const TargetInstrDesc &II = TII.get(MachineInstOpcode); -   +    if (II.getNumDefs() >= 1)      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg).addImm(Imm);    else { diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 5ef6404ee5d6..98582ba99f14 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -29,7 +29,6 @@  #include "llvm/CodeGen/MachineRegisterInfo.h"  #include "llvm/Target/TargetRegisterInfo.h"  #include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetFrameInfo.h"  #include "llvm/Target/TargetInstrInfo.h"  #include "llvm/Target/TargetLowering.h"  #include "llvm/Target/TargetOptions.h" diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 61c2a90e7edc..e309defba20f 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -31,11 +31,11 @@  using namespace llvm;  /// CountResults - The results of target nodes have register or immediate -/// operands first, then an optional chain, and optional flag operands (which do +/// operands first, then an optional chain, and optional glue operands (which do  /// not go into the resulting MachineInstr).  unsigned InstrEmitter::CountResults(SDNode *Node) {    unsigned N = Node->getNumValues(); -  while (N && Node->getValueType(N - 1) == MVT::Flag) +  while (N && Node->getValueType(N - 1) == MVT::Glue)      --N;    if (N && Node->getValueType(N - 1) == MVT::Other)      --N;    // Skip over chain result. @@ -43,12 +43,12 @@ unsigned InstrEmitter::CountResults(SDNode *Node) {  }  /// CountOperands - The inputs to target nodes have any actual inputs first, -/// followed by an optional chain operand, then an optional flag operand. +/// followed by an optional chain operand, then an optional glue operand.  /// Compute the number of actual operands that will go into the resulting  /// MachineInstr.  unsigned InstrEmitter::CountOperands(SDNode *Node) {    unsigned N = Node->getNumOperands(); -  while (N && Node->getOperand(N - 1).getValueType() == MVT::Flag) +  while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)      --N;    if (N && Node->getOperand(N - 1).getValueType() == MVT::Other)      --N; // Ignore chain if it exists. @@ -67,7 +67,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,      if (IsClone)        VRBaseMap.erase(Op);      bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second; -    isNew = isNew; // Silence compiler warning. +    (void)isNew; // Silence compiler warning.      assert(isNew && "Node emitted out of order - early");      return;    } @@ -96,7 +96,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,            if (Op.getNode() != Node || Op.getResNo() != ResNo)              continue;            EVT VT = Node->getValueType(Op.getResNo()); -          if (VT == MVT::Other || VT == MVT::Flag) +          if (VT == MVT::Other || VT == MVT::Glue)              continue;            Match = false;            if (User->isMachineOpcode()) { @@ -150,7 +150,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,    if (IsClone)      VRBaseMap.erase(Op);    bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; -  isNew = isNew; // Silence compiler warning. +  (void)isNew; // Silence compiler warning.    assert(isNew && "Node emitted out of order - early");  } @@ -224,7 +224,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,      if (IsClone)        VRBaseMap.erase(Op);      bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; -    isNew = isNew; // Silence compiler warning. +    (void)isNew; // Silence compiler warning.      assert(isNew && "Node emitted out of order - early");    }  } @@ -264,8 +264,8 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,                                   DenseMap<SDValue, unsigned> &VRBaseMap,                                   bool IsDebug, bool IsClone, bool IsCloned) {    assert(Op.getValueType() != MVT::Other && -         Op.getValueType() != MVT::Flag && -         "Chain and flag operands should occur at end of operand list!"); +         Op.getValueType() != MVT::Glue && +         "Chain and glue operands should occur at end of operand list!");    // Get/emit the operand.    unsigned VReg = getVR(Op, VRBaseMap);    assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?"); @@ -377,8 +377,8 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,                                              BA->getTargetFlags()));    } else {      assert(Op.getValueType() != MVT::Other && -           Op.getValueType() != MVT::Flag && -           "Chain and flag operands should occur at end of operand list!"); +           Op.getValueType() != MVT::Glue && +           "Chain and glue operands should occur at end of operand list!");      AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap,                         IsDebug, IsClone, IsCloned);    } @@ -428,31 +428,47 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,      // Figure out the register class to create for the destreg.      unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); -    const TargetRegisterClass *TRC = MRI->getRegClass(VReg); -    const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx); -    assert(SRC && "Invalid subregister index in EXTRACT_SUBREG"); - -    // Figure out the register class to create for the destreg. -    // Note that if we're going to directly use an existing register, -    // it must be precisely the required class, and not a subclass -    // thereof. -    if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) { -      // Create the reg -      assert(SRC && "Couldn't find source register class"); -      VRBase = MRI->createVirtualRegister(SRC); -    } +    MachineInstr *DefMI = MRI->getVRegDef(VReg); +    unsigned SrcReg, DstReg, DefSubIdx; +    if (DefMI && +        TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) && +        SubIdx == DefSubIdx) { +      // Optimize these: +      // r1025 = s/zext r1024, 4 +      // r1026 = extract_subreg r1025, 4 +      // to a copy +      // r1026 = copy r1024 +      const TargetRegisterClass *TRC = MRI->getRegClass(SrcReg); +      VRBase = MRI->createVirtualRegister(TRC); +      BuildMI(*MBB, InsertPos, Node->getDebugLoc(), +              TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg); +    } else { +      const TargetRegisterClass *TRC = MRI->getRegClass(VReg); +      const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx); +      assert(SRC && "Invalid subregister index in EXTRACT_SUBREG"); + +      // Figure out the register class to create for the destreg. +      // Note that if we're going to directly use an existing register, +      // it must be precisely the required class, and not a subclass +      // thereof. +      if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) { +        // Create the reg +        assert(SRC && "Couldn't find source register class"); +        VRBase = MRI->createVirtualRegister(SRC); +      } -    // Create the extract_subreg machine instruction. -    MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), -                               TII->get(TargetOpcode::COPY), VRBase); +      // Create the extract_subreg machine instruction. +      MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), +                                 TII->get(TargetOpcode::COPY), VRBase); -    // Add source, and subreg index -    AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false, -               IsClone, IsCloned); -    assert(TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg()) && -           "Cannot yet extract from physregs"); -    MI->getOperand(1).setSubReg(SubIdx); -    MBB->insert(InsertPos, MI); +      // Add source, and subreg index +      AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false, +                 IsClone, IsCloned); +      assert(TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg())&& +             "Cannot yet extract from physregs"); +      MI->getOperand(1).setSubReg(SubIdx); +      MBB->insert(InsertPos, MI); +    }    } else if (Opc == TargetOpcode::INSERT_SUBREG ||               Opc == TargetOpcode::SUBREG_TO_REG) {      SDValue N0 = Node->getOperand(0); @@ -496,7 +512,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,    SDValue Op(Node, 0);    bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; -  isNew = isNew; // Silence compiler warning. +  (void)isNew; // Silence compiler warning.    assert(isNew && "Node emitted out of order - early");  } @@ -518,7 +534,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,    SDValue Op(Node, 0);    bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second; -  isNew = isNew; // Silence compiler warning. +  (void)isNew; // Silence compiler warning.    assert(isNew && "Node emitted out of order - early");  } @@ -543,9 +559,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,        const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);        const TargetRegisterClass *SRC =          TRI->getMatchingSuperRegClass(RC, TRC, SubIdx); -      if (!SRC) -        llvm_unreachable("Invalid subregister index in REG_SEQUENCE"); -      if (SRC != RC) { +      if (SRC && SRC != RC) {          MRI->setRegClass(NewVReg, SRC);          RC = SRC;        } @@ -557,7 +571,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,    MBB->insert(InsertPos, MI);    SDValue Op(Node, 0);    bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second; -  isNew = isNew; // Silence compiler warning. +  (void)isNew; // Silence compiler warning.    assert(isNew && "Node emitted out of order - early");  } @@ -673,10 +687,10 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,    // The MachineInstr constructor adds implicit-def operands. Scan through    // these to determine which are dead.    if (MI->getNumOperands() != 0 && -      Node->getValueType(Node->getNumValues()-1) == MVT::Flag) { +      Node->getValueType(Node->getNumValues()-1) == MVT::Glue) {      // First, collect all used registers.      SmallVector<unsigned, 8> UsedRegs; -    for (SDNode *F = Node->getFlaggedUser(); F; F = F->getFlaggedUser()) +    for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser())        if (F->getOpcode() == ISD::CopyFromReg)          UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());        else { @@ -689,7 +703,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,          for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)            if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {              unsigned Reg = R->getReg(); -            if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) +            if (TargetRegisterInfo::isPhysicalRegister(Reg))                UsedRegs.push_back(Reg);            }        } @@ -721,20 +735,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,    // hook knows where in the block to insert the replacement code.    MBB->insert(InsertPos, MI); -  if (II.usesCustomInsertionHook()) { -    // Insert this instruction into the basic block using a target -    // specific inserter which may returns a new basic block. -    bool AtEnd = InsertPos == MBB->end(); -    MachineBasicBlock *NewMBB = TLI->EmitInstrWithCustomInserter(MI, MBB); -    if (NewMBB != MBB) { -      if (AtEnd) -        InsertPos = NewMBB->end(); -      MBB = NewMBB; -    } -    return; -  } -   -  // Additional results must be an physical register def. +  // Additional results must be physical register defs.    if (HasPhysRegOuts) {      for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {        unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()]; @@ -742,17 +743,17 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,          EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);        // If there are no uses, mark the register as dead now, so that        // MachineLICM/Sink can see that it's dead. Don't do this if the -      // node has a Flag value, for the benefit of targets still using -      // Flag for values in physregs. -      else if (Node->getValueType(Node->getNumValues()-1) != MVT::Flag) +      // node has a Glue value, for the benefit of targets still using +      // Glue for values in physregs. +      else if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)          MI->addRegisterDead(Reg, TRI);      }    }    // If the instruction has implicit defs and the node doesn't, mark the -  // implicit def as dead.  If the node has any flag outputs, we don't do this -  // because we don't know what implicit defs are being used by flagged nodes. -  if (Node->getValueType(Node->getNumValues()-1) != MVT::Flag) +  // implicit def as dead.  If the node has any glue outputs, we don't do this +  // because we don't know what implicit defs are being used by glued nodes. +  if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)      if (const unsigned *IDList = II.getImplicitDefs()) {        for (unsigned i = NumResults, e = II.getNumDefs()+II.getNumImplicitDefs();             i != e; ++i) @@ -808,8 +809,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,    case ISD::INLINEASM: {      unsigned NumOps = Node->getNumOperands(); -    if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag) -      --NumOps;  // Ignore the flag operand. +    if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) +      --NumOps;  // Ignore the glue operand.      // Create the inline asm machine instruction.      MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), @@ -820,11 +821,11 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,      const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol();      MI->addOperand(MachineOperand::CreateES(AsmStr)); -    // Add the isAlignStack bit. -    int64_t isAlignStack = -      cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_IsAlignStack))-> +    // Add the HasSideEffect and isAlignStack bits. +    int64_t ExtraInfo = +      cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_ExtraInfo))->                            getZExtValue(); -    MI->addOperand(MachineOperand::CreateImm(isAlignStack)); +    MI->addOperand(MachineOperand::CreateImm(ExtraInfo));      // Add all of the operand registers to the instruction.      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 2981cd3f1cab..49c862ce3e0b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -11,14 +11,15 @@  //  //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/CodeGen/Analysis.h"  #include "llvm/CodeGen/MachineFunction.h"  #include "llvm/CodeGen/MachineFrameInfo.h"  #include "llvm/CodeGen/MachineJumpTableInfo.h"  #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/Analysis/DebugInfo.h"  #include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetFrameLowering.h"  #include "llvm/Target/TargetLowering.h"  #include "llvm/Target/TargetData.h"  #include "llvm/Target/TargetMachine.h" @@ -65,11 +66,6 @@ class SelectionDAGLegalize {    /// against each other, including inserted libcalls.    SDValue LastCALLSEQ_END; -  /// IsLegalizingCall - This member is used *only* for purposes of providing -  /// helpful assertions that a libcall isn't created while another call is -  /// being legalized (which could lead to non-serialized call sequences). -  bool IsLegalizingCall; -    enum LegalizeAction {      Legal,      // The target natively supports this operation.      Promote,    // This operation should be executed in a larger type. @@ -91,6 +87,9 @@ class SelectionDAGLegalize {      // If someone requests legalization of the new node, return itself.      if (From != To)        LegalizedNodes.insert(std::make_pair(To, To)); +     +    // Transfer SDDbgValues. +    DAG.TransferDbgValues(From, To);    }  public: @@ -172,6 +171,7 @@ private:    SDValue ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl);    SDValue ExpandExtractFromVectorThroughStack(SDValue Op); +  SDValue ExpandInsertToVectorThroughStack(SDValue Op);    SDValue ExpandVectorBuildThroughStack(SDNode* Node);    std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node); @@ -224,7 +224,6 @@ SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag,  void SelectionDAGLegalize::LegalizeDAG() {    LastCALLSEQ_END = DAG.getEntryNode(); -  IsLegalizingCall = false;    // The legalize process is inherently a bottom-up recursive process (users    // legalize their uses before themselves).  Given infinite stack space, we @@ -251,9 +250,16 @@ void SelectionDAGLegalize::LegalizeDAG() {  /// FindCallEndFromCallStart - Given a chained node that is part of a call  /// sequence, find the CALLSEQ_END node that terminates the call sequence. -static SDNode *FindCallEndFromCallStart(SDNode *Node) { -  if (Node->getOpcode() == ISD::CALLSEQ_END) -    return Node; +static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) { +  // Nested CALLSEQ_START/END constructs aren't yet legal, +  // but we can DTRT and handle them correctly here. +  if (Node->getOpcode() == ISD::CALLSEQ_START) +    depth++; +  else if (Node->getOpcode() == ISD::CALLSEQ_END) { +    depth--; +    if (depth == 0) +      return Node; +  }    if (Node->use_empty())      return 0;   // No CallSeqEnd @@ -283,7 +289,7 @@ static SDNode *FindCallEndFromCallStart(SDNode *Node) {      SDNode *User = *UI;      for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i)        if (User->getOperand(i) == TheChain) -        if (SDNode *Result = FindCallEndFromCallStart(User)) +        if (SDNode *Result = FindCallEndFromCallStart(User, depth))            return Result;    }    return 0; @@ -292,12 +298,26 @@ static SDNode *FindCallEndFromCallStart(SDNode *Node) {  /// FindCallStartFromCallEnd - Given a chained node that is part of a call  /// sequence, find the CALLSEQ_START node that initiates the call sequence.  static SDNode *FindCallStartFromCallEnd(SDNode *Node) { +  int nested = 0;    assert(Node && "Didn't find callseq_start for a call??"); -  if (Node->getOpcode() == ISD::CALLSEQ_START) return Node; - -  assert(Node->getOperand(0).getValueType() == MVT::Other && -         "Node doesn't have a token chain argument!"); -  return FindCallStartFromCallEnd(Node->getOperand(0).getNode()); +  while (Node->getOpcode() != ISD::CALLSEQ_START || nested) { +    Node = Node->getOperand(0).getNode(); +    assert(Node->getOperand(0).getValueType() == MVT::Other && +           "Node doesn't have a token chain argument!"); +    switch (Node->getOpcode()) { +    default: +      break; +    case ISD::CALLSEQ_START: +      if (!nested) +        return Node; +      nested--; +      break; +    case ISD::CALLSEQ_END: +      nested++; +      break; +    } +  } +  return 0;  }  /// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to @@ -377,12 +397,12 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,    SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy());    unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();    if (Extend) -    return DAG.getExtLoad(ISD::EXTLOAD, OrigVT, dl, +    return DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT,                            DAG.getEntryNode(), -                          CPIdx, PseudoSourceValue::getConstantPool(), -                          0, VT, false, false, Alignment); +                          CPIdx, MachinePointerInfo::getConstantPool(), +                          VT, false, false, Alignment);    return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, -                     PseudoSourceValue::getConstantPool(), 0, false, false, +                     MachinePointerInfo::getConstantPool(), false, false,                       Alignment);  } @@ -395,7 +415,6 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,    SDValue Val = ST->getValue();    EVT VT = Val.getValueType();    int Alignment = ST->getAlignment(); -  int SVOffset = ST->getSrcValueOffset();    DebugLoc dl = ST->getDebugLoc();    if (ST->getMemoryVT().isFloatingPoint() ||        ST->getMemoryVT().isVector()) { @@ -404,10 +423,9 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,        // Expand to a bitconvert of the value to the integer type of the        // same size, then a (misaligned) int store.        // FIXME: Does not handle truncating floating point stores! -      SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, intVT, Val); -      return DAG.getStore(Chain, dl, Result, Ptr, ST->getSrcValue(), -                          SVOffset, ST->isVolatile(), ST->isNonTemporal(), -                          Alignment); +      SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val); +      return DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(), +                          ST->isVolatile(), ST->isNonTemporal(), Alignment);      } else {        // Do a (aligned) store to a stack slot, then copy from the stack slot        // to the final destination using (unaligned) integer loads and stores. @@ -425,8 +443,8 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,        // Perform the original store, only redirected to the stack slot.        SDValue Store = DAG.getTruncStore(Chain, dl, -                                        Val, StackPtr, NULL, 0, StoredVT, -                                        false, false, 0); +                                        Val, StackPtr, MachinePointerInfo(), +                                        StoredVT, false, false, 0);        SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());        SmallVector<SDValue, 8> Stores;        unsigned Offset = 0; @@ -434,11 +452,12 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,        // Do all but one copies using the full register width.        for (unsigned i = 1; i < NumRegs; i++) {          // Load one integer register's worth from the stack slot. -        SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, NULL, 0, +        SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, +                                   MachinePointerInfo(),                                     false, false, 0);          // Store it to the final location.  Remember the store.          Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr, -                                      ST->getSrcValue(), SVOffset + Offset, +                                    ST->getPointerInfo().getWithOffset(Offset),                                        ST->isVolatile(), ST->isNonTemporal(),                                        MinAlign(ST->getAlignment(), Offset)));          // Increment the pointers. @@ -455,11 +474,13 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,                                      8 * (StoredBytes - Offset));        // Load from the stack slot. -      SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Store, StackPtr, -                                    NULL, 0, MemVT, false, false, 0); +      SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr, +                                    MachinePointerInfo(), +                                    MemVT, false, false, 0);        Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr, -                                         ST->getSrcValue(), SVOffset + Offset, +                                         ST->getPointerInfo() +                                           .getWithOffset(Offset),                                           MemVT, ST->isVolatile(),                                           ST->isNonTemporal(),                                           MinAlign(ST->getAlignment(), Offset))); @@ -484,13 +505,13 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,    // Store the two parts    SDValue Store1, Store2;    Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr, -                             ST->getSrcValue(), SVOffset, NewStoredVT, +                             ST->getPointerInfo(), NewStoredVT,                               ST->isVolatile(), ST->isNonTemporal(), Alignment);    Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,                      DAG.getConstant(IncrementSize, TLI.getPointerTy()));    Alignment = MinAlign(Alignment, IncrementSize);    Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr, -                             ST->getSrcValue(), SVOffset + IncrementSize, +                             ST->getPointerInfo().getWithOffset(IncrementSize),                               NewStoredVT, ST->isVolatile(), ST->isNonTemporal(),                               Alignment); @@ -501,7 +522,6 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,  static  SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,                              const TargetLowering &TLI) { -  int SVOffset = LD->getSrcValueOffset();    SDValue Chain = LD->getChain();    SDValue Ptr = LD->getBasePtr();    EVT VT = LD->getValueType(0); @@ -512,74 +532,75 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,      if (TLI.isTypeLegal(intVT)) {        // Expand to a (misaligned) integer load of the same size,        // then bitconvert to floating point or vector. -      SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getSrcValue(), -                                    SVOffset, LD->isVolatile(), +      SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(), +                                    LD->isVolatile(),                                      LD->isNonTemporal(), LD->getAlignment()); -      SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, LoadedVT, newLoad); +      SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);        if (VT.isFloatingPoint() && LoadedVT != VT)          Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result);        SDValue Ops[] = { Result, Chain };        return DAG.getMergeValues(Ops, 2, dl); -    } else { -      // Copy the value to a (aligned) stack slot using (unaligned) integer -      // loads and stores, then do a (aligned) load from the stack slot. -      EVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT); -      unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8; -      unsigned RegBytes = RegVT.getSizeInBits() / 8; -      unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes; - -      // Make sure the stack slot is also aligned for the register type. -      SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT); - -      SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy()); -      SmallVector<SDValue, 8> Stores; -      SDValue StackPtr = StackBase; -      unsigned Offset = 0; - -      // Do all but one copies using the full register width. -      for (unsigned i = 1; i < NumRegs; i++) { -        // Load one integer register's worth from the original location. -        SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, LD->getSrcValue(), -                                   SVOffset + Offset, LD->isVolatile(), -                                   LD->isNonTemporal(), -                                   MinAlign(LD->getAlignment(), Offset)); -        // Follow the load with a store to the stack slot.  Remember the store. -        Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, -                                      NULL, 0, false, false, 0)); -        // Increment the pointers. -        Offset += RegBytes; -        Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); -        StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, -                               Increment); -      } +    } -      // The last copy may be partial.  Do an extending load. -      EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), -                                    8 * (LoadedBytes - Offset)); -      SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Chain, Ptr, -                                    LD->getSrcValue(), SVOffset + Offset, -                                    MemVT, LD->isVolatile(), -                                    LD->isNonTemporal(), -                                    MinAlign(LD->getAlignment(), Offset)); +    // Copy the value to a (aligned) stack slot using (unaligned) integer +    // loads and stores, then do a (aligned) load from the stack slot. +    EVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT); +    unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8; +    unsigned RegBytes = RegVT.getSizeInBits() / 8; +    unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes; + +    // Make sure the stack slot is also aligned for the register type. +    SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT); + +    SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy()); +    SmallVector<SDValue, 8> Stores; +    SDValue StackPtr = StackBase; +    unsigned Offset = 0; + +    // Do all but one copies using the full register width. +    for (unsigned i = 1; i < NumRegs; i++) { +      // Load one integer register's worth from the original location. +      SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, +                                 LD->getPointerInfo().getWithOffset(Offset), +                                 LD->isVolatile(), LD->isNonTemporal(), +                                 MinAlign(LD->getAlignment(), Offset));        // Follow the load with a store to the stack slot.  Remember the store. -      // On big-endian machines this requires a truncating store to ensure -      // that the bits end up in the right place. -      Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr, -                                         NULL, 0, MemVT, false, false, 0)); - -      // The order of the stores doesn't matter - say it with a TokenFactor. -      SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], -                               Stores.size()); - -      // Finally, perform the original load only redirected to the stack slot. -      Load = DAG.getExtLoad(LD->getExtensionType(), VT, dl, TF, StackBase, -                            NULL, 0, LoadedVT, false, false, 0); - -      // Callers expect a MERGE_VALUES node. -      SDValue Ops[] = { Load, TF }; -      return DAG.getMergeValues(Ops, 2, dl); +      Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, +                                    MachinePointerInfo(), false, false, 0)); +      // Increment the pointers. +      Offset += RegBytes; +      Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); +      StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, +                             Increment);      } + +    // The last copy may be partial.  Do an extending load. +    EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), +                                  8 * (LoadedBytes - Offset)); +    SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr, +                                  LD->getPointerInfo().getWithOffset(Offset), +                                  MemVT, LD->isVolatile(), +                                  LD->isNonTemporal(), +                                  MinAlign(LD->getAlignment(), Offset)); +    // Follow the load with a store to the stack slot.  Remember the store. +    // On big-endian machines this requires a truncating store to ensure +    // that the bits end up in the right place. +    Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr, +                                       MachinePointerInfo(), MemVT, +                                       false, false, 0)); + +    // The order of the stores doesn't matter - say it with a TokenFactor. +    SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], +                             Stores.size()); + +    // Finally, perform the original load only redirected to the stack slot. +    Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase, +                          MachinePointerInfo(), LoadedVT, false, false, 0); + +    // Callers expect a MERGE_VALUES node. +    SDValue Ops[] = { Load, TF }; +    return DAG.getMergeValues(Ops, 2, dl);    }    assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&           "Unaligned load of unsupported type."); @@ -602,22 +623,24 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,    // Load the value in two parts    SDValue Lo, Hi;    if (TLI.isLittleEndian()) { -    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr, LD->getSrcValue(), -                        SVOffset, NewLoadedVT, LD->isVolatile(), +    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(), +                        NewLoadedVT, LD->isVolatile(),                          LD->isNonTemporal(), Alignment);      Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,                        DAG.getConstant(IncrementSize, TLI.getPointerTy())); -    Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr, LD->getSrcValue(), -                        SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(), +    Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, +                        LD->getPointerInfo().getWithOffset(IncrementSize), +                        NewLoadedVT, LD->isVolatile(),                          LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));    } else { -    Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr, LD->getSrcValue(), -                        SVOffset, NewLoadedVT, LD->isVolatile(), +    Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(), +                        NewLoadedVT, LD->isVolatile(),                          LD->isNonTemporal(), Alignment);      Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,                        DAG.getConstant(IncrementSize, TLI.getPointerTy())); -    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr, LD->getSrcValue(), -                        SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(), +    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, +                        LD->getPointerInfo().getWithOffset(IncrementSize), +                        NewLoadedVT, LD->isVolatile(),                          LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));    } @@ -660,7 +683,7 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,    // Store the vector.    SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr, -                            PseudoSourceValue::getFixedStack(SPFI), 0, +                            MachinePointerInfo::getFixedStack(SPFI),                              false, false, 0);    // Truncate or zero extend offset to target pointer type. @@ -671,13 +694,11 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,    Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,DAG.getConstant(EltSize, IdxVT));    SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr);    // Store the scalar value. -  Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, -                         PseudoSourceValue::getFixedStack(SPFI), 0, EltVT, +  Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT,                           false, false, 0);    // Load the updated vector.    return DAG.getLoad(VT, dl, Ch, StackPtr, -                     PseudoSourceValue::getFixedStack(SPFI), 0, -                     false, false, 0); +                     MachinePointerInfo::getFixedStack(SPFI), false, false, 0);  } @@ -719,7 +740,6 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {    SDValue Tmp1 = ST->getChain();    SDValue Tmp2 = ST->getBasePtr();    SDValue Tmp3; -  int SVOffset = ST->getSrcValueOffset();    unsigned Alignment = ST->getAlignment();    bool isVolatile = ST->isVolatile();    bool isNonTemporal = ST->isNonTemporal(); @@ -730,29 +750,34 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {        Tmp3 = DAG.getConstant(CFP->getValueAPF().                                        bitcastToAPInt().zextOrTrunc(32),                                MVT::i32); -      return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), -                          SVOffset, isVolatile, isNonTemporal, Alignment); -    } else if (CFP->getValueType(0) == MVT::f64) { +      return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), +                          isVolatile, isNonTemporal, Alignment); +    } + +    if (CFP->getValueType(0) == MVT::f64) {        // If this target supports 64-bit registers, do a single 64-bit store.        if (getTypeAction(MVT::i64) == Legal) {          Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().                                    zextOrTrunc(64), MVT::i64); -        return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), -                            SVOffset, isVolatile, isNonTemporal, Alignment); -      } else if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) { +        return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), +                            isVolatile, isNonTemporal, Alignment); +      } + +      if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) {          // Otherwise, if the target supports 32-bit registers, use 2 32-bit          // stores.  If the target supports neither 32- nor 64-bits, this          // xform is certainly not worth it.          const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt(); -        SDValue Lo = DAG.getConstant(APInt(IntVal).trunc(32), MVT::i32); +        SDValue Lo = DAG.getConstant(IntVal.trunc(32), MVT::i32);          SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32);          if (TLI.isBigEndian()) std::swap(Lo, Hi); -        Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getSrcValue(), -                          SVOffset, isVolatile, isNonTemporal, Alignment); +        Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getPointerInfo(), isVolatile, +                          isNonTemporal, Alignment);          Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,                              DAG.getIntPtrConstant(4)); -        Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), SVOffset+4, +        Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, +                          ST->getPointerInfo().getWithOffset(4),                            isVolatile, isNonTemporal, MinAlign(Alignment, 4U));          return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); @@ -792,7 +817,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {    bool isCustom = false;    // Figure out the correct action; the way to query this varies by opcode -  TargetLowering::LegalizeAction Action; +  TargetLowering::LegalizeAction Action = TargetLowering::Legal;    bool SimpleFinishLegalizing = true;    switch (Node->getOpcode()) {    case ISD::INTRINSIC_W_CHAIN: @@ -860,6 +885,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {    case ISD::FRAME_TO_ARGS_OFFSET:    case ISD::EH_SJLJ_SETJMP:    case ISD::EH_SJLJ_LONGJMP: +  case ISD::EH_SJLJ_DISPATCHSETUP:      // These operations lie about being legal: when they claim to be legal,      // they should actually be expanded.      Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); @@ -996,6 +1022,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {      }      break;    case ISD::CALLSEQ_START: { +    static int depth = 0;      SDNode *CallEnd = FindCallEndFromCallStart(Node);      // Recursively Legalize all of the inputs of the call end that do not lead @@ -1013,7 +1040,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {      // Merge in the last call to ensure that this call starts after the last      // call ended. -    if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) { +    if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken && depth == 0) {        Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,                           Tmp1, LastCALLSEQ_END);        Tmp1 = LegalizeOp(Tmp1); @@ -1036,14 +1063,18 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {      // sequence have been legalized, legalize the call itself.  During this      // process, no libcalls can/will be inserted, guaranteeing that no calls      // can overlap. -    assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!"); + +    SDValue Saved_LastCALLSEQ_END = LastCALLSEQ_END ;      // Note that we are selecting this call!      LastCALLSEQ_END = SDValue(CallEnd, 0); -    IsLegalizingCall = true; +    depth++;      // Legalize the call, starting from the CALLSEQ_END.      LegalizeOp(LastCALLSEQ_END); -    assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!"); +    depth--; +    assert(depth >= 0 && "Un-matched CALLSEQ_START?"); +    if (depth > 0) +      LastCALLSEQ_END = Saved_LastCALLSEQ_END;      return Result;    }    case ISD::CALLSEQ_END: @@ -1062,7 +1093,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {      Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.      // Do not try to legalize the target-specific arguments (#1+), except for      // an optional flag input. -    if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Flag){ +    if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Glue){        if (Tmp1 != Node->getOperand(0)) {          SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());          Ops[0] = Tmp1; @@ -1082,10 +1113,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {                           Result.getResNo());        }      } -    assert(IsLegalizingCall && "Call sequence imbalance between start/end?");      // This finishes up call legalization. -    IsLegalizingCall = false; -      // If the CALLSEQ_END node has a flag, remember that we legalized it.      AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0));      if (Node->getNumValues() == 2) @@ -1136,11 +1164,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {          // Change base type to a different vector type.          EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); -        Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getSrcValue(), -                           LD->getSrcValueOffset(), +        Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getPointerInfo(),                             LD->isVolatile(), LD->isNonTemporal(),                             LD->getAlignment()); -        Tmp3 = LegalizeOp(DAG.getNode(ISD::BIT_CONVERT, dl, VT, Tmp1)); +        Tmp3 = LegalizeOp(DAG.getNode(ISD::BITCAST, dl, VT, Tmp1));          Tmp4 = LegalizeOp(Tmp1.getValue(1));          break;        } @@ -1150,227 +1177,224 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {        AddLegalizedOperand(SDValue(Node, 0), Tmp3);        AddLegalizedOperand(SDValue(Node, 1), Tmp4);        return Op.getResNo() ? Tmp4 : Tmp3; -    } else { -      EVT SrcVT = LD->getMemoryVT(); -      unsigned SrcWidth = SrcVT.getSizeInBits(); -      int SVOffset = LD->getSrcValueOffset(); -      unsigned Alignment = LD->getAlignment(); -      bool isVolatile = LD->isVolatile(); -      bool isNonTemporal = LD->isNonTemporal(); - -      if (SrcWidth != SrcVT.getStoreSizeInBits() && -          // Some targets pretend to have an i1 loading operation, and actually -          // load an i8.  This trick is correct for ZEXTLOAD because the top 7 -          // bits are guaranteed to be zero; it helps the optimizers understand -          // that these bits are zero.  It is also useful for EXTLOAD, since it -          // tells the optimizers that those bits are undefined.  It would be -          // nice to have an effective generic way of getting these benefits... -          // Until such a way is found, don't insist on promoting i1 here. -          (SrcVT != MVT::i1 || -           TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) { -        // Promote to a byte-sized load if not loading an integral number of -        // bytes.  For example, promote EXTLOAD:i20 -> EXTLOAD:i24. -        unsigned NewWidth = SrcVT.getStoreSizeInBits(); -        EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth); -        SDValue Ch; - -        // The extra bits are guaranteed to be zero, since we stored them that -        // way.  A zext load from NVT thus automatically gives zext from SrcVT. - -        ISD::LoadExtType NewExtType = -          ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; - -        Result = DAG.getExtLoad(NewExtType, Node->getValueType(0), dl, -                                Tmp1, Tmp2, LD->getSrcValue(), SVOffset, -                                NVT, isVolatile, isNonTemporal, Alignment); - -        Ch = Result.getValue(1); // The chain. - -        if (ExtType == ISD::SEXTLOAD) -          // Having the top bits zero doesn't help when sign extending. -          Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, -                               Result.getValueType(), -                               Result, DAG.getValueType(SrcVT)); -        else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType()) -          // All the top bits are guaranteed to be zero - inform the optimizers. -          Result = DAG.getNode(ISD::AssertZext, dl, -                               Result.getValueType(), Result, -                               DAG.getValueType(SrcVT)); - -        Tmp1 = LegalizeOp(Result); -        Tmp2 = LegalizeOp(Ch); -      } else if (SrcWidth & (SrcWidth - 1)) { -        // If not loading a power-of-2 number of bits, expand as two loads. -        assert(!SrcVT.isVector() && "Unsupported extload!"); -        unsigned RoundWidth = 1 << Log2_32(SrcWidth); -        assert(RoundWidth < SrcWidth); -        unsigned ExtraWidth = SrcWidth - RoundWidth; -        assert(ExtraWidth < RoundWidth); -        assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && -               "Load size not an integral number of bytes!"); -        EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); -        EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); -        SDValue Lo, Hi, Ch; -        unsigned IncrementSize; +    } -        if (TLI.isLittleEndian()) { -          // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16) -          // Load the bottom RoundWidth bits. -          Lo = DAG.getExtLoad(ISD::ZEXTLOAD, Node->getValueType(0), dl, -                              Tmp1, Tmp2, -                              LD->getSrcValue(), SVOffset, RoundVT, isVolatile, -                              isNonTemporal, Alignment); - -          // Load the remaining ExtraWidth bits. -          IncrementSize = RoundWidth / 8; -          Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, -                             DAG.getIntPtrConstant(IncrementSize)); -          Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2, -                              LD->getSrcValue(), SVOffset + IncrementSize, -                              ExtraVT, isVolatile, isNonTemporal, -                              MinAlign(Alignment, IncrementSize)); - -          // Build a factor node to remember that this load is independent of -          // the other one. -          Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), -                           Hi.getValue(1)); - -          // Move the top bits to the right place. -          Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, -                           DAG.getConstant(RoundWidth, TLI.getShiftAmountTy())); +    EVT SrcVT = LD->getMemoryVT(); +    unsigned SrcWidth = SrcVT.getSizeInBits(); +    unsigned Alignment = LD->getAlignment(); +    bool isVolatile = LD->isVolatile(); +    bool isNonTemporal = LD->isNonTemporal(); + +    if (SrcWidth != SrcVT.getStoreSizeInBits() && +        // Some targets pretend to have an i1 loading operation, and actually +        // load an i8.  This trick is correct for ZEXTLOAD because the top 7 +        // bits are guaranteed to be zero; it helps the optimizers understand +        // that these bits are zero.  It is also useful for EXTLOAD, since it +        // tells the optimizers that those bits are undefined.  It would be +        // nice to have an effective generic way of getting these benefits... +        // Until such a way is found, don't insist on promoting i1 here. +        (SrcVT != MVT::i1 || +         TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) { +      // Promote to a byte-sized load if not loading an integral number of +      // bytes.  For example, promote EXTLOAD:i20 -> EXTLOAD:i24. +      unsigned NewWidth = SrcVT.getStoreSizeInBits(); +      EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth); +      SDValue Ch; + +      // The extra bits are guaranteed to be zero, since we stored them that +      // way.  A zext load from NVT thus automatically gives zext from SrcVT. + +      ISD::LoadExtType NewExtType = +        ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; + +      Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), +                              Tmp1, Tmp2, LD->getPointerInfo(), +                              NVT, isVolatile, isNonTemporal, Alignment); + +      Ch = Result.getValue(1); // The chain. + +      if (ExtType == ISD::SEXTLOAD) +        // Having the top bits zero doesn't help when sign extending. +        Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, +                             Result.getValueType(), +                             Result, DAG.getValueType(SrcVT)); +      else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType()) +        // All the top bits are guaranteed to be zero - inform the optimizers. +        Result = DAG.getNode(ISD::AssertZext, dl, +                             Result.getValueType(), Result, +                             DAG.getValueType(SrcVT)); + +      Tmp1 = LegalizeOp(Result); +      Tmp2 = LegalizeOp(Ch); +    } else if (SrcWidth & (SrcWidth - 1)) { +      // If not loading a power-of-2 number of bits, expand as two loads. +      assert(!SrcVT.isVector() && "Unsupported extload!"); +      unsigned RoundWidth = 1 << Log2_32(SrcWidth); +      assert(RoundWidth < SrcWidth); +      unsigned ExtraWidth = SrcWidth - RoundWidth; +      assert(ExtraWidth < RoundWidth); +      assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && +             "Load size not an integral number of bytes!"); +      EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); +      EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); +      SDValue Lo, Hi, Ch; +      unsigned IncrementSize; + +      if (TLI.isLittleEndian()) { +        // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16) +        // Load the bottom RoundWidth bits. +        Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), +                            Tmp1, Tmp2, +                            LD->getPointerInfo(), RoundVT, isVolatile, +                            isNonTemporal, Alignment); + +        // Load the remaining ExtraWidth bits. +        IncrementSize = RoundWidth / 8; +        Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, +                           DAG.getIntPtrConstant(IncrementSize)); +        Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2, +                            LD->getPointerInfo().getWithOffset(IncrementSize), +                            ExtraVT, isVolatile, isNonTemporal, +                            MinAlign(Alignment, IncrementSize)); + +        // Build a factor node to remember that this load is independent of +        // the other one. +        Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), +                         Hi.getValue(1)); + +        // Move the top bits to the right place. +        Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, +                         DAG.getConstant(RoundWidth, TLI.getShiftAmountTy())); + +        // Join the hi and lo parts. +        Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); +      } else { +        // Big endian - avoid unaligned loads. +        // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 +        // Load the top RoundWidth bits. +        Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2, +                            LD->getPointerInfo(), RoundVT, isVolatile, +                            isNonTemporal, Alignment); + +        // Load the remaining ExtraWidth bits. +        IncrementSize = RoundWidth / 8; +        Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, +                           DAG.getIntPtrConstant(IncrementSize)); +        Lo = DAG.getExtLoad(ISD::ZEXTLOAD, +                            dl, Node->getValueType(0), Tmp1, Tmp2, +                            LD->getPointerInfo().getWithOffset(IncrementSize), +                            ExtraVT, isVolatile, isNonTemporal, +                            MinAlign(Alignment, IncrementSize)); + +        // Build a factor node to remember that this load is independent of +        // the other one. +        Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), +                         Hi.getValue(1)); + +        // Move the top bits to the right place. +        Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, +                         DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy())); + +        // Join the hi and lo parts. +        Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); +      } -          // Join the hi and lo parts. -          Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); +      Tmp1 = LegalizeOp(Result); +      Tmp2 = LegalizeOp(Ch); +    } else { +      switch (TLI.getLoadExtAction(ExtType, SrcVT)) { +      default: assert(0 && "This action is not supported yet!"); +      case TargetLowering::Custom: +        isCustom = true; +        // FALLTHROUGH +      case TargetLowering::Legal: +        Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), +                                                Tmp1, Tmp2, LD->getOffset()), +                         Result.getResNo()); +        Tmp1 = Result.getValue(0); +        Tmp2 = Result.getValue(1); + +        if (isCustom) { +          Tmp3 = TLI.LowerOperation(Result, DAG); +          if (Tmp3.getNode()) { +            Tmp1 = LegalizeOp(Tmp3); +            Tmp2 = LegalizeOp(Tmp3.getValue(1)); +          }          } else { -          // Big endian - avoid unaligned loads. -          // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 -          // Load the top RoundWidth bits. -          Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2, -                              LD->getSrcValue(), SVOffset, RoundVT, isVolatile, -                              isNonTemporal, Alignment); - -          // Load the remaining ExtraWidth bits. -          IncrementSize = RoundWidth / 8; -          Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, -                             DAG.getIntPtrConstant(IncrementSize)); -          Lo = DAG.getExtLoad(ISD::ZEXTLOAD, -                              Node->getValueType(0), dl, Tmp1, Tmp2, -                              LD->getSrcValue(), SVOffset + IncrementSize, -                              ExtraVT, isVolatile, isNonTemporal, -                              MinAlign(Alignment, IncrementSize)); - -          // Build a factor node to remember that this load is independent of -          // the other one. -          Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), -                           Hi.getValue(1)); - -          // Move the top bits to the right place. -          Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, -                           DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy())); - -          // Join the hi and lo parts. -          Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); -        } - -        Tmp1 = LegalizeOp(Result); -        Tmp2 = LegalizeOp(Ch); -      } else { -        switch (TLI.getLoadExtAction(ExtType, SrcVT)) { -        default: assert(0 && "This action is not supported yet!"); -        case TargetLowering::Custom: -          isCustom = true; -          // FALLTHROUGH -        case TargetLowering::Legal: -          Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), -                                                  Tmp1, Tmp2, LD->getOffset()), -                           Result.getResNo()); -          Tmp1 = Result.getValue(0); -          Tmp2 = Result.getValue(1); - -          if (isCustom) { -            Tmp3 = TLI.LowerOperation(Result, DAG); -            if (Tmp3.getNode()) { -              Tmp1 = LegalizeOp(Tmp3); -              Tmp2 = LegalizeOp(Tmp3.getValue(1)); -            } -          } else { -            // If this is an unaligned load and the target doesn't support it, -            // expand it. -            if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { -              const Type *Ty = -                LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); -              unsigned ABIAlignment = -                TLI.getTargetData()->getABITypeAlignment(Ty); -              if (LD->getAlignment() < ABIAlignment){ -                Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), -                                             DAG, TLI); -                Tmp1 = Result.getOperand(0); -                Tmp2 = Result.getOperand(1); -                Tmp1 = LegalizeOp(Tmp1); -                Tmp2 = LegalizeOp(Tmp2); -              } +          // If this is an unaligned load and the target doesn't support it, +          // expand it. +          if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { +            const Type *Ty = +              LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); +            unsigned ABIAlignment = +              TLI.getTargetData()->getABITypeAlignment(Ty); +            if (LD->getAlignment() < ABIAlignment){ +              Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), +                                           DAG, TLI); +              Tmp1 = Result.getOperand(0); +              Tmp2 = Result.getOperand(1); +              Tmp1 = LegalizeOp(Tmp1); +              Tmp2 = LegalizeOp(Tmp2);              }            } -          break; -        case TargetLowering::Expand: -          if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && isTypeLegal(SrcVT)) { -            SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getSrcValue(), -                                       LD->getSrcValueOffset(), -                                       LD->isVolatile(), LD->isNonTemporal(), -                                       LD->getAlignment()); -            unsigned ExtendOp; -            switch (ExtType) { -            case ISD::EXTLOAD: -              ExtendOp = (SrcVT.isFloatingPoint() ? -                          ISD::FP_EXTEND : ISD::ANY_EXTEND); -              break; -            case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break; -            case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; -            default: llvm_unreachable("Unexpected extend load type!"); -            } -            Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); -            Tmp1 = LegalizeOp(Result);  // Relegalize new nodes. -            Tmp2 = LegalizeOp(Load.getValue(1)); +        } +        break; +      case TargetLowering::Expand: +        if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && isTypeLegal(SrcVT)) { +          SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, +                                     LD->getPointerInfo(), +                                     LD->isVolatile(), LD->isNonTemporal(), +                                     LD->getAlignment()); +          unsigned ExtendOp; +          switch (ExtType) { +          case ISD::EXTLOAD: +            ExtendOp = (SrcVT.isFloatingPoint() ? +                        ISD::FP_EXTEND : ISD::ANY_EXTEND);              break; +          case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break; +          case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; +          default: llvm_unreachable("Unexpected extend load type!");            } -          // FIXME: This does not work for vectors on most targets.  Sign- and -          // zero-extend operations are currently folded into extending loads, -          // whether they are legal or not, and then we end up here without any -          // support for legalizing them. -          assert(ExtType != ISD::EXTLOAD && -                 "EXTLOAD should always be supported!"); -          // Turn the unsupported load into an EXTLOAD followed by an explicit -          // zero/sign extend inreg. -          Result = DAG.getExtLoad(ISD::EXTLOAD, Node->getValueType(0), dl, -                                  Tmp1, Tmp2, LD->getSrcValue(), -                                  LD->getSrcValueOffset(), SrcVT, -                                  LD->isVolatile(), LD->isNonTemporal(), -                                  LD->getAlignment()); -          SDValue ValRes; -          if (ExtType == ISD::SEXTLOAD) -            ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, -                                 Result.getValueType(), -                                 Result, DAG.getValueType(SrcVT)); -          else -            ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT); -          Tmp1 = LegalizeOp(ValRes);  // Relegalize new nodes. -          Tmp2 = LegalizeOp(Result.getValue(1));  // Relegalize new nodes. +          Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); +          Tmp1 = LegalizeOp(Result);  // Relegalize new nodes. +          Tmp2 = LegalizeOp(Load.getValue(1));            break;          } +        // FIXME: This does not work for vectors on most targets.  Sign- and +        // zero-extend operations are currently folded into extending loads, +        // whether they are legal or not, and then we end up here without any +        // support for legalizing them. +        assert(ExtType != ISD::EXTLOAD && +               "EXTLOAD should always be supported!"); +        // Turn the unsupported load into an EXTLOAD followed by an explicit +        // zero/sign extend inreg. +        Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), +                                Tmp1, Tmp2, LD->getPointerInfo(), SrcVT, +                                LD->isVolatile(), LD->isNonTemporal(), +                                LD->getAlignment()); +        SDValue ValRes; +        if (ExtType == ISD::SEXTLOAD) +          ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, +                               Result.getValueType(), +                               Result, DAG.getValueType(SrcVT)); +        else +          ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType()); +        Tmp1 = LegalizeOp(ValRes);  // Relegalize new nodes. +        Tmp2 = LegalizeOp(Result.getValue(1));  // Relegalize new nodes. +        break;        } - -      // Since loads produce two values, make sure to remember that we legalized -      // both of them. -      AddLegalizedOperand(SDValue(Node, 0), Tmp1); -      AddLegalizedOperand(SDValue(Node, 1), Tmp2); -      return Op.getResNo() ? Tmp2 : Tmp1;      } + +    // Since loads produce two values, make sure to remember that we legalized +    // both of them. +    AddLegalizedOperand(SDValue(Node, 0), Tmp1); +    AddLegalizedOperand(SDValue(Node, 1), Tmp2); +    return Op.getResNo() ? Tmp2 : Tmp1;    }    case ISD::STORE: {      StoreSDNode *ST = cast<StoreSDNode>(Node);      Tmp1 = LegalizeOp(ST->getChain());    // Legalize the chain.      Tmp2 = LegalizeOp(ST->getBasePtr());  // Legalize the pointer. -    int SVOffset = ST->getSrcValueOffset();      unsigned Alignment = ST->getAlignment();      bool isVolatile = ST->isVolatile();      bool isNonTemporal = ST->isNonTemporal(); @@ -1408,10 +1432,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {            break;          case TargetLowering::Promote:            assert(VT.isVector() && "Unknown legal promote case!"); -          Tmp3 = DAG.getNode(ISD::BIT_CONVERT, dl, +          Tmp3 = DAG.getNode(ISD::BITCAST, dl,                               TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3);            Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, -                                ST->getSrcValue(), SVOffset, isVolatile, +                                ST->getPointerInfo(), isVolatile,                                  isNonTemporal, Alignment);            break;          } @@ -1430,9 +1454,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {          EVT NVT = EVT::getIntegerVT(*DAG.getContext(),                                      StVT.getStoreSizeInBits());          Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT); -        Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), -                                   SVOffset, NVT, isVolatile, isNonTemporal, -                                   Alignment); +        Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), +                                   NVT, isVolatile, isNonTemporal, Alignment);        } else if (StWidth & (StWidth - 1)) {          // If not storing a power-of-2 number of bits, expand as two stores.          assert(!StVT.isVector() && "Unsupported truncstore!"); @@ -1450,8 +1473,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {          if (TLI.isLittleEndian()) {            // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)            // Store the bottom RoundWidth bits. -          Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), -                                 SVOffset, RoundVT, +          Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), +                                 RoundVT,                                   isVolatile, isNonTemporal, Alignment);            // Store the remaining ExtraWidth bits. @@ -1460,9 +1483,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {                               DAG.getIntPtrConstant(IncrementSize));            Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,                             DAG.getConstant(RoundWidth, TLI.getShiftAmountTy())); -          Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), -                                 SVOffset + IncrementSize, ExtraVT, isVolatile, -                                 isNonTemporal, +          Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, +                             ST->getPointerInfo().getWithOffset(IncrementSize), +                                 ExtraVT, isVolatile, isNonTemporal,                                   MinAlign(Alignment, IncrementSize));          } else {            // Big endian - avoid unaligned stores. @@ -1470,17 +1493,16 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {            // Store the top RoundWidth bits.            Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,                             DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy())); -          Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), -                                 SVOffset, RoundVT, isVolatile, isNonTemporal, -                                 Alignment); +          Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getPointerInfo(), +                                 RoundVT, isVolatile, isNonTemporal, Alignment);            // Store the remaining ExtraWidth bits.            IncrementSize = RoundWidth / 8;            Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,                               DAG.getIntPtrConstant(IncrementSize)); -          Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), -                                 SVOffset + IncrementSize, ExtraVT, isVolatile, -                                 isNonTemporal, +          Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, +                              ST->getPointerInfo().getWithOffset(IncrementSize), +                                 ExtraVT, isVolatile, isNonTemporal,                                   MinAlign(Alignment, IncrementSize));          } @@ -1514,9 +1536,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {            // TRUNCSTORE:i16 i32 -> STORE i16            assert(isTypeLegal(StVT) && "Do not know how to expand this store!");            Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3); -          Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), -                                SVOffset, isVolatile, isNonTemporal, -                                Alignment); +          Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), +                                isVolatile, isNonTemporal, Alignment);            break;          }        } @@ -1543,8 +1564,8 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {    DebugLoc dl = Op.getDebugLoc();    // Store the value to a temporary stack slot, then LOAD the returned part.    SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); -  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0, -                            false, false, 0); +  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, +                            MachinePointerInfo(), false, false, 0);    // Add the offset to the index.    unsigned EltSize = @@ -1560,12 +1581,56 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {    StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);    if (Op.getValueType().isVector()) -    return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0, +    return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,MachinePointerInfo(),                         false, false, 0); +  return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, +                        MachinePointerInfo(), +                        Vec.getValueType().getVectorElementType(), +                        false, false, 0); +} + +SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { +  assert(Op.getValueType().isVector() && "Non-vector insert subvector!"); + +  SDValue Vec  = Op.getOperand(0); +  SDValue Part = Op.getOperand(1); +  SDValue Idx  = Op.getOperand(2); +  DebugLoc dl  = Op.getDebugLoc(); + +  // Store the value to a temporary stack slot, then LOAD the returned part. + +  SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); +  int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); +  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI); + +  // First store the whole vector. +  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo, +                            false, false, 0); + +  // Then store the inserted part. + +  // Add the offset to the index. +  unsigned EltSize = +      Vec.getValueType().getVectorElementType().getSizeInBits()/8; + +  Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx, +                    DAG.getConstant(EltSize, Idx.getValueType())); + +  if (Idx.getValueType().bitsGT(TLI.getPointerTy())) +    Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx);    else -    return DAG.getExtLoad(ISD::EXTLOAD, Op.getValueType(), dl, Ch, StackPtr, -                          NULL, 0, Vec.getValueType().getVectorElementType(), -                          false, false, 0); +    Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx); + +  SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, +                                    StackPtr); + +  // Store the subvector. +  Ch = DAG.getStore(DAG.getEntryNode(), dl, Part, SubStackPtr, +                    MachinePointerInfo(), false, false, 0); + +  // Finally, load the updated vector. +  return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo, +                     false, false, 0);  }  SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { @@ -1578,7 +1643,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {    DebugLoc dl = Node->getDebugLoc();    SDValue FIPtr = DAG.CreateStackTemporary(VT);    int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex(); -  const Value *SV = PseudoSourceValue::getFixedStack(FI); +  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI);    // Emit a store of each element to the stack slot.    SmallVector<SDValue, 8> Stores; @@ -1597,11 +1662,13 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {      // element type, only store the bits necessary.      if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) {        Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl, -                                         Node->getOperand(i), Idx, SV, Offset, +                                         Node->getOperand(i), Idx, +                                         PtrInfo.getWithOffset(Offset),                                           EltVT, false, false, 0));      } else        Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, -                                    Node->getOperand(i), Idx, SV, Offset, +                                    Node->getOperand(i), Idx, +                                    PtrInfo.getWithOffset(Offset),                                      false, false, 0));    } @@ -1613,7 +1680,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {      StoreChain = DAG.getEntryNode();    // Result is a load from the stack slot. -  return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0, false, false, 0); +  return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, false, false, 0);  }  SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { @@ -1628,7 +1695,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {    EVT IVT = EVT::getIntegerVT(*DAG.getContext(), FloatVT.getSizeInBits());    if (isTypeLegal(IVT)) {      // Convert to an integer with the same sign bit. -    SignBit = DAG.getNode(ISD::BIT_CONVERT, dl, IVT, Tmp2); +    SignBit = DAG.getNode(ISD::BITCAST, dl, IVT, Tmp2);    } else {      // Store the float to memory, then load the sign part out as an integer.      MVT LoadTy = TLI.getPointerTy(); @@ -1636,12 +1703,13 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {      SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy);      // Then store the float to it.      SDValue Ch = -      DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, NULL, 0, +      DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, MachinePointerInfo(),                     false, false, 0);      if (TLI.isBigEndian()) {        assert(FloatVT.isByteSized() && "Unsupported floating point type!");        // Load out a legal integer with the same sign bit as the float. -      SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, NULL, 0, false, false, 0); +      SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(), +                            false, false, 0);      } else { // Little endian        SDValue LoadPtr = StackPtr;        // The float may be wider than the integer we are going to load.  Advance @@ -1651,7 +1719,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {        LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(),                              LoadPtr, DAG.getIntPtrConstant(ByteOffset));        // Load a legal integer containing the sign bit. -      SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, NULL, 0, false, false, 0); +      SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(), +                            false, false, 0);        // Move the sign bit to the top bit of the loaded integer.        unsigned BitShift = LoadTy.getSizeInBits() -          (FloatVT.getSizeInBits() - 8 * ByteOffset); @@ -1694,7 +1763,7 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,    SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);    Chain = SP.getValue(1);    unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue(); -  unsigned StackAlign = TM.getFrameInfo()->getStackAlignment(); +  unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();    if (Align > StackAlign)      SP = DAG.getNode(ISD::AND, dl, VT, SP,                        DAG.getConstant(-(uint64_t)Align, VT)); @@ -1768,7 +1837,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,    FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr);    int SPFI = StackPtrFI->getIndex(); -  const Value *SV = PseudoSourceValue::getFixedStack(SPFI); +  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI);    unsigned SrcSize = SrcOp.getValueType().getSizeInBits();    unsigned SlotSize = SlotVT.getSizeInBits(); @@ -1782,21 +1851,21 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,    if (SrcSize > SlotSize)      Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, -                              SV, 0, SlotVT, false, false, SrcAlign); +                              PtrInfo, SlotVT, false, false, SrcAlign);    else {      assert(SrcSize == SlotSize && "Invalid store");      Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, -                         SV, 0, false, false, SrcAlign); +                         PtrInfo, false, false, SrcAlign);    }    // Result is a load from the stack slot.    if (SlotSize == DestSize) -    return DAG.getLoad(DestVT, dl, Store, FIPtr, SV, 0, false, false, -                       DestAlign); +    return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo, +                       false, false, DestAlign);    assert(SlotSize < DestSize && "Unknown extension!"); -  return DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl, Store, FIPtr, SV, 0, SlotVT, -                        false, false, DestAlign); +  return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, +                        PtrInfo, SlotVT, false, false, DestAlign);  }  SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { @@ -1810,11 +1879,11 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {    SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0),                                   StackPtr, -                                 PseudoSourceValue::getFixedStack(SPFI), 0, +                                 MachinePointerInfo::getFixedStack(SPFI),                                   Node->getValueType(0).getVectorElementType(),                                   false, false, 0);    return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr, -                     PseudoSourceValue::getFixedStack(SPFI), 0, +                     MachinePointerInfo::getFixedStack(SPFI),                       false, false, 0);  } @@ -1888,7 +1957,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {      SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy());      unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();      return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, -                       PseudoSourceValue::getConstantPool(), 0, +                       MachinePointerInfo::getConstantPool(),                         false, false, Alignment);    } @@ -1924,7 +1993,6 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {  // and leave the Hi part unset.  SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,                                              bool isSigned) { -  assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");    // The input chain to this libcall is the entry node of the function.    // Legalizing the call will automatically add the previous call to the    // dependence. @@ -1945,12 +2013,20 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,    // Splice the libcall in wherever FindInputOutputChains tells us to.    const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); + +  // isTailCall may be true since the callee does not reference caller stack +  // frame. Check if it's in the right position. +  bool isTailCall = isInTailCallPosition(DAG, Node, TLI);    std::pair<SDValue, SDValue> CallInfo =      TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, -                    0, TLI.getLibcallCallingConv(LC), false, +                    0, TLI.getLibcallCallingConv(LC), isTailCall,                      /*isReturnValueUsed=*/true,                      Callee, Args, DAG, Node->getDebugLoc()); +  if (!CallInfo.second.getNode()) +    // It's a tailcall, return the chain (which is the DAG root). +    return DAG.getRoot(); +    // Legalize the call sequence, starting with the chain.  This will advance    // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that    // was added by LowerCallTo (guaranteeing proper serialization of calls). @@ -1964,7 +2040,6 @@ std::pair<SDValue, SDValue>  SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,                                           SDNode *Node,                                           bool isSigned) { -  assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");    SDValue InChain = Node->getOperand(0);    TargetLowering::ArgListTy Args; @@ -1985,7 +2060,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,    const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());    std::pair<SDValue, SDValue> CallInfo =      TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, -                    0, TLI.getLibcallCallingConv(LC), false, +                    0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,                      /*isReturnValueUsed=*/true,                      Callee, Args, DAG, Node->getDebugLoc()); @@ -2064,16 +2139,17 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,      }      // store the lo of the constructed double - based on integer input      SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, -                                  Op0Mapped, Lo, NULL, 0, +                                  Op0Mapped, Lo, MachinePointerInfo(),                                    false, false, 0);      // initial hi portion of constructed double      SDValue InitialHi = DAG.getConstant(0x43300000u, MVT::i32);      // store the hi of the constructed double - biased exponent -    SDValue Store2=DAG.getStore(Store1, dl, InitialHi, Hi, NULL, 0, -                                false, false, 0); +    SDValue Store2 = DAG.getStore(Store1, dl, InitialHi, Hi, +                                  MachinePointerInfo(), +                                  false, false, 0);      // load the constructed double -    SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, NULL, 0, -                               false, false, 0); +    SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, +                               MachinePointerInfo(), false, false, 0);      // FP constant to bias correct the final result      SDValue Bias = DAG.getConstantFP(isSigned ?                                       BitsToDouble(0x4330000080000000ULL) : @@ -2116,17 +2192,40 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,                               DAG.getConstant(32, MVT::i64));      SDValue LoOr = DAG.getNode(ISD::OR, dl, MVT::i64, Lo, TwoP52);      SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84); -    SDValue LoFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, LoOr); -    SDValue HiFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, HiOr); +    SDValue LoFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, LoOr); +    SDValue HiFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, HiOr);      SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt,                                  TwoP84PlusTwoP52);      return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub);    } -  // Implementation of unsigned i64 to f32.  This implementation has the -  // advantage of performing rounding correctly. +  // Implementation of unsigned i64 to f32.    // TODO: Generalize this for use with other types.    if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) { +    // For unsigned conversions, convert them to signed conversions using the +    // algorithm from the x86_64 __floatundidf in compiler_rt. +    if (!isSigned) { +      SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0); + +      SDValue ShiftConst = DAG.getConstant(1, TLI.getShiftAmountTy()); +      SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst); +      SDValue AndConst = DAG.getConstant(1, MVT::i64); +      SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst); +      SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, Shr); + +      SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Or); +      SDValue Slow = DAG.getNode(ISD::FADD, dl, MVT::f32, SignCvt, SignCvt); + +      // TODO: This really should be implemented using a branch rather than a +      // select.  We happen to get lucky and machinesink does the right +      // thing most of the time.  This would be a good candidate for a +      //pseudo-op, or, even better, for whole-function isel. +      SDValue SignBitTest = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64), +        Op0, DAG.getConstant(0, MVT::i64), ISD::SETLT); +      return DAG.getNode(ISD::SELECT, dl, MVT::f32, SignBitTest, Slow, Fast); +    } + +    // Otherwise, implement the fully general conversion.      EVT SHVT = TLI.getShiftAmountTy();      SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, @@ -2140,7 +2239,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,      SDValue Sel = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ne, Or, Op0);      SDValue Ge = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),                     Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64), -                    ISD::SETUGE); +                   ISD::SETUGE);      SDValue Sel2 = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ge, Sel, Op0);      SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2, @@ -2155,7 +2254,6 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,      SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2);      return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd,                         DAG.getIntPtrConstant(0)); -    }    SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0); @@ -2189,13 +2287,13 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,    SDValue FudgeInReg;    if (DestVT == MVT::f32)      FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx, -                             PseudoSourceValue::getConstantPool(), 0, +                             MachinePointerInfo::getConstantPool(),                               false, false, Alignment);    else {      FudgeInReg = -      LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl, +      LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,                                  DAG.getEntryNode(), CPIdx, -                                PseudoSourceValue::getConstantPool(), 0, +                                MachinePointerInfo::getConstantPool(),                                  MVT::f32, false, false, Alignment));    } @@ -2332,6 +2430,18 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {    }  } +/// SplatByte - Distribute ByteVal over NumBits bits. +// FIXME: Move this helper to a common place. +static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) { +  APInt Val = APInt(NumBits, ByteVal); +  unsigned Shift = 8; +  for (unsigned i = NumBits; i > 8; i >>= 1) { +    Val = (Val << Shift) | Val; +    Shift <<= 1; +  } +  return Val; +} +  /// ExpandBitCount - Expand the specified bitcount instruction into operations.  ///  SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, @@ -2339,26 +2449,45 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,    switch (Opc) {    default: assert(0 && "Cannot expand this yet!");    case ISD::CTPOP: { -    static const uint64_t mask[6] = { -      0x5555555555555555ULL, 0x3333333333333333ULL, -      0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL, -      0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL -    };      EVT VT = Op.getValueType();      EVT ShVT = TLI.getShiftAmountTy(); -    unsigned len = VT.getSizeInBits(); -    for (unsigned i = 0; (1U << i) <= (len / 2); ++i) { -      //x = (x & mask[i][len/8]) + (x >> (1 << i) & mask[i][len/8]) -      unsigned EltSize = VT.isVector() ? -        VT.getVectorElementType().getSizeInBits() : len; -      SDValue Tmp2 = DAG.getConstant(APInt(EltSize, mask[i]), VT); -      SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT); -      Op = DAG.getNode(ISD::ADD, dl, VT, -                       DAG.getNode(ISD::AND, dl, VT, Op, Tmp2), -                       DAG.getNode(ISD::AND, dl, VT, -                                   DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3), -                                   Tmp2)); -    } +    unsigned Len = VT.getSizeInBits(); + +    assert(VT.isInteger() && Len <= 128 && Len % 8 == 0 && +           "CTPOP not implemented for this type."); + +    // This is the "best" algorithm from +    // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel + +    SDValue Mask55 = DAG.getConstant(SplatByte(Len, 0x55), VT); +    SDValue Mask33 = DAG.getConstant(SplatByte(Len, 0x33), VT); +    SDValue Mask0F = DAG.getConstant(SplatByte(Len, 0x0F), VT); +    SDValue Mask01 = DAG.getConstant(SplatByte(Len, 0x01), VT); + +    // v = v - ((v >> 1) & 0x55555555...) +    Op = DAG.getNode(ISD::SUB, dl, VT, Op, +                     DAG.getNode(ISD::AND, dl, VT, +                                 DAG.getNode(ISD::SRL, dl, VT, Op, +                                             DAG.getConstant(1, ShVT)), +                                 Mask55)); +    // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...) +    Op = DAG.getNode(ISD::ADD, dl, VT, +                     DAG.getNode(ISD::AND, dl, VT, Op, Mask33), +                     DAG.getNode(ISD::AND, dl, VT, +                                 DAG.getNode(ISD::SRL, dl, VT, Op, +                                             DAG.getConstant(2, ShVT)), +                                 Mask33)); +    // v = (v + (v >> 4)) & 0x0F0F0F0F... +    Op = DAG.getNode(ISD::AND, dl, VT, +                     DAG.getNode(ISD::ADD, dl, VT, Op, +                                 DAG.getNode(ISD::SRL, dl, VT, Op, +                                             DAG.getConstant(4, ShVT))), +                     Mask0F); +    // v = (v * 0x01010101...) >> (Len - 8) +    Op = DAG.getNode(ISD::SRL, dl, VT, +                     DAG.getNode(ISD::MUL, dl, VT, Op, Mask01), +                     DAG.getConstant(Len - 8, ShVT)); +          return Op;    }    case ISD::CTLZ: { @@ -2516,9 +2645,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,    case ISD::PREFETCH:    case ISD::VAEND:    case ISD::EH_SJLJ_LONGJMP: +  case ISD::EH_SJLJ_DISPATCHSETUP: +    // If the target didn't expand these, there's nothing to do, so just +    // preserve the chain and be done.      Results.push_back(Node->getOperand(0));      break;    case ISD::EH_SJLJ_SETJMP: +    // If the target didn't expand this, just return 'zero' and preserve the +    // chain.      Results.push_back(DAG.getConstant(0, MVT::i32));      Results.push_back(Node->getOperand(0));      break; @@ -2527,7 +2661,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,      TargetLowering::ArgListTy Args;      std::pair<SDValue, SDValue> CallResult =        TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()), -                      false, false, false, false, 0, CallingConv::C, false, +                      false, false, false, false, 0, CallingConv::C, +                      /*isTailCall=*/false,                        /*isReturnValueUsed=*/true,                        DAG.getExternalSymbol("__sync_synchronize",                                              TLI.getPointerTy()), @@ -2538,7 +2673,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,    // By default, atomic intrinsics are marked Legal and lowered. Targets    // which don't support them directly, however, may want libcalls, in which    // case they mark them Expand, and we get here. -  // FIXME: Unimplemented for now. Add libcalls.    case ISD::ATOMIC_SWAP:    case ISD::ATOMIC_LOAD_ADD:    case ISD::ATOMIC_LOAD_SUB: @@ -2578,7 +2712,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,      TargetLowering::ArgListTy Args;      std::pair<SDValue, SDValue> CallResult =        TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()), -                      false, false, false, false, 0, CallingConv::C, false, +                      false, false, false, false, 0, CallingConv::C, +                      /*isTailCall=*/false,                        /*isReturnValueUsed=*/true,                        DAG.getExternalSymbol("abort", TLI.getPointerTy()),                        Args, DAG, dl); @@ -2586,7 +2721,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,      break;    }    case ISD::FP_ROUND: -  case ISD::BIT_CONVERT: +  case ISD::BITCAST:      Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0),                              Node->getValueType(0), dl);      Results.push_back(Tmp1); @@ -2637,8 +2772,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,      SDValue True, False;      EVT VT =  Node->getOperand(0).getValueType();      EVT NVT = Node->getValueType(0); -    const uint64_t zero[] = {0, 0}; -    APFloat apf = APFloat(APInt(VT.getSizeInBits(), 2, zero)); +    APFloat apf(APInt::getNullValue(VT.getSizeInBits()));      APInt x = APInt::getSignBit(NVT.getSizeInBits());      (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven);      Tmp1 = DAG.getConstantFP(apf, VT); @@ -2662,8 +2796,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,      Tmp2 = Node->getOperand(1);      unsigned Align = Node->getConstantOperandVal(3); -    SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0, -                                     false, false, 0); +    SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, +                                     MachinePointerInfo(V), false, false, 0);      SDValue VAList = VAListLoad;      if (Align > TLI.getMinStackArgumentAlignment()) { @@ -2674,7 +2808,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,                                             TLI.getPointerTy()));        VAList = DAG.getNode(ISD::AND, dl, TLI.getPointerTy(), VAList, -                           DAG.getConstant(-Align, +                           DAG.getConstant(-(int64_t)Align,                                             TLI.getPointerTy()));      } @@ -2684,10 +2818,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,                            getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),                                         TLI.getPointerTy()));      // Store the incremented VAList to the legalized pointer -    Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2, V, 0, -                        false, false, 0); +    Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2, +                        MachinePointerInfo(V), false, false, 0);      // Load the actual argument out of the pointer VAList -    Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0, +    Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(),                                    false, false, 0));      Results.push_back(Results[0].getValue(1));      break; @@ -2698,16 +2832,17 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,      const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();      const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();      Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0), -                       Node->getOperand(2), VS, 0, false, false, 0); -    Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), VD, 0, -                        false, false, 0); +                       Node->getOperand(2), MachinePointerInfo(VS), +                       false, false, 0); +    Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), +                        MachinePointerInfo(VD), false, false, 0);      Results.push_back(Tmp1);      break;    }    case ISD::EXTRACT_VECTOR_ELT:      if (Node->getOperand(0).getValueType().getVectorNumElements() == 1)        // This must be an access of the only element.  Return it. -      Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, Node->getValueType(0), +      Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0),                           Node->getOperand(0));      else        Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0)); @@ -2716,6 +2851,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,    case ISD::EXTRACT_SUBVECTOR:      Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0)));      break; +  case ISD::INSERT_SUBVECTOR: +    Results.push_back(ExpandInsertToVectorThroughStack(SDValue(Node, 0))); +    break;    case ISD::CONCAT_VECTORS: {      Results.push_back(ExpandVectorBuildThroughStack(Node));      break; @@ -3094,14 +3232,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,        BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,                                 RHS);        TopHalf = BottomHalf.getValue(1); -    } else { -      // FIXME: We should be able to fall back to a libcall with an illegal -      // type in some cases. -      // Also, we can fall back to a division in some cases, but that's a big -      // performance hit in the general case. -      assert(TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(), -                                               VT.getSizeInBits() * 2)) && -             "Don't know how to expand this operation yet!"); +    } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(), +                                                 VT.getSizeInBits() * 2))) {        EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);        LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);        RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); @@ -3110,6 +3242,30 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,                                 DAG.getIntPtrConstant(0));        TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,                              DAG.getIntPtrConstant(1)); +    } else { +      // We can fall back to a libcall with an illegal type for the MUL if we +      // have a libcall big enough. +      // Also, we can fall back to a division in some cases, but that's a big +      // performance hit in the general case. +      EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2); +      RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; +      if (WideVT == MVT::i16) +        LC = RTLIB::MUL_I16; +      else if (WideVT == MVT::i32) +        LC = RTLIB::MUL_I32; +      else if (WideVT == MVT::i64) +        LC = RTLIB::MUL_I64; +      else if (WideVT == MVT::i128) +        LC = RTLIB::MUL_I128; +      assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!"); +      LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS); +      RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); +       +      SDValue Ret = ExpandLibCall(LC, Node, isSigned); +      BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Ret); +      TopHalf = DAG.getNode(ISD::SRL, dl, Ret.getValueType(), Ret, +                       DAG.getConstant(VT.getSizeInBits(), TLI.getPointerTy())); +      TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, TopHalf);      }      if (isSigned) {        Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy()); @@ -3165,8 +3321,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,      SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);      EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8); -    SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, PTy, dl, Chain, Addr, -                                PseudoSourceValue::getJumpTable(), 0, MemVT, +    SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr, +                                MachinePointerInfo::getJumpTable(), MemVT,                                  false, false, 0);      Addr = LD;      if (TM.getRelocationModel() == Reloc::PIC_) { @@ -3329,8 +3485,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,    case ISD::XOR: {      unsigned ExtOp, TruncOp;      if (OVT.isVector()) { -      ExtOp   = ISD::BIT_CONVERT; -      TruncOp = ISD::BIT_CONVERT; +      ExtOp   = ISD::BITCAST; +      TruncOp = ISD::BITCAST;      } else {        assert(OVT.isInteger() && "Cannot promote logic operation");        ExtOp   = ISD::ANY_EXTEND; @@ -3347,8 +3503,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,    case ISD::SELECT: {      unsigned ExtOp, TruncOp;      if (Node->getValueType(0).isVector()) { -      ExtOp   = ISD::BIT_CONVERT; -      TruncOp = ISD::BIT_CONVERT; +      ExtOp   = ISD::BITCAST; +      TruncOp = ISD::BITCAST;      } else if (Node->getValueType(0).isInteger()) {        ExtOp   = ISD::ANY_EXTEND;        TruncOp = ISD::TRUNCATE; @@ -3375,12 +3531,12 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,      cast<ShuffleVectorSDNode>(Node)->getMask(Mask);      // Cast the two input vectors. -    Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(0)); -    Tmp2 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(1)); +    Tmp1 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(0)); +    Tmp2 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(1));      // Convert the shuffle mask to the right # elements.      Tmp1 = ShuffleWithNarrowerEltType(NVT, OVT, dl, Tmp1, Tmp2, Mask); -    Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, OVT, Tmp1); +    Tmp1 = DAG.getNode(ISD::BITCAST, dl, OVT, Tmp1);      Results.push_back(Tmp1);      break;    } diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 650ee5a0721c..27752123aac4 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -55,7 +55,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {  #endif      llvm_unreachable("Do not know how to soften the result of this operator!"); -    case ISD::BIT_CONVERT: R = SoftenFloatRes_BIT_CONVERT(N); break; +    case ISD::BITCAST:     R = SoftenFloatRes_BITCAST(N); break;      case ISD::BUILD_PAIR:  R = SoftenFloatRes_BUILD_PAIR(N); break;      case ISD::ConstantFP:        R = SoftenFloatRes_ConstantFP(cast<ConstantFPSDNode>(N)); @@ -102,7 +102,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {      SetSoftenedFloat(SDValue(N, ResNo), R);  } -SDValue DAGTypeLegalizer::SoftenFloatRes_BIT_CONVERT(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) {    return BitConvertToInteger(N->getOperand(0));  } @@ -133,8 +133,9 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {    unsigned Size = NVT.getSizeInBits();    // Mask = ~(1 << (Size-1)) -  SDValue Mask = DAG.getConstant(APInt::getAllOnesValue(Size).clear(Size-1), -                                 NVT); +  APInt API = APInt::getAllOnesValue(Size); +  API.clearBit(Size-1); +  SDValue Mask = DAG.getConstant(API, NVT);    SDValue Op = GetSoftenedFloat(N->getOperand(0));    return DAG.getNode(ISD::AND, N->getDebugLoc(), NVT, Op, Mask);  } @@ -455,7 +456,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {    if (L->getExtensionType() == ISD::NON_EXTLOAD) {      NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(),                         NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(), -                       L->getSrcValue(), L->getSrcValueOffset(), NVT, +                       L->getPointerInfo(), NVT,                         L->isVolatile(), L->isNonTemporal(), L->getAlignment());      // Legalized the chain result - switch anything that used the old chain to      // use the new one. @@ -466,8 +467,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {    // Do a non-extending load followed by FP_EXTEND.    NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD,                       L->getMemoryVT(), dl, L->getChain(), -                     L->getBasePtr(), L->getOffset(), -                     L->getSrcValue(), L->getSrcValueOffset(), +                     L->getBasePtr(), L->getOffset(), L->getPointerInfo(),                       L->getMemoryVT(), L->isVolatile(),                       L->isNonTemporal(), L->getAlignment());    // Legalized the chain result - switch anything that used the old chain to @@ -558,7 +558,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {  #endif      llvm_unreachable("Do not know how to soften this operator's operand!"); -  case ISD::BIT_CONVERT: Res = SoftenFloatOp_BIT_CONVERT(N); break; +  case ISD::BITCAST:     Res = SoftenFloatOp_BITCAST(N); break;    case ISD::BR_CC:       Res = SoftenFloatOp_BR_CC(N); break;    case ISD::FP_ROUND:    Res = SoftenFloatOp_FP_ROUND(N); break;    case ISD::FP_TO_SINT:  Res = SoftenFloatOp_FP_TO_SINT(N); break; @@ -670,8 +670,8 @@ void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,    }  } -SDValue DAGTypeLegalizer::SoftenFloatOp_BIT_CONVERT(SDNode *N) { -  return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0), +SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) { +  return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0),                       GetSoftenedFloat(N->getOperand(0)));  } @@ -780,7 +780,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {      Val = GetSoftenedFloat(Val);    return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(), -                      ST->getSrcValue(), ST->getSrcValueOffset(), +                      ST->getPointerInfo(),                        ST->isVolatile(), ST->isNonTemporal(),                        ST->getAlignment());  } @@ -816,7 +816,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {    case ISD::SELECT:       SplitRes_SELECT(N, Lo, Hi); break;    case ISD::SELECT_CC:    SplitRes_SELECT_CC(N, Lo, Hi); break; -  case ISD::BIT_CONVERT:        ExpandRes_BIT_CONVERT(N, Lo, Hi); break; +  case ISD::BITCAST:            ExpandRes_BITCAST(N, Lo, Hi); break;    case ISD::BUILD_PAIR:         ExpandRes_BUILD_PAIR(N, Lo, Hi); break;    case ISD::EXTRACT_ELEMENT:    ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;    case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break; @@ -1110,9 +1110,8 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,    assert(NVT.isByteSized() && "Expanded type not byte sized!");    assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?"); -  Hi = DAG.getExtLoad(LD->getExtensionType(), NVT, dl, Chain, Ptr, -                      LD->getSrcValue(), LD->getSrcValueOffset(), -                      LD->getMemoryVT(), LD->isVolatile(), +  Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr, +                      LD->getPointerInfo(), LD->getMemoryVT(), LD->isVolatile(),                        LD->isNonTemporal(), LD->getAlignment());    // Remember the chain. @@ -1222,7 +1221,7 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {    #endif        llvm_unreachable("Do not know how to expand this operator's operand!"); -    case ISD::BIT_CONVERT:     Res = ExpandOp_BIT_CONVERT(N); break; +    case ISD::BITCAST:         Res = ExpandOp_BITCAST(N); break;      case ISD::BUILD_VECTOR:    Res = ExpandOp_BUILD_VECTOR(N); break;      case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; @@ -1421,7 +1420,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {    GetExpandedOp(ST->getValue(), Lo, Hi);    return DAG.getTruncStore(Chain, N->getDebugLoc(), Hi, Ptr, -                           ST->getSrcValue(), ST->getSrcValueOffset(), +                           ST->getPointerInfo(),                             ST->getMemoryVT(), ST->isVolatile(),                             ST->isNonTemporal(), ST->getAlignment());  } diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index f8c589071921..f0752df80f12 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -49,7 +49,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {      llvm_unreachable("Do not know how to promote this operator!");    case ISD::AssertSext:  Res = PromoteIntRes_AssertSext(N); break;    case ISD::AssertZext:  Res = PromoteIntRes_AssertZext(N); break; -  case ISD::BIT_CONVERT: Res = PromoteIntRes_BIT_CONVERT(N); break; +  case ISD::BITCAST:     Res = PromoteIntRes_BITCAST(N); break;    case ISD::BSWAP:       Res = PromoteIntRes_BSWAP(N); break;    case ISD::BUILD_PAIR:  Res = PromoteIntRes_BUILD_PAIR(N); break;    case ISD::Constant:    Res = PromoteIntRes_Constant(N); break; @@ -143,7 +143,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {    SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),                                N->getMemoryVT(),                                N->getChain(), N->getBasePtr(), -                              Op2, N->getSrcValue(), N->getAlignment()); +                              Op2, N->getMemOperand());    // Legalized the chain result - switch anything that used the old chain to    // use the new one.    ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -155,14 +155,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) {    SDValue Op3 = GetPromotedInteger(N->getOperand(3));    SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),                                N->getMemoryVT(), N->getChain(), N->getBasePtr(), -                              Op2, Op3, N->getSrcValue(), N->getAlignment()); +                              Op2, Op3, N->getMemOperand());    // Legalized the chain result - switch anything that used the old chain to    // use the new one.    ReplaceValueWith(SDValue(N, 1), Res.getValue(1));    return Res;  } -SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {    SDValue InOp = N->getOperand(0);    EVT InVT = InOp.getValueType();    EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); @@ -179,8 +179,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {    case PromoteInteger:      if (NOutVT.bitsEq(NInVT))        // The input promotes to the same size.  Convert the promoted value. -      return DAG.getNode(ISD::BIT_CONVERT, dl, -                         NOutVT, GetPromotedInteger(InOp)); +      return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp));      break;    case SoftenFloat:      // Promote the integer operand by hand. @@ -193,7 +192,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {      return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,                         BitConvertToInteger(GetScalarizedVector(InOp)));    case SplitVector: { -    // For example, i32 = BIT_CONVERT v2i16 on alpha.  Convert the split +    // For example, i32 = BITCAST v2i16 on alpha.  Convert the split      // pieces of the input into integers and reassemble in the final type.      SDValue Lo, Hi;      GetSplitVector(N->getOperand(0), Lo, Hi); @@ -207,12 +206,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {                         EVT::getIntegerVT(*DAG.getContext(),                                           NOutVT.getSizeInBits()),                         JoinIntegers(Lo, Hi)); -    return DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, InOp); +    return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp);    }    case WidenVector:      if (OutVT.bitsEq(NInVT))        // The input is widened to the same size.  Convert to the widened value. -      return DAG.getNode(ISD::BIT_CONVERT, dl, OutVT, GetWidenedVector(InOp)); +      return DAG.getNode(ISD::BITCAST, dl, OutVT, GetWidenedVector(InOp));    }    return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, @@ -293,7 +292,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {    // value was zero.  This can be handled by setting the bit just off    // the top of the original type.    APInt TopBit(NVT.getSizeInBits(), 0); -  TopBit.set(OVT.getSizeInBits()); +  TopBit.setBit(OVT.getSizeInBits());    Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT));    return DAG.getNode(ISD::CTTZ, dl, NVT, Op);  } @@ -371,8 +370,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {    ISD::LoadExtType ExtType =      ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType();    DebugLoc dl = N->getDebugLoc(); -  SDValue Res = DAG.getExtLoad(ExtType, NVT, dl, N->getChain(), N->getBasePtr(), -                               N->getSrcValue(), N->getSrcValueOffset(), +  SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(), +                               N->getPointerInfo(),                                 N->getMemoryVT(), N->isVolatile(),                                 N->isNonTemporal(), N->getAlignment()); @@ -549,6 +548,48 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {    return Res;  } +SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { +  // Promote the overflow bit trivially. +  if (ResNo == 1) +    return PromoteIntRes_Overflow(N); + +  SDValue LHS = N->getOperand(0), RHS = N->getOperand(1); +  DebugLoc DL = N->getDebugLoc(); +  EVT SmallVT = LHS.getValueType(); + +  // To determine if the result overflowed in a larger type, we extend the input +  // to the larger type, do the multiply, then check the high bits of the result +  // to see if the overflow happened. +  if (N->getOpcode() == ISD::SMULO) { +    LHS = SExtPromotedInteger(LHS); +    RHS = SExtPromotedInteger(RHS); +  } else { +    LHS = ZExtPromotedInteger(LHS); +    RHS = ZExtPromotedInteger(RHS); +  } +  SDValue Mul = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS); + +  // Overflow occurred iff the high part of the result does not zero/sign-extend +  // the low part. +  SDValue Overflow; +  if (N->getOpcode() == ISD::UMULO) { +    // Unsigned overflow occurred iff the high part is non-zero. +    SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul, +                             DAG.getIntPtrConstant(SmallVT.getSizeInBits())); +    Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi, +                            DAG.getConstant(0, Hi.getValueType()), ISD::SETNE); +  } else { +    // Signed overflow occurred iff the high part does not sign extend the low. +    SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Mul.getValueType(), +                               Mul, DAG.getValueType(SmallVT)); +    Overflow = DAG.getSetCC(DL, N->getValueType(1), SExt, Mul, ISD::SETNE); +  } + +  // Use the calculated overflow everywhere. +  ReplaceValueWith(SDValue(N, 1), Overflow); +  return Mul; +} +  SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) {    // Zero extend the input.    SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); @@ -602,11 +643,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {    return Res;  } -SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { -  assert(ResNo == 1 && "Only boolean result promotion currently supported!"); -  return PromoteIntRes_Overflow(N); -} -  //===----------------------------------------------------------------------===//  //  Integer Operand Promotion  //===----------------------------------------------------------------------===// @@ -631,7 +667,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {      llvm_unreachable("Do not know how to promote this operator's operand!");    case ISD::ANY_EXTEND:   Res = PromoteIntOp_ANY_EXTEND(N); break; -  case ISD::BIT_CONVERT:  Res = PromoteIntOp_BIT_CONVERT(N); break; +  case ISD::BITCAST:      Res = PromoteIntOp_BITCAST(N); break;    case ISD::BR_CC:        Res = PromoteIntOp_BR_CC(N, OpNo); break;    case ISD::BRCOND:       Res = PromoteIntOp_BRCOND(N, OpNo); break;    case ISD::BUILD_PAIR:   Res = PromoteIntOp_BUILD_PAIR(N); break; @@ -713,7 +749,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) {    return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), Op);  } -SDValue DAGTypeLegalizer::PromoteIntOp_BIT_CONVERT(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) {    // This should only occur in unusual situations like bitcasting to an    // x86_fp80, so just turn it into a store+load    return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0)); @@ -889,7 +925,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) {  SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){    assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");    SDValue Ch = N->getChain(), Ptr = N->getBasePtr(); -  int SVOffset = N->getSrcValueOffset();    unsigned Alignment = N->getAlignment();    bool isVolatile = N->isVolatile();    bool isNonTemporal = N->isNonTemporal(); @@ -898,8 +933,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){    SDValue Val = GetPromotedInteger(N->getValue());  // Get promoted value.    // Truncate the value and store the result. -  return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getSrcValue(), -                           SVOffset, N->getMemoryVT(), +  return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getPointerInfo(), +                           N->getMemoryVT(),                             isVolatile, isNonTemporal, Alignment);  } @@ -951,7 +986,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {    case ISD::SELECT_CC:    SplitRes_SELECT_CC(N, Lo, Hi); break;    case ISD::UNDEF:        SplitRes_UNDEF(N, Lo, Hi); break; -  case ISD::BIT_CONVERT:        ExpandRes_BIT_CONVERT(N, Lo, Hi); break; +  case ISD::BITCAST:            ExpandRes_BITCAST(N, Lo, Hi); break;    case ISD::BUILD_PAIR:         ExpandRes_BUILD_PAIR(N, Lo, Hi); break;    case ISD::EXTRACT_ELEMENT:    ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;    case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break; @@ -978,6 +1013,23 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {    case ISD::UREM:        ExpandIntRes_UREM(N, Lo, Hi); break;    case ISD::ZERO_EXTEND: ExpandIntRes_ZERO_EXTEND(N, Lo, Hi); break; +  case ISD::ATOMIC_LOAD_ADD: +  case ISD::ATOMIC_LOAD_SUB: +  case ISD::ATOMIC_LOAD_AND: +  case ISD::ATOMIC_LOAD_OR: +  case ISD::ATOMIC_LOAD_XOR: +  case ISD::ATOMIC_LOAD_NAND: +  case ISD::ATOMIC_LOAD_MIN: +  case ISD::ATOMIC_LOAD_MAX: +  case ISD::ATOMIC_LOAD_UMIN: +  case ISD::ATOMIC_LOAD_UMAX: +  case ISD::ATOMIC_SWAP: { +    std::pair<SDValue, SDValue> Tmp = ExpandAtomic(N); +    SplitInteger(Tmp.first, Lo, Hi); +    ReplaceValueWith(SDValue(N, 1), Tmp.second); +    break; +  } +    case ISD::AND:    case ISD::OR:    case ISD::XOR: ExpandIntRes_Logical(N, Lo, Hi); break; @@ -999,6 +1051,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {    case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break;    case ISD::UADDO:    case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break; +  case ISD::UMULO: +  case ISD::SMULO: ExpandIntRes_UMULSMULO(N, Lo, Hi); break;    }    // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -1006,11 +1060,98 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {      SetExpandedInteger(SDValue(N, ResNo), Lo, Hi);  } +/// Lower an atomic node to the appropriate builtin call. +std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { +  unsigned Opc = Node->getOpcode(); +  MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT(); +  RTLIB::Libcall LC; + +  switch (Opc) { +  default: +    llvm_unreachable("Unhandled atomic intrinsic Expand!"); +    break; +  case ISD::ATOMIC_SWAP: +    switch (VT.SimpleTy) { +    default: llvm_unreachable("Unexpected value type for atomic!"); +    case MVT::i8:  LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break; +    case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break; +    case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break; +    case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break; +    } +    break; +  case ISD::ATOMIC_CMP_SWAP: +    switch (VT.SimpleTy) { +    default: llvm_unreachable("Unexpected value type for atomic!"); +    case MVT::i8:  LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break; +    case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break; +    case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break; +    case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break; +    } +    break; +  case ISD::ATOMIC_LOAD_ADD: +    switch (VT.SimpleTy) { +    default: llvm_unreachable("Unexpected value type for atomic!"); +    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_ADD_1; break; +    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break; +    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break; +    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break; +    } +    break; +  case ISD::ATOMIC_LOAD_SUB: +    switch (VT.SimpleTy) { +    default: llvm_unreachable("Unexpected value type for atomic!"); +    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_SUB_1; break; +    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break; +    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break; +    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break; +    } +    break; +  case ISD::ATOMIC_LOAD_AND: +    switch (VT.SimpleTy) { +    default: llvm_unreachable("Unexpected value type for atomic!"); +    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_AND_1; break; +    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break; +    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break; +    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break; +    } +    break; +  case ISD::ATOMIC_LOAD_OR: +    switch (VT.SimpleTy) { +    default: llvm_unreachable("Unexpected value type for atomic!"); +    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_OR_1; break; +    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break; +    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break; +    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break; +    } +    break; +  case ISD::ATOMIC_LOAD_XOR: +    switch (VT.SimpleTy) { +    default: llvm_unreachable("Unexpected value type for atomic!"); +    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_XOR_1; break; +    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break; +    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break; +    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break; +    } +    break; +  case ISD::ATOMIC_LOAD_NAND: +    switch (VT.SimpleTy) { +    default: llvm_unreachable("Unexpected value type for atomic!"); +    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_NAND_1; break; +    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break; +    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break; +    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break; +    } +    break; +  } + +  return ExpandChainLibCall(LC, Node, false); +} +  /// ExpandShiftByConstant - N is a shift by a value that needs to be expanded,  /// and the shift amount is a constant 'Amt'.  Expand the operation.  void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,                                               SDValue &Lo, SDValue &Hi) { -  DebugLoc dl = N->getDebugLoc(); +  DebugLoc DL = N->getDebugLoc();    // Expand the incoming operand to be shifted, so that we have its parts    SDValue InL, InH;    GetExpandedInteger(N->getOperand(0), InL, InH); @@ -1025,8 +1166,8 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,        Lo = Hi = DAG.getConstant(0, NVT);      } else if (Amt > NVTBits) {        Lo = DAG.getConstant(0, NVT); -      Hi = DAG.getNode(ISD::SHL, dl, -                       NVT, InL, DAG.getConstant(Amt-NVTBits,ShTy)); +      Hi = DAG.getNode(ISD::SHL, DL, +                       NVT, InL, DAG.getConstant(Amt-NVTBits, ShTy));      } else if (Amt == NVTBits) {        Lo = DAG.getConstant(0, NVT);        Hi = InL; @@ -1034,17 +1175,17 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,                 TLI.isOperationLegalOrCustom(ISD::ADDC,                                TLI.getTypeToExpandTo(*DAG.getContext(), NVT))) {        // Emit this X << 1 as X+X. -      SDVTList VTList = DAG.getVTList(NVT, MVT::Flag); +      SDVTList VTList = DAG.getVTList(NVT, MVT::Glue);        SDValue LoOps[2] = { InL, InL }; -      Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2); +      Lo = DAG.getNode(ISD::ADDC, DL, VTList, LoOps, 2);        SDValue HiOps[3] = { InH, InH, Lo.getValue(1) }; -      Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3); +      Hi = DAG.getNode(ISD::ADDE, DL, VTList, HiOps, 3);      } else { -      Lo = DAG.getNode(ISD::SHL, dl, NVT, InL, DAG.getConstant(Amt, ShTy)); -      Hi = DAG.getNode(ISD::OR, dl, NVT, -                       DAG.getNode(ISD::SHL, dl, NVT, InH, +      Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, ShTy)); +      Hi = DAG.getNode(ISD::OR, DL, NVT, +                       DAG.getNode(ISD::SHL, DL, NVT, InH,                                     DAG.getConstant(Amt, ShTy)), -                       DAG.getNode(ISD::SRL, dl, NVT, InL, +                       DAG.getNode(ISD::SRL, DL, NVT, InL,                                     DAG.getConstant(NVTBits-Amt, ShTy)));      }      return; @@ -1055,43 +1196,43 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,        Lo = DAG.getConstant(0, NVT);        Hi = DAG.getConstant(0, NVT);      } else if (Amt > NVTBits) { -      Lo = DAG.getNode(ISD::SRL, dl, +      Lo = DAG.getNode(ISD::SRL, DL,                         NVT, InH, DAG.getConstant(Amt-NVTBits,ShTy));        Hi = DAG.getConstant(0, NVT);      } else if (Amt == NVTBits) {        Lo = InH;        Hi = DAG.getConstant(0, NVT);      } else { -      Lo = DAG.getNode(ISD::OR, dl, NVT, -                       DAG.getNode(ISD::SRL, dl, NVT, InL, +      Lo = DAG.getNode(ISD::OR, DL, NVT, +                       DAG.getNode(ISD::SRL, DL, NVT, InL,                                     DAG.getConstant(Amt, ShTy)), -                       DAG.getNode(ISD::SHL, dl, NVT, InH, +                       DAG.getNode(ISD::SHL, DL, NVT, InH,                                     DAG.getConstant(NVTBits-Amt, ShTy))); -      Hi = DAG.getNode(ISD::SRL, dl, NVT, InH, DAG.getConstant(Amt, ShTy)); +      Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt, ShTy));      }      return;    }    assert(N->getOpcode() == ISD::SRA && "Unknown shift!");    if (Amt > VTBits) { -    Hi = Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, +    Hi = Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,                            DAG.getConstant(NVTBits-1, ShTy));    } else if (Amt > NVTBits) { -    Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, +    Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,                       DAG.getConstant(Amt-NVTBits, ShTy)); -    Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, +    Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,                       DAG.getConstant(NVTBits-1, ShTy));    } else if (Amt == NVTBits) {      Lo = InH; -    Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, +    Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,                       DAG.getConstant(NVTBits-1, ShTy));    } else { -    Lo = DAG.getNode(ISD::OR, dl, NVT, -                     DAG.getNode(ISD::SRL, dl, NVT, InL, +    Lo = DAG.getNode(ISD::OR, DL, NVT, +                     DAG.getNode(ISD::SRL, DL, NVT, InL,                                   DAG.getConstant(Amt, ShTy)), -                     DAG.getNode(ISD::SHL, dl, NVT, InH, +                     DAG.getNode(ISD::SHL, DL, NVT, InH,                                   DAG.getConstant(NVTBits-Amt, ShTy))); -    Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, DAG.getConstant(Amt, ShTy)); +    Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt, ShTy));    }  } @@ -1269,7 +1410,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,    // Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support    // them.  TODO: Teach operation legalization how to expand unsupported    // ADDC/ADDE/SUBC/SUBE.  The problem is that these operations generate -  // a carry of type MVT::Flag, but there doesn't seem to be any way to +  // a carry of type MVT::Glue, but there doesn't seem to be any way to    // generate a value of this type in the expanded code sequence.    bool hasCarry =      TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ? @@ -1277,7 +1418,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,                                   TLI.getTypeToExpandTo(*DAG.getContext(), NVT));    if (hasCarry) { -    SDVTList VTList = DAG.getVTList(NVT, MVT::Flag); +    SDVTList VTList = DAG.getVTList(NVT, MVT::Glue);      if (N->getOpcode() == ISD::ADD) {        Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);        HiOps[2] = Lo.getValue(1); @@ -1287,31 +1428,32 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,        HiOps[2] = Lo.getValue(1);        Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3);      } +    return;     +  } +   +  if (N->getOpcode() == ISD::ADD) { +    Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2); +    Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2); +    SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0], +                                ISD::SETULT); +    SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1, +                                 DAG.getConstant(1, NVT), +                                 DAG.getConstant(0, NVT)); +    SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1], +                                ISD::SETULT); +    SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2, +                                 DAG.getConstant(1, NVT), Carry1); +    Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2);    } else { -    if (N->getOpcode() == ISD::ADD) { -      Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2); -      Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2); -      SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0], -                                  ISD::SETULT); -      SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1, -                                   DAG.getConstant(1, NVT), -                                   DAG.getConstant(0, NVT)); -      SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1], -                                  ISD::SETULT); -      SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2, -                                   DAG.getConstant(1, NVT), Carry1); -      Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2); -    } else { -      Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2); -      Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2); -      SDValue Cmp = -        DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()), -                     LoOps[0], LoOps[1], ISD::SETULT); -      SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, -                                   DAG.getConstant(1, NVT), -                                   DAG.getConstant(0, NVT)); -      Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow); -    } +    Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2); +    Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2); +    SDValue Cmp = +      DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()), +                   LoOps[0], LoOps[1], ISD::SETULT); +    SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, +                                 DAG.getConstant(1, NVT), +                                 DAG.getConstant(0, NVT)); +    Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);    }  } @@ -1322,7 +1464,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N,    DebugLoc dl = N->getDebugLoc();    GetExpandedInteger(N->getOperand(0), LHSL, LHSH);    GetExpandedInteger(N->getOperand(1), RHSL, RHSH); -  SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag); +  SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue);    SDValue LoOps[2] = { LHSL, RHSL };    SDValue HiOps[3] = { LHSH, RHSH }; @@ -1348,7 +1490,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N,    DebugLoc dl = N->getDebugLoc();    GetExpandedInteger(N->getOperand(0), LHSL, LHSH);    GetExpandedInteger(N->getOperand(1), RHSL, RHSH); -  SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag); +  SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue);    SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) };    SDValue HiOps[3] = { LHSH, RHSH }; @@ -1437,7 +1579,7 @@ void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,    EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));    unsigned NBitWidth = NVT.getSizeInBits();    const APInt &Cst = cast<ConstantSDNode>(N)->getAPIntValue(); -  Lo = DAG.getConstant(APInt(Cst).trunc(NBitWidth), NVT); +  Lo = DAG.getConstant(Cst.trunc(NBitWidth), NVT);    Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT);  } @@ -1524,7 +1666,6 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,    SDValue Ch  = N->getChain();    SDValue Ptr = N->getBasePtr();    ISD::LoadExtType ExtType = N->getExtensionType(); -  int SVOffset = N->getSrcValueOffset();    unsigned Alignment = N->getAlignment();    bool isVolatile = N->isVolatile();    bool isNonTemporal = N->isNonTemporal(); @@ -1535,7 +1676,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,    if (N->getMemoryVT().bitsLE(NVT)) {      EVT MemVT = N->getMemoryVT(); -    Lo = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset, +    Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),                          MemVT, isVolatile, isNonTemporal, Alignment);      // Remember the chain. @@ -1557,7 +1698,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,      }    } else if (TLI.isLittleEndian()) {      // Little-endian - low bits are at low addresses. -    Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset, +    Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(),                       isVolatile, isNonTemporal, Alignment);      unsigned ExcessBits = @@ -1568,8 +1709,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,      unsigned IncrementSize = NVT.getSizeInBits()/8;      Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,                        DAG.getIntPtrConstant(IncrementSize)); -    Hi = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), -                        SVOffset+IncrementSize, NEVT, +    Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, +                        N->getPointerInfo().getWithOffset(IncrementSize), NEVT,                          isVolatile, isNonTemporal,                          MinAlign(Alignment, IncrementSize)); @@ -1586,7 +1727,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,      unsigned ExcessBits = (EBytes - IncrementSize)*8;      // Load both the high bits and maybe some of the low bits. -    Hi = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset, +    Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),                          EVT::getIntegerVT(*DAG.getContext(),                                            MemVT.getSizeInBits() - ExcessBits),                          isVolatile, isNonTemporal, Alignment); @@ -1595,8 +1736,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,      Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,                        DAG.getIntPtrConstant(IncrementSize));      // Load the rest of the low bits. -    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, NVT, dl, Ch, Ptr, N->getSrcValue(), -                        SVOffset+IncrementSize, +    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, +                        N->getPointerInfo().getWithOffset(IncrementSize),                          EVT::getIntegerVT(*DAG.getContext(), ExcessBits),                          isVolatile, isNonTemporal,                          MinAlign(Alignment, IncrementSize)); @@ -1987,6 +2128,31 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,    ReplaceValueWith(SDValue(N, 1), Ofl);  } +void DAGTypeLegalizer::ExpandIntRes_UMULSMULO(SDNode *N, +                                              SDValue &Lo, SDValue &Hi) { +  SDValue LHS = N->getOperand(0); +  SDValue RHS = N->getOperand(1); +  DebugLoc dl = N->getDebugLoc(); +  EVT VT = N->getValueType(0); +  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() / 2); +  // Expand the result by simply replacing it with the equivalent +  // non-overflow-checking operation. +  SDValue Ret = DAG.getNode(ISD::MUL, dl, LHS.getValueType(), LHS, RHS); +  SplitInteger(Ret, Lo, Hi); +   +  // Now calculate overflow. +  SDValue Ofl; +  if (N->getOpcode() == ISD::UMULO) +    Ofl = DAG.getSetCC(dl, N->getValueType(1), Hi, +                       DAG.getConstant(0, VT), ISD::SETNE); +  else { +    SDValue Tmp = DAG.getConstant(VT.getSizeInBits() - 1, HalfVT); +    Tmp = DAG.getNode(ISD::SRA, dl, HalfVT, Lo, Tmp); +    Ofl = DAG.getSetCC(dl, N->getValueType(1), Hi, Tmp, ISD::SETNE); +  } +  ReplaceValueWith(SDValue(N, 1), Ofl); +} +  void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,                                           SDValue &Lo, SDValue &Hi) {    EVT VT = N->getValueType(0); @@ -2078,7 +2244,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {    #endif      llvm_unreachable("Do not know how to expand this operator's operand!"); -  case ISD::BIT_CONVERT:       Res = ExpandOp_BIT_CONVERT(N); break; +  case ISD::BITCAST:           Res = ExpandOp_BITCAST(N); break;    case ISD::BR_CC:             Res = ExpandIntOp_BR_CC(N); break;    case ISD::BUILD_VECTOR:      Res = ExpandOp_BUILD_VECTOR(N); break;    case ISD::EXTRACT_ELEMENT:   Res = ExpandOp_EXTRACT_ELEMENT(N); break; @@ -2308,7 +2474,6 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {    EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);    SDValue Ch  = N->getChain();    SDValue Ptr = N->getBasePtr(); -  int SVOffset = N->getSrcValueOffset();    unsigned Alignment = N->getAlignment();    bool isVolatile = N->isVolatile();    bool isNonTemporal = N->isNonTemporal(); @@ -2319,14 +2484,16 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {    if (N->getMemoryVT().bitsLE(NVT)) {      GetExpandedInteger(N->getValue(), Lo, Hi); -    return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset, +    return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),                               N->getMemoryVT(), isVolatile, isNonTemporal,                               Alignment); -  } else if (TLI.isLittleEndian()) { +  } + +  if (TLI.isLittleEndian()) {      // Little-endian - low bits are at low addresses.      GetExpandedInteger(N->getValue(), Lo, Hi); -    Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset, +    Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),                        isVolatile, isNonTemporal, Alignment);      unsigned ExcessBits = @@ -2337,50 +2504,49 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {      unsigned IncrementSize = NVT.getSizeInBits()/8;      Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,                        DAG.getIntPtrConstant(IncrementSize)); -    Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), -                           SVOffset+IncrementSize, NEVT, -                           isVolatile, isNonTemporal, +    Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, +                           N->getPointerInfo().getWithOffset(IncrementSize), +                           NEVT, isVolatile, isNonTemporal,                             MinAlign(Alignment, IncrementSize));      return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); -  } else { -    // Big-endian - high bits are at low addresses.  Favor aligned stores at -    // the cost of some bit-fiddling. -    GetExpandedInteger(N->getValue(), Lo, Hi); - -    EVT ExtVT = N->getMemoryVT(); -    unsigned EBytes = ExtVT.getStoreSize(); -    unsigned IncrementSize = NVT.getSizeInBits()/8; -    unsigned ExcessBits = (EBytes - IncrementSize)*8; -    EVT HiVT = EVT::getIntegerVT(*DAG.getContext(), -                                 ExtVT.getSizeInBits() - ExcessBits); +  } -    if (ExcessBits < NVT.getSizeInBits()) { -      // Transfer high bits from the top of Lo to the bottom of Hi. -      Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi, -                       DAG.getConstant(NVT.getSizeInBits() - ExcessBits, -                                       TLI.getPointerTy())); -      Hi = DAG.getNode(ISD::OR, dl, NVT, Hi, -                       DAG.getNode(ISD::SRL, dl, NVT, Lo, -                                   DAG.getConstant(ExcessBits, -                                                   TLI.getPointerTy()))); -    } +  // Big-endian - high bits are at low addresses.  Favor aligned stores at +  // the cost of some bit-fiddling. +  GetExpandedInteger(N->getValue(), Lo, Hi); + +  EVT ExtVT = N->getMemoryVT(); +  unsigned EBytes = ExtVT.getStoreSize(); +  unsigned IncrementSize = NVT.getSizeInBits()/8; +  unsigned ExcessBits = (EBytes - IncrementSize)*8; +  EVT HiVT = EVT::getIntegerVT(*DAG.getContext(), +                               ExtVT.getSizeInBits() - ExcessBits); + +  if (ExcessBits < NVT.getSizeInBits()) { +    // Transfer high bits from the top of Lo to the bottom of Hi. +    Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi, +                     DAG.getConstant(NVT.getSizeInBits() - ExcessBits, +                                     TLI.getPointerTy())); +    Hi = DAG.getNode(ISD::OR, dl, NVT, Hi, +                     DAG.getNode(ISD::SRL, dl, NVT, Lo, +                                 DAG.getConstant(ExcessBits, +                                                 TLI.getPointerTy()))); +  } -    // Store both the high bits and maybe some of the low bits. -    Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), -                           SVOffset, HiVT, isVolatile, isNonTemporal, -                           Alignment); +  // Store both the high bits and maybe some of the low bits. +  Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(), +                         HiVT, isVolatile, isNonTemporal, Alignment); -    // Increment the pointer to the other half. -    Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, -                      DAG.getIntPtrConstant(IncrementSize)); -    // Store the lowest ExcessBits bits in the second half. -    Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), -                           SVOffset+IncrementSize, -                           EVT::getIntegerVT(*DAG.getContext(), ExcessBits), -                           isVolatile, isNonTemporal, -                           MinAlign(Alignment, IncrementSize)); -    return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); -  } +  // Increment the pointer to the other half. +  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, +                    DAG.getIntPtrConstant(IncrementSize)); +  // Store the lowest ExcessBits bits in the second half. +  Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, +                         N->getPointerInfo().getWithOffset(IncrementSize), +                         EVT::getIntegerVT(*DAG.getContext(), ExcessBits), +                         isVolatile, isNonTemporal, +                         MinAlign(Alignment, IncrementSize)); +  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);  }  SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) { @@ -2460,8 +2626,10 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {      // Load the value out, extending it from f32 to the destination float type.      // FIXME: Avoid the extend by constructing the right constant pool? -    SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, DstVT, dl, DAG.getEntryNode(), -                                   FudgePtr, NULL, 0, MVT::f32, +    SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(), +                                   FudgePtr, +                                   MachinePointerInfo::getConstantPool(), +                                   MVT::f32,                                     false, false, Alignment);      return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge);    } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 6e56c98e9b56..cedda7e7075a 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -714,6 +714,11 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {            if (M->getNodeId() == Processed)              RemapValue(NewVal);            DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL); +          // OldVal may be a target of the ReplacedValues map which was marked +          // NewNode to force reanalysis because it was updated.  Ensure that +          // anything that ReplacedValues mapped to OldVal will now be mapped +          // all the way to NewVal. +          ReplacedValues[OldVal] = NewVal;          }          // The original node continues to exist in the DAG, marked NewNode.        } @@ -858,7 +863,7 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {  /// BitConvertToInteger - Convert to an integer of the same size.  SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) {    unsigned BitWidth = Op.getValueType().getSizeInBits(); -  return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(), +  return DAG.getNode(ISD::BITCAST, Op.getDebugLoc(),                       EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op);  } @@ -869,7 +874,7 @@ SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) {    unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits();    EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth);    unsigned NumElts = Op.getValueType().getVectorNumElements(); -  return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(), +  return DAG.getNode(ISD::BITCAST, Op.getDebugLoc(),                       EVT::getVectorVT(*DAG.getContext(), EltNVT, NumElts), Op);  } @@ -880,10 +885,11 @@ SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,    // the source and destination types.    SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT);    // Emit a store to the stack slot. -  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, NULL, 0, -                               false, false, 0); +  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, +                               MachinePointerInfo(), false, false, 0);    // Result is a load from the stack slot. -  return DAG.getLoad(DestVT, dl, Store, StackPtr, NULL, 0, false, false, 0); +  return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo(), +                     false, false, 0);  }  /// CustomLowerNode - Replace the node's results with custom code provided @@ -1049,6 +1055,39 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT,    return CallInfo.first;  } +// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to +// ExpandLibCall except that the first operand is the in-chain. +std::pair<SDValue, SDValue> +DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, +                                         SDNode *Node, +                                         bool isSigned) { +  SDValue InChain = Node->getOperand(0); + +  TargetLowering::ArgListTy Args; +  TargetLowering::ArgListEntry Entry; +  for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) { +    EVT ArgVT = Node->getOperand(i).getValueType(); +    const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); +    Entry.Node = Node->getOperand(i); +    Entry.Ty = ArgTy; +    Entry.isSExt = isSigned; +    Entry.isZExt = !isSigned; +    Args.push_back(Entry); +  } +  SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), +                                         TLI.getPointerTy()); + +  // Splice the libcall in wherever FindInputOutputChains tells us to. +  const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); +  std::pair<SDValue, SDValue> CallInfo = +    TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, +                    0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, +                    /*isReturnValueUsed=*/true, +                    Callee, Args, DAG, Node->getDebugLoc()); + +  return CallInfo; +} +  /// PromoteTargetBoolean - Promote the given target boolean to a target boolean  /// of the given type.  A target boolean is an integer value, not necessarily of  /// type i1, the bits of which conform to getBooleanContents. diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index d56029208e61..3f81bbbe4061 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -99,7 +99,7 @@ private:            return SoftenFloat;          return ExpandFloat;        } -         +        if (VT.getVectorNumElements() == 1)          return ScalarizeVector;        return SplitVector; @@ -192,6 +192,10 @@ private:    SDValue MakeLibCall(RTLIB::Libcall LC, EVT RetVT,                        const SDValue *Ops, unsigned NumOps, bool isSigned,                        DebugLoc dl); +	std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC, +									                               SDNode *Node, bool isSigned); +	std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node); +    SDValue PromoteTargetBoolean(SDValue Bool, EVT VT);    void ReplaceValueWith(SDValue From, SDValue To);    void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi); @@ -244,7 +248,7 @@ private:    SDValue PromoteIntRes_AssertZext(SDNode *N);    SDValue PromoteIntRes_Atomic1(AtomicSDNode *N);    SDValue PromoteIntRes_Atomic2(AtomicSDNode *N); -  SDValue PromoteIntRes_BIT_CONVERT(SDNode *N); +  SDValue PromoteIntRes_BITCAST(SDNode *N);    SDValue PromoteIntRes_BSWAP(SDNode *N);    SDValue PromoteIntRes_BUILD_PAIR(SDNode *N);    SDValue PromoteIntRes_Constant(SDNode *N); @@ -278,7 +282,7 @@ private:    // Integer Operand Promotion.    bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo);    SDValue PromoteIntOp_ANY_EXTEND(SDNode *N); -  SDValue PromoteIntOp_BIT_CONVERT(SDNode *N); +  SDValue PromoteIntOp_BITCAST(SDNode *N);    SDValue PromoteIntOp_BUILD_PAIR(SDNode *N);    SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo);    SDValue PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo); @@ -344,6 +348,7 @@ private:    void ExpandIntRes_SADDSUBO          (SDNode *N, SDValue &Lo, SDValue &Hi);    void ExpandIntRes_UADDSUBO          (SDNode *N, SDValue &Lo, SDValue &Hi); +  void ExpandIntRes_UMULSMULO	      (SDNode *N, SDValue &Lo, SDValue &Hi);    void ExpandShiftByConstant(SDNode *N, unsigned Amt,                               SDValue &Lo, SDValue &Hi); @@ -352,7 +357,7 @@ private:    // Integer Operand Expansion.    bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo); -  SDValue ExpandIntOp_BIT_CONVERT(SDNode *N); +  SDValue ExpandIntOp_BITCAST(SDNode *N);    SDValue ExpandIntOp_BR_CC(SDNode *N);    SDValue ExpandIntOp_BUILD_VECTOR(SDNode *N);    SDValue ExpandIntOp_EXTRACT_ELEMENT(SDNode *N); @@ -387,7 +392,7 @@ private:    // Result Float to Integer Conversion.    void SoftenFloatResult(SDNode *N, unsigned OpNo); -  SDValue SoftenFloatRes_BIT_CONVERT(SDNode *N); +  SDValue SoftenFloatRes_BITCAST(SDNode *N);    SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);    SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N);    SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N); @@ -426,7 +431,7 @@ private:    // Operand Float to Integer Conversion.    bool SoftenFloatOperand(SDNode *N, unsigned OpNo); -  SDValue SoftenFloatOp_BIT_CONVERT(SDNode *N); +  SDValue SoftenFloatOp_BITCAST(SDNode *N);    SDValue SoftenFloatOp_BR_CC(SDNode *N);    SDValue SoftenFloatOp_FP_ROUND(SDNode *N);    SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N); @@ -515,7 +520,7 @@ private:    SDValue ScalarizeVecRes_UnaryOp(SDNode *N);    SDValue ScalarizeVecRes_InregOp(SDNode *N); -  SDValue ScalarizeVecRes_BIT_CONVERT(SDNode *N); +  SDValue ScalarizeVecRes_BITCAST(SDNode *N);    SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N);    SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);    SDValue ScalarizeVecRes_FPOWI(SDNode *N); @@ -532,7 +537,7 @@ private:    // Vector Operand Scalarization: <1 x ty> -> ty.    bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo); -  SDValue ScalarizeVecOp_BIT_CONVERT(SDNode *N); +  SDValue ScalarizeVecOp_BITCAST(SDNode *N);    SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);    SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);    SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo); @@ -557,7 +562,7 @@ private:    void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);    void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi); -  void SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, SDValue &Hi); +  void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);    void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi);    void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);    void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -577,11 +582,12 @@ private:    bool SplitVectorOperand(SDNode *N, unsigned OpNo);    SDValue SplitVecOp_UnaryOp(SDNode *N); -  SDValue SplitVecOp_BIT_CONVERT(SDNode *N); +  SDValue SplitVecOp_BITCAST(SDNode *N);    SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);    SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);    SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);    SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N); +  SDValue SplitVecOp_FP_ROUND(SDNode *N);    //===--------------------------------------------------------------------===//    // Vector Widening Support: LegalizeVectorTypes.cpp @@ -603,7 +609,7 @@ private:    // Widen Vector Result Promotion.    void WidenVectorResult(SDNode *N, unsigned ResNo); -  SDValue WidenVecRes_BIT_CONVERT(SDNode* N); +  SDValue WidenVecRes_BITCAST(SDNode* N);    SDValue WidenVecRes_BUILD_VECTOR(SDNode* N);    SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);    SDValue WidenVecRes_CONVERT_RNDSAT(SDNode* N); @@ -628,7 +634,7 @@ private:    // Widen Vector Operand.    bool WidenVectorOperand(SDNode *N, unsigned ResNo); -  SDValue WidenVecOp_BIT_CONVERT(SDNode *N); +  SDValue WidenVecOp_BITCAST(SDNode *N);    SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);    SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);    SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); @@ -721,7 +727,7 @@ private:    }    // Generic Result Expansion. -  void ExpandRes_BIT_CONVERT       (SDNode *N, SDValue &Lo, SDValue &Hi); +  void ExpandRes_BITCAST           (SDNode *N, SDValue &Lo, SDValue &Hi);    void ExpandRes_BUILD_PAIR        (SDNode *N, SDValue &Lo, SDValue &Hi);    void ExpandRes_EXTRACT_ELEMENT   (SDNode *N, SDValue &Lo, SDValue &Hi);    void ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -729,7 +735,7 @@ private:    void ExpandRes_VAARG             (SDNode *N, SDValue &Lo, SDValue &Hi);    // Generic Operand Expansion. -  SDValue ExpandOp_BIT_CONVERT      (SDNode *N); +  SDValue ExpandOp_BITCAST          (SDNode *N);    SDValue ExpandOp_BUILD_VECTOR     (SDNode *N);    SDValue ExpandOp_EXTRACT_ELEMENT  (SDNode *N);    SDValue ExpandOp_INSERT_VECTOR_ELT(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 9c2b1d9ed73d..a75ae87f3cbe 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -32,8 +32,7 @@ using namespace llvm;  // little/big-endian machines, followed by the Hi/Lo part.  This means that  // they cannot be used as is on vectors, for which Lo is always stored first. -void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, -                                             SDValue &Hi) { +void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {    EVT OutVT = N->getValueType(0);    EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);    SDValue InOp = N->getOperand(0); @@ -50,31 +49,31 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,      case SoftenFloat:        // Convert the integer operand instead.        SplitInteger(GetSoftenedFloat(InOp), Lo, Hi); -      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); -      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); +      Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); +      Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);        return;      case ExpandInteger:      case ExpandFloat:        // Convert the expanded pieces of the input.        GetExpandedOp(InOp, Lo, Hi); -      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); -      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); +      Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); +      Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);        return;      case SplitVector:        GetSplitVector(InOp, Lo, Hi);        if (TLI.isBigEndian())          std::swap(Lo, Hi); -      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); -      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); +      Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); +      Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);        return;      case ScalarizeVector:        // Convert the element instead.        SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi); -      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); -      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); +      Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); +      Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);        return;      case WidenVector: { -      assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BIT_CONVERT"); +      assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST");        InOp = GetWidenedVector(InOp);        EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),                                     InVT.getVectorNumElements()/2); @@ -84,19 +83,19 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,                         DAG.getIntPtrConstant(InNVT.getVectorNumElements()));        if (TLI.isBigEndian())          std::swap(Lo, Hi); -      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); -      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); +      Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); +      Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);        return;      }    }    if (InVT.isVector() && OutVT.isInteger()) { -    // Handle cases like i64 = BIT_CONVERT v1i64 on x86, where the operand +    // Handle cases like i64 = BITCAST v1i64 on x86, where the operand      // is legal but the result is not.      EVT NVT = EVT::getVectorVT(*DAG.getContext(), NOutVT, 2);      if (isTypeLegal(NVT)) { -      SDValue CastInOp = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, InOp); +      SDValue CastInOp = DAG.getNode(ISD::BITCAST, dl, NVT, InOp);        Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp,                         DAG.getIntPtrConstant(0));        Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp, @@ -119,14 +118,14 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,                                                getTypeForEVT(*DAG.getContext()));    SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment);    int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); -  const Value *SV = PseudoSourceValue::getFixedStack(SPFI); +  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI);    // Emit a store to the stack slot. -  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, SV, 0, +  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo,                                 false, false, 0);    // Load the first half from the stack slot. -  Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, 0, false, false, 0); +  Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, false, false, 0);    // Increment the pointer to the other half.    unsigned IncrementSize = NOutVT.getSizeInBits() / 8; @@ -134,7 +133,8 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,                           DAG.getIntPtrConstant(IncrementSize));    // Load the second half from the stack slot. -  Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, IncrementSize, false, +  Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, +                   PtrInfo.getWithOffset(IncrementSize), false,                     false, MinAlign(Alignment, IncrementSize));    // Handle endianness of the load. @@ -172,7 +172,7 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,    EVT OldVT = N->getValueType(0);    EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT); -  SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl, +  SDValue NewVec = DAG.getNode(ISD::BITCAST, dl,                                 EVT::getVectorVT(*DAG.getContext(),                                                  NewVT, 2*OldElts),                                 OldVec); @@ -204,22 +204,21 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,    EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));    SDValue Chain = LD->getChain();    SDValue Ptr = LD->getBasePtr(); -  int SVOffset = LD->getSrcValueOffset();    unsigned Alignment = LD->getAlignment();    bool isVolatile = LD->isVolatile();    bool isNonTemporal = LD->isNonTemporal();    assert(NVT.isByteSized() && "Expanded type not byte sized!"); -  Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset, +  Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),                     isVolatile, isNonTemporal, Alignment);    // Increment the pointer to the other half.    unsigned IncrementSize = NVT.getSizeInBits() / 8;    Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,                      DAG.getIntPtrConstant(IncrementSize)); -  Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), -                   SVOffset+IncrementSize, +  Hi = DAG.getLoad(NVT, dl, Chain, Ptr, +                   LD->getPointerInfo().getWithOffset(IncrementSize),                     isVolatile, isNonTemporal,                     MinAlign(Alignment, IncrementSize)); @@ -262,14 +261,14 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {  // Generic Operand Expansion.  //===--------------------------------------------------------------------===// -SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) { +SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) {    DebugLoc dl = N->getDebugLoc();    if (N->getValueType(0).isVector()) {      // An illegal expanding type is being converted to a legal vector type.      // Make a two element vector out of the expanded parts and convert that      // instead, but only if the new vector type is legal (otherwise there      // is no point, and it might create expansion loops).  For example, on -    // x86 this turns v1i64 = BIT_CONVERT i64 into v1i64 = BIT_CONVERT v2i32. +    // x86 this turns v1i64 = BITCAST i64 into v1i64 = BITCAST v2i32.      EVT OVT = N->getOperand(0).getValueType();      EVT NVT = EVT::getVectorVT(*DAG.getContext(),                                 TLI.getTypeToTransformTo(*DAG.getContext(), OVT), @@ -283,7 +282,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) {          std::swap(Parts[0], Parts[1]);        SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Parts, 2); -      return DAG.getNode(ISD::BIT_CONVERT, dl, N->getValueType(0), Vec); +      return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec);      }    } @@ -322,7 +321,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {                                 &NewElts[0], NewElts.size());    // Convert the new vector to the old vector type. -  return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec); +  return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec);  }  SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) { @@ -347,7 +346,7 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {    // Bitconvert to a vector of twice the length with elements of the expanded    // type, insert the expanded vector elements, and then convert back.    EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEVT, NumElts*2); -  SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl, +  SDValue NewVec = DAG.getNode(ISD::BITCAST, dl,                                 NewVecVT, N->getOperand(0));    SDValue Lo, Hi; @@ -363,7 +362,7 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {    NewVec =  DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx);    // Convert the new vector to the old vector type. -  return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec); +  return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec);  }  SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) { @@ -390,7 +389,6 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {                                       St->getValue().getValueType());    SDValue Chain = St->getChain();    SDValue Ptr = St->getBasePtr(); -  int SVOffset = St->getSrcValueOffset();    unsigned Alignment = St->getAlignment();    bool isVolatile = St->isVolatile();    bool isNonTemporal = St->isNonTemporal(); @@ -404,14 +402,14 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {    if (TLI.isBigEndian())      std::swap(Lo, Hi); -  Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getSrcValue(), SVOffset, +  Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(),                      isVolatile, isNonTemporal, Alignment);    Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,                      DAG.getIntPtrConstant(IncrementSize));    assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!"); -  Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getSrcValue(), -                    SVOffset + IncrementSize, +  Hi = DAG.getStore(Chain, dl, Hi, Ptr, +                    St->getPointerInfo().getWithOffset(IncrementSize),                      isVolatile, isNonTemporal,                      MinAlign(Alignment, IncrementSize)); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 621c08724210..167dbe0377b3 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -241,14 +241,14 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {    for (unsigned j = 0; j != Op.getNumOperands(); ++j) {      if (Op.getOperand(j).getValueType().isVector()) -      Operands[j] = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Op.getOperand(j)); +      Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));      else        Operands[j] = Op.getOperand(j);    }    Op = DAG.getNode(Op.getOpcode(), dl, NVT, &Operands[0], Operands.size()); -  return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Op); +  return DAG.getNode(ISD::BITCAST, dl, VT, Op);  }  SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 93bc2d04928e..182f8fcbfbf3 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -46,7 +46,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {  #endif      llvm_unreachable("Do not know how to scalarize the result of this operator!"); -  case ISD::BIT_CONVERT:       R = ScalarizeVecRes_BIT_CONVERT(N); break; +  case ISD::BITCAST:           R = ScalarizeVecRes_BITCAST(N); break;    case ISD::BUILD_VECTOR:      R = N->getOperand(0); break;    case ISD::CONVERT_RNDSAT:    R = ScalarizeVecRes_CONVERT_RNDSAT(N); break;    case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; @@ -122,9 +122,9 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {                       LHS.getValueType(), LHS, RHS);  } -SDValue DAGTypeLegalizer::ScalarizeVecRes_BIT_CONVERT(SDNode *N) { +SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) {    EVT NewVT = N->getValueType(0).getVectorElementType(); -  return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), +  return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),                       NewVT, N->getOperand(0));  } @@ -171,7 +171,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {                                 N->getDebugLoc(),                                 N->getChain(), N->getBasePtr(),                                 DAG.getUNDEF(N->getBasePtr().getValueType()), -                               N->getSrcValue(), N->getSrcValueOffset(), +                               N->getPointerInfo(),                                 N->getMemoryVT().getVectorElementType(),                                 N->isVolatile(), N->isNonTemporal(),                                 N->getOriginalAlignment()); @@ -296,8 +296,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {        dbgs() << "\n";  #endif        llvm_unreachable("Do not know how to scalarize this operator's operand!"); -    case ISD::BIT_CONVERT: -      Res = ScalarizeVecOp_BIT_CONVERT(N); +    case ISD::BITCAST: +      Res = ScalarizeVecOp_BITCAST(N);        break;      case ISD::CONCAT_VECTORS:        Res = ScalarizeVecOp_CONCAT_VECTORS(N); @@ -326,11 +326,11 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {    return false;  } -/// ScalarizeVecOp_BIT_CONVERT - If the value to convert is a vector that needs +/// ScalarizeVecOp_BITCAST - If the value to convert is a vector that needs  /// to be scalarized, it must be <1 x ty>.  Convert the element instead. -SDValue DAGTypeLegalizer::ScalarizeVecOp_BIT_CONVERT(SDNode *N) { +SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) {    SDValue Elt = GetScalarizedVector(N->getOperand(0)); -  return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), +  return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),                       N->getValueType(0), Elt);  } @@ -365,14 +365,13 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){    if (N->isTruncatingStore())      return DAG.getTruncStore(N->getChain(), dl,                               GetScalarizedVector(N->getOperand(1)), -                             N->getBasePtr(), -                             N->getSrcValue(), N->getSrcValueOffset(), +                             N->getBasePtr(), N->getPointerInfo(),                               N->getMemoryVT().getVectorElementType(),                               N->isVolatile(), N->isNonTemporal(),                               N->getAlignment());    return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), -                      N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(), +                      N->getBasePtr(), N->getPointerInfo(),                        N->isVolatile(), N->isNonTemporal(),                        N->getOriginalAlignment());  } @@ -407,7 +406,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {    case ISD::SELECT_CC:    SplitRes_SELECT_CC(N, Lo, Hi); break;    case ISD::UNDEF:        SplitRes_UNDEF(N, Lo, Hi); break; -  case ISD::BIT_CONVERT:       SplitVecRes_BIT_CONVERT(N, Lo, Hi); break; +  case ISD::BITCAST:           SplitVecRes_BITCAST(N, Lo, Hi); break;    case ISD::BUILD_VECTOR:      SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break;    case ISD::CONCAT_VECTORS:    SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;    case ISD::CONVERT_RNDSAT:    SplitVecRes_CONVERT_RNDSAT(N, Lo, Hi); break; @@ -497,8 +496,8 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,    Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi);  } -void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, -                                               SDValue &Hi) { +void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, +                                           SDValue &Hi) {    // We know the result is a vector.  The input may be either a vector or a    // scalar value.    EVT LoVT, HiVT; @@ -526,8 +525,8 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo,        GetExpandedOp(InOp, Lo, Hi);        if (TLI.isBigEndian())          std::swap(Lo, Hi); -      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo); -      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi); +      Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); +      Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);        return;      }      break; @@ -535,8 +534,8 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo,      // If the input is a vector that needs to be split, convert each split      // piece of the input now.      GetSplitVector(InOp, Lo, Hi); -    Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo); -    Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi); +    Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); +    Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);      return;    } @@ -550,8 +549,8 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo,    if (TLI.isBigEndian())      std::swap(Lo, Hi); -  Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo); -  Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi); +  Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); +  Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);  }  void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, @@ -626,9 +625,9 @@ void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo,      EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),                                   LoVT.getVectorNumElements());      VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, -                     DAG.getIntPtrConstant(0)); +                      DAG.getIntPtrConstant(0));      VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, -                     DAG.getIntPtrConstant(InNVT.getVectorNumElements())); +                      DAG.getIntPtrConstant(InNVT.getVectorNumElements()));      break;    }    } @@ -646,16 +645,15 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,                                                       SDValue &Hi) {    SDValue Vec = N->getOperand(0);    SDValue Idx = N->getOperand(1); -  EVT IdxVT = Idx.getValueType();    DebugLoc dl = N->getDebugLoc();    EVT LoVT, HiVT;    GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);    Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx); -  Idx = DAG.getNode(ISD::ADD, dl, IdxVT, Idx, -                    DAG.getConstant(LoVT.getVectorNumElements(), IdxVT)); -  Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, Idx); +  uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); +  Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, +                   DAG.getIntPtrConstant(IdxVal + LoVT.getVectorNumElements()));  }  void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, @@ -705,8 +703,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,    EVT VecVT = Vec.getValueType();    EVT EltVT = VecVT.getVectorElementType();    SDValue StackPtr = DAG.CreateStackTemporary(VecVT); -  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0, -                               false, false, 0); +  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, +                               MachinePointerInfo(), false, false, 0);    // Store the new element.  This may be larger than the vector element type,    // so use a truncating store. @@ -714,11 +712,11 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,    const Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());    unsigned Alignment =      TLI.getTargetData()->getPrefTypeAlignment(VecType); -  Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT, +  Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT,                              false, false, 0);    // Load the Lo part from the stack slot. -  Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, NULL, 0, +  Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),                     false, false, 0);    // Increment the pointer to the other part. @@ -727,8 +725,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,                           DAG.getIntPtrConstant(IncrementSize));    // Load the Hi part from the stack slot. -  Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, NULL, 0, false, -                   false, MinAlign(Alignment, IncrementSize)); +  Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), +                   false, false, MinAlign(Alignment, IncrementSize));  }  void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, @@ -751,8 +749,6 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,    SDValue Ch = LD->getChain();    SDValue Ptr = LD->getBasePtr();    SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); -  const Value *SV = LD->getSrcValue(); -  int SVOffset = LD->getSrcValueOffset();    EVT MemoryVT = LD->getMemoryVT();    unsigned Alignment = LD->getOriginalAlignment();    bool isVolatile = LD->isVolatile(); @@ -762,14 +758,15 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,    GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);    Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, -                   SV, SVOffset, LoMemVT, isVolatile, isNonTemporal, Alignment); +                   LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal, +                   Alignment);    unsigned IncrementSize = LoMemVT.getSizeInBits()/8;    Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,                      DAG.getIntPtrConstant(IncrementSize)); -  SVOffset += IncrementSize;    Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, -                   SV, SVOffset, HiMemVT, isVolatile, isNonTemporal, Alignment); +                   LD->getPointerInfo().getWithOffset(IncrementSize), +                   HiMemVT, isVolatile, isNonTemporal, Alignment);    // Build a factor node to remember that this load is independent of the    // other one. @@ -980,10 +977,11 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {  #endif        llvm_unreachable("Do not know how to split this operator's operand!"); -    case ISD::BIT_CONVERT:       Res = SplitVecOp_BIT_CONVERT(N); break; +    case ISD::BITCAST:           Res = SplitVecOp_BITCAST(N); break;      case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;      case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;      case ISD::CONCAT_VECTORS:    Res = SplitVecOp_CONCAT_VECTORS(N); break; +    case ISD::FP_ROUND:          Res = SplitVecOp_FP_ROUND(N); break;      case ISD::STORE:        Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);        break; @@ -995,6 +993,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {      case ISD::FP_TO_UINT:      case ISD::SINT_TO_FP:      case ISD::UINT_TO_FP: +    case ISD::FP_EXTEND: +    case ISD::FTRUNC:      case ISD::TRUNCATE:      case ISD::SIGN_EXTEND:      case ISD::ZERO_EXTEND: @@ -1036,8 +1036,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {    return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);  } -SDValue DAGTypeLegalizer::SplitVecOp_BIT_CONVERT(SDNode *N) { -  // For example, i64 = BIT_CONVERT v4i16 on alpha.  Typically the vector will +SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) { +  // For example, i64 = BITCAST v4i16 on alpha.  Typically the vector will    // end up being split all the way down to individual components.  Convert the    // split pieces into integers and reassemble.    SDValue Lo, Hi; @@ -1048,13 +1048,12 @@ SDValue DAGTypeLegalizer::SplitVecOp_BIT_CONVERT(SDNode *N) {    if (TLI.isBigEndian())      std::swap(Lo, Hi); -  return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0), +  return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0),                       JoinIntegers(Lo, Hi));  }  SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { -  // We know that the extracted result type is legal.  For now, assume the index -  // is a constant. +  // We know that the extracted result type is legal.    EVT SubVT = N->getValueType(0);    SDValue Idx = N->getOperand(1);    DebugLoc dl = N->getDebugLoc(); @@ -1099,15 +1098,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {    EVT EltVT = VecVT.getVectorElementType();    DebugLoc dl = N->getDebugLoc();    SDValue StackPtr = DAG.CreateStackTemporary(VecVT); -  int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); -  const Value *SV = PseudoSourceValue::getFixedStack(SPFI); -  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, SV, 0, -                               false, false, 0); +  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, +                               MachinePointerInfo(), false, false, 0);    // Load back the required element.    StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); -  return DAG.getExtLoad(ISD::EXTLOAD, N->getValueType(0), dl, Store, StackPtr, -                        SV, 0, EltVT, false, false, 0); +  return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, +                        MachinePointerInfo(), EltVT, false, false, 0);  }  SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { @@ -1118,7 +1115,6 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {    bool isTruncating = N->isTruncatingStore();    SDValue Ch  = N->getChain();    SDValue Ptr = N->getBasePtr(); -  int SVOffset = N->getSrcValueOffset();    EVT MemoryVT = N->getMemoryVT();    unsigned Alignment = N->getOriginalAlignment();    bool isVol = N->isVolatile(); @@ -1132,22 +1128,23 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {    unsigned IncrementSize = LoMemVT.getSizeInBits()/8;    if (isTruncating) -    Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getSrcValue(), SVOffset, +    Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),                             LoMemVT, isVol, isNT, Alignment);    else -    Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getSrcValue(), SVOffset, +    Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),                        isVol, isNT, Alignment);    // Increment the pointer to the other half.    Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,                      DAG.getIntPtrConstant(IncrementSize)); -  SVOffset += IncrementSize;    if (isTruncating) -    Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, N->getSrcValue(), SVOffset, +    Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, +                           N->getPointerInfo().getWithOffset(IncrementSize),                             HiMemVT, isVol, isNT, Alignment);    else -    Hi = DAG.getStore(Ch, DL, Hi, Ptr, N->getSrcValue(), SVOffset, +    Hi = DAG.getStore(Ch, DL, Hi, Ptr, +                      N->getPointerInfo().getWithOffset(IncrementSize),                        isVol, isNT, Alignment);    return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); @@ -1155,7 +1152,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {  SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {    DebugLoc DL = N->getDebugLoc(); -   +    // The input operands all must have the same type, and we know the result the    // result type is valid.  Convert this to a buildvector which extracts all the    // input elements. @@ -1172,11 +1169,29 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {      }    } -   +    return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0),                       &Elts[0], Elts.size());  } +SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) { +  // The result has a legal vector type, but the input needs splitting. +  EVT ResVT = N->getValueType(0); +  SDValue Lo, Hi; +  DebugLoc DL = N->getDebugLoc(); +  GetSplitVector(N->getOperand(0), Lo, Hi); +  EVT InVT = Lo.getValueType(); +   +  EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), +                               InVT.getVectorNumElements()); +   +  Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1)); +  Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1)); +   +  return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi); +}   + +  //===----------------------------------------------------------------------===//  //  Result Vector Widening @@ -1201,7 +1216,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {  #endif      llvm_unreachable("Do not know how to widen the result of this operator!"); -  case ISD::BIT_CONVERT:       Res = WidenVecRes_BIT_CONVERT(N); break; +  case ISD::BITCAST:           Res = WidenVecRes_BITCAST(N); break;    case ISD::BUILD_VECTOR:      Res = WidenVecRes_BUILD_VECTOR(N); break;    case ISD::CONCAT_VECTORS:    Res = WidenVecRes_CONCAT_VECTORS(N); break;    case ISD::CONVERT_RNDSAT:    Res = WidenVecRes_CONVERT_RNDSAT(N); break; @@ -1297,7 +1312,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {    EVT WidenEltVT = WidenVT.getVectorElementType();    EVT VT = WidenVT;    unsigned NumElts =  VT.getVectorNumElements(); -  while (!TLI.isTypeSynthesizable(VT) && NumElts != 1) { +  while (!TLI.isTypeLegal(VT) && NumElts != 1) {      NumElts = NumElts / 2;      VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);    } @@ -1308,11 +1323,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {      SDValue InOp2 = GetWidenedVector(N->getOperand(1));      return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);    } -   +    // No legal vector version so unroll the vector operation and then widen.    if (NumElts == 1)      return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); -   +    // Since the operation can trap, apply operation on the original vector.    EVT MaxVT = VT;    SDValue InOp1 = GetWidenedVector(N->getOperand(0)); @@ -1323,7 +1338,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {    unsigned ConcatEnd = 0;  // Current ConcatOps index.    int Idx = 0;        // Current Idx into input vectors. -  // NumElts := greatest synthesizable vector size (at most WidenVT) +  // NumElts := greatest legal vector size (at most WidenVT)    // while (orig. vector has unhandled elements) {    //   take munches of size NumElts from the beginning and add to ConcatOps    //   NumElts := next smaller supported vector size or 1 @@ -1341,13 +1356,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {      do {        NumElts = NumElts / 2;        VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); -    } while (!TLI.isTypeSynthesizable(VT) && NumElts != 1); +    } while (!TLI.isTypeLegal(VT) && NumElts != 1);      if (NumElts == 1) {        for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) { -        SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,  +        SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,                                     InOp1, DAG.getIntPtrConstant(Idx)); -        SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,  +        SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,                                     InOp2, DAG.getIntPtrConstant(Idx));          ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,                                               EOp1, EOp2); @@ -1378,7 +1393,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {      do {        NextSize *= 2;        NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize); -    } while (!TLI.isTypeSynthesizable(NextVT)); +    } while (!TLI.isTypeLegal(NextVT));      if (!VT.isVector()) {        // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT @@ -1415,7 +1430,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {      if (VT == WidenVT)        return ConcatOps[0];    } -   +    // add undefs of size MaxVT until ConcatOps grows to length of WidenVT    unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements();    if (NumOps != ConcatEnd ) { @@ -1428,7 +1443,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {  SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {    SDValue InOp = N->getOperand(0); -  DebugLoc dl = N->getDebugLoc(); +  DebugLoc DL = N->getDebugLoc();    EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));    unsigned WidenNumElts = WidenVT.getVectorNumElements(); @@ -1444,11 +1459,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {      InOp = GetWidenedVector(N->getOperand(0));      InVT = InOp.getValueType();      InVTNumElts = InVT.getVectorNumElements(); -    if (InVTNumElts == WidenNumElts) -      return DAG.getNode(Opcode, dl, WidenVT, InOp); +    if (InVTNumElts == WidenNumElts) { +      if (N->getNumOperands() == 1) +        return DAG.getNode(Opcode, DL, WidenVT, InOp); +      return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1)); +    }    } -  if (TLI.isTypeSynthesizable(InWidenVT)) { +  if (TLI.isTypeLegal(InWidenVT)) {      // Because the result and the input are different vector types, widening      // the result could create a legal type but widening the input might make      // it an illegal type that might lead to repeatedly splitting the input @@ -1462,16 +1480,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {        SDValue UndefVal = DAG.getUNDEF(InVT);        for (unsigned i = 1; i != NumConcat; ++i)          Ops[i] = UndefVal; -      return DAG.getNode(Opcode, dl, WidenVT, -                         DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, -                         &Ops[0], NumConcat)); +      SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, +                                  &Ops[0], NumConcat); +      if (N->getNumOperands() == 1) +        return DAG.getNode(Opcode, DL, WidenVT, InVec); +      return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1));      }      if (InVTNumElts % WidenNumElts == 0) { +      SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, +                                  InOp, DAG.getIntPtrConstant(0));        // Extract the input and convert the shorten input vector. -      return DAG.getNode(Opcode, dl, WidenVT, -                         DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, -                                     InOp, DAG.getIntPtrConstant(0))); +      if (N->getNumOperands() == 1) +        return DAG.getNode(Opcode, DL, WidenVT, InVal); +      return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1));      }    } @@ -1480,16 +1502,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {    EVT EltVT = WidenVT.getVectorElementType();    unsigned MinElts = std::min(InVTNumElts, WidenNumElts);    unsigned i; -  for (i=0; i < MinElts; ++i) -    Ops[i] = DAG.getNode(Opcode, dl, EltVT, -                         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, -                                     DAG.getIntPtrConstant(i))); +  for (i=0; i < MinElts; ++i) { +    SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp, +                              DAG.getIntPtrConstant(i)); +    if (N->getNumOperands() == 1) +      Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val); +    else +      Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1)); +  }    SDValue UndefVal = DAG.getUNDEF(EltVT);    for (; i < WidenNumElts; ++i)      Ops[i] = UndefVal; -  return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts); +  return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, &Ops[0], WidenNumElts);  }  SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) { @@ -1536,7 +1562,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) {                       WidenVT, WidenLHS, DAG.getValueType(ExtVT));  } -SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) { +SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {    SDValue InOp = N->getOperand(0);    EVT InVT = InOp.getValueType();    EVT VT = N->getValueType(0); @@ -1555,7 +1581,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {      InOp = GetPromotedInteger(InOp);      InVT = InOp.getValueType();      if (WidenVT.bitsEq(InVT)) -      return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp); +      return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);      break;    case SoftenFloat:    case ExpandInteger: @@ -1570,13 +1596,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {      InVT = InOp.getValueType();      if (WidenVT.bitsEq(InVT))        // The input widens to the same size. Convert to the widen value. -      return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp); +      return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);      break;    }    unsigned WidenSize = WidenVT.getSizeInBits();    unsigned InSize = InVT.getSizeInBits(); -  if (WidenSize % InSize == 0) { +  // x86mmx is not an acceptable vector element type, so don't try. +  if (WidenSize % InSize == 0 && InVT != MVT::x86mmx) {      // Determine new input vector type.  The new input vector type will use      // the same element type (if its a vector) or use the input type as a      // vector.  It is the same size as the type to widen to. @@ -1590,7 +1617,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {        NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts);      } -    if (TLI.isTypeSynthesizable(NewInVT)) { +    if (TLI.isTypeLegal(NewInVT)) {        // Because the result and the input are different vector types, widening        // the result could create a legal type but widening the input might make        // it an illegal type that might lead to repeatedly splitting the input @@ -1609,7 +1636,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {        else          NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,                               NewInVT, &Ops[0], NewNumElts); -      return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, NewVec); +      return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec);      }    } @@ -1730,7 +1757,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {                                    SatOp, CvtCode);    } -  if (TLI.isTypeSynthesizable(InWidenVT)) { +  if (TLI.isTypeLegal(InWidenVT)) {      // Because the result and the input are different vector types, widening      // the result could create a legal type but widening the input might make      // it an illegal type that might lead to repeatedly splitting the input @@ -1794,39 +1821,25 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {    EVT InVT = InOp.getValueType(); -  ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx); -  if (CIdx) { -    unsigned IdxVal = CIdx->getZExtValue(); -    // Check if we can just return the input vector after widening. -    if (IdxVal == 0 && InVT == WidenVT) -      return InOp; - -    // Check if we can extract from the vector. -    unsigned InNumElts = InVT.getVectorNumElements(); -    if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts) -        return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx); -  } +  // Check if we can just return the input vector after widening. +  uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); +  if (IdxVal == 0 && InVT == WidenVT) +    return InOp; + +  // Check if we can extract from the vector. +  unsigned InNumElts = InVT.getVectorNumElements(); +  if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts) +    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);    // We could try widening the input to the right length but for now, extract    // the original elements, fill the rest with undefs and build a vector.    SmallVector<SDValue, 16> Ops(WidenNumElts);    EVT EltVT = VT.getVectorElementType(); -  EVT IdxVT = Idx.getValueType();    unsigned NumElts = VT.getVectorNumElements();    unsigned i; -  if (CIdx) { -    unsigned IdxVal = CIdx->getZExtValue(); -    for (i=0; i < NumElts; ++i) -      Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, -                           DAG.getConstant(IdxVal+i, IdxVT)); -  } else { -    Ops[0] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, Idx); -    for (i=1; i < NumElts; ++i) { -      SDValue NewIdx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, -                                   DAG.getConstant(i, IdxVT)); -      Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, NewIdx); -    } -  } +  for (i=0; i < NumElts; ++i) +    Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, +                         DAG.getIntPtrConstant(IdxVal+i));    SDValue UndefVal = DAG.getUNDEF(EltVT);    for (; i < WidenNumElts; ++i) @@ -1985,7 +1998,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) {  #endif      llvm_unreachable("Do not know how to widen this operator's operand!"); -  case ISD::BIT_CONVERT:        Res = WidenVecOp_BIT_CONVERT(N); break; +  case ISD::BITCAST:            Res = WidenVecOp_BITCAST(N); break;    case ISD::CONCAT_VECTORS:     Res = WidenVecOp_CONCAT_VECTORS(N); break;    case ISD::EXTRACT_SUBVECTOR:  Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;    case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; @@ -2044,7 +2057,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {    return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);  } -SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) { +SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {    EVT VT = N->getValueType(0);    SDValue InOp = GetWidenedVector(N->getOperand(0));    EVT InWidenVT = InOp.getValueType(); @@ -2053,11 +2066,12 @@ SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) {    // Check if we can convert between two legal vector types and extract.    unsigned InWidenSize = InWidenVT.getSizeInBits();    unsigned Size = VT.getSizeInBits(); -  if (InWidenSize % Size == 0 && !VT.isVector()) { +  // x86mmx is not an acceptable vector element type, so don't try. +  if (InWidenSize % Size == 0 && !VT.isVector() && VT != MVT::x86mmx) {      unsigned NewNumElts = InWidenSize / Size;      EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts); -    if (TLI.isTypeSynthesizable(NewVT)) { -      SDValue BitOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, InOp); +    if (TLI.isTypeLegal(NewVT)) { +      SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);        return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,                           DAG.getIntPtrConstant(0));      } @@ -2146,7 +2160,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,    if (Width == WidenEltWidth)      return RetVT; -  // See if there is larger legal integer than the element type to load/store  +  // See if there is larger legal integer than the element type to load/store    unsigned VT;    for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE;         VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) { @@ -2154,7 +2168,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,      unsigned MemVTWidth = MemVT.getSizeInBits();      if (MemVT.getSizeInBits() <= WidenEltWidth)        break; -    if (TLI.isTypeSynthesizable(MemVT) && (WidenWidth % MemVTWidth) == 0 && +    if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 &&          (MemVTWidth <= Width ||           (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {        RetVT = MemVT; @@ -2168,7 +2182,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,         VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) {      EVT MemVT = (MVT::SimpleValueType) VT;      unsigned MemVTWidth = MemVT.getSizeInBits(); -    if (TLI.isTypeSynthesizable(MemVT) && WidenEltVT == MemVT.getVectorElementType() && +    if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&          (WidenWidth % MemVTWidth) == 0 &&          (MemVTWidth <= Width ||           (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { @@ -2201,7 +2215,7 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,      if (NewLdTy != LdTy) {        NumElts = Width / NewLdTy.getSizeInBits();        NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts); -      VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp); +      VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, VecOp);        // Readjust position and vector position based on new load type        Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits();        LdTy = NewLdTy; @@ -2209,11 +2223,11 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,      VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i],                          DAG.getIntPtrConstant(Idx++));    } -  return DAG.getNode(ISD::BIT_CONVERT, dl, VecTy, VecOp); +  return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp);  } -SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, -                                              LoadSDNode * LD) { +SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain, +                                              LoadSDNode *LD) {    // The strategy assumes that we can efficiently load powers of two widths.    // The routines chops the vector into the largest vector loads with the same    // element type or scalar loads and then recombines it to the widen vector @@ -2228,11 +2242,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,    // Load information    SDValue   Chain = LD->getChain();    SDValue   BasePtr = LD->getBasePtr(); -  int       SVOffset = LD->getSrcValueOffset();    unsigned  Align    = LD->getAlignment();    bool      isVolatile = LD->isVolatile();    bool      isNonTemporal = LD->isNonTemporal(); -  const Value *SV = LD->getSrcValue();    int LdWidth = LdVT.getSizeInBits();    int WidthDiff = WidenWidth - LdWidth;          // Difference @@ -2241,7 +2253,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,    // Find the vector type that can load from.    EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);    int NewVTWidth = NewVT.getSizeInBits(); -  SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV, SVOffset, +  SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),                               isVolatile, isNonTemporal, Align);    LdChain.push_back(LdOp.getValue(1)); @@ -2251,7 +2263,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,        unsigned NumElts = WidenWidth / NewVTWidth;        EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);        SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); -      return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, VecOp); +      return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);      }      if (NewVT == WidenVT)        return LdOp; @@ -2286,8 +2298,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,        NewVTWidth = NewVT.getSizeInBits();      } -    SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV, -                               SVOffset+Offset, isVolatile, +    SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, +                               LD->getPointerInfo().getWithOffset(Offset), +                               isVolatile,                                 isNonTemporal, MinAlign(Align, Increment));      LdChain.push_back(LdOp.getValue(1));      LdOps.push_back(LdOp); @@ -2300,7 +2313,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,    if (!LdOps[0].getValueType().isVector())      // All the loads are scalar loads.      return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End); -   +    // If the load contains vectors, build the vector using concat vector.    // All of the vectors used to loads are power of 2 and the scalars load    // can be combined to make a power of 2 vector. @@ -2362,11 +2375,9 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,    // Load information    SDValue   Chain = LD->getChain();    SDValue   BasePtr = LD->getBasePtr(); -  int       SVOffset = LD->getSrcValueOffset();    unsigned  Align    = LD->getAlignment();    bool      isVolatile = LD->isVolatile();    bool      isNonTemporal = LD->isNonTemporal(); -  const Value *SV = LD->getSrcValue();    EVT EltVT = WidenVT.getVectorElementType();    EVT LdEltVT = LdVT.getVectorElementType(); @@ -2376,16 +2387,17 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,    unsigned WidenNumElts = WidenVT.getVectorNumElements();    SmallVector<SDValue, 16> Ops(WidenNumElts);    unsigned Increment = LdEltVT.getSizeInBits() / 8; -  Ops[0] = DAG.getExtLoad(ExtType, EltVT, dl, Chain, BasePtr, SV, SVOffset, +  Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, +                          LD->getPointerInfo(),                            LdEltVT, isVolatile, isNonTemporal, Align);    LdChain.push_back(Ops[0].getValue(1));    unsigned i = 0, Offset = Increment;    for (i=1; i < NumElts; ++i, Offset += Increment) {      SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),                                       BasePtr, DAG.getIntPtrConstant(Offset)); -    Ops[i] = DAG.getExtLoad(ExtType, EltVT, dl, Chain, NewBasePtr, SV, -                            SVOffset + Offset, LdEltVT, isVolatile, -                            isNonTemporal, Align); +    Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, +                            LD->getPointerInfo().getWithOffset(Offset), LdEltVT, +                            isVolatile, isNonTemporal, Align);      LdChain.push_back(Ops[i].getValue(1));    } @@ -2405,8 +2417,6 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,    // element type or scalar stores.    SDValue  Chain = ST->getChain();    SDValue  BasePtr = ST->getBasePtr(); -  const    Value *SV = ST->getSrcValue(); -  int      SVOffset = ST->getSrcValueOffset();    unsigned Align = ST->getAlignment();    bool     isVolatile = ST->isVolatile();    bool     isNonTemporal = ST->isNonTemporal(); @@ -2433,9 +2443,9 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,        do {          SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,                                     DAG.getIntPtrConstant(Idx)); -        StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV, -                                       SVOffset + Offset, isVolatile, -                                       isNonTemporal, +        StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, +                                    ST->getPointerInfo().getWithOffset(Offset), +                                       isVolatile, isNonTemporal,                                         MinAlign(Align, Offset)));          StWidth -= NewVTWidth;          Offset += Increment; @@ -2447,15 +2457,16 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,        // Cast the vector to the scalar type we can store        unsigned NumElts = ValWidth / NewVTWidth;        EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); -      SDValue VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, ValOp); +      SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp);        // Readjust index position based on new vector type        Idx = Idx * ValEltWidth / NewVTWidth;        do {          SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,                        DAG.getIntPtrConstant(Idx++)); -        StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV, -                                       SVOffset + Offset, isVolatile, -                                       isNonTemporal, MinAlign(Align, Offset))); +        StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, +                                    ST->getPointerInfo().getWithOffset(Offset), +                                       isVolatile, isNonTemporal, +                                       MinAlign(Align, Offset)));          StWidth -= NewVTWidth;          Offset += Increment;          BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, @@ -2474,14 +2485,12 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,    // and then store it.  Instead, we extract each element and then store it.    SDValue  Chain = ST->getChain();    SDValue  BasePtr = ST->getBasePtr(); -  const    Value *SV = ST->getSrcValue(); -  int      SVOffset = ST->getSrcValueOffset();    unsigned Align = ST->getAlignment();    bool     isVolatile = ST->isVolatile();    bool     isNonTemporal = ST->isNonTemporal();    SDValue  ValOp = GetWidenedVector(ST->getValue());    DebugLoc dl = ST->getDebugLoc(); -   +    EVT StVT = ST->getMemoryVT();    EVT ValVT = ValOp.getValueType(); @@ -2499,8 +2508,8 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,    unsigned NumElts = StVT.getVectorNumElements();    SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,                              DAG.getIntPtrConstant(0)); -  StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, SV, -                                      SVOffset, StEltVT, +  StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, +                                      ST->getPointerInfo(), StEltVT,                                        isVolatile, isNonTemporal, Align));    unsigned Offset = Increment;    for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { @@ -2508,9 +2517,9 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,                                       BasePtr, DAG.getIntPtrConstant(Offset));      SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,                              DAG.getIntPtrConstant(0)); -    StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, SV, -                                        SVOffset + Offset, StEltVT, -                                        isVolatile, isNonTemporal, +    StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, +                                      ST->getPointerInfo().getWithOffset(Offset), +                                        StEltVT, isVolatile, isNonTemporal,                                          MinAlign(Align, Offset)));    }  } diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h index ac2d33884b26..2dcb22957325 100644 --- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h +++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -16,7 +16,7 @@  #include "llvm/ADT/SmallVector.h"  #include "llvm/Support/DebugLoc.h" -#include "llvm/System/DataTypes.h" +#include "llvm/Support/DataTypes.h"  namespace llvm { diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index fae27294e364..e3da2084529a 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -205,7 +205,7 @@ void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {  /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled  /// successors to the newly created node.  SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { -  if (SU->getNode()->getFlaggedNode()) +  if (SU->getNode()->getGluedNode())      return NULL;    SDNode *N = SU->getNode(); @@ -216,7 +216,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {    bool TryUnfold = false;    for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {      EVT VT = N->getValueType(i); -    if (VT == MVT::Flag) +    if (VT == MVT::Glue)        return NULL;      else if (VT == MVT::Other)        TryUnfold = true; @@ -224,7 +224,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {      const SDValue &Op = N->getOperand(i);      EVT VT = Op.getNode()->getValueType(Op.getResNo()); -    if (VT == MVT::Flag) +    if (VT == MVT::Glue)        return NULL;    } @@ -476,12 +476,12 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,      }    } -  for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) { +  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {      if (Node->getOpcode() == ISD::INLINEASM) {        // Inline asm can clobber physical defs.        unsigned NumOps = Node->getNumOperands(); -      if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag) -        --NumOps;  // Ignore the flag operand. +      if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) +        --NumOps;  // Ignore the glue operand.        for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {          unsigned Flags = diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp index 56f5ded50083..430283d5eff9 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp @@ -40,7 +40,7 @@ STATISTIC(NumStalls, "Number of pipeline stalls");  static RegisterScheduler    tdListDAGScheduler("list-td", "Top-down list scheduler",                       createTDListDAGScheduler); -    +  namespace {  //===----------------------------------------------------------------------===//  /// ScheduleDAGList - The actual list scheduler implementation.  This supports @@ -51,7 +51,7 @@ private:    /// AvailableQueue - The priority queue to use for the available SUnits.    ///    SchedulingPriorityQueue *AvailableQueue; -   +    /// PendingQueue - This contains all of the instructions whose operands have    /// been issued, but their results are not ready yet (due to the latency of    /// the operation).  Once the operands become available, the instruction is @@ -63,11 +63,12 @@ private:  public:    ScheduleDAGList(MachineFunction &mf, -                  SchedulingPriorityQueue *availqueue, -                  ScheduleHazardRecognizer *HR) -    : ScheduleDAGSDNodes(mf), -      AvailableQueue(availqueue), HazardRec(HR) { -    } +                  SchedulingPriorityQueue *availqueue) +    : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue) { + +    const TargetMachine &tm = mf.getTarget(); +    HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); +  }    ~ScheduleDAGList() {      delete HazardRec; @@ -87,14 +88,14 @@ private:  /// Schedule - Schedule the DAG using list scheduling.  void ScheduleDAGList::Schedule() {    DEBUG(dbgs() << "********** List Scheduling **********\n"); -   +    // Build the scheduling graph.    BuildSchedGraph(NULL);    AvailableQueue->initNodes(SUnits); -   +    ListScheduleTopDown(); -   +    AvailableQueue->releaseState();  } @@ -118,7 +119,7 @@ void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) {    --SuccSU->NumPredsLeft;    SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency()); -   +    // If all the node's predecessors are scheduled, this node is ready    // to be scheduled. Ignore the special ExitSU node.    if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) @@ -142,7 +143,7 @@ void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) {  void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {    DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");    DEBUG(SU->dump(this)); -   +    Sequence.push_back(SU);    assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");    SU->setDepthToAtLeast(CurCycle); @@ -168,7 +169,7 @@ void ScheduleDAGList::ListScheduleTopDown() {        SUnits[i].isAvailable = true;      }    } -   +    // While Available queue is not empty, grab the node with the highest    // priority. If it is not ready put it back.  Schedule the node.    std::vector<SUnit*> NotReady; @@ -187,7 +188,7 @@ void ScheduleDAGList::ListScheduleTopDown() {          assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?");        }      } -     +      // If there are no instructions available, don't try to issue anything, and      // don't advance the hazard recognizer.      if (AvailableQueue->empty()) { @@ -196,24 +197,24 @@ void ScheduleDAGList::ListScheduleTopDown() {      }      SUnit *FoundSUnit = 0; -     +      bool HasNoopHazards = false;      while (!AvailableQueue->empty()) {        SUnit *CurSUnit = AvailableQueue->pop(); -       +        ScheduleHazardRecognizer::HazardType HT = -        HazardRec->getHazardType(CurSUnit); +        HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);        if (HT == ScheduleHazardRecognizer::NoHazard) {          FoundSUnit = CurSUnit;          break;        } -     +        // Remember if this is a noop hazard.        HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard; -       +        NotReady.push_back(CurSUnit);      } -     +      // Add the nodes that aren't ready back onto the available list.      if (!NotReady.empty()) {        AvailableQueue->push_all(NotReady); @@ -228,7 +229,7 @@ void ScheduleDAGList::ListScheduleTopDown() {        // If this is a pseudo-op node, we don't want to increment the current        // cycle.        if (FoundSUnit->Latency)  // Don't increment CurCycle for pseudo-ops! -        ++CurCycle;         +        ++CurCycle;      } else if (!HasNoopHazards) {        // Otherwise, we have a pipeline stall, but no other problem, just advance        // the current cycle and try again. @@ -257,12 +258,8 @@ void ScheduleDAGList::ListScheduleTopDown() {  //                         Public Constructor Functions  //===----------------------------------------------------------------------===// -/// createTDListDAGScheduler - This creates a top-down list scheduler with a -/// new hazard recognizer. This scheduler takes ownership of the hazard -/// recognizer and deletes it when done. +/// createTDListDAGScheduler - This creates a top-down list scheduler.  ScheduleDAGSDNodes *  llvm::createTDListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { -  return new ScheduleDAGList(*IS->MF, -                             new LatencyPriorityQueue(), -                             IS->CreateTargetHazardRecognizer()); +  return new ScheduleDAGList(*IS->MF, new LatencyPriorityQueue());  } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 4c3e4e3b0768..0b548b277f4c 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -20,6 +20,7 @@  #include "llvm/InlineAsm.h"  #include "llvm/CodeGen/SchedulerRegistry.h"  #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h"  #include "llvm/Target/TargetRegisterInfo.h"  #include "llvm/Target/TargetData.h"  #include "llvm/Target/TargetMachine.h" @@ -65,6 +66,10 @@ static RegisterScheduler                        "which tries to balance ILP and register pressure",                        createILPListDAGScheduler); +static cl::opt<bool> DisableSchedCycles( +  "disable-sched-cycles", cl::Hidden, cl::init(false), +  cl::desc("Disable cycle-level precision during preRA scheduling")); +  namespace {  //===----------------------------------------------------------------------===//  /// ScheduleDAGRRList - The actual register reduction list scheduler @@ -83,31 +88,56 @@ private:    /// AvailableQueue - The priority queue to use for the available SUnits.    SchedulingPriorityQueue *AvailableQueue; +  /// PendingQueue - This contains all of the instructions whose operands have +  /// been issued, but their results are not ready yet (due to the latency of +  /// the operation).  Once the operands becomes available, the instruction is +  /// added to the AvailableQueue. +  std::vector<SUnit*> PendingQueue; + +  /// HazardRec - The hazard recognizer to use. +  ScheduleHazardRecognizer *HazardRec; + +  /// CurCycle - The current scheduler state corresponds to this cycle. +  unsigned CurCycle; + +  /// MinAvailableCycle - Cycle of the soonest available instruction. +  unsigned MinAvailableCycle; +    /// LiveRegDefs - A set of physical registers and their definition    /// that are "live". These nodes must be scheduled before any other nodes that    /// modifies the registers can be scheduled.    unsigned NumLiveRegs;    std::vector<SUnit*> LiveRegDefs; -  std::vector<unsigned> LiveRegCycles; +  std::vector<SUnit*> LiveRegGens;    /// Topo - A topological ordering for SUnits which permits fast IsReachable    /// and similar queries.    ScheduleDAGTopologicalSort Topo;  public: -  ScheduleDAGRRList(MachineFunction &mf, -                    bool isbottomup, bool needlatency, -                    SchedulingPriorityQueue *availqueue) -    : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), NeedLatency(needlatency), -      AvailableQueue(availqueue), Topo(SUnits) { -    } +  ScheduleDAGRRList(MachineFunction &mf, bool needlatency, +                    SchedulingPriorityQueue *availqueue, +                    CodeGenOpt::Level OptLevel) +    : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()), +      NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), +      Topo(SUnits) { + +    const TargetMachine &tm = mf.getTarget(); +    if (DisableSchedCycles || !NeedLatency) +      HazardRec = new ScheduleHazardRecognizer(); +    else +      HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); +  }    ~ScheduleDAGRRList() { +    delete HazardRec;      delete AvailableQueue;    }    void Schedule(); +  ScheduleHazardRecognizer *getHazardRec() { return HazardRec; } +    /// IsReachable - Checks if SU is reachable from TargetSU.    bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {      return Topo.IsReachable(SU, TargetSU); @@ -136,24 +166,37 @@ public:    }  private: +  bool isReady(SUnit *SU) { +    return DisableSchedCycles || !AvailableQueue->hasReadyFilter() || +      AvailableQueue->isReady(SU); +  } +    void ReleasePred(SUnit *SU, const SDep *PredEdge); -  void ReleasePredecessors(SUnit *SU, unsigned CurCycle); +  void ReleasePredecessors(SUnit *SU);    void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);    void ReleaseSuccessors(SUnit *SU); +  void ReleasePending(); +  void AdvanceToCycle(unsigned NextCycle); +  void AdvancePastStalls(SUnit *SU); +  void EmitNode(SUnit *SU); +  void ScheduleNodeBottomUp(SUnit*);    void CapturePred(SDep *PredEdge); -  void ScheduleNodeBottomUp(SUnit*, unsigned); -  void ScheduleNodeTopDown(SUnit*, unsigned);    void UnscheduleNodeBottomUp(SUnit*); -  void BacktrackBottomUp(SUnit*, unsigned, unsigned&); +  void RestoreHazardCheckerBottomUp(); +  void BacktrackBottomUp(SUnit*, SUnit*);    SUnit *CopyAndMoveSuccessors(SUnit*);    void InsertCopiesAndMoveSuccs(SUnit*, unsigned,                                  const TargetRegisterClass*,                                  const TargetRegisterClass*,                                  SmallVector<SUnit*, 2>&);    bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); -  void ListScheduleTopDown(); + +  SUnit *PickNodeToScheduleBottomUp();    void ListScheduleBottomUp(); +  void ScheduleNodeTopDown(SUnit*); +  void ListScheduleTopDown(); +    /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.    /// Updates the topological ordering if required. @@ -190,11 +233,13 @@ private:  void ScheduleDAGRRList::Schedule() {    DEBUG(dbgs()          << "********** List Scheduling BB#" << BB->getNumber() -        << " **********\n"); +        << " '" << BB->getName() << "' **********\n"); +  CurCycle = 0; +  MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;    NumLiveRegs = 0; -  LiveRegDefs.resize(TRI->getNumRegs(), NULL);   -  LiveRegCycles.resize(TRI->getNumRegs(), 0); +  LiveRegDefs.resize(TRI->getNumRegs(), NULL); +  LiveRegGens.resize(TRI->getNumRegs(), NULL);    // Build the scheduling graph.    BuildSchedGraph(NULL); @@ -204,13 +249,15 @@ void ScheduleDAGRRList::Schedule() {    Topo.InitDAGTopologicalSorting();    AvailableQueue->initNodes(SUnits); -   + +  HazardRec->Reset(); +    // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.    if (isBottomUp)      ListScheduleBottomUp();    else      ListScheduleTopDown(); -   +    AvailableQueue->releaseState();  } @@ -243,33 +290,197 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {    // to be scheduled. Ignore the special EntrySU node.    if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {      PredSU->isAvailable = true; -    AvailableQueue->push(PredSU); + +    unsigned Height = PredSU->getHeight(); +    if (Height < MinAvailableCycle) +      MinAvailableCycle = Height; + +    if (isReady(SU)) { +      AvailableQueue->push(PredSU); +    } +    // CapturePred and others may have left the node in the pending queue, avoid +    // adding it twice. +    else if (!PredSU->isPending) { +      PredSU->isPending = true; +      PendingQueue.push_back(PredSU); +    }    }  } -void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { +/// Call ReleasePred for each predecessor, then update register live def/gen. +/// Always update LiveRegDefs for a register dependence even if the current SU +/// also defines the register. This effectively create one large live range +/// across a sequence of two-address node. This is important because the +/// entire chain must be scheduled together. Example: +/// +/// flags = (3) add +/// flags = (2) addc flags +/// flags = (1) addc flags +/// +/// results in +/// +/// LiveRegDefs[flags] = 3 +/// LiveRegGens[flags] = 1 +/// +/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid +/// interference on flags. +void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {    // Bottom up: release predecessors    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();         I != E; ++I) {      ReleasePred(SU, &*I);      if (I->isAssignedRegDep()) {        // This is a physical register dependency and it's impossible or -      // expensive to copy the register. Make sure nothing that can  +      // expensive to copy the register. Make sure nothing that can        // clobber the register is scheduled between the predecessor and        // this node. -      if (!LiveRegDefs[I->getReg()]) { +      SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef; +      assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) && +             "interference on register dependence"); +      LiveRegDefs[I->getReg()] = I->getSUnit(); +      if (!LiveRegGens[I->getReg()]) {          ++NumLiveRegs; -        LiveRegDefs[I->getReg()] = I->getSUnit(); -        LiveRegCycles[I->getReg()] = CurCycle; +        LiveRegGens[I->getReg()] = SU;        }      }    }  } +/// Check to see if any of the pending instructions are ready to issue.  If +/// so, add them to the available queue. +void ScheduleDAGRRList::ReleasePending() { +  if (DisableSchedCycles) { +    assert(PendingQueue.empty() && "pending instrs not allowed in this mode"); +    return; +  } + +  // If the available queue is empty, it is safe to reset MinAvailableCycle. +  if (AvailableQueue->empty()) +    MinAvailableCycle = UINT_MAX; + +  // Check to see if any of the pending instructions are ready to issue.  If +  // so, add them to the available queue. +  for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { +    unsigned ReadyCycle = +      isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth(); +    if (ReadyCycle < MinAvailableCycle) +      MinAvailableCycle = ReadyCycle; + +    if (PendingQueue[i]->isAvailable) { +      if (!isReady(PendingQueue[i])) +          continue; +      AvailableQueue->push(PendingQueue[i]); +    } +    PendingQueue[i]->isPending = false; +    PendingQueue[i] = PendingQueue.back(); +    PendingQueue.pop_back(); +    --i; --e; +  } +} + +/// Move the scheduler state forward by the specified number of Cycles. +void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { +  if (NextCycle <= CurCycle) +    return; + +  AvailableQueue->setCurCycle(NextCycle); +  if (!HazardRec->isEnabled()) { +    // Bypass lots of virtual calls in case of long latency. +    CurCycle = NextCycle; +  } +  else { +    for (; CurCycle != NextCycle; ++CurCycle) { +      if (isBottomUp) +        HazardRec->RecedeCycle(); +      else +        HazardRec->AdvanceCycle(); +    } +  } +  // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the +  // available Q to release pending nodes at least once before popping. +  ReleasePending(); +} + +/// Move the scheduler state forward until the specified node's dependents are +/// ready and can be scheduled with no resource conflicts. +void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { +  if (DisableSchedCycles) +    return; + +  unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth(); + +  // Bump CurCycle to account for latency. We assume the latency of other +  // available instructions may be hidden by the stall (not a full pipe stall). +  // This updates the hazard recognizer's cycle before reserving resources for +  // this instruction. +  AdvanceToCycle(ReadyCycle); + +  // Calls are scheduled in their preceding cycle, so don't conflict with +  // hazards from instructions after the call. EmitNode will reset the +  // scoreboard state before emitting the call. +  if (isBottomUp && SU->isCall) +    return; + +  // FIXME: For resource conflicts in very long non-pipelined stages, we +  // should probably skip ahead here to avoid useless scoreboard checks. +  int Stalls = 0; +  while (true) { +    ScheduleHazardRecognizer::HazardType HT = +      HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls); + +    if (HT == ScheduleHazardRecognizer::NoHazard) +      break; + +    ++Stalls; +  } +  AdvanceToCycle(CurCycle + Stalls); +} + +/// Record this SUnit in the HazardRecognizer. +/// Does not update CurCycle. +void ScheduleDAGRRList::EmitNode(SUnit *SU) { +  if (!HazardRec->isEnabled()) +    return; + +  // Check for phys reg copy. +  if (!SU->getNode()) +    return; + +  switch (SU->getNode()->getOpcode()) { +  default: +    assert(SU->getNode()->isMachineOpcode() && +           "This target-independent node should not be scheduled."); +    break; +  case ISD::MERGE_VALUES: +  case ISD::TokenFactor: +  case ISD::CopyToReg: +  case ISD::CopyFromReg: +  case ISD::EH_LABEL: +    // Noops don't affect the scoreboard state. Copies are likely to be +    // removed. +    return; +  case ISD::INLINEASM: +    // For inline asm, clear the pipeline state. +    HazardRec->Reset(); +    return; +  } +  if (isBottomUp && SU->isCall) { +    // Calls are scheduled with their preceding instructions. For bottom-up +    // scheduling, clear the pipeline state before emitting. +    HazardRec->Reset(); +  } + +  HazardRec->EmitInstruction(SU); + +  if (!isBottomUp && SU->isCall) { +    HazardRec->Reset(); +  } +} +  /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending  /// count of its predecessors. If a predecessor pending count is zero, add it to  /// the Available queue. -void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { +void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {    DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");    DEBUG(SU->dump(this)); @@ -278,36 +489,51 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {      DEBUG(dbgs() << "   Height [" << SU->getHeight() << "] pipeline stall!\n");  #endif -  // FIXME: Handle noop hazard. +  // FIXME: Do not modify node height. It may interfere with +  // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the +  // node it's ready cycle can aid heuristics, and after scheduling it can +  // indicate the scheduled cycle.    SU->setHeightToAtLeast(CurCycle); + +  // Reserve resources for the scheduled intruction. +  EmitNode(SU); +    Sequence.push_back(SU);    AvailableQueue->ScheduledNode(SU); -  ReleasePredecessors(SU, CurCycle); +  // Update liveness of predecessors before successors to avoid treating a +  // two-address node as a live range def. +  ReleasePredecessors(SU);    // Release all the implicit physical register defs that are live.    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();         I != E; ++I) { -    if (I->isAssignedRegDep()) { -      if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) { -        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); -        assert(LiveRegDefs[I->getReg()] == SU && -               "Physical register dependency violated?"); -        --NumLiveRegs; -        LiveRegDefs[I->getReg()] = NULL; -        LiveRegCycles[I->getReg()] = 0; -      } +    // LiveRegDegs[I->getReg()] != SU when SU is a two-address node. +    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) { +      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); +      --NumLiveRegs; +      LiveRegDefs[I->getReg()] = NULL; +      LiveRegGens[I->getReg()] = NULL;      }    }    SU->isScheduled = true; + +  // Conditions under which the scheduler should eagerly advance the cycle: +  // (1) No available instructions +  // (2) All pipelines full, so available instructions must have hazards. +  // +  // If HazardRec is disabled, count each inst as one cycle. +  if (!HazardRec->isEnabled() || HazardRec->atIssueLimit() +      || AvailableQueue->empty()) +    AdvanceToCycle(CurCycle + 1);  }  /// CapturePred - This does the opposite of ReleasePred. Since SU is being  /// unscheduled, incrcease the succ left count of its predecessors. Remove  /// them from AvailableQueue if necessary. -void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {   +void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {    SUnit *PredSU = PredEdge->getSUnit();    if (PredSU->isAvailable) {      PredSU->isAvailable = false; @@ -328,59 +554,98 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();         I != E; ++I) {      CapturePred(&*I); -    if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]){ +    if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");        assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&               "Physical register dependency violated?");        --NumLiveRegs;        LiveRegDefs[I->getReg()] = NULL; -      LiveRegCycles[I->getReg()] = 0; +      LiveRegGens[I->getReg()] = NULL;      }    }    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();         I != E; ++I) {      if (I->isAssignedRegDep()) { +      // This becomes the nearest def. Note that an earlier def may still be +      // pending if this is a two-address node. +      LiveRegDefs[I->getReg()] = SU;        if (!LiveRegDefs[I->getReg()]) { -        LiveRegDefs[I->getReg()] = SU;          ++NumLiveRegs;        } -      if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()]) -        LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight(); +      if (LiveRegGens[I->getReg()] == NULL || +          I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight()) +        LiveRegGens[I->getReg()] = I->getSUnit();      }    } +  if (SU->getHeight() < MinAvailableCycle) +    MinAvailableCycle = SU->getHeight();    SU->setHeightDirty();    SU->isScheduled = false;    SU->isAvailable = true; -  AvailableQueue->push(SU); +  if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) { +    // Don't make available until backtracking is complete. +    SU->isPending = true; +    PendingQueue.push_back(SU); +  } +  else { +    AvailableQueue->push(SU); +  }    AvailableQueue->UnscheduledNode(SU);  } +/// After backtracking, the hazard checker needs to be restored to a state +/// corresponding the the current cycle. +void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() { +  HazardRec->Reset(); + +  unsigned LookAhead = std::min((unsigned)Sequence.size(), +                                HazardRec->getMaxLookAhead()); +  if (LookAhead == 0) +    return; + +  std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead); +  unsigned HazardCycle = (*I)->getHeight(); +  for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) { +    SUnit *SU = *I; +    for (; SU->getHeight() > HazardCycle; ++HazardCycle) { +      HazardRec->RecedeCycle(); +    } +    EmitNode(SU); +  } +} +  /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in  /// BTCycle in order to schedule a specific node. -void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle, -                                          unsigned &CurCycle) { -  SUnit *OldSU = NULL; -  while (CurCycle > BtCycle) { -    OldSU = Sequence.back(); +void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) { +  SUnit *OldSU = Sequence.back(); +  while (true) {      Sequence.pop_back();      if (SU->isSucc(OldSU))        // Don't try to remove SU from AvailableQueue.        SU->isAvailable = false; +    // FIXME: use ready cycle instead of height +    CurCycle = OldSU->getHeight();      UnscheduleNodeBottomUp(OldSU); -    --CurCycle;      AvailableQueue->setCurCycle(CurCycle); +    if (OldSU == BtSU) +      break; +    OldSU = Sequence.back();    }    assert(!SU->isSucc(OldSU) && "Something is wrong!"); +  RestoreHazardCheckerBottomUp(); + +  ReleasePending(); +    ++NumBacktracks;  }  static bool isOperandOf(const SUnit *SU, SDNode *N) {    for (const SDNode *SUNode = SU->getNode(); SUNode; -       SUNode = SUNode->getFlaggedNode()) { +       SUNode = SUNode->getGluedNode()) {      if (SUNode->isOperandOf(N))        return true;    } @@ -390,18 +655,18 @@ static bool isOperandOf(const SUnit *SU, SDNode *N) {  /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled  /// successors to the newly created node.  SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { -  if (SU->getNode()->getFlaggedNode()) -    return NULL; -    SDNode *N = SU->getNode();    if (!N)      return NULL; +  if (SU->getNode()->getGluedNode()) +    return NULL; +    SUnit *NewSU;    bool TryUnfold = false;    for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {      EVT VT = N->getValueType(i); -    if (VT == MVT::Flag) +    if (VT == MVT::Glue)        return NULL;      else if (VT == MVT::Other)        TryUnfold = true; @@ -409,7 +674,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {      const SDValue &Op = N->getOperand(i);      EVT VT = Op.getNode()->getValueType(Op.getResNo()); -    if (VT == MVT::Flag) +    if (VT == MVT::Glue)        return NULL;    } @@ -441,13 +706,15 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {      } else {        LoadSU = CreateNewSUnit(LoadNode);        LoadNode->setNodeId(LoadSU->NodeNum); + +      InitNumRegDefsLeft(LoadSU);        ComputeLatency(LoadSU);      }      SUnit *NewSU = CreateNewSUnit(N);      assert(N->getNodeId() == -1 && "Node already inserted!");      N->setNodeId(NewSU->NodeNum); -       +      const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());      for (unsigned i = 0; i != TID.getNumOperands(); ++i) {        if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { @@ -457,6 +724,8 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {      }      if (TID.isCommutable())        NewSU->isCommutable = true; + +    InitNumRegDefsLeft(NewSU);      ComputeLatency(NewSU);      // Record all the edges to and from the old SU, by category. @@ -507,6 +776,10 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {        RemovePred(SuccDep, D);        D.setSUnit(NewSU);        AddPred(SuccDep, D); +      // Balance register pressure. +      if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled +          && !D.isCtrl() && NewSU->NumRegDefsLeft > 0) +        --NewSU->NumRegDefsLeft;      }      for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {        SDep D = ChainSuccs[i]; @@ -517,7 +790,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {          D.setSUnit(LoadSU);          AddPred(SuccDep, D);        } -    }  +    }      // Add a data dependency to reflect that NewSU reads the value defined      // by LoadSU. @@ -633,52 +906,52 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,  /// CheckForLiveRegDef - Return true and update live register vector if the  /// specified register def of the specified SUnit clobbers any "live" registers. -static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, +static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,                                 std::vector<SUnit*> &LiveRegDefs,                                 SmallSet<unsigned, 4> &RegAdded,                                 SmallVector<unsigned, 4> &LRegs,                                 const TargetRegisterInfo *TRI) { -  bool Added = false; -  if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) { -    if (RegAdded.insert(Reg)) { +  for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) { + +    // Check if Ref is live. +    if (!LiveRegDefs[Reg]) continue; + +    // Allow multiple uses of the same def. +    if (LiveRegDefs[Reg] == SU) continue; + +    // Add Reg to the set of interfering live regs. +    if (RegAdded.insert(Reg))        LRegs.push_back(Reg); -      Added = true; -    }    } -  for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) -    if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { -      if (RegAdded.insert(*Alias)) { -        LRegs.push_back(*Alias); -        Added = true; -      } -    } -  return Added;  }  /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay  /// scheduling of the given node to satisfy live physical register dependencies.  /// If the specific node is the last one that's available to schedule, do  /// whatever is necessary (i.e. backtracking or cloning) to make it possible. -bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU, -                                                 SmallVector<unsigned, 4> &LRegs){ +bool ScheduleDAGRRList:: +DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {    if (NumLiveRegs == 0)      return false;    SmallSet<unsigned, 4> RegAdded;    // If this node would clobber any "live" register, then it's not ready. +  // +  // If SU is the currently live definition of the same register that it uses, +  // then we are free to schedule it.    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();         I != E; ++I) { -    if (I->isAssignedRegDep()) +    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)        CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,                           RegAdded, LRegs, TRI);    } -  for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) { +  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {      if (Node->getOpcode() == ISD::INLINEASM) {        // Inline asm can clobber physical defs.        unsigned NumOps = Node->getNumOperands(); -      if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag) -        --NumOps;  // Ignore the flag operand. +      if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) +        --NumOps;  // Ignore the glue operand.        for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {          unsigned Flags = @@ -708,17 +981,151 @@ bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,      for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)        CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);    } +    return !LRegs.empty();  } +/// Return a node that can be scheduled in this cycle. Requirements: +/// (1) Ready: latency has been satisfied +/// (2) No Hazards: resources are available +/// (3) No Interferences: may unschedule to break register interferences. +SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { +  SmallVector<SUnit*, 4> Interferences; +  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; + +  SUnit *CurSU = AvailableQueue->pop(); +  while (CurSU) { +    SmallVector<unsigned, 4> LRegs; +    if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) +      break; +    LRegsMap.insert(std::make_pair(CurSU, LRegs)); + +    CurSU->isPending = true;  // This SU is not in AvailableQueue right now. +    Interferences.push_back(CurSU); +    CurSU = AvailableQueue->pop(); +  } +  if (CurSU) { +    // Add the nodes that aren't ready back onto the available list. +    for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { +      Interferences[i]->isPending = false; +      assert(Interferences[i]->isAvailable && "must still be available"); +      AvailableQueue->push(Interferences[i]); +    } +    return CurSU; +  } + +  // All candidates are delayed due to live physical reg dependencies. +  // Try backtracking, code duplication, or inserting cross class copies +  // to resolve it. +  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { +    SUnit *TrySU = Interferences[i]; +    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; + +    // Try unscheduling up to the point where it's safe to schedule +    // this node. +    SUnit *BtSU = NULL; +    unsigned LiveCycle = UINT_MAX; +    for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { +      unsigned Reg = LRegs[j]; +      if (LiveRegGens[Reg]->getHeight() < LiveCycle) { +        BtSU = LiveRegGens[Reg]; +        LiveCycle = BtSU->getHeight(); +      } +    } +    if (!WillCreateCycle(TrySU, BtSU))  { +      BacktrackBottomUp(TrySU, BtSU); + +      // Force the current node to be scheduled before the node that +      // requires the physical reg dep. +      if (BtSU->isAvailable) { +        BtSU->isAvailable = false; +        if (!BtSU->isPending) +          AvailableQueue->remove(BtSU); +      } +      AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1, +                          /*Reg=*/0, /*isNormalMemory=*/false, +                          /*isMustAlias=*/false, /*isArtificial=*/true)); + +      // If one or more successors has been unscheduled, then the current +      // node is no longer avaialable. Schedule a successor that's now +      // available instead. +      if (!TrySU->isAvailable) { +        CurSU = AvailableQueue->pop(); +      } +      else { +        CurSU = TrySU; +        TrySU->isPending = false; +        Interferences.erase(Interferences.begin()+i); +      } +      break; +    } +  } + +  if (!CurSU) { +    // Can't backtrack. If it's too expensive to copy the value, then try +    // duplicate the nodes that produces these "too expensive to copy" +    // values to break the dependency. In case even that doesn't work, +    // insert cross class copies. +    // If it's not too expensive, i.e. cost != -1, issue copies. +    SUnit *TrySU = Interferences[0]; +    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; +    assert(LRegs.size() == 1 && "Can't handle this yet!"); +    unsigned Reg = LRegs[0]; +    SUnit *LRDef = LiveRegDefs[Reg]; +    EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); +    const TargetRegisterClass *RC = +      TRI->getMinimalPhysRegClass(Reg, VT); +    const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); + +    // If cross copy register class is null, then it must be possible copy +    // the value directly. Do not try duplicate the def. +    SUnit *NewDef = 0; +    if (DestRC) +      NewDef = CopyAndMoveSuccessors(LRDef); +    else +      DestRC = RC; +    if (!NewDef) { +      // Issue copies, these can be expensive cross register class copies. +      SmallVector<SUnit*, 2> Copies; +      InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); +      DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum +            << " to SU #" << Copies.front()->NodeNum << "\n"); +      AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, +                          /*Reg=*/0, /*isNormalMemory=*/false, +                          /*isMustAlias=*/false, +                          /*isArtificial=*/true)); +      NewDef = Copies.back(); +    } + +    DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum +          << " to SU #" << TrySU->NodeNum << "\n"); +    LiveRegDefs[Reg] = NewDef; +    AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, +                         /*Reg=*/0, /*isNormalMemory=*/false, +                         /*isMustAlias=*/false, +                         /*isArtificial=*/true)); +    TrySU->isAvailable = false; +    CurSU = NewDef; +  } + +  assert(CurSU && "Unable to resolve live physical register dependencies!"); + +  // Add the nodes that aren't ready back onto the available list. +  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { +    Interferences[i]->isPending = false; +    // May no longer be available due to backtracking. +    if (Interferences[i]->isAvailable) { +      AvailableQueue->push(Interferences[i]); +    } +  } +  return CurSU; +}  /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up  /// schedulers.  void ScheduleDAGRRList::ListScheduleBottomUp() { -  unsigned CurCycle = 0; -    // Release any predecessors of the special Exit node. -  ReleasePredecessors(&ExitSU, CurCycle); +  ReleasePredecessors(&ExitSU);    // Add root to Available queue.    if (!SUnits.empty()) { @@ -730,135 +1137,29 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {    // While Available queue is not empty, grab the node with the highest    // priority. If it is not ready put it back.  Schedule the node. -  SmallVector<SUnit*, 4> NotReady; -  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;    Sequence.reserve(SUnits.size());    while (!AvailableQueue->empty()) { -    bool Delayed = false; -    LRegsMap.clear(); -    SUnit *CurSU = AvailableQueue->pop(); -    while (CurSU) { -      SmallVector<unsigned, 4> LRegs; -      if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) -        break; -      Delayed = true; -      LRegsMap.insert(std::make_pair(CurSU, LRegs)); +    DEBUG(dbgs() << "\n*** Examining Available\n"; +          AvailableQueue->dump(this)); -      CurSU->isPending = true;  // This SU is not in AvailableQueue right now. -      NotReady.push_back(CurSU); -      CurSU = AvailableQueue->pop(); -    } +    // Pick the best node to schedule taking all constraints into +    // consideration. +    SUnit *SU = PickNodeToScheduleBottomUp(); -    // All candidates are delayed due to live physical reg dependencies. -    // Try backtracking, code duplication, or inserting cross class copies -    // to resolve it. -    if (Delayed && !CurSU) { -      for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { -        SUnit *TrySU = NotReady[i]; -        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; - -        // Try unscheduling up to the point where it's safe to schedule -        // this node. -        unsigned LiveCycle = CurCycle; -        for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { -          unsigned Reg = LRegs[j]; -          unsigned LCycle = LiveRegCycles[Reg]; -          LiveCycle = std::min(LiveCycle, LCycle); -        } -        SUnit *OldSU = Sequence[LiveCycle]; -        if (!WillCreateCycle(TrySU, OldSU))  { -          BacktrackBottomUp(TrySU, LiveCycle, CurCycle); -          // Force the current node to be scheduled before the node that -          // requires the physical reg dep. -          if (OldSU->isAvailable) { -            OldSU->isAvailable = false; -            AvailableQueue->remove(OldSU); -          } -          AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1, -                              /*Reg=*/0, /*isNormalMemory=*/false, -                              /*isMustAlias=*/false, /*isArtificial=*/true)); -          // If one or more successors has been unscheduled, then the current -          // node is no longer avaialable. Schedule a successor that's now -          // available instead. -          if (!TrySU->isAvailable) -            CurSU = AvailableQueue->pop(); -          else { -            CurSU = TrySU; -            TrySU->isPending = false; -            NotReady.erase(NotReady.begin()+i); -          } -          break; -        } -      } +    AdvancePastStalls(SU); -      if (!CurSU) { -        // Can't backtrack. If it's too expensive to copy the value, then try -        // duplicate the nodes that produces these "too expensive to copy" -        // values to break the dependency. In case even that doesn't work, -        // insert cross class copies. -        // If it's not too expensive, i.e. cost != -1, issue copies. -        SUnit *TrySU = NotReady[0]; -        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; -        assert(LRegs.size() == 1 && "Can't handle this yet!"); -        unsigned Reg = LRegs[0]; -        SUnit *LRDef = LiveRegDefs[Reg]; -        EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); -        const TargetRegisterClass *RC = -          TRI->getMinimalPhysRegClass(Reg, VT); -        const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); - -        // If cross copy register class is null, then it must be possible copy -        // the value directly. Do not try duplicate the def. -        SUnit *NewDef = 0; -        if (DestRC) -          NewDef = CopyAndMoveSuccessors(LRDef); -        else -          DestRC = RC; -        if (!NewDef) { -          // Issue copies, these can be expensive cross register class copies. -          SmallVector<SUnit*, 2> Copies; -          InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); -          DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum -                       << " to SU #" << Copies.front()->NodeNum << "\n"); -          AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, -                              /*Reg=*/0, /*isNormalMemory=*/false, -                              /*isMustAlias=*/false, -                              /*isArtificial=*/true)); -          NewDef = Copies.back(); -        } +    ScheduleNodeBottomUp(SU); -        DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum -                     << " to SU #" << TrySU->NodeNum << "\n"); -        LiveRegDefs[Reg] = NewDef; -        AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, -                             /*Reg=*/0, /*isNormalMemory=*/false, -                             /*isMustAlias=*/false, -                             /*isArtificial=*/true)); -        TrySU->isAvailable = false; -        CurSU = NewDef; -      } - -      assert(CurSU && "Unable to resolve live physical register dependencies!"); -    } - -    // Add the nodes that aren't ready back onto the available list. -    for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { -      NotReady[i]->isPending = false; -      // May no longer be available due to backtracking. -      if (NotReady[i]->isAvailable) -        AvailableQueue->push(NotReady[i]); +    while (AvailableQueue->empty() && !PendingQueue.empty()) { +      // Advance the cycle to free resources. Skip ahead to the next ready SU. +      assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized"); +      AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));      } -    NotReady.clear(); - -    if (CurSU) -      ScheduleNodeBottomUp(CurSU, CurCycle); -    ++CurCycle; -    AvailableQueue->setCurCycle(CurCycle);    }    // Reverse the order if it is bottom up.    std::reverse(Sequence.begin(), Sequence.end()); -   +  #ifndef NDEBUG    VerifySchedule(isBottomUp);  #endif @@ -905,7 +1206,7 @@ void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {  /// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending  /// count of its successors. If a successor pending count is zero, add it to  /// the Available queue. -void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { +void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) {    DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");    DEBUG(SU->dump(this)); @@ -921,7 +1222,6 @@ void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {  /// ListScheduleTopDown - The main loop of list scheduling for top-down  /// schedulers.  void ScheduleDAGRRList::ListScheduleTopDown() { -  unsigned CurCycle = 0;    AvailableQueue->setCurCycle(CurCycle);    // Release any successors of the special Entry node. @@ -935,19 +1235,19 @@ void ScheduleDAGRRList::ListScheduleTopDown() {        SUnits[i].isAvailable = true;      }    } -   +    // While Available queue is not empty, grab the node with the highest    // priority. If it is not ready put it back.  Schedule the node.    Sequence.reserve(SUnits.size());    while (!AvailableQueue->empty()) {      SUnit *CurSU = AvailableQueue->pop(); -     +      if (CurSU) -      ScheduleNodeTopDown(CurSU, CurCycle); +      ScheduleNodeTopDown(CurSU);      ++CurCycle;      AvailableQueue->setCurCycle(CurCycle);    } -   +  #ifndef NDEBUG    VerifySchedule(isBottomUp);  #endif @@ -955,70 +1255,288 @@ void ScheduleDAGRRList::ListScheduleTopDown() {  //===----------------------------------------------------------------------===// -//                RegReductionPriorityQueue Implementation +//                RegReductionPriorityQueue Definition  //===----------------------------------------------------------------------===//  //  // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers  // to reduce register pressure. -//  +//  namespace { -  template<class SF> -  class RegReductionPriorityQueue; -   -  /// bu_ls_rr_sort - Priority function for bottom up register pressure -  // reduction scheduler. -  struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { -    RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; -    bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} -    bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} -     -    bool operator()(const SUnit* left, const SUnit* right) const; +class RegReductionPQBase; + +struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> { +  bool isReady(SUnit* SU, unsigned CurCycle) const { return true; } +}; + +/// bu_ls_rr_sort - Priority function for bottom up register pressure +// reduction scheduler. +struct bu_ls_rr_sort : public queue_sort { +  enum { +    IsBottomUp = true, +    HasReadyFilter = false    }; -  // td_ls_rr_sort - Priority function for top down register pressure reduction -  // scheduler. -  struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { -    RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; -    td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} -    td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} -     -    bool operator()(const SUnit* left, const SUnit* right) const; +  RegReductionPQBase *SPQ; +  bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} +  bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} + +  bool operator()(SUnit* left, SUnit* right) const; +}; + +// td_ls_rr_sort - Priority function for top down register pressure reduction +// scheduler. +struct td_ls_rr_sort : public queue_sort { +  enum { +    IsBottomUp = false, +    HasReadyFilter = false    }; -  // src_ls_rr_sort - Priority function for source order scheduler. -  struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { -    RegReductionPriorityQueue<src_ls_rr_sort> *SPQ; -    src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq) -      : SPQ(spq) {} -    src_ls_rr_sort(const src_ls_rr_sort &RHS) -      : SPQ(RHS.SPQ) {} -     -    bool operator()(const SUnit* left, const SUnit* right) const; +  RegReductionPQBase *SPQ; +  td_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} +  td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} + +  bool operator()(const SUnit* left, const SUnit* right) const; +}; + +// src_ls_rr_sort - Priority function for source order scheduler. +struct src_ls_rr_sort : public queue_sort { +  enum { +    IsBottomUp = true, +    HasReadyFilter = false    }; -  // hybrid_ls_rr_sort - Priority function for hybrid scheduler. -  struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { -    RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ; -    hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq) -      : SPQ(spq) {} -    hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS) -      : SPQ(RHS.SPQ) {} +  RegReductionPQBase *SPQ; +  src_ls_rr_sort(RegReductionPQBase *spq) +    : SPQ(spq) {} +  src_ls_rr_sort(const src_ls_rr_sort &RHS) +    : SPQ(RHS.SPQ) {} + +  bool operator()(SUnit* left, SUnit* right) const; +}; -    bool operator()(const SUnit* left, const SUnit* right) const; +// hybrid_ls_rr_sort - Priority function for hybrid scheduler. +struct hybrid_ls_rr_sort : public queue_sort { +  enum { +    IsBottomUp = true, +    HasReadyFilter = true    }; -  // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism) -  // scheduler. -  struct ilp_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { -    RegReductionPriorityQueue<ilp_ls_rr_sort> *SPQ; -    ilp_ls_rr_sort(RegReductionPriorityQueue<ilp_ls_rr_sort> *spq) -      : SPQ(spq) {} -    ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS) -      : SPQ(RHS.SPQ) {} +  RegReductionPQBase *SPQ; +  hybrid_ls_rr_sort(RegReductionPQBase *spq) +    : SPQ(spq) {} +  hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS) +    : SPQ(RHS.SPQ) {} + +  bool isReady(SUnit *SU, unsigned CurCycle) const; -    bool operator()(const SUnit* left, const SUnit* right) const; +  bool operator()(SUnit* left, SUnit* right) const; +}; + +// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism) +// scheduler. +struct ilp_ls_rr_sort : public queue_sort { +  enum { +    IsBottomUp = true, +    HasReadyFilter = true    }; -}  // end anonymous namespace + +  RegReductionPQBase *SPQ; +  ilp_ls_rr_sort(RegReductionPQBase *spq) +    : SPQ(spq) {} +  ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS) +    : SPQ(RHS.SPQ) {} + +  bool isReady(SUnit *SU, unsigned CurCycle) const; + +  bool operator()(SUnit* left, SUnit* right) const; +}; + +class RegReductionPQBase : public SchedulingPriorityQueue { +protected: +  std::vector<SUnit*> Queue; +  unsigned CurQueueId; +  bool TracksRegPressure; + +  // SUnits - The SUnits for the current graph. +  std::vector<SUnit> *SUnits; + +  MachineFunction &MF; +  const TargetInstrInfo *TII; +  const TargetRegisterInfo *TRI; +  const TargetLowering *TLI; +  ScheduleDAGRRList *scheduleDAG; + +  // SethiUllmanNumbers - The SethiUllman number for each node. +  std::vector<unsigned> SethiUllmanNumbers; + +  /// RegPressure - Tracking current reg pressure per register class. +  /// +  std::vector<unsigned> RegPressure; + +  /// RegLimit - Tracking the number of allocatable registers per register +  /// class. +  std::vector<unsigned> RegLimit; + +public: +  RegReductionPQBase(MachineFunction &mf, +                     bool hasReadyFilter, +                     bool tracksrp, +                     const TargetInstrInfo *tii, +                     const TargetRegisterInfo *tri, +                     const TargetLowering *tli) +    : SchedulingPriorityQueue(hasReadyFilter), +      CurQueueId(0), TracksRegPressure(tracksrp), +      MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) { +    if (TracksRegPressure) { +      unsigned NumRC = TRI->getNumRegClasses(); +      RegLimit.resize(NumRC); +      RegPressure.resize(NumRC); +      std::fill(RegLimit.begin(), RegLimit.end(), 0); +      std::fill(RegPressure.begin(), RegPressure.end(), 0); +      for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), +             E = TRI->regclass_end(); I != E; ++I) +        RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF); +    } +  } + +  void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { +    scheduleDAG = scheduleDag; +  } + +  ScheduleHazardRecognizer* getHazardRec() { +    return scheduleDAG->getHazardRec(); +  } + +  void initNodes(std::vector<SUnit> &sunits); + +  void addNode(const SUnit *SU); + +  void updateNode(const SUnit *SU); + +  void releaseState() { +    SUnits = 0; +    SethiUllmanNumbers.clear(); +    std::fill(RegPressure.begin(), RegPressure.end(), 0); +  } + +  unsigned getNodePriority(const SUnit *SU) const; + +  unsigned getNodeOrdering(const SUnit *SU) const { +    return scheduleDAG->DAG->GetOrdering(SU->getNode()); +  } + +  bool empty() const { return Queue.empty(); } + +  void push(SUnit *U) { +    assert(!U->NodeQueueId && "Node in the queue already"); +    U->NodeQueueId = ++CurQueueId; +    Queue.push_back(U); +  } + +  void remove(SUnit *SU) { +    assert(!Queue.empty() && "Queue is empty!"); +    assert(SU->NodeQueueId != 0 && "Not in queue!"); +    std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), +                                                 SU); +    if (I != prior(Queue.end())) +      std::swap(*I, Queue.back()); +    Queue.pop_back(); +    SU->NodeQueueId = 0; +  } + +  bool tracksRegPressure() const { return TracksRegPressure; } + +  void dumpRegPressure() const; + +  bool HighRegPressure(const SUnit *SU) const; + +  bool MayReduceRegPressure(SUnit *SU); + +  void ScheduledNode(SUnit *SU); + +  void UnscheduledNode(SUnit *SU); + +protected: +  bool canClobber(const SUnit *SU, const SUnit *Op); +  void AddPseudoTwoAddrDeps(); +  void PrescheduleNodesWithMultipleUses(); +  void CalculateSethiUllmanNumbers(); +}; + +template<class SF> +class RegReductionPriorityQueue : public RegReductionPQBase { +  static SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker) { +    std::vector<SUnit *>::iterator Best = Q.begin(); +    for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()), +           E = Q.end(); I != E; ++I) +      if (Picker(*Best, *I)) +        Best = I; +    SUnit *V = *Best; +    if (Best != prior(Q.end())) +      std::swap(*Best, Q.back()); +    Q.pop_back(); +    return V; +  } + +  SF Picker; + +public: +  RegReductionPriorityQueue(MachineFunction &mf, +                            bool tracksrp, +                            const TargetInstrInfo *tii, +                            const TargetRegisterInfo *tri, +                            const TargetLowering *tli) +    : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli), +      Picker(this) {} + +  bool isBottomUp() const { return SF::IsBottomUp; } + +  bool isReady(SUnit *U) const { +    return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle()); +  } + +  SUnit *pop() { +    if (Queue.empty()) return NULL; + +    SUnit *V = popFromQueue(Queue, Picker); +    V->NodeQueueId = 0; +    return V; +  } + +  void dump(ScheduleDAG *DAG) const { +    // Emulate pop() without clobbering NodeQueueIds. +    std::vector<SUnit*> DumpQueue = Queue; +    SF DumpPicker = Picker; +    while (!DumpQueue.empty()) { +      SUnit *SU = popFromQueue(DumpQueue, DumpPicker); +      if (isBottomUp()) +        dbgs() << "Height " << SU->getHeight() << ": "; +      else +        dbgs() << "Depth " << SU->getDepth() << ": "; +      SU->dump(DAG); +    } +  } +}; + +typedef RegReductionPriorityQueue<bu_ls_rr_sort> +BURegReductionPriorityQueue; + +typedef RegReductionPriorityQueue<td_ls_rr_sort> +TDRegReductionPriorityQueue; + +typedef RegReductionPriorityQueue<src_ls_rr_sort> +SrcRegReductionPriorityQueue; + +typedef RegReductionPriorityQueue<hybrid_ls_rr_sort> +HybridBURRPriorityQueue; + +typedef RegReductionPriorityQueue<ilp_ls_rr_sort> +ILPBURRPriorityQueue; +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +//           Static Node Priority for Register Pressure Reduction +//===----------------------------------------------------------------------===//  /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.  /// Smaller number is the higher priority. @@ -1045,413 +1563,283 @@ CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {    if (SethiUllmanNumber == 0)      SethiUllmanNumber = 1; -   +    return SethiUllmanNumber;  } -namespace { -  template<class SF> -  class RegReductionPriorityQueue : public SchedulingPriorityQueue { -    std::vector<SUnit*> Queue; -    SF Picker; -    unsigned CurQueueId; -    bool TracksRegPressure; - -  protected: -    // SUnits - The SUnits for the current graph. -    std::vector<SUnit> *SUnits; - -    MachineFunction &MF; -    const TargetInstrInfo *TII; -    const TargetRegisterInfo *TRI; -    const TargetLowering *TLI; -    ScheduleDAGRRList *scheduleDAG; - -    // SethiUllmanNumbers - The SethiUllman number for each node. -    std::vector<unsigned> SethiUllmanNumbers; - -    /// RegPressure - Tracking current reg pressure per register class. -    /// -    std::vector<unsigned> RegPressure; - -    /// RegLimit - Tracking the number of allocatable registers per register -    /// class. -    std::vector<unsigned> RegLimit; - -  public: -    RegReductionPriorityQueue(MachineFunction &mf, -                              bool tracksrp, -                              const TargetInstrInfo *tii, -                              const TargetRegisterInfo *tri, -                              const TargetLowering *tli) -      : Picker(this), CurQueueId(0), TracksRegPressure(tracksrp), -        MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) { -      if (TracksRegPressure) { -        unsigned NumRC = TRI->getNumRegClasses(); -        RegLimit.resize(NumRC); -        RegPressure.resize(NumRC); -        std::fill(RegLimit.begin(), RegLimit.end(), 0); -        std::fill(RegPressure.begin(), RegPressure.end(), 0); -        for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), -               E = TRI->regclass_end(); I != E; ++I) -          RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF); -      } -    } -     -    void initNodes(std::vector<SUnit> &sunits) { -      SUnits = &sunits; -      // Add pseudo dependency edges for two-address nodes. -      AddPseudoTwoAddrDeps(); -      // Reroute edges to nodes with multiple uses. -      PrescheduleNodesWithMultipleUses(); -      // Calculate node priorities. -      CalculateSethiUllmanNumbers(); -    } - -    void addNode(const SUnit *SU) { -      unsigned SUSize = SethiUllmanNumbers.size(); -      if (SUnits->size() > SUSize) -        SethiUllmanNumbers.resize(SUSize*2, 0); -      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); -    } - -    void updateNode(const SUnit *SU) { -      SethiUllmanNumbers[SU->NodeNum] = 0; -      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); -    } +/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all +/// scheduling units. +void RegReductionPQBase::CalculateSethiUllmanNumbers() { +  SethiUllmanNumbers.assign(SUnits->size(), 0); -    void releaseState() { -      SUnits = 0; -      SethiUllmanNumbers.clear(); -      std::fill(RegPressure.begin(), RegPressure.end(), 0); -    } +  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) +    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); +} -    unsigned getNodePriority(const SUnit *SU) const { -      assert(SU->NodeNum < SethiUllmanNumbers.size()); -      unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; -      if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) -        // CopyToReg should be close to its uses to facilitate coalescing and -        // avoid spilling. -        return 0; -      if (Opc == TargetOpcode::EXTRACT_SUBREG || -          Opc == TargetOpcode::SUBREG_TO_REG || -          Opc == TargetOpcode::INSERT_SUBREG) -        // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be -        // close to their uses to facilitate coalescing. -        return 0; -      if (SU->NumSuccs == 0 && SU->NumPreds != 0) -        // If SU does not have a register use, i.e. it doesn't produce a value -        // that would be consumed (e.g. store), then it terminates a chain of -        // computation.  Give it a large SethiUllman number so it will be -        // scheduled right before its predecessors that it doesn't lengthen -        // their live ranges. -        return 0xffff; -      if (SU->NumPreds == 0 && SU->NumSuccs != 0) -        // If SU does not have a register def, schedule it close to its uses -        // because it does not lengthen any live ranges. -        return 0; -      return SethiUllmanNumbers[SU->NodeNum]; -    } +void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { +  SUnits = &sunits; +  // Add pseudo dependency edges for two-address nodes. +  AddPseudoTwoAddrDeps(); +  // Reroute edges to nodes with multiple uses. +  if (!TracksRegPressure) +    PrescheduleNodesWithMultipleUses(); +  // Calculate node priorities. +  CalculateSethiUllmanNumbers(); +} -    unsigned getNodeOrdering(const SUnit *SU) const { -      return scheduleDAG->DAG->GetOrdering(SU->getNode()); -    } +void RegReductionPQBase::addNode(const SUnit *SU) { +  unsigned SUSize = SethiUllmanNumbers.size(); +  if (SUnits->size() > SUSize) +    SethiUllmanNumbers.resize(SUSize*2, 0); +  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); +} -    bool empty() const { return Queue.empty(); } -     -    void push(SUnit *U) { -      assert(!U->NodeQueueId && "Node in the queue already"); -      U->NodeQueueId = ++CurQueueId; -      Queue.push_back(U); -    } +void RegReductionPQBase::updateNode(const SUnit *SU) { +  SethiUllmanNumbers[SU->NodeNum] = 0; +  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); +} -    SUnit *pop() { -      if (empty()) return NULL; -      std::vector<SUnit *>::iterator Best = Queue.begin(); -      for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()), -           E = Queue.end(); I != E; ++I) -        if (Picker(*Best, *I)) -          Best = I; -      SUnit *V = *Best; -      if (Best != prior(Queue.end())) -        std::swap(*Best, Queue.back()); -      Queue.pop_back(); -      V->NodeQueueId = 0; -      return V; -    } +// Lower priority means schedule further down. For bottom-up scheduling, lower +// priority SUs are scheduled before higher priority SUs. +unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const { +  assert(SU->NodeNum < SethiUllmanNumbers.size()); +  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; +  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) +    // CopyToReg should be close to its uses to facilitate coalescing and +    // avoid spilling. +    return 0; +  if (Opc == TargetOpcode::EXTRACT_SUBREG || +      Opc == TargetOpcode::SUBREG_TO_REG || +      Opc == TargetOpcode::INSERT_SUBREG) +    // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be +    // close to their uses to facilitate coalescing. +    return 0; +  if (SU->NumSuccs == 0 && SU->NumPreds != 0) +    // If SU does not have a register use, i.e. it doesn't produce a value +    // that would be consumed (e.g. store), then it terminates a chain of +    // computation.  Give it a large SethiUllman number so it will be +    // scheduled right before its predecessors that it doesn't lengthen +    // their live ranges. +    return 0xffff; +  if (SU->NumPreds == 0 && SU->NumSuccs != 0) +    // If SU does not have a register def, schedule it close to its uses +    // because it does not lengthen any live ranges. +    return 0; +  return SethiUllmanNumbers[SU->NodeNum]; +} -    void remove(SUnit *SU) { -      assert(!Queue.empty() && "Queue is empty!"); -      assert(SU->NodeQueueId != 0 && "Not in queue!"); -      std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), -                                                   SU); -      if (I != prior(Queue.end())) -        std::swap(*I, Queue.back()); -      Queue.pop_back(); -      SU->NodeQueueId = 0; -    } +//===----------------------------------------------------------------------===// +//                     Register Pressure Tracking +//===----------------------------------------------------------------------===// -    bool HighRegPressure(const SUnit *SU) const { -      if (!TLI) -        return false; +void RegReductionPQBase::dumpRegPressure() const { +  for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), +         E = TRI->regclass_end(); I != E; ++I) { +    const TargetRegisterClass *RC = *I; +    unsigned Id = RC->getID(); +    unsigned RP = RegPressure[Id]; +    if (!RP) continue; +    DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id] +          << '\n'); +  } +} -      for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); -           I != E; ++I) { -        if (I->isCtrl()) -          continue; -        SUnit *PredSU = I->getSUnit(); -        const SDNode *PN = PredSU->getNode(); -        if (!PN->isMachineOpcode()) { -          if (PN->getOpcode() == ISD::CopyFromReg) { -            EVT VT = PN->getValueType(0); -            unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); -            unsigned Cost = TLI->getRepRegClassCostFor(VT); -            if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) -              return true; -          } -          continue; -        } -        unsigned POpc = PN->getMachineOpcode(); -        if (POpc == TargetOpcode::IMPLICIT_DEF) -          continue; -        if (POpc == TargetOpcode::EXTRACT_SUBREG) { -          EVT VT = PN->getOperand(0).getValueType(); -          unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); -          unsigned Cost = TLI->getRepRegClassCostFor(VT); -          // Check if this increases register pressure of the specific register -          // class to the point where it would cause spills. -          if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) -            return true; -          continue;             -        } else if (POpc == TargetOpcode::INSERT_SUBREG || -                   POpc == TargetOpcode::SUBREG_TO_REG) { -          EVT VT = PN->getValueType(0); -          unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); -          unsigned Cost = TLI->getRepRegClassCostFor(VT); -          // Check if this increases register pressure of the specific register -          // class to the point where it would cause spills. -          if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) -            return true; -          continue; -        } -        unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); -        for (unsigned i = 0; i != NumDefs; ++i) { -          EVT VT = PN->getValueType(i); -          unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); -          if (RegPressure[RCId] >= RegLimit[RCId]) -            return true; // Reg pressure already high. -          unsigned Cost = TLI->getRepRegClassCostFor(VT); -          if (!PN->hasAnyUseOfValue(i)) -            continue; -          // Check if this increases register pressure of the specific register -          // class to the point where it would cause spills. -          if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) -            return true; -        } -      } +bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { +  if (!TLI) +    return false; -      return false; +  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); +       I != E; ++I) { +    if (I->isCtrl()) +      continue; +    SUnit *PredSU = I->getSUnit(); +    // NumRegDefsLeft is zero when enough uses of this node have been scheduled +    // to cover the number of registers defined (they are all live). +    if (PredSU->NumRegDefsLeft == 0) { +      continue; +    } +    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); +         RegDefPos.IsValid(); RegDefPos.Advance()) { +      EVT VT = RegDefPos.GetValue(); +      unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); +      unsigned Cost = TLI->getRepRegClassCostFor(VT); +      if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) +        return true;      } +  } +  return false; +} -    void ScheduledNode(SUnit *SU) { -      if (!TracksRegPressure) -        return; - -      const SDNode *N = SU->getNode(); -      if (!N->isMachineOpcode()) { -        if (N->getOpcode() != ISD::CopyToReg) -          return; -      } else { -        unsigned Opc = N->getMachineOpcode(); -        if (Opc == TargetOpcode::EXTRACT_SUBREG || -            Opc == TargetOpcode::INSERT_SUBREG || -            Opc == TargetOpcode::SUBREG_TO_REG || -            Opc == TargetOpcode::REG_SEQUENCE || -            Opc == TargetOpcode::IMPLICIT_DEF) -          return; -      } +bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) { +  const SDNode *N = SU->getNode(); -      for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); -           I != E; ++I) { -        if (I->isCtrl()) -          continue; -        SUnit *PredSU = I->getSUnit(); -        if (PredSU->NumSuccsLeft != PredSU->NumSuccs) -          continue; -        const SDNode *PN = PredSU->getNode(); -        if (!PN->isMachineOpcode()) { -          if (PN->getOpcode() == ISD::CopyFromReg) { -            EVT VT = PN->getValueType(0); -            unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); -            RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); -          } -          continue; -        } -        unsigned POpc = PN->getMachineOpcode(); -        if (POpc == TargetOpcode::IMPLICIT_DEF) -          continue; -        if (POpc == TargetOpcode::EXTRACT_SUBREG) { -          EVT VT = PN->getOperand(0).getValueType(); -          unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); -          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); -          continue;             -        } else if (POpc == TargetOpcode::INSERT_SUBREG || -                   POpc == TargetOpcode::SUBREG_TO_REG) { -          EVT VT = PN->getValueType(0); -          unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); -          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); -          continue; -        } -        unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); -        for (unsigned i = 0; i != NumDefs; ++i) { -          EVT VT = PN->getValueType(i); -          if (!PN->hasAnyUseOfValue(i)) -            continue; -          unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); -          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); -        } -      } +  if (!N->isMachineOpcode() || !SU->NumSuccs) +    return false; -      // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() -      // may transfer data dependencies to CopyToReg. -      if (SU->NumSuccs && N->isMachineOpcode()) { -        unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); -        for (unsigned i = 0; i != NumDefs; ++i) { -          EVT VT = N->getValueType(i); -          if (!N->hasAnyUseOfValue(i)) -            continue; -          unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); -          if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) -            // Register pressure tracking is imprecise. This can happen. -            RegPressure[RCId] = 0; -          else -            RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); -        } -      } +  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); +  for (unsigned i = 0; i != NumDefs; ++i) { +    EVT VT = N->getValueType(i); +    if (!N->hasAnyUseOfValue(i)) +      continue; +    unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); +    if (RegPressure[RCId] >= RegLimit[RCId]) +      return true; +  } +  return false; +} + +void RegReductionPQBase::ScheduledNode(SUnit *SU) { +  if (!TracksRegPressure) +    return; -      dumpRegPressure(); +  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); +       I != E; ++I) { +    if (I->isCtrl()) +      continue; +    SUnit *PredSU = I->getSUnit(); +    // NumRegDefsLeft is zero when enough uses of this node have been scheduled +    // to cover the number of registers defined (they are all live). +    if (PredSU->NumRegDefsLeft == 0) { +      continue; +    } +    // FIXME: The ScheduleDAG currently loses information about which of a +    // node's values is consumed by each dependence. Consequently, if the node +    // defines multiple register classes, we don't know which to pressurize +    // here. Instead the following loop consumes the register defs in an +    // arbitrary order. At least it handles the common case of clustered loads +    // to the same class. For precise liveness, each SDep needs to indicate the +    // result number. But that tightly couples the ScheduleDAG with the +    // SelectionDAG making updates tricky. A simpler hack would be to attach a +    // value type or register class to SDep. +    // +    // The most important aspect of register tracking is balancing the increase +    // here with the reduction further below. Note that this SU may use multiple +    // defs in PredSU. The can't be determined here, but we've already +    // compensated by reducing NumRegDefsLeft in PredSU during +    // ScheduleDAGSDNodes::AddSchedEdges. +    --PredSU->NumRegDefsLeft; +    unsigned SkipRegDefs = PredSU->NumRegDefsLeft; +    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); +         RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { +      if (SkipRegDefs) +        continue; +      EVT VT = RegDefPos.GetValue(); +      unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); +      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); +      break;      } +  } -    void UnscheduledNode(SUnit *SU) { -      if (!TracksRegPressure) -        return; - -      const SDNode *N = SU->getNode(); -      if (!N->isMachineOpcode()) { -        if (N->getOpcode() != ISD::CopyToReg) -          return; -      } else { -        unsigned Opc = N->getMachineOpcode(); -        if (Opc == TargetOpcode::EXTRACT_SUBREG || -            Opc == TargetOpcode::INSERT_SUBREG || -            Opc == TargetOpcode::SUBREG_TO_REG || -            Opc == TargetOpcode::REG_SEQUENCE || -            Opc == TargetOpcode::IMPLICIT_DEF) -          return; -      } +  // We should have this assert, but there may be dead SDNodes that never +  // materialize as SUnits, so they don't appear to generate liveness. +  //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses"); +  int SkipRegDefs = (int)SU->NumRegDefsLeft; +  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG); +       RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { +    if (SkipRegDefs > 0) +      continue; +    EVT VT = RegDefPos.GetValue(); +    unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); +    if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) { +      // Register pressure tracking is imprecise. This can happen. But we try +      // hard not to let it happen because it likely results in poor scheduling. +      DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") has too many regdefs\n"); +      RegPressure[RCId] = 0; +    } +    else { +      RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); +    } +  } +  dumpRegPressure(); +} -      for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); -           I != E; ++I) { -        if (I->isCtrl()) -          continue; -        SUnit *PredSU = I->getSUnit(); -        if (PredSU->NumSuccsLeft != PredSU->NumSuccs) -          continue; -        const SDNode *PN = PredSU->getNode(); -        if (!PN->isMachineOpcode()) { -          if (PN->getOpcode() == ISD::CopyFromReg) { -            EVT VT = PN->getValueType(0); -            unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); -            RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); -          } -          continue; -        } -        unsigned POpc = PN->getMachineOpcode(); -        if (POpc == TargetOpcode::IMPLICIT_DEF) -          continue; -        if (POpc == TargetOpcode::EXTRACT_SUBREG) { -          EVT VT = PN->getOperand(0).getValueType(); -          unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); -          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); -          continue;             -        } else if (POpc == TargetOpcode::INSERT_SUBREG || -                   POpc == TargetOpcode::SUBREG_TO_REG) { -          EVT VT = PN->getValueType(0); -          unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); -          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); -          continue; -        } -        unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); -        for (unsigned i = 0; i != NumDefs; ++i) { -          EVT VT = PN->getValueType(i); -          if (!PN->hasAnyUseOfValue(i)) -            continue; -          unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); -          if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) -            // Register pressure tracking is imprecise. This can happen. -            RegPressure[RCId] = 0; -          else -            RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); -        } -      } +void RegReductionPQBase::UnscheduledNode(SUnit *SU) { +  if (!TracksRegPressure) +    return; + +  const SDNode *N = SU->getNode(); +  if (!N->isMachineOpcode()) { +    if (N->getOpcode() != ISD::CopyToReg) +      return; +  } else { +    unsigned Opc = N->getMachineOpcode(); +    if (Opc == TargetOpcode::EXTRACT_SUBREG || +        Opc == TargetOpcode::INSERT_SUBREG || +        Opc == TargetOpcode::SUBREG_TO_REG || +        Opc == TargetOpcode::REG_SEQUENCE || +        Opc == TargetOpcode::IMPLICIT_DEF) +      return; +  } -      // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() -      // may transfer data dependencies to CopyToReg. -      if (SU->NumSuccs && N->isMachineOpcode()) { -        unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); -        for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { -          EVT VT = N->getValueType(i); -          if (VT == MVT::Flag || VT == MVT::Other) -            continue; -          if (!N->hasAnyUseOfValue(i)) -            continue; -          unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); -          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); -        } +  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); +       I != E; ++I) { +    if (I->isCtrl()) +      continue; +    SUnit *PredSU = I->getSUnit(); +    // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only +    // counts data deps. +    if (PredSU->NumSuccsLeft != PredSU->Succs.size()) +      continue; +    const SDNode *PN = PredSU->getNode(); +    if (!PN->isMachineOpcode()) { +      if (PN->getOpcode() == ISD::CopyFromReg) { +        EVT VT = PN->getValueType(0); +        unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); +        RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);        } - -      dumpRegPressure(); +      continue;      } - -    void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {  -      scheduleDAG = scheduleDag;  +    unsigned POpc = PN->getMachineOpcode(); +    if (POpc == TargetOpcode::IMPLICIT_DEF) +      continue; +    if (POpc == TargetOpcode::EXTRACT_SUBREG) { +      EVT VT = PN->getOperand(0).getValueType(); +      unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); +      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); +      continue; +    } else if (POpc == TargetOpcode::INSERT_SUBREG || +               POpc == TargetOpcode::SUBREG_TO_REG) { +      EVT VT = PN->getValueType(0); +      unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); +      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); +      continue;      } - -    void dumpRegPressure() const { -      for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), -             E = TRI->regclass_end(); I != E; ++I) { -        const TargetRegisterClass *RC = *I; -        unsigned Id = RC->getID(); -        unsigned RP = RegPressure[Id]; -        if (!RP) continue; -        DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id] -              << '\n'); -      } +    unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); +    for (unsigned i = 0; i != NumDefs; ++i) { +      EVT VT = PN->getValueType(i); +      if (!PN->hasAnyUseOfValue(i)) +        continue; +      unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); +      if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) +        // Register pressure tracking is imprecise. This can happen. +        RegPressure[RCId] = 0; +      else +        RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);      } +  } -  protected: -    bool canClobber(const SUnit *SU, const SUnit *Op); -    void AddPseudoTwoAddrDeps(); -    void PrescheduleNodesWithMultipleUses(); -    void CalculateSethiUllmanNumbers(); -  }; - -  typedef RegReductionPriorityQueue<bu_ls_rr_sort> -    BURegReductionPriorityQueue; - -  typedef RegReductionPriorityQueue<td_ls_rr_sort> -    TDRegReductionPriorityQueue; - -  typedef RegReductionPriorityQueue<src_ls_rr_sort> -    SrcRegReductionPriorityQueue; - -  typedef RegReductionPriorityQueue<hybrid_ls_rr_sort> -    HybridBURRPriorityQueue; +  // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() +  // may transfer data dependencies to CopyToReg. +  if (SU->NumSuccs && N->isMachineOpcode()) { +    unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); +    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { +      EVT VT = N->getValueType(i); +      if (VT == MVT::Glue || VT == MVT::Other) +        continue; +      if (!N->hasAnyUseOfValue(i)) +        continue; +      unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); +      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); +    } +  } -  typedef RegReductionPriorityQueue<ilp_ls_rr_sort> -    ILPBURRPriorityQueue; +  dumpRegPressure();  } +//===----------------------------------------------------------------------===// +//           Dynamic Node Priority for Register Pressure Reduction +//===----------------------------------------------------------------------===// +  /// closestSucc - Returns the scheduled cycle of the successor which is  /// closest to the current cycle.  static unsigned closestSucc(const SUnit *SU) { @@ -1483,9 +1871,123 @@ static unsigned calcMaxScratches(const SUnit *SU) {    return Scratches;  } -template <typename RRSort> -static bool BURRSort(const SUnit *left, const SUnit *right, -                     const RegReductionPriorityQueue<RRSort> *SPQ) { +/// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a +/// CopyToReg to a virtual register. This SU def is probably a liveout and +/// it has no other use. It should be scheduled closer to the terminator. +static bool hasOnlyLiveOutUses(const SUnit *SU) { +  bool RetVal = false; +  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); +       I != E; ++I) { +    if (I->isCtrl()) continue; +    const SUnit *SuccSU = I->getSUnit(); +    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) { +      unsigned Reg = +        cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg(); +      if (TargetRegisterInfo::isVirtualRegister(Reg)) { +        RetVal = true; +        continue; +      } +    } +    return false; +  } +  return RetVal; +} + +/// UnitsSharePred - Return true if the two scheduling units share a common +/// data predecessor. +static bool UnitsSharePred(const SUnit *left, const SUnit *right) { +  SmallSet<const SUnit*, 4> Preds; +  for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end(); +       I != E; ++I) { +    if (I->isCtrl()) continue;  // ignore chain preds +    Preds.insert(I->getSUnit()); +  } +  for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end(); +       I != E; ++I) { +    if (I->isCtrl()) continue;  // ignore chain preds +    if (Preds.count(I->getSUnit())) +      return true; +  } +  return false; +} + +// Check for either a dependence (latency) or resource (hazard) stall. +// +// Note: The ScheduleHazardRecognizer interface requires a non-const SU. +static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) { +  if ((int)SPQ->getCurCycle() < Height) return true; +  if (SPQ->getHazardRec()->getHazardType(SU, 0) +      != ScheduleHazardRecognizer::NoHazard) +    return true; +  return false; +} + +// Return -1 if left has higher priority, 1 if right has higher priority. +// Return 0 if latency-based priority is equivalent. +static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, +                            RegReductionPQBase *SPQ) { +  // If the two nodes share an operand and one of them has a single +  // use that is a live out copy, favor the one that is live out. Otherwise +  // it will be difficult to eliminate the copy if the instruction is a +  // loop induction variable update. e.g. +  // BB: +  // sub r1, r3, #1 +  // str r0, [r2, r3] +  // mov r3, r1 +  // cmp +  // bne BB +  bool SharePred = UnitsSharePred(left, right); +  // FIXME: Only adjust if BB is a loop back edge. +  // FIXME: What's the cost of a copy? +  int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0; +  int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0; +  int LHeight = (int)left->getHeight() - LBonus; +  int RHeight = (int)right->getHeight() - RBonus; + +  bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) && +    BUHasStall(left, LHeight, SPQ); +  bool RStall = (!checkPref || right->SchedulingPref == Sched::Latency) && +    BUHasStall(right, RHeight, SPQ); + +  // If scheduling one of the node will cause a pipeline stall, delay it. +  // If scheduling either one of the node will cause a pipeline stall, sort +  // them according to their height. +  if (LStall) { +    if (!RStall) +      return 1; +    if (LHeight != RHeight) +      return LHeight > RHeight ? 1 : -1; +  } else if (RStall) +    return -1; + +  // If either node is scheduling for latency, sort them by height/depth +  // and latency. +  if (!checkPref || (left->SchedulingPref == Sched::Latency || +                     right->SchedulingPref == Sched::Latency)) { +    if (DisableSchedCycles) { +      if (LHeight != RHeight) +        return LHeight > RHeight ? 1 : -1; +    } +    else { +      // If neither instruction stalls (!LStall && !RStall) then +      // it's height is already covered so only its depth matters. We also reach +      // this if both stall but have the same height. +      unsigned LDepth = left->getDepth(); +      unsigned RDepth = right->getDepth(); +      if (LDepth != RDepth) { +        DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum +              << ") depth " << LDepth << " vs SU (" << right->NodeNum +              << ") depth " << RDepth << "\n"); +        return LDepth < RDepth ? 1 : -1; +      } +    } +    if (left->Latency != right->Latency) +      return left->Latency > right->Latency ? 1 : -1; +  } +  return 0; +} + +static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {    unsigned LPriority = SPQ->getNodePriority(left);    unsigned RPriority = SPQ->getNodePriority(right);    if (LPriority != RPriority) @@ -1519,24 +2021,31 @@ static bool BURRSort(const SUnit *left, const SUnit *right,    if (LScratch != RScratch)      return LScratch > RScratch; -  if (left->getHeight() != right->getHeight()) -    return left->getHeight() > right->getHeight(); -   -  if (left->getDepth() != right->getDepth()) -    return left->getDepth() < right->getDepth(); +  if (!DisableSchedCycles) { +    int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ); +    if (result != 0) +      return result > 0; +  } +  else { +    if (left->getHeight() != right->getHeight()) +      return left->getHeight() > right->getHeight(); -  assert(left->NodeQueueId && right->NodeQueueId &&  +    if (left->getDepth() != right->getDepth()) +      return left->getDepth() < right->getDepth(); +  } + +  assert(left->NodeQueueId && right->NodeQueueId &&           "NodeQueueId cannot be zero");    return (left->NodeQueueId > right->NodeQueueId);  }  // Bottom up -bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { +bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {    return BURRSort(left, right, SPQ);  }  // Source order, otherwise bottom up. -bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { +bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {    unsigned LOrder = SPQ->getNodeOrdering(left);    unsigned ROrder = SPQ->getNodeOrdering(right); @@ -1548,49 +2057,69 @@ bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {    return BURRSort(left, right, SPQ);  } -bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ +// If the time between now and when the instruction will be ready can cover +// the spill code, then avoid adding it to the ready queue. This gives long +// stalls highest priority and allows hoisting across calls. It should also +// speed up processing the available queue. +bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { +  static const unsigned ReadyDelay = 3; + +  if (SPQ->MayReduceRegPressure(SU)) return true; + +  if (SU->getHeight() > (CurCycle + ReadyDelay)) return false; + +  if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay) +      != ScheduleHazardRecognizer::NoHazard) +    return false; + +  return true; +} + +// Return true if right should be scheduled with higher priority than left. +bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { +  if (left->isCall || right->isCall) +    // No way to compute latency of calls. +    return BURRSort(left, right, SPQ); +    bool LHigh = SPQ->HighRegPressure(left);    bool RHigh = SPQ->HighRegPressure(right);    // Avoid causing spills. If register pressure is high, schedule for    // register pressure reduction. -  if (LHigh && !RHigh) +  if (LHigh && !RHigh) { +    DEBUG(dbgs() << "  pressure SU(" << left->NodeNum << ") > SU(" +          << right->NodeNum << ")\n");      return true; -  else if (!LHigh && RHigh) +  } +  else if (!LHigh && RHigh) { +    DEBUG(dbgs() << "  pressure SU(" << right->NodeNum << ") > SU(" +          << left->NodeNum << ")\n");      return false; +  }    else if (!LHigh && !RHigh) { -    // Low register pressure situation, schedule for latency if possible. -    bool LStall = left->SchedulingPref == Sched::Latency && -      SPQ->getCurCycle() < left->getHeight(); -    bool RStall = right->SchedulingPref == Sched::Latency && -      SPQ->getCurCycle() < right->getHeight(); -    // If scheduling one of the node will cause a pipeline stall, delay it. -    // If scheduling either one of the node will cause a pipeline stall, sort -    // them according to their height. -    // If neither will cause a pipeline stall, try to reduce register pressure. -    if (LStall) { -      if (!RStall) -        return true; -      if (left->getHeight() != right->getHeight()) -        return left->getHeight() > right->getHeight(); -    } else if (RStall) -      return false; - -    // If either node is scheduling for latency, sort them by height and latency -    // first. -    if (left->SchedulingPref == Sched::Latency || -        right->SchedulingPref == Sched::Latency) { -      if (left->getHeight() != right->getHeight()) -        return left->getHeight() > right->getHeight(); -      if (left->Latency != right->Latency) -        return left->Latency > right->Latency; -    } +    int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ); +    if (result != 0) +      return result > 0;    } -    return BURRSort(left, right, SPQ);  } -bool ilp_ls_rr_sort::operator()(const SUnit *left, -                                const SUnit *right) const { +// Schedule as many instructions in each cycle as possible. So don't make an +// instruction available unless it is ready in the current cycle. +bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { +  if (SU->getHeight() > CurCycle) return false; + +  if (SPQ->getHazardRec()->getHazardType(SU, 0) +      != ScheduleHazardRecognizer::NoHazard) +    return false; + +  return SU->getHeight() <= CurCycle; +} + +bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { +  if (left->isCall || right->isCall) +    // No way to compute latency of calls. +    return BURRSort(left, right, SPQ); +    bool LHigh = SPQ->HighRegPressure(left);    bool RHigh = SPQ->HighRegPressure(right);    // Avoid causing spills. If register pressure is high, schedule for @@ -1611,9 +2140,11 @@ bool ilp_ls_rr_sort::operator()(const SUnit *left,    return BURRSort(left, right, SPQ);  } -template<class SF> -bool -RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) { +//===----------------------------------------------------------------------===// +//                    Preschedule for Register Pressure +//===----------------------------------------------------------------------===// + +bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {    if (SU->isTwoAddress) {      unsigned Opc = SU->getNode()->getMachineOpcode();      const TargetInstrDesc &TID = TII->get(Opc); @@ -1631,19 +2162,6 @@ RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {    return false;  } -/// hasCopyToRegUse - Return true if SU has a value successor that is a -/// CopyToReg node. -static bool hasCopyToRegUse(const SUnit *SU) { -  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); -       I != E; ++I) { -    if (I->isCtrl()) continue; -    const SUnit *SuccSU = I->getSUnit(); -    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) -      return true; -  } -  return false; -} -  /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's  /// physical register defs.  static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, @@ -1654,7 +2172,7 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,    const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();    assert(ImpDefs && "Caller should check hasPhysRegDefs");    for (const SDNode *SUNode = SU->getNode(); SUNode; -       SUNode = SUNode->getFlaggedNode()) { +       SUNode = SUNode->getGluedNode()) {      if (!SUNode->isMachineOpcode())        continue;      const unsigned *SUImpDefs = @@ -1663,7 +2181,7 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,        return false;      for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {        EVT VT = N->getValueType(i); -      if (VT == MVT::Flag || VT == MVT::Other) +      if (VT == MVT::Glue || VT == MVT::Other)          continue;        if (!N->hasAnyUseOfValue(i))          continue; @@ -1709,8 +2227,7 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,  /// after N, which shortens the U->N live range, reducing  /// register pressure.  /// -template<class SF> -void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() { +void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {    // Visit all the nodes in topological order, working top-down.    for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {      SUnit *SU = &(*SUnits)[i]; @@ -1748,7 +2265,7 @@ void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {      if (PredSU->NumSuccs == 1)        continue;      // Avoid prescheduling to copies from virtual registers, which don't behave -    // like other nodes from the perspective of scheduling // heuristics. +    // like other nodes from the perspective of scheduling heuristics.      if (SDNode *N = SU->getNode())        if (N->getOpcode() == ISD::CopyFromReg &&            TargetRegisterInfo::isVirtualRegister @@ -1802,17 +2319,17 @@ void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {  /// one that has a CopyToReg use (more likely to be a loop induction update).  /// If both are two-address, but one is commutable while the other is not  /// commutable, favor the one that's not commutable. -template<class SF> -void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { +void RegReductionPQBase::AddPseudoTwoAddrDeps() {    for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {      SUnit *SU = &(*SUnits)[i];      if (!SU->isTwoAddress)        continue;      SDNode *Node = SU->getNode(); -    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode()) +    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode())        continue; +    bool isLiveOut = hasOnlyLiveOutUses(SU);      unsigned Opc = Node->getMachineOpcode();      const TargetInstrDesc &TID = TII->get(Opc);      unsigned NumRes = TID.getNumDefs(); @@ -1862,7 +2379,7 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {              SuccOpc == TargetOpcode::SUBREG_TO_REG)            continue;          if ((!canClobber(SuccSU, DUSU) || -             (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || +             (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||               (!SU->isCommutable && SuccSU->isCommutable)) &&              !scheduleDAG->IsReachable(SuccSU, SU)) {            DEBUG(dbgs() << "    Adding a pseudo-two-addr edge from SU #" @@ -1877,20 +2394,10 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {    }  } -/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all -/// scheduling units. -template<class SF> -void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { -  SethiUllmanNumbers.assign(SUnits->size(), 0); -   -  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) -    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); -} -  /// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled  /// predecessors of the successors of the SUnit SU. Stop when the provided  /// limit is exceeded. -static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,  +static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,                                                      unsigned Limit) {    unsigned Sum = 0;    for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); @@ -1942,7 +2449,7 @@ bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {    if (left->NumSuccsLeft != right->NumSuccsLeft)      return left->NumSuccsLeft > right->NumSuccsLeft; -  assert(left->NodeQueueId && right->NodeQueueId &&  +  assert(left->NodeQueueId && right->NodeQueueId &&           "NodeQueueId cannot be zero");    return (left->NodeQueueId > right->NodeQueueId);  } @@ -1952,68 +2459,74 @@ bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {  //===----------------------------------------------------------------------===//  llvm::ScheduleDAGSDNodes * -llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { +llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, +                                 CodeGenOpt::Level OptLevel) {    const TargetMachine &TM = IS->TM;    const TargetInstrInfo *TII = TM.getInstrInfo();    const TargetRegisterInfo *TRI = TM.getRegisterInfo(); -   +    BURegReductionPriorityQueue *PQ =      new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); -  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ); +  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);    PQ->setScheduleDAG(SD); -  return SD;   +  return SD;  }  llvm::ScheduleDAGSDNodes * -llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { +llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, +                                 CodeGenOpt::Level OptLevel) {    const TargetMachine &TM = IS->TM;    const TargetInstrInfo *TII = TM.getInstrInfo();    const TargetRegisterInfo *TRI = TM.getRegisterInfo(); -   +    TDRegReductionPriorityQueue *PQ =      new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); -  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ); +  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);    PQ->setScheduleDAG(SD);    return SD;  }  llvm::ScheduleDAGSDNodes * -llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { +llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, +                                   CodeGenOpt::Level OptLevel) {    const TargetMachine &TM = IS->TM;    const TargetInstrInfo *TII = TM.getInstrInfo();    const TargetRegisterInfo *TRI = TM.getRegisterInfo(); -   +    SrcRegReductionPriorityQueue *PQ =      new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); -  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ); +  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);    PQ->setScheduleDAG(SD); -  return SD;   +  return SD;  }  llvm::ScheduleDAGSDNodes * -llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { +llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, +                                   CodeGenOpt::Level OptLevel) {    const TargetMachine &TM = IS->TM;    const TargetInstrInfo *TII = TM.getInstrInfo();    const TargetRegisterInfo *TRI = TM.getRegisterInfo();    const TargetLowering *TLI = &IS->getTargetLowering(); -   +    HybridBURRPriorityQueue *PQ =      new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); -  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ); + +  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);    PQ->setScheduleDAG(SD); -  return SD;   +  return SD;  }  llvm::ScheduleDAGSDNodes * -llvm::createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { +llvm::createILPListDAGScheduler(SelectionDAGISel *IS, +                                CodeGenOpt::Level OptLevel) {    const TargetMachine &TM = IS->TM;    const TargetInstrInfo *TII = TM.getInstrInfo();    const TargetRegisterInfo *TRI = TM.getRegisterInfo();    const TargetLowering *TLI = &IS->getTargetLowering(); -   +    ILPBURRPriorityQueue *PQ =      new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); -  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ); +  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);    PQ->setScheduleDAG(SD); -  return SD;   +  return SD;  } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index f1bf82ab145a..477c1ffe65d3 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -34,8 +34,8 @@ using namespace llvm;  STATISTIC(LoadsClustered, "Number of loads clustered together");  ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) -  : ScheduleDAG(mf) { -} +  : ScheduleDAG(mf), +    InstrItins(mf.getTarget().getInstrItineraryData()) {}  /// Run - perform scheduling.  /// @@ -72,6 +72,7 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {    SUnit *SU = NewSUnit(Old->getNode());    SU->OrigNode = Old->OrigNode;    SU->Latency = Old->Latency; +  SU->isCall = Old->isCall;    SU->isTwoAddress = Old->isTwoAddress;    SU->isCommutable = Old->isCommutable;    SU->hasPhysRegDefs = Old->hasPhysRegDefs; @@ -85,7 +86,7 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {  /// a specified operand is a physical register dependency. If so, returns the  /// register and the cost of copying the register.  static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, -                                      const TargetRegisterInfo *TRI,  +                                      const TargetRegisterInfo *TRI,                                        const TargetInstrInfo *TII,                                        unsigned &PhysReg, int &Cost) {    if (Op != 2 || User->getOpcode() != ISD::CopyToReg) @@ -108,29 +109,28 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,    }  } -static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag, -                     SelectionDAG *DAG) { +static void AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {    SmallVector<EVT, 4> VTs; -  SDNode *FlagDestNode = Flag.getNode(); +  SDNode *GlueDestNode = Glue.getNode(); -  // Don't add a flag from a node to itself. -  if (FlagDestNode == N) return; +  // Don't add glue from a node to itself. +  if (GlueDestNode == N) return; -  // Don't add a flag to something which already has a flag. -  if (N->getValueType(N->getNumValues() - 1) == MVT::Flag) return; +  // Don't add glue to something which already has glue. +  if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return;    for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)      VTs.push_back(N->getValueType(I)); -  if (AddFlag) -    VTs.push_back(MVT::Flag); +  if (AddGlue) +    VTs.push_back(MVT::Glue);    SmallVector<SDValue, 4> Ops;    for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I)      Ops.push_back(N->getOperand(I)); -  if (FlagDestNode) -    Ops.push_back(Flag); +  if (GlueDestNode) +    Ops.push_back(Glue);    SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size());    MachineSDNode::mmo_iterator Begin = 0, End = 0; @@ -149,9 +149,9 @@ static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag,      MN->setMemRefs(Begin, End);  } -/// ClusterNeighboringLoads - Force nearby loads together by "flagging" them. +/// ClusterNeighboringLoads - Force nearby loads together by "gluing" them.  /// This function finds loads of the same base and different offsets. If the -/// offsets are not far apart (target specific), it add MVT::Flag inputs and +/// offsets are not far apart (target specific), it add MVT::Glue inputs and  /// outputs to ensure they are scheduled together and in order. This  /// optimization may benefit some targets by improving cache locality.  void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { @@ -213,20 +213,20 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {    if (NumLoads == 0)      return; -  // Cluster loads by adding MVT::Flag outputs and inputs. This also +  // Cluster loads by adding MVT::Glue outputs and inputs. This also    // ensure they are scheduled in order of increasing addresses.    SDNode *Lead = Loads[0]; -  AddFlags(Lead, SDValue(0, 0), true, DAG); +  AddGlue(Lead, SDValue(0, 0), true, DAG); -  SDValue InFlag = SDValue(Lead, Lead->getNumValues() - 1); +  SDValue InGlue = SDValue(Lead, Lead->getNumValues() - 1);    for (unsigned I = 1, E = Loads.size(); I != E; ++I) { -    bool OutFlag = I < E - 1; +    bool OutGlue = I < E - 1;      SDNode *Load = Loads[I]; -    AddFlags(Load, InFlag, OutFlag, DAG); +    AddGlue(Load, InGlue, OutGlue, DAG); -    if (OutFlag) -      InFlag = SDValue(Load, Load->getNumValues() - 1); +    if (OutGlue) +      InGlue = SDValue(Load, Load->getNumValues() - 1);      ++LoadsClustered;    } @@ -266,68 +266,75 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {    // FIXME: Multiply by 2 because we may clone nodes during scheduling.    // This is a temporary workaround.    SUnits.reserve(NumNodes * 2); -   +    // Add all nodes in depth first order.    SmallVector<SDNode*, 64> Worklist;    SmallPtrSet<SDNode*, 64> Visited;    Worklist.push_back(DAG->getRoot().getNode());    Visited.insert(DAG->getRoot().getNode()); -   +    while (!Worklist.empty()) {      SDNode *NI = Worklist.pop_back_val(); -     +      // Add all operands to the worklist unless they've already been added.      for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i)        if (Visited.insert(NI->getOperand(i).getNode()))          Worklist.push_back(NI->getOperand(i).getNode()); -   +      if (isPassiveNode(NI))  // Leaf node, e.g. a TargetImmediate.        continue; -     +      // If this node has already been processed, stop now.      if (NI->getNodeId() != -1) continue; -     +      SUnit *NodeSUnit = NewSUnit(NI); -     -    // See if anything is flagged to this node, if so, add them to flagged -    // nodes.  Nodes can have at most one flag input and one flag output.  Flags -    // are required to be the last operand and result of a node. -     -    // Scan up to find flagged preds. + +    // See if anything is glued to this node, if so, add them to glued +    // nodes.  Nodes can have at most one glue input and one glue output.  Glue +    // is required to be the last operand and result of a node. + +    // Scan up to find glued preds.      SDNode *N = NI;      while (N->getNumOperands() && -           N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) { +           N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) {        N = N->getOperand(N->getNumOperands()-1).getNode();        assert(N->getNodeId() == -1 && "Node already inserted!");        N->setNodeId(NodeSUnit->NodeNum); +      if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall()) +        NodeSUnit->isCall = true;      } -     -    // Scan down to find any flagged succs. + +    // Scan down to find any glued succs.      N = NI; -    while (N->getValueType(N->getNumValues()-1) == MVT::Flag) { -      SDValue FlagVal(N, N->getNumValues()-1); -       -      // There are either zero or one users of the Flag result. -      bool HasFlagUse = false; -      for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();  +    while (N->getValueType(N->getNumValues()-1) == MVT::Glue) { +      SDValue GlueVal(N, N->getNumValues()-1); + +      // There are either zero or one users of the Glue result. +      bool HasGlueUse = false; +      for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();             UI != E; ++UI) -        if (FlagVal.isOperandOf(*UI)) { -          HasFlagUse = true; +        if (GlueVal.isOperandOf(*UI)) { +          HasGlueUse = true;            assert(N->getNodeId() == -1 && "Node already inserted!");            N->setNodeId(NodeSUnit->NodeNum);            N = *UI; +          if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall()) +            NodeSUnit->isCall = true;            break;          } -      if (!HasFlagUse) break; +      if (!HasGlueUse) break;      } -     -    // If there are flag operands involved, N is now the bottom-most node -    // of the sequence of nodes that are flagged together. + +    // If there are glue operands involved, N is now the bottom-most node +    // of the sequence of nodes that are glued together.      // Update the SUnit.      NodeSUnit->setNode(N);      assert(N->getNodeId() == -1 && "Node already inserted!");      N->setNodeId(NodeSUnit->NodeNum); +    // Compute NumRegDefsLeft. This must be done before AddSchedEdges. +    InitNumRegDefsLeft(NodeSUnit); +      // Assign the Latency field of NodeSUnit using target-provided information.      ComputeLatency(NodeSUnit);    } @@ -343,7 +350,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {    for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {      SUnit *SU = &SUnits[su];      SDNode *MainNode = SU->getNode(); -     +      if (MainNode->isMachineOpcode()) {        unsigned Opc = MainNode->getMachineOpcode();        const TargetInstrDesc &TID = TII->get(Opc); @@ -356,9 +363,9 @@ void ScheduleDAGSDNodes::AddSchedEdges() {        if (TID.isCommutable())          SU->isCommutable = true;      } -     +      // Find all predecessors and successors of the group. -    for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) { +    for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {        if (N->isMachineOpcode() &&            TII->get(N->getMachineOpcode()).getImplicitDefs()) {          SU->hasPhysRegClobbers = true; @@ -368,7 +375,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {          if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())            SU->hasPhysRegDefs = true;        } -       +        for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {          SDNode *OpN = N->getOperand(i).getNode();          if (isPassiveNode(OpN)) continue;   // Not scheduled. @@ -377,7 +384,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {          if (OpSU == SU) continue;           // In the same group.          EVT OpVT = N->getOperand(i).getValueType(); -        assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!"); +        assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!");          bool isChain = OpVT == MVT::Other;          unsigned PhysReg = 0; @@ -403,7 +410,13 @@ void ScheduleDAGSDNodes::AddSchedEdges() {            ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));          } -        SU->addPred(dep); +        if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 0) { +          // Multiple register uses are combined in the same SUnit. For example, +          // we could have a set of glued nodes with all their defs consumed by +          // another set of glued nodes. Register pressure tracking sees this as +          // a single use, so to keep pressure balanced we reduce the defs. +          --OpSU->NumRegDefsLeft; +        }        }      }    } @@ -412,7 +425,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {  /// BuildSchedGraph - Build the SUnit graph from the selection dag that we  /// are input.  This SUnit graph is similar to the SelectionDAG, but  /// excludes nodes that aren't interesting to scheduling, and represents -/// flagged together nodes with a single SUnit. +/// glued together nodes with a single SUnit.  void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {    // Cluster certain nodes which should be scheduled together.    ClusterNodes(); @@ -422,6 +435,69 @@ void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {    AddSchedEdges();  } +// Initialize NumNodeDefs for the current Node's opcode. +void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() { +  if (!Node->isMachineOpcode()) { +    if (Node->getOpcode() == ISD::CopyFromReg) +      NodeNumDefs = 1; +    else +      NodeNumDefs = 0; +    return; +  } +  unsigned POpc = Node->getMachineOpcode(); +  if (POpc == TargetOpcode::IMPLICIT_DEF) { +    // No register need be allocated for this. +    NodeNumDefs = 0; +    return; +  } +  unsigned NRegDefs = SchedDAG->TII->get(Node->getMachineOpcode()).getNumDefs(); +  // Some instructions define regs that are not represented in the selection DAG +  // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues. +  NodeNumDefs = std::min(Node->getNumValues(), NRegDefs); +  DefIdx = 0; +} + +// Construct a RegDefIter for this SUnit and find the first valid value. +ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit *SU, +                                           const ScheduleDAGSDNodes *SD) +  : SchedDAG(SD), Node(SU->getNode()), DefIdx(0), NodeNumDefs(0) { +  InitNodeNumDefs(); +  Advance(); +} + +// Advance to the next valid value defined by the SUnit. +void ScheduleDAGSDNodes::RegDefIter::Advance() { +  for (;Node;) { // Visit all glued nodes. +    for (;DefIdx < NodeNumDefs; ++DefIdx) { +      if (!Node->hasAnyUseOfValue(DefIdx)) +        continue; +      if (Node->isMachineOpcode() && +          Node->getMachineOpcode() == TargetOpcode::EXTRACT_SUBREG) { +        // Propagate the incoming (full-register) type. I doubt it's needed. +        ValueType = Node->getOperand(0).getValueType(); +      } +      else { +        ValueType = Node->getValueType(DefIdx); +      } +      ++DefIdx; +      return; // Found a normal regdef. +    } +    Node = Node->getGluedNode(); +    if (Node == NULL) { +      return; // No values left to visit. +    } +    InitNodeNumDefs(); +  } +} + +void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) { +  assert(SU->NumRegDefsLeft == 0 && "expect a new node"); +  for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) { +    assert(SU->NumRegDefsLeft < USHRT_MAX && "overflow is ok but unexpected"); +    ++SU->NumRegDefsLeft; +  } +} +  void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {    // Check to see if the scheduler cares about latencies.    if (ForceUnitLatencies()) { @@ -429,20 +505,17 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {      return;    } -  const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); -  if (InstrItins.isEmpty()) { +  if (!InstrItins || InstrItins->isEmpty()) {      SU->Latency = 1;      return;    } -   +    // Compute the latency for the node.  We use the sum of the latencies for -  // all nodes flagged together into this SUnit. +  // all nodes glued together into this SUnit.    SU->Latency = 0; -  for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) -    if (N->isMachineOpcode()) { -      SU->Latency += InstrItins. -        getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass()); -    } +  for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) +    if (N->isMachineOpcode()) +      SU->Latency += TII->getInstrLatency(InstrItins, N);  }  void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use, @@ -451,32 +524,25 @@ void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,    if (ForceUnitLatencies())      return; -  const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); -  if (InstrItins.isEmpty()) -    return; -      if (dep.getKind() != SDep::Data)      return;    unsigned DefIdx = Use->getOperand(OpIdx).getResNo(); -  if (Def->isMachineOpcode()) { -    const TargetInstrDesc &II = TII->get(Def->getMachineOpcode()); -    if (DefIdx >= II.getNumDefs()) -      return; -    int DefCycle = InstrItins.getOperandCycle(II.getSchedClass(), DefIdx); -    if (DefCycle < 0) -      return; -    int UseCycle = 1; -    if (Use->isMachineOpcode()) { -      const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass(); -      UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx); -    } -    if (UseCycle >= 0) { -      int Latency = DefCycle - UseCycle + 1; -      if (Latency >= 0) -        dep.setLatency(Latency); -    } +  if (Use->isMachineOpcode()) +    // Adjust the use operand index by num of defs. +    OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs(); +  int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx); +  if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg && +      !BB->succ_empty()) { +    unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg(); +    if (TargetRegisterInfo::isVirtualRegister(Reg)) +      // This copy is a liveout value. It is likely coalesced, so reduce the +      // latency so not to penalize the def. +      // FIXME: need target specific adjustment here? +      Latency = (Latency > 1) ? Latency - 1 : 1;    } +  if (Latency >= 0) +    dep.setLatency(Latency);  }  void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { @@ -487,14 +553,14 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {    SU->getNode()->dump(DAG);    dbgs() << "\n"; -  SmallVector<SDNode *, 4> FlaggedNodes; -  for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode()) -    FlaggedNodes.push_back(N); -  while (!FlaggedNodes.empty()) { +  SmallVector<SDNode *, 4> GluedNodes; +  for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode()) +    GluedNodes.push_back(N); +  while (!GluedNodes.empty()) {      dbgs() << "    "; -    FlaggedNodes.back()->dump(DAG); +    GluedNodes.back()->dump(DAG);      dbgs() << "\n"; -    FlaggedNodes.pop_back(); +    GluedNodes.pop_back();    }  } @@ -507,37 +573,25 @@ namespace {    };  } -// ProcessSourceNode - Process nodes with source order numbers. These are added -// to a vector which EmitSchedule uses to determine how to insert dbg_value -// instructions in the right order. -static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, -                           InstrEmitter &Emitter, -                           DenseMap<SDValue, unsigned> &VRBaseMap, +/// ProcessSDDbgValues - Process SDDbgValues assoicated with this node. +static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, +                               InstrEmitter &Emitter,                      SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders, -                           SmallSet<unsigned, 8> &Seen) { -  unsigned Order = DAG->GetOrdering(N); -  if (!Order || !Seen.insert(Order)) -    return; - -  MachineBasicBlock *BB = Emitter.getBlock(); -  if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) { -    // Did not insert any instruction. -    Orders.push_back(std::make_pair(Order, (MachineInstr*)0)); -    return; -  } - -  Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos()))); +                            DenseMap<SDValue, unsigned> &VRBaseMap, +                            unsigned Order) {    if (!N->getHasDebugValue())      return; +    // Opportunistically insert immediate dbg_value uses, i.e. those with source    // order number right after the N. +  MachineBasicBlock *BB = Emitter.getBlock();    MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();    SmallVector<SDDbgValue*,2> &DVs = DAG->GetDbgValues(N);    for (unsigned i = 0, e = DVs.size(); i != e; ++i) {      if (DVs[i]->isInvalidated())        continue;      unsigned DVOrder = DVs[i]->getOrder(); -    if (DVOrder == ++Order) { +    if (!Order || DVOrder == ++Order) {        MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap);        if (DbgMI) {          Orders.push_back(std::make_pair(DVOrder, DbgMI)); @@ -548,6 +602,33 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,    }  } +// ProcessSourceNode - Process nodes with source order numbers. These are added +// to a vector which EmitSchedule uses to determine how to insert dbg_value +// instructions in the right order. +static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, +                           InstrEmitter &Emitter, +                           DenseMap<SDValue, unsigned> &VRBaseMap, +                    SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders, +                           SmallSet<unsigned, 8> &Seen) { +  unsigned Order = DAG->GetOrdering(N); +  if (!Order || !Seen.insert(Order)) { +    // Process any valid SDDbgValues even if node does not have any order +    // assigned. +    ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0); +    return; +  } + +  MachineBasicBlock *BB = Emitter.getBlock(); +  if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) { +    // Did not insert any instruction. +    Orders.push_back(std::make_pair(Order, (MachineInstr*)0)); +    return; +  } + +  Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos()))); +  ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order); +} +  /// EmitSchedule - Emit the machine code in scheduled order.  MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { @@ -578,25 +659,25 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {      }      // For pre-regalloc scheduling, create instructions corresponding to the -    // SDNode and any flagged SDNodes and append them to the block. +    // SDNode and any glued SDNodes and append them to the block.      if (!SU->getNode()) {        // Emit a copy.        EmitPhysRegCopy(SU, CopyVRBaseMap);        continue;      } -    SmallVector<SDNode *, 4> FlaggedNodes; -    for (SDNode *N = SU->getNode()->getFlaggedNode(); N; -         N = N->getFlaggedNode()) -      FlaggedNodes.push_back(N); -    while (!FlaggedNodes.empty()) { -      SDNode *N = FlaggedNodes.back(); -      Emitter.EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned, +    SmallVector<SDNode *, 4> GluedNodes; +    for (SDNode *N = SU->getNode()->getGluedNode(); N; +         N = N->getGluedNode()) +      GluedNodes.push_back(N); +    while (!GluedNodes.empty()) { +      SDNode *N = GluedNodes.back(); +      Emitter.EmitNode(GluedNodes.back(), SU->OrigNode != SU, SU->isCloned,                         VRBaseMap);        // Remember the source order of the inserted instruction.        if (HasDbg)          ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen); -      FlaggedNodes.pop_back(); +      GluedNodes.pop_back();      }      Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned,                       VRBaseMap); @@ -625,16 +706,8 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {        // Insert all SDDbgValue's whose order(s) are before "Order".        if (!MI)          continue; -#ifndef NDEBUG -      unsigned LastDIOrder = 0; -#endif        for (; DI != DE &&               (*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) { -#ifndef NDEBUG -        assert((*DI)->getOrder() >= LastDIOrder && -               "SDDbgValue nodes must be in source order!"); -        LastDIOrder = (*DI)->getOrder(); -#endif          if ((*DI)->isInvalidated())            continue;          MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 842fc8c72703..cc7310e4ca42 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -20,13 +20,13 @@  namespace llvm {    /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs. -  ///  +  ///    /// Edges between SUnits are initially based on edges in the SelectionDAG,    /// and additional edges can be added by the schedulers as heuristics.    /// SDNodes such as Constants, Registers, and a few others that are not    /// interesting to schedulers are not allocated SUnits.    /// -  /// SDNodes with MVT::Flag operands are grouped along with the flagged +  /// SDNodes with MVT::Glue operands are grouped along with the flagged    /// nodes into a single SUnit so that they are scheduled together.    ///    /// SDNode-based scheduling graphs do not use SDep::Anti or SDep::Output @@ -36,6 +36,7 @@ namespace llvm {    class ScheduleDAGSDNodes : public ScheduleDAG {    public:      SelectionDAG *DAG;                    // DAG of the current basic block +    const InstrItineraryData *InstrItins;      explicit ScheduleDAGSDNodes(MachineFunction &mf); @@ -72,13 +73,17 @@ namespace llvm {      /// predecessors / successors info nor the temporary scheduling states.      ///      SUnit *Clone(SUnit *N); -     +      /// BuildSchedGraph - Build the SUnit graph from the selection dag that we      /// are input.  This SUnit graph is similar to the SelectionDAG, but      /// excludes nodes that aren't interesting to scheduling, and represents      /// flagged together nodes with a single SUnit.      virtual void BuildSchedGraph(AliasAnalysis *AA); +    /// InitNumRegDefsLeft - Determine the # of regs defined by this node. +    /// +    void InitNumRegDefsLeft(SUnit *SU); +      /// ComputeLatency - Compute node latency.      ///      virtual void ComputeLatency(SUnit *SU); @@ -105,6 +110,30 @@ namespace llvm {      virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const; +    /// RegDefIter - In place iteration over the values defined by an +    /// SUnit. This does not need copies of the iterator or any other STLisms. +    /// The iterator creates itself, rather than being provided by the SchedDAG. +    class RegDefIter { +      const ScheduleDAGSDNodes *SchedDAG; +      const SDNode *Node; +      unsigned DefIdx; +      unsigned NodeNumDefs; +      EVT ValueType; +    public: +      RegDefIter(const SUnit *SU, const ScheduleDAGSDNodes *SD); + +      bool IsValid() const { return Node != NULL; } + +      EVT GetValue() const { +        assert(IsValid() && "bad iterator"); +        return ValueType; +      } + +      void Advance(); +    private: +      void InitNodeNumDefs(); +    }; +    private:      /// ClusterNeighboringLoads - Cluster loads from "near" addresses into      /// combined SUnits. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index ad06ebda5b00..2fb2f2d8aa1e 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -31,7 +31,6 @@  #include "llvm/CodeGen/PseudoSourceValue.h"  #include "llvm/Target/TargetRegisterInfo.h"  #include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetFrameInfo.h"  #include "llvm/Target/TargetLowering.h"  #include "llvm/Target/TargetSelectionDAGInfo.h"  #include "llvm/Target/TargetOptions.h" @@ -44,7 +43,7 @@  #include "llvm/Support/ManagedStatic.h"  #include "llvm/Support/MathExtras.h"  #include "llvm/Support/raw_ostream.h" -#include "llvm/System/Mutex.h" +#include "llvm/Support/Mutex.h"  #include "llvm/ADT/SetVector.h"  #include "llvm/ADT/SmallPtrSet.h"  #include "llvm/ADT/SmallSet.h" @@ -111,7 +110,7 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT,  /// BUILD_VECTOR where all of the elements are ~0 or undef.  bool ISD::isBuildVectorAllOnes(const SDNode *N) {    // Look through a bit convert. -  if (N->getOpcode() == ISD::BIT_CONVERT) +  if (N->getOpcode() == ISD::BITCAST)      N = N->getOperand(0).getNode();    if (N->getOpcode() != ISD::BUILD_VECTOR) return false; @@ -152,7 +151,7 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) {  /// BUILD_VECTOR where all of the elements are 0 or undef.  bool ISD::isBuildVectorAllZeros(const SDNode *N) {    // Look through a bit convert. -  if (N->getOpcode() == ISD::BIT_CONVERT) +  if (N->getOpcode() == ISD::BITCAST)      N = N->getOperand(0).getNode();    if (N->getOpcode() != ISD::BUILD_VECTOR) return false; @@ -199,6 +198,8 @@ bool ISD::isScalarToVector(const SDNode *N) {    if (N->getOperand(0).getOpcode() == ISD::UNDEF)      return false;    unsigned NumElems = N->getNumOperands(); +  if (NumElems == 1) +    return false;    for (unsigned i = 1; i < NumElems; ++i) {      SDValue V = N->getOperand(i);      if (V.getOpcode() != ISD::UNDEF) @@ -489,7 +490,7 @@ encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile,  /// doNotCSE - Return true if CSE should not be performed for this node.  static bool doNotCSE(SDNode *N) { -  if (N->getValueType(0) == MVT::Flag) +  if (N->getValueType(0) == MVT::Glue)      return true; // Never CSE anything that produces a flag.    switch (N->getOpcode()) { @@ -501,7 +502,7 @@ static bool doNotCSE(SDNode *N) {    // Check that remaining values produced are not flags.    for (unsigned i = 1, e = N->getNumValues(); i != e; ++i) -    if (N->getValueType(i) == MVT::Flag) +    if (N->getValueType(i) == MVT::Glue)        return true; // Never CSE anything that produces a flag.    return false; @@ -609,9 +610,6 @@ void SelectionDAG::DeallocateNode(SDNode *N) {  bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {    bool Erased = false;    switch (N->getOpcode()) { -  case ISD::EntryToken: -    llvm_unreachable("EntryToken should not be in CSEMaps!"); -    return false;    case ISD::HANDLENODE: return false;  // noop.    case ISD::CONDCODE:      assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] && @@ -641,6 +639,8 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {    }    default:      // Remove it from the CSE Map. +    assert(N->getOpcode() != ISD::DELETED_NODE && "DELETED_NODE in CSEMap!"); +    assert(N->getOpcode() != ISD::EntryToken && "EntryToken in CSEMap!");      Erased = CSEMap.RemoveNode(N);      break;    } @@ -648,7 +648,7 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {    // Verify that the node was actually in one of the CSE maps, unless it has a    // flag result (which cannot be CSE'd) or is one of the special cases that are    // not subject to CSE. -  if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Flag && +  if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Glue &&        !N->isMachineOpcode() && !doNotCSE(N)) {      N->dump(this);      dbgs() << "\n"; @@ -743,8 +743,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,    return Node;  } -/// VerifyNode - Sanity check the given node.  Aborts if it is invalid. -void SelectionDAG::VerifyNode(SDNode *N) { +#ifndef NDEBUG +/// VerifyNodeCommon - Sanity check the given node.  Aborts if it is invalid. +static void VerifyNodeCommon(SDNode *N) {    switch (N->getOpcode()) {    default:      break; @@ -778,6 +779,44 @@ void SelectionDAG::VerifyNode(SDNode *N) {    }  } +/// VerifySDNode - Sanity check the given SDNode.  Aborts if it is invalid. +static void VerifySDNode(SDNode *N) { +  // The SDNode allocators cannot be used to allocate nodes with fields that are +  // not present in an SDNode! +  assert(!isa<MemSDNode>(N) && "Bad MemSDNode!"); +  assert(!isa<ShuffleVectorSDNode>(N) && "Bad ShuffleVectorSDNode!"); +  assert(!isa<ConstantSDNode>(N) && "Bad ConstantSDNode!"); +  assert(!isa<ConstantFPSDNode>(N) && "Bad ConstantFPSDNode!"); +  assert(!isa<GlobalAddressSDNode>(N) && "Bad GlobalAddressSDNode!"); +  assert(!isa<FrameIndexSDNode>(N) && "Bad FrameIndexSDNode!"); +  assert(!isa<JumpTableSDNode>(N) && "Bad JumpTableSDNode!"); +  assert(!isa<ConstantPoolSDNode>(N) && "Bad ConstantPoolSDNode!"); +  assert(!isa<BasicBlockSDNode>(N) && "Bad BasicBlockSDNode!"); +  assert(!isa<SrcValueSDNode>(N) && "Bad SrcValueSDNode!"); +  assert(!isa<MDNodeSDNode>(N) && "Bad MDNodeSDNode!"); +  assert(!isa<RegisterSDNode>(N) && "Bad RegisterSDNode!"); +  assert(!isa<BlockAddressSDNode>(N) && "Bad BlockAddressSDNode!"); +  assert(!isa<EHLabelSDNode>(N) && "Bad EHLabelSDNode!"); +  assert(!isa<ExternalSymbolSDNode>(N) && "Bad ExternalSymbolSDNode!"); +  assert(!isa<CondCodeSDNode>(N) && "Bad CondCodeSDNode!"); +  assert(!isa<CvtRndSatSDNode>(N) && "Bad CvtRndSatSDNode!"); +  assert(!isa<VTSDNode>(N) && "Bad VTSDNode!"); +  assert(!isa<MachineSDNode>(N) && "Bad MachineSDNode!"); + +  VerifyNodeCommon(N); +} + +/// VerifyMachineNode - Sanity check the given MachineNode.  Aborts if it is +/// invalid. +static void VerifyMachineNode(SDNode *N) { +  // The MachineNode allocators cannot be used to allocate nodes with fields +  // that are not present in a MachineNode! +  // Currently there are no such nodes. + +  VerifyNodeCommon(N); +} +#endif // NDEBUG +  /// getEVTAlignment - Compute the default alignment value for the  /// given type.  /// @@ -1315,7 +1354,7 @@ SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) {    void *IP = 0;    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))      return SDValue(E, 0); -   +    SDNode *N = new (NodeAllocator) EHLabelSDNode(dl, Root, Label);    CSEMap.InsertNode(N, IP);    AllNodes.push_back(N); @@ -1365,11 +1404,11 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) {    FoldingSetNodeID ID;    AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), 0, 0);    ID.AddPointer(MD); -   +    void *IP = 0;    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))      return SDValue(E, 0); -   +    SDNode *N = new (NodeAllocator) MDNodeSDNode(MD);    CSEMap.InsertNode(N, IP);    AllNodes.push_back(N); @@ -1613,7 +1652,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,      // Also compute a conserative estimate for high known-0 bits.      // More trickiness is possible, but this is sufficient for the      // interesting case of alignment computation. -    KnownOne.clear(); +    KnownOne.clearAllBits();      unsigned TrailZ = KnownZero.countTrailingOnes() +                        KnownZero2.countTrailingOnes();      unsigned LeadZ =  std::max(KnownZero.countLeadingOnes() + @@ -1636,8 +1675,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,                        AllOnes, KnownZero2, KnownOne2, Depth+1);      unsigned LeadZ = KnownZero2.countLeadingOnes(); -    KnownOne2.clear(); -    KnownZero2.clear(); +    KnownOne2.clearAllBits(); +    KnownZero2.clearAllBits();      ComputeMaskedBits(Op.getOperand(1),                        AllOnes, KnownZero2, KnownOne2, Depth+1);      unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); @@ -1765,7 +1804,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,      // If the sign extended bits are demanded, we know that the sign      // bit is demanded. -    InSignBit.zext(BitWidth); +    InSignBit = InSignBit.zext(BitWidth);      if (NewBits.getBoolValue())        InputDemandedBits |= InSignBit; @@ -1792,7 +1831,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,    case ISD::CTPOP: {      unsigned LowBits = Log2_32(BitWidth)+1;      KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); -    KnownOne.clear(); +    KnownOne.clearAllBits();      return;    }    case ISD::LOAD: { @@ -1808,13 +1847,12 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,      EVT InVT = Op.getOperand(0).getValueType();      unsigned InBits = InVT.getScalarType().getSizeInBits();      APInt NewBits   = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask; -    APInt InMask    = Mask; -    InMask.trunc(InBits); -    KnownZero.trunc(InBits); -    KnownOne.trunc(InBits); +    APInt InMask    = Mask.trunc(InBits); +    KnownZero = KnownZero.trunc(InBits); +    KnownOne = KnownOne.trunc(InBits);      ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); -    KnownZero.zext(BitWidth); -    KnownOne.zext(BitWidth); +    KnownZero = KnownZero.zext(BitWidth); +    KnownOne = KnownOne.zext(BitWidth);      KnownZero |= NewBits;      return;    } @@ -1823,16 +1861,15 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,      unsigned InBits = InVT.getScalarType().getSizeInBits();      APInt InSignBit = APInt::getSignBit(InBits);      APInt NewBits   = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask; -    APInt InMask = Mask; -    InMask.trunc(InBits); +    APInt InMask = Mask.trunc(InBits);      // If any of the sign extended bits are demanded, we know that the sign      // bit is demanded. Temporarily set this bit in the mask for our callee.      if (NewBits.getBoolValue())        InMask |= InSignBit; -    KnownZero.trunc(InBits); -    KnownOne.trunc(InBits); +    KnownZero = KnownZero.trunc(InBits); +    KnownOne = KnownOne.trunc(InBits);      ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);      // Note if the sign bit is known to be zero or one. @@ -1844,13 +1881,12 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,      // If the sign bit wasn't actually demanded by our caller, we don't      // want it set in the KnownZero and KnownOne result values. Reset the      // mask and reapply it to the result values. -    InMask = Mask; -    InMask.trunc(InBits); +    InMask = Mask.trunc(InBits);      KnownZero &= InMask;      KnownOne  &= InMask; -    KnownZero.zext(BitWidth); -    KnownOne.zext(BitWidth); +    KnownZero = KnownZero.zext(BitWidth); +    KnownOne = KnownOne.zext(BitWidth);      // If the sign bit is known zero or one, the top bits match.      if (SignBitKnownZero) @@ -1862,26 +1898,24 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,    case ISD::ANY_EXTEND: {      EVT InVT = Op.getOperand(0).getValueType();      unsigned InBits = InVT.getScalarType().getSizeInBits(); -    APInt InMask = Mask; -    InMask.trunc(InBits); -    KnownZero.trunc(InBits); -    KnownOne.trunc(InBits); +    APInt InMask = Mask.trunc(InBits); +    KnownZero = KnownZero.trunc(InBits); +    KnownOne = KnownOne.trunc(InBits);      ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); -    KnownZero.zext(BitWidth); -    KnownOne.zext(BitWidth); +    KnownZero = KnownZero.zext(BitWidth); +    KnownOne = KnownOne.zext(BitWidth);      return;    }    case ISD::TRUNCATE: {      EVT InVT = Op.getOperand(0).getValueType();      unsigned InBits = InVT.getScalarType().getSizeInBits(); -    APInt InMask = Mask; -    InMask.zext(InBits); -    KnownZero.zext(InBits); -    KnownOne.zext(InBits); +    APInt InMask = Mask.zext(InBits); +    KnownZero = KnownZero.zext(InBits); +    KnownOne = KnownOne.zext(InBits);      ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); -    KnownZero.trunc(BitWidth); -    KnownOne.trunc(BitWidth); +    KnownZero = KnownZero.trunc(BitWidth); +    KnownOne = KnownOne.trunc(BitWidth);      break;    }    case ISD::AssertZext: { @@ -1921,7 +1955,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,      }    }    // fall through -  case ISD::ADD: { +  case ISD::ADD: +  case ISD::ADDE: {      // Output known-0 bits are known if clear or set in both the low clear bits      // common to both LHS & RHS.  For example, 8+(X<<3) is known to have the      // low 3 bits clear. @@ -1936,7 +1971,17 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,      KnownZeroOut = std::min(KnownZeroOut,                              KnownZero2.countTrailingOnes()); -    KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut); +    if (Op.getOpcode() == ISD::ADD) { +      KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut); +      return; +    } + +    // With ADDE, a carry bit may be added in, so we can only use this +    // information if we know (at least) that the low two bits are clear.  We +    // then return to the caller that the low bit is unknown but that other bits +    // are known zero. +    if (KnownZeroOut >= 2) // ADDE +      KnownZero |= APInt::getBitsSet(BitWidth, 1, KnownZeroOut);      return;    }    case ISD::SREM: @@ -1991,10 +2036,19 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,      uint32_t Leaders = std::max(KnownZero.countLeadingOnes(),                                  KnownZero2.countLeadingOnes()); -    KnownOne.clear(); +    KnownOne.clearAllBits();      KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask;      return;    } +  case ISD::FrameIndex: +  case ISD::TargetFrameIndex: +    if (unsigned Align = InferPtrAlignment(Op)) { +      // The low bits are known zero if the pointer is aligned. +      KnownZero = APInt::getLowBitsSet(BitWidth, Log2_32(Align)); +      return; +    } +    break; +          default:      // Allow the target to implement this method for its nodes.      if (Op.getOpcode() >= ISD::BUILTIN_OP_END) { @@ -2234,6 +2288,25 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{    return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros()));  } +/// isBaseWithConstantOffset - Return true if the specified operand is an +/// ISD::ADD with a ConstantSDNode on the right-hand side, or if it is an +/// ISD::OR with a ConstantSDNode that is guaranteed to have the same +/// semantics as an ADD.  This handles the equivalence: +///     X|Cst == X+Cst iff X&Cst = 0. +bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const { +  if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) || +      !isa<ConstantSDNode>(Op.getOperand(1))) +    return false; +   +  if (Op.getOpcode() == ISD::OR &&  +      !MaskedValueIsZero(Op.getOperand(0), +                     cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue())) +    return false; +   +  return true; +} + +  bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {    // If we're told that NaNs won't happen, assume they won't.    if (NoNaNsFPMath) @@ -2295,7 +2368,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) {    AllNodes.push_back(N);  #ifndef NDEBUG -  VerifyNode(N); +  VerifySDNode(N);  #endif    return SDValue(N, 0);  } @@ -2308,23 +2381,22 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,      switch (Opcode) {      default: break;      case ISD::SIGN_EXTEND: -      return getConstant(APInt(Val).sextOrTrunc(VT.getSizeInBits()), VT); +      return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), VT);      case ISD::ANY_EXTEND:      case ISD::ZERO_EXTEND:      case ISD::TRUNCATE: -      return getConstant(APInt(Val).zextOrTrunc(VT.getSizeInBits()), VT); +      return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT);      case ISD::UINT_TO_FP:      case ISD::SINT_TO_FP: { -      const uint64_t zero[] = {0, 0};        // No compile time operations on ppcf128.        if (VT == MVT::ppcf128) break; -      APFloat apf = APFloat(APInt(VT.getSizeInBits(), 2, zero)); +      APFloat apf(APInt::getNullValue(VT.getSizeInBits()));        (void)apf.convertFromAPInt(Val,                                   Opcode==ISD::SINT_TO_FP,                                   APFloat::rmNearestTiesToEven);        return getConstantFP(apf, VT);      } -    case ISD::BIT_CONVERT: +    case ISD::BITCAST:        if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)          return getConstantFP(Val.bitsToFloat(), VT);        else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) @@ -2375,7 +2447,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,          APInt api(VT.getSizeInBits(), 2, x);          return getConstant(api, VT);        } -      case ISD::BIT_CONVERT: +      case ISD::BITCAST:          if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)            return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT);          else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) @@ -2477,13 +2549,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,          return Operand.getNode()->getOperand(0);      }      break; -  case ISD::BIT_CONVERT: +  case ISD::BITCAST:      // Basic sanity checking.      assert(VT.getSizeInBits() == Operand.getValueType().getSizeInBits() -           && "Cannot BIT_CONVERT between types of different sizes!"); +           && "Cannot BITCAST between types of different sizes!");      if (VT == Operand.getValueType()) return Operand;  // noop conversion. -    if (OpOpcode == ISD::BIT_CONVERT)  // bitconv(bitconv(x)) -> bitconv(x) -      return getNode(ISD::BIT_CONVERT, DL, VT, Operand.getOperand(0)); +    if (OpOpcode == ISD::BITCAST)  // bitconv(bitconv(x)) -> bitconv(x) +      return getNode(ISD::BITCAST, DL, VT, Operand.getOperand(0));      if (OpOpcode == ISD::UNDEF)        return getUNDEF(VT);      break; @@ -2519,7 +2591,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,    SDNode *N;    SDVTList VTs = getVTList(VT); -  if (VT != MVT::Flag) { // Don't CSE flag producing nodes +  if (VT != MVT::Glue) { // Don't CSE flag producing nodes      FoldingSetNodeID ID;      SDValue Ops[1] = { Operand };      AddNodeIDNode(ID, Opcode, VTs, Ops, 1); @@ -2535,7 +2607,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,    AllNodes.push_back(N);  #ifndef NDEBUG -  VerifyNode(N); +  VerifySDNode(N);  #endif    return SDValue(N, 0);  } @@ -2676,6 +2748,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,             "Shift operators return type must be the same as their first arg");      assert(VT.isInteger() && N2.getValueType().isInteger() &&             "Shifts only work on integers"); +    // Verify that the shift amount VT is bit enough to hold valid shift +    // amounts.  This catches things like trying to shift an i1024 value by an +    // i8, which is easy to fall into in generic code that uses +    // TLI.getShiftAmount(). +    assert(N2.getValueType().getSizeInBits() >= +                   Log2_32_Ceil(N1.getValueType().getSizeInBits()) &&  +           "Invalid use of small shift amount with oversized value!");      // Always fold shifts of i1 values so the code generator doesn't need to      // handle them.  Since we know the size of the shift has to be less than the @@ -2820,11 +2899,30 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,        return getConstant(ShiftedVal.trunc(ElementSize), VT);      }      break; -  case ISD::EXTRACT_SUBVECTOR: -    if (N1.getValueType() == VT) // Trivial extraction. -      return N1; +  case ISD::EXTRACT_SUBVECTOR: { +    SDValue Index = N2; +    if (VT.isSimple() && N1.getValueType().isSimple()) { +      assert(VT.isVector() && N1.getValueType().isVector() && +             "Extract subvector VTs must be a vectors!"); +      assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() && +             "Extract subvector VTs must have the same element type!"); +      assert(VT.getSimpleVT() <= N1.getValueType().getSimpleVT() && +             "Extract subvector must be from larger vector to smaller vector!"); + +      if (isa<ConstantSDNode>(Index.getNode())) { +        assert((VT.getVectorNumElements() + +                cast<ConstantSDNode>(Index.getNode())->getZExtValue() +                <= N1.getValueType().getVectorNumElements()) +               && "Extract subvector overflow!"); +      } + +      // Trivial extraction. +      if (VT.getSimpleVT() == N1.getValueType().getSimpleVT()) +        return N1; +    }      break;    } +  }    if (N1C) {      if (N2C) { @@ -2961,7 +3059,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,    // Memoize this node if possible.    SDNode *N;    SDVTList VTs = getVTList(VT); -  if (VT != MVT::Flag) { +  if (VT != MVT::Glue) {      SDValue Ops[] = { N1, N2 };      FoldingSetNodeID ID;      AddNodeIDNode(ID, Opcode, VTs, Ops, 2); @@ -2977,7 +3075,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,    AllNodes.push_back(N);  #ifndef NDEBUG -  VerifyNode(N); +  VerifySDNode(N);  #endif    return SDValue(N, 0);  } @@ -3019,7 +3117,31 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,    case ISD::VECTOR_SHUFFLE:      llvm_unreachable("should use getVectorShuffle constructor!");      break; -  case ISD::BIT_CONVERT: +  case ISD::INSERT_SUBVECTOR: { +    SDValue Index = N3; +    if (VT.isSimple() && N1.getValueType().isSimple() +        && N2.getValueType().isSimple()) { +      assert(VT.isVector() && N1.getValueType().isVector() && +             N2.getValueType().isVector() && +             "Insert subvector VTs must be a vectors"); +      assert(VT == N1.getValueType() && +             "Dest and insert subvector source types must match!"); +      assert(N2.getValueType().getSimpleVT() <= N1.getValueType().getSimpleVT() && +             "Insert subvector must be from smaller vector to larger vector!"); +      if (isa<ConstantSDNode>(Index.getNode())) { +        assert((N2.getValueType().getVectorNumElements() + +                cast<ConstantSDNode>(Index.getNode())->getZExtValue() +                <= VT.getVectorNumElements()) +               && "Insert subvector overflow!"); +      } + +      // Trivial insertion. +      if (VT.getSimpleVT() == N2.getValueType().getSimpleVT()) +        return N2; +    } +    break; +  } +  case ISD::BITCAST:      // Fold bit_convert nodes from a type to themselves.      if (N1.getValueType() == VT)        return N1; @@ -3029,7 +3151,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,    // Memoize node if it doesn't produce a flag.    SDNode *N;    SDVTList VTs = getVTList(VT); -  if (VT != MVT::Flag) { +  if (VT != MVT::Glue) {      SDValue Ops[] = { N1, N2, N3 };      FoldingSetNodeID ID;      AddNodeIDNode(ID, Opcode, VTs, Ops, 3); @@ -3045,7 +3167,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,    AllNodes.push_back(N);  #ifndef NDEBUG -  VerifyNode(N); +  VerifySDNode(N);  #endif    return SDValue(N, 0);  } @@ -3087,6 +3209,17 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {                   &ArgChains[0], ArgChains.size());  } +/// SplatByte - Distribute ByteVal over NumBits bits. +static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) { +  APInt Val = APInt(NumBits, ByteVal); +  unsigned Shift = 8; +  for (unsigned i = NumBits; i > 8; i >>= 1) { +    Val = (Val << Shift) | Val; +    Shift <<= 1; +  } +  return Val; +} +  /// getMemsetValue - Vectorized representation of the memset value  /// operand.  static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, @@ -3095,27 +3228,18 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,    unsigned NumBits = VT.getScalarType().getSizeInBits();    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) { -    APInt Val = APInt(NumBits, C->getZExtValue() & 255); -    unsigned Shift = 8; -    for (unsigned i = NumBits; i > 8; i >>= 1) { -      Val = (Val << Shift) | Val; -      Shift <<= 1; -    } +    APInt Val = SplatByte(NumBits, C->getZExtValue() & 255);      if (VT.isInteger())        return DAG.getConstant(Val, VT);      return DAG.getConstantFP(APFloat(Val), VT);    } -  const TargetLowering &TLI = DAG.getTargetLoweringInfo();    Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value); -  unsigned Shift = 8; -  for (unsigned i = NumBits; i > 8; i >>= 1) { -    Value = DAG.getNode(ISD::OR, dl, VT, -                        DAG.getNode(ISD::SHL, dl, VT, Value, -                                    DAG.getConstant(Shift, -                                                    TLI.getShiftAmountTy())), -                        Value); -    Shift <<= 1; +  if (NumBits > 8) { +    // Use a multiplication with 0x010101... to extend the input to the +    // required length. +    APInt Magic = SplatByte(NumBits, 0x01); +    Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT));    }    return Value; @@ -3131,13 +3255,12 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,    if (Str.empty()) {      if (VT.isInteger())        return DAG.getConstant(0, VT); -    else if (VT.getSimpleVT().SimpleTy == MVT::f32 || -             VT.getSimpleVT().SimpleTy == MVT::f64) +    else if (VT == MVT::f32 || VT == MVT::f64)        return DAG.getConstantFP(0.0, VT);      else if (VT.isVector()) {        unsigned NumElts = VT.getVectorNumElements();        MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64; -      return DAG.getNode(ISD::BIT_CONVERT, dl, VT, +      return DAG.getNode(ISD::BITCAST, dl, VT,                           DAG.getConstant(0, EVT::getVectorVT(*DAG.getContext(),                                                               EltVT, NumElts)));      } else @@ -3234,15 +3357,6 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,      if (VT.bitsGT(LVT))        VT = LVT;    } -   -  // If we're optimizing for size, and there is a limit, bump the maximum number -  // of operations inserted down to 4.  This is a wild guess that approximates -  // the size of a call to memcpy or memset (3 arguments + call). -  if (Limit != ~0U) { -    const Function *F = DAG.getMachineFunction().getFunction(); -    if (F->hasFnAttr(Attribute::OptimizeForSize)) -      Limit = 4; -  }    unsigned NumMemOps = 0;    while (Size != 0) { @@ -3276,18 +3390,22 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,                                         SDValue Src, uint64_t Size,                                         unsigned Align, bool isVol,                                         bool AlwaysInline, -                                       const Value *DstSV, uint64_t DstSVOff, -                                       const Value *SrcSV, uint64_t SrcSVOff) { +                                       MachinePointerInfo DstPtrInfo, +                                       MachinePointerInfo SrcPtrInfo) {    // Turn a memcpy of undef to nop.    if (Src.getOpcode() == ISD::UNDEF)      return Chain;    // Expand memcpy to a series of load and store ops if the size operand falls    // below a certain threshold. +  // TODO: In the AlwaysInline case, if the size is big then generate a loop +  // rather than maybe a humongous number of loads and stores.    const TargetLowering &TLI = DAG.getTargetLoweringInfo();    std::vector<EVT> MemOps;    bool DstAlignCanChange = false; -  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); +  MachineFunction &MF = DAG.getMachineFunction(); +  MachineFrameInfo *MFI = MF.getFrameInfo(); +  bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);    FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);    if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))      DstAlignCanChange = true; @@ -3297,8 +3415,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,    std::string Str;    bool CopyFromStr = isMemSrcFromString(Src, Str);    bool isZeroStr = CopyFromStr && Str.empty(); -  unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(); -   +  unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); +    if (!FindOptimalMemOpLowering(MemOps, Limit, Size,                                  (DstAlignCanChange ? 0 : Align),                                  (isZeroStr ? 0 : SrcAlign), @@ -3334,7 +3452,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,        Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff);        Store = DAG.getStore(Chain, dl, Value,                             getMemBasePlusOffset(Dst, DstOff, DAG), -                           DstSV, DstSVOff + DstOff, isVol, false, Align); +                           DstPtrInfo.getWithOffset(DstOff), isVol, +                           false, Align);      } else {        // The type might not be legal for the target.  This should only happen        // if the type is smaller than a legal type, as on PPC, so the right @@ -3343,14 +3462,14 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,        // FIXME does the case above also need this?        EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);        assert(NVT.bitsGE(VT)); -      Value = DAG.getExtLoad(ISD::EXTLOAD, NVT, dl, Chain, +      Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,                               getMemBasePlusOffset(Src, SrcOff, DAG), -                             SrcSV, SrcSVOff + SrcOff, VT, isVol, false, +                             SrcPtrInfo.getWithOffset(SrcOff), VT, isVol, false,                               MinAlign(SrcAlign, SrcOff));        Store = DAG.getTruncStore(Chain, dl, Value,                                  getMemBasePlusOffset(Dst, DstOff, DAG), -                                DstSV, DstSVOff + DstOff, VT, isVol, false, -                                Align); +                                DstPtrInfo.getWithOffset(DstOff), VT, isVol, +                                false, Align);      }      OutChains.push_back(Store);      SrcOff += VTSize; @@ -3366,8 +3485,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,                                          SDValue Src, uint64_t Size,                                          unsigned Align,  bool isVol,                                          bool AlwaysInline, -                                        const Value *DstSV, uint64_t DstSVOff, -                                        const Value *SrcSV, uint64_t SrcSVOff) { +                                        MachinePointerInfo DstPtrInfo, +                                        MachinePointerInfo SrcPtrInfo) {    // Turn a memmove of undef to nop.    if (Src.getOpcode() == ISD::UNDEF)      return Chain; @@ -3377,14 +3496,16 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,    const TargetLowering &TLI = DAG.getTargetLoweringInfo();    std::vector<EVT> MemOps;    bool DstAlignCanChange = false; -  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); +  MachineFunction &MF = DAG.getMachineFunction(); +  MachineFrameInfo *MFI = MF.getFrameInfo(); +  bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);    FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);    if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))      DstAlignCanChange = true;    unsigned SrcAlign = DAG.InferPtrAlignment(Src);    if (Align > SrcAlign)      SrcAlign = Align; -  unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(); +  unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);    if (!FindOptimalMemOpLowering(MemOps, Limit, Size,                                  (DstAlignCanChange ? 0 : Align), @@ -3414,7 +3535,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,      Value = DAG.getLoad(VT, dl, Chain,                          getMemBasePlusOffset(Src, SrcOff, DAG), -                        SrcSV, SrcSVOff + SrcOff, isVol, false, SrcAlign); +                        SrcPtrInfo.getWithOffset(SrcOff), isVol, +                        false, SrcAlign);      LoadValues.push_back(Value);      LoadChains.push_back(Value.getValue(1));      SrcOff += VTSize; @@ -3429,7 +3551,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,      Store = DAG.getStore(Chain, dl, LoadValues[i],                           getMemBasePlusOffset(Dst, DstOff, DAG), -                         DstSV, DstSVOff + DstOff, isVol, false, Align); +                         DstPtrInfo.getWithOffset(DstOff), isVol, false, Align);      OutChains.push_back(Store);      DstOff += VTSize;    } @@ -3442,7 +3564,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,                                 SDValue Chain, SDValue Dst,                                 SDValue Src, uint64_t Size,                                 unsigned Align, bool isVol, -                               const Value *DstSV, uint64_t DstSVOff) { +                               MachinePointerInfo DstPtrInfo) {    // Turn a memset of undef to nop.    if (Src.getOpcode() == ISD::UNDEF)      return Chain; @@ -3452,13 +3574,15 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,    const TargetLowering &TLI = DAG.getTargetLoweringInfo();    std::vector<EVT> MemOps;    bool DstAlignCanChange = false; -  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); +  MachineFunction &MF = DAG.getMachineFunction(); +  MachineFrameInfo *MFI = MF.getFrameInfo(); +  bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);    FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);    if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))      DstAlignCanChange = true;    bool NonScalarIntSafe =      isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue(); -  if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(), +  if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),                                  Size, (DstAlignCanChange ? 0 : Align), 0,                                  NonScalarIntSafe, false, DAG, TLI))      return SDValue(); @@ -3477,15 +3601,34 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,    SmallVector<SDValue, 8> OutChains;    uint64_t DstOff = 0;    unsigned NumMemOps = MemOps.size(); + +  // Find the largest store and generate the bit pattern for it. +  EVT LargestVT = MemOps[0]; +  for (unsigned i = 1; i < NumMemOps; i++) +    if (MemOps[i].bitsGT(LargestVT)) +      LargestVT = MemOps[i]; +  SDValue MemSetValue = getMemsetValue(Src, LargestVT, DAG, dl); +    for (unsigned i = 0; i < NumMemOps; i++) {      EVT VT = MemOps[i]; -    unsigned VTSize = VT.getSizeInBits() / 8; -    SDValue Value = getMemsetValue(Src, VT, DAG, dl); + +    // If this store is smaller than the largest store see whether we can get +    // the smaller value for free with a truncate. +    SDValue Value = MemSetValue; +    if (VT.bitsLT(LargestVT)) { +      if (!LargestVT.isVector() && !VT.isVector() && +          TLI.isTruncateFree(LargestVT, VT)) +        Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue); +      else +        Value = getMemsetValue(Src, VT, DAG, dl); +    } +    assert(Value.getValueType() == VT && "Value with wrong type.");      SDValue Store = DAG.getStore(Chain, dl, Value,                                   getMemBasePlusOffset(Dst, DstOff, DAG), -                                 DstSV, DstSVOff + DstOff, isVol, false, 0); +                                 DstPtrInfo.getWithOffset(DstOff), +                                 isVol, false, Align);      OutChains.push_back(Store); -    DstOff += VTSize; +    DstOff += VT.getSizeInBits() / 8;    }    return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, @@ -3495,8 +3638,8 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,  SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,                                  SDValue Src, SDValue Size,                                  unsigned Align, bool isVol, bool AlwaysInline, -                                const Value *DstSV, uint64_t DstSVOff, -                                const Value *SrcSV, uint64_t SrcSVOff) { +                                MachinePointerInfo DstPtrInfo, +                                MachinePointerInfo SrcPtrInfo) {    // Check to see if we should lower the memcpy to loads and stores first.    // For cases within the target-specified limits, this is the best choice. @@ -3508,7 +3651,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,      SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,                                               ConstantSize->getZExtValue(),Align, -                                isVol, false, DstSV, DstSVOff, SrcSV, SrcSVOff); +                                isVol, false, DstPtrInfo, SrcPtrInfo);      if (Result.getNode())        return Result;    } @@ -3518,7 +3661,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,    SDValue Result =      TSI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align,                                  isVol, AlwaysInline, -                                DstSV, DstSVOff, SrcSV, SrcSVOff); +                                DstPtrInfo, SrcPtrInfo);    if (Result.getNode())      return Result; @@ -3528,7 +3671,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,      assert(ConstantSize && "AlwaysInline requires a constant size!");      return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,                                     ConstantSize->getZExtValue(), Align, isVol, -                                   true, DstSV, DstSVOff, SrcSV, SrcSVOff); +                                   true, DstPtrInfo, SrcPtrInfo);    }    // FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc @@ -3559,8 +3702,8 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,  SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,                                   SDValue Src, SDValue Size,                                   unsigned Align, bool isVol, -                                 const Value *DstSV, uint64_t DstSVOff, -                                 const Value *SrcSV, uint64_t SrcSVOff) { +                                 MachinePointerInfo DstPtrInfo, +                                 MachinePointerInfo SrcPtrInfo) {    // Check to see if we should lower the memmove to loads and stores first.    // For cases within the target-specified limits, this is the best choice. @@ -3573,7 +3716,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,      SDValue Result =        getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src,                                 ConstantSize->getZExtValue(), Align, isVol, -                               false, DstSV, DstSVOff, SrcSV, SrcSVOff); +                               false, DstPtrInfo, SrcPtrInfo);      if (Result.getNode())        return Result;    } @@ -3582,7 +3725,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,    // code. If the target chooses to do this, this is the next best.    SDValue Result =      TSI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol, -                                 DstSV, DstSVOff, SrcSV, SrcSVOff); +                                 DstPtrInfo, SrcPtrInfo);    if (Result.getNode())      return Result; @@ -3611,7 +3754,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,  SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,                                  SDValue Src, SDValue Size,                                  unsigned Align, bool isVol, -                                const Value *DstSV, uint64_t DstSVOff) { +                                MachinePointerInfo DstPtrInfo) {    // Check to see if we should lower the memset to stores first.    // For cases within the target-specified limits, this is the best choice. @@ -3623,7 +3766,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,      SDValue Result =        getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), -                      Align, isVol, DstSV, DstSVOff); +                      Align, isVol, DstPtrInfo);      if (Result.getNode())        return Result; @@ -3633,11 +3776,11 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,    // code. If the target chooses to do this, this is the next best.    SDValue Result =      TSI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol, -                                DstSV, DstSVOff); +                                DstPtrInfo);    if (Result.getNode())      return Result; -  // Emit a library call.   +  // Emit a library call.    const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext());    TargetLowering::ArgListTy Args;    TargetLowering::ArgListEntry Entry; @@ -3669,19 +3812,12 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,  }  SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, -                                SDValue Chain, -                                SDValue Ptr, SDValue Cmp, -                                SDValue Swp, const Value* PtrVal, +                                SDValue Chain, SDValue Ptr, SDValue Cmp, +                                SDValue Swp, MachinePointerInfo PtrInfo,                                  unsigned Alignment) {    if (Alignment == 0)  // Ensure that codegen never sees alignment 0      Alignment = getEVTAlignment(MemVT); -  // Check if the memory reference references a frame index -  if (!PtrVal) -    if (const FrameIndexSDNode *FI = -          dyn_cast<const FrameIndexSDNode>(Ptr.getNode())) -      PtrVal = PseudoSourceValue::getFixedStack(FI->getIndex()); -    MachineFunction &MF = getMachineFunction();    unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; @@ -3689,8 +3825,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,    Flags |= MachineMemOperand::MOVolatile;    MachineMemOperand *MMO = -    MF.getMachineMemOperand(PtrVal, Flags, 0, -                            MemVT.getStoreSize(), Alignment); +    MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment);    return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO);  } @@ -3729,12 +3864,6 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,    if (Alignment == 0)  // Ensure that codegen never sees alignment 0      Alignment = getEVTAlignment(MemVT); -  // Check if the memory reference references a frame index -  if (!PtrVal) -    if (const FrameIndexSDNode *FI = -          dyn_cast<const FrameIndexSDNode>(Ptr.getNode())) -      PtrVal = PseudoSourceValue::getFixedStack(FI->getIndex()); -    MachineFunction &MF = getMachineFunction();    unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; @@ -3742,7 +3871,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,    Flags |= MachineMemOperand::MOVolatile;    MachineMemOperand *MMO = -    MF.getMachineMemOperand(PtrVal, Flags, 0, +    MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags,                              MemVT.getStoreSize(), Alignment);    return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO); @@ -3785,7 +3914,6 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,  }  /// getMergeValues - Create a MERGE_VALUES node from the given operands. -/// Allowed to return something different (and simpler) if Simplify is true.  SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps,                                       DebugLoc dl) {    if (NumOps == 1) @@ -3803,18 +3931,18 @@ SDValue  SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl,                                    const EVT *VTs, unsigned NumVTs,                                    const SDValue *Ops, unsigned NumOps, -                                  EVT MemVT, const Value *srcValue, int SVOff, +                                  EVT MemVT, MachinePointerInfo PtrInfo,                                    unsigned Align, bool Vol,                                    bool ReadMem, bool WriteMem) {    return getMemIntrinsicNode(Opcode, dl, makeVTList(VTs, NumVTs), Ops, NumOps, -                             MemVT, srcValue, SVOff, Align, Vol, +                             MemVT, PtrInfo, Align, Vol,                               ReadMem, WriteMem);  }  SDValue  SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,                                    const SDValue *Ops, unsigned NumOps, -                                  EVT MemVT, const Value *srcValue, int SVOff, +                                  EVT MemVT, MachinePointerInfo PtrInfo,                                    unsigned Align, bool Vol,                                    bool ReadMem, bool WriteMem) {    if (Align == 0)  // Ensure that codegen never sees alignment 0 @@ -3829,8 +3957,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,    if (Vol)      Flags |= MachineMemOperand::MOVolatile;    MachineMemOperand *MMO = -    MF.getMachineMemOperand(srcValue, Flags, SVOff, -                            MemVT.getStoreSize(), Align); +    MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Align);    return getMemIntrinsicNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO);  } @@ -3841,13 +3968,14 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,                                    EVT MemVT, MachineMemOperand *MMO) {    assert((Opcode == ISD::INTRINSIC_VOID ||            Opcode == ISD::INTRINSIC_W_CHAIN || +          Opcode == ISD::PREFETCH ||            (Opcode <= INT_MAX &&             (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&           "Opcode is not a memory-accessing opcode!");    // Memoize the node unless it returns a flag.    MemIntrinsicSDNode *N; -  if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) { +  if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {      FoldingSetNodeID ID;      AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);      void *IP = 0; @@ -3867,36 +3995,70 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,    return SDValue(N, 0);  } +/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a +/// MachinePointerInfo record from it.  This is particularly useful because the +/// code generator has many cases where it doesn't bother passing in a +/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst". +static MachinePointerInfo InferPointerInfo(SDValue Ptr, int64_t Offset = 0) { +  // If this is FI+Offset, we can model it. +  if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) +    return MachinePointerInfo::getFixedStack(FI->getIndex(), Offset); + +  // If this is (FI+Offset1)+Offset2, we can model it. +  if (Ptr.getOpcode() != ISD::ADD || +      !isa<ConstantSDNode>(Ptr.getOperand(1)) || +      !isa<FrameIndexSDNode>(Ptr.getOperand(0))) +    return MachinePointerInfo(); + +  int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); +  return MachinePointerInfo::getFixedStack(FI, Offset+ +                       cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue()); +} + +/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a +/// MachinePointerInfo record from it.  This is particularly useful because the +/// code generator has many cases where it doesn't bother passing in a +/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst". +static MachinePointerInfo InferPointerInfo(SDValue Ptr, SDValue OffsetOp) { +  // If the 'Offset' value isn't a constant, we can't handle this. +  if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp)) +    return InferPointerInfo(Ptr, OffsetNode->getSExtValue()); +  if (OffsetOp.getOpcode() == ISD::UNDEF) +    return InferPointerInfo(Ptr); +  return MachinePointerInfo(); +} + +  SDValue  SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,                        EVT VT, DebugLoc dl, SDValue Chain,                        SDValue Ptr, SDValue Offset, -                      const Value *SV, int SVOffset, EVT MemVT, +                      MachinePointerInfo PtrInfo, EVT MemVT,                        bool isVolatile, bool isNonTemporal, -                      unsigned Alignment) { +                      unsigned Alignment, const MDNode *TBAAInfo) {    if (Alignment == 0)  // Ensure that codegen never sees alignment 0      Alignment = getEVTAlignment(VT); -  // Check if the memory reference references a frame index -  if (!SV) -    if (const FrameIndexSDNode *FI = -          dyn_cast<const FrameIndexSDNode>(Ptr.getNode())) -      SV = PseudoSourceValue::getFixedStack(FI->getIndex()); - -  MachineFunction &MF = getMachineFunction();    unsigned Flags = MachineMemOperand::MOLoad;    if (isVolatile)      Flags |= MachineMemOperand::MOVolatile;    if (isNonTemporal)      Flags |= MachineMemOperand::MONonTemporal; + +  // If we don't have a PtrInfo, infer the trivial frame index case to simplify +  // clients. +  if (PtrInfo.V == 0) +    PtrInfo = InferPointerInfo(Ptr, Offset); + +  MachineFunction &MF = getMachineFunction();    MachineMemOperand *MMO = -    MF.getMachineMemOperand(SV, Flags, SVOffset, -                            MemVT.getStoreSize(), Alignment); +    MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment, +                            TBAAInfo);    return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO);  }  SDValue -SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,  +SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,                        EVT VT, DebugLoc dl, SDValue Chain,                        SDValue Ptr, SDValue Offset, EVT MemVT,                        MachineMemOperand *MMO) { @@ -3943,25 +4105,26 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,  SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl,                                SDValue Chain, SDValue Ptr, -                              const Value *SV, int SVOffset, +                              MachinePointerInfo PtrInfo,                                bool isVolatile, bool isNonTemporal, -                              unsigned Alignment) { +                              unsigned Alignment, const MDNode *TBAAInfo) {    SDValue Undef = getUNDEF(Ptr.getValueType());    return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, -                 SV, SVOffset, VT, isVolatile, isNonTemporal, Alignment); +                 PtrInfo, VT, isVolatile, isNonTemporal, Alignment, TBAAInfo);  } -SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, EVT VT, DebugLoc dl, +SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,                                   SDValue Chain, SDValue Ptr, -                                 const Value *SV, -                                 int SVOffset, EVT MemVT, +                                 MachinePointerInfo PtrInfo, EVT MemVT,                                   bool isVolatile, bool isNonTemporal, -                                 unsigned Alignment) { +                                 unsigned Alignment, const MDNode *TBAAInfo) {    SDValue Undef = getUNDEF(Ptr.getValueType());    return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, -                 SV, SVOffset, MemVT, isVolatile, isNonTemporal, Alignment); +                 PtrInfo, MemVT, isVolatile, isNonTemporal, Alignment, +                 TBAAInfo);  } +  SDValue  SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,                               SDValue Offset, ISD::MemIndexedMode AM) { @@ -3969,33 +4132,32 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,    assert(LD->getOffset().getOpcode() == ISD::UNDEF &&           "Load is already a indexed load!");    return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl, -                 LD->getChain(), Base, Offset, LD->getSrcValue(), -                 LD->getSrcValueOffset(), LD->getMemoryVT(), +                 LD->getChain(), Base, Offset, LD->getPointerInfo(), +                 LD->getMemoryVT(),                   LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment());  }  SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, -                               SDValue Ptr, const Value *SV, int SVOffset, +                               SDValue Ptr, MachinePointerInfo PtrInfo,                                 bool isVolatile, bool isNonTemporal, -                               unsigned Alignment) { +                               unsigned Alignment, const MDNode *TBAAInfo) {    if (Alignment == 0)  // Ensure that codegen never sees alignment 0      Alignment = getEVTAlignment(Val.getValueType()); -  // Check if the memory reference references a frame index -  if (!SV) -    if (const FrameIndexSDNode *FI = -          dyn_cast<const FrameIndexSDNode>(Ptr.getNode())) -      SV = PseudoSourceValue::getFixedStack(FI->getIndex()); - -  MachineFunction &MF = getMachineFunction();    unsigned Flags = MachineMemOperand::MOStore;    if (isVolatile)      Flags |= MachineMemOperand::MOVolatile;    if (isNonTemporal)      Flags |= MachineMemOperand::MONonTemporal; + +  if (PtrInfo.V == 0) +    PtrInfo = InferPointerInfo(Ptr); + +  MachineFunction &MF = getMachineFunction();    MachineMemOperand *MMO = -    MF.getMachineMemOperand(SV, Flags, SVOffset, -                            Val.getValueType().getStoreSize(), Alignment); +    MF.getMachineMemOperand(PtrInfo, Flags, +                            Val.getValueType().getStoreSize(), Alignment, +                            TBAAInfo);    return getStore(Chain, dl, Val, Ptr, MMO);  } @@ -4024,27 +4186,26 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,  }  SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, -                                    SDValue Ptr, const Value *SV, -                                    int SVOffset, EVT SVT, -                                    bool isVolatile, bool isNonTemporal, -                                    unsigned Alignment) { +                                    SDValue Ptr, MachinePointerInfo PtrInfo, +                                    EVT SVT,bool isVolatile, bool isNonTemporal, +                                    unsigned Alignment, +                                    const MDNode *TBAAInfo) {    if (Alignment == 0)  // Ensure that codegen never sees alignment 0      Alignment = getEVTAlignment(SVT); -  // Check if the memory reference references a frame index -  if (!SV) -    if (const FrameIndexSDNode *FI = -          dyn_cast<const FrameIndexSDNode>(Ptr.getNode())) -      SV = PseudoSourceValue::getFixedStack(FI->getIndex()); - -  MachineFunction &MF = getMachineFunction();    unsigned Flags = MachineMemOperand::MOStore;    if (isVolatile)      Flags |= MachineMemOperand::MOVolatile;    if (isNonTemporal)      Flags |= MachineMemOperand::MONonTemporal; + +  if (PtrInfo.V == 0) +    PtrInfo = InferPointerInfo(Ptr); + +  MachineFunction &MF = getMachineFunction();    MachineMemOperand *MMO = -    MF.getMachineMemOperand(SV, Flags, SVOffset, SVT.getStoreSize(), Alignment); +    MF.getMachineMemOperand(PtrInfo, Flags, SVT.getStoreSize(), Alignment, +                            TBAAInfo);    return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO);  } @@ -4170,7 +4331,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,    SDNode *N;    SDVTList VTs = getVTList(VT); -  if (VT != MVT::Flag) { +  if (VT != MVT::Glue) {      FoldingSetNodeID ID;      AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps);      void *IP = 0; @@ -4186,7 +4347,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,    AllNodes.push_back(N);  #ifndef NDEBUG -  VerifyNode(N); +  VerifySDNode(N);  #endif    return SDValue(N, 0);  } @@ -4236,7 +4397,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,    // Memoize the node unless it returns a flag.    SDNode *N; -  if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) { +  if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {      FoldingSetNodeID ID;      AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);      void *IP = 0; @@ -4268,7 +4429,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,    }    AllNodes.push_back(N);  #ifndef NDEBUG -  VerifyNode(N); +  VerifySDNode(N);  #endif    return SDValue(N, 0);  } @@ -4645,7 +4806,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,                                    unsigned NumOps) {    // If an identical node already exists, use it.    void *IP = 0; -  if (VTs.VTs[VTs.NumVTs-1] != MVT::Flag) { +  if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) {      FoldingSetNodeID ID;      AddNodeIDNode(ID, Opc, VTs, Ops, NumOps);      if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) @@ -4845,9 +5006,9 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,  MachineSDNode *  SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,                               const SDValue *Ops, unsigned NumOps) { -  bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Flag; +  bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue;    MachineSDNode *N; -  void *IP; +  void *IP = 0;    if (DoCSE) {      FoldingSetNodeID ID; @@ -4876,7 +5037,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,    AllNodes.push_back(N);  #ifndef NDEBUG -  VerifyNode(N); +  VerifyMachineNode(N);  #endif    return N;  } @@ -4907,7 +5068,7 @@ SelectionDAG::getTargetInsertSubreg(int SRIdx, DebugLoc DL, EVT VT,  /// else return NULL.  SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,                                        const SDValue *Ops, unsigned NumOps) { -  if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) { +  if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {      FoldingSetNodeID ID;      AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);      void *IP = 0; @@ -5340,6 +5501,29 @@ void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) {      SD->setHasDebugValue(true);  } +/// TransferDbgValues - Transfer SDDbgValues. +void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) { +  if (From == To || !From.getNode()->getHasDebugValue()) +    return; +  SDNode *FromNode = From.getNode(); +  SDNode *ToNode = To.getNode(); +  SmallVector<SDDbgValue *, 2> &DVs = GetDbgValues(FromNode); +  SmallVector<SDDbgValue *, 2> ClonedDVs; +  for (SmallVector<SDDbgValue *, 2>::iterator I = DVs.begin(), E = DVs.end(); +       I != E; ++I) { +    SDDbgValue *Dbg = *I; +    if (Dbg->getKind() == SDDbgValue::SDNODE) { +      SDDbgValue *Clone = getDbgValue(Dbg->getMDPtr(), ToNode, To.getResNo(), +                                      Dbg->getOffset(), Dbg->getDebugLoc(), +                                      Dbg->getOrder()); +      ClonedDVs.push_back(Clone); +    } +  } +  for (SmallVector<SDDbgValue *, 2>::iterator I = ClonedDVs.begin(), +         E = ClonedDVs.end(); I != E; ++I) +    AddDbgValue(*I, ToNode, false); +} +  //===----------------------------------------------------------------------===//  //                              SDNode Class  //===----------------------------------------------------------------------===// @@ -5367,7 +5551,7 @@ MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt,  }  MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, -                     const SDValue *Ops, unsigned NumOps, EVT memvt,  +                     const SDValue *Ops, unsigned NumOps, EVT memvt,                       MachineMemOperand *mmo)     : SDNode(Opc, dl, VTs, Ops, NumOps),       MemoryVT(memvt), MMO(mmo) { @@ -5386,7 +5570,7 @@ void SDNode::Profile(FoldingSetNodeID &ID) const {  namespace {    struct EVTArray {      std::vector<EVT> VTs; -     +      EVTArray() {        VTs.reserve(MVT::LAST_VALUETYPE);        for (unsigned i = 0; i < MVT::LAST_VALUETYPE; ++i) @@ -5406,7 +5590,7 @@ const EVT *SDNode::getValueTypeList(EVT VT) {      sys::SmartScopedLock<true> Lock(*VTMutex);      return &(*EVTs->insert(VT).first);    } else { -    assert(VT.getSimpleVT().SimpleTy < MVT::LAST_VALUETYPE && +    assert(VT.getSimpleVT() < MVT::LAST_VALUETYPE &&             "Value type out of range!");      return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy];    } @@ -5478,9 +5662,9 @@ bool SDNode::isOperandOf(SDNode *N) const {  /// reachesChainWithoutSideEffects - Return true if this operand (which must  /// be a chain) reaches the specified operand without crossing any -/// side-effecting instructions.  In practice, this looks through token -/// factors and non-volatile loads.  In order to remain efficient, this only -/// looks a couple of nodes in, it does not do an exhaustive search. +/// side-effecting instructions on any chain path.  In practice, this looks +/// through token factors and non-volatile loads.  In order to remain efficient, +/// this only looks a couple of nodes in, it does not do an exhaustive search.  bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,                                                 unsigned Depth) const {    if (*this == Dest) return true; @@ -5490,12 +5674,12 @@ bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,    if (Depth == 0) return false;    // If this is a token factor, all inputs to the TF happen in parallel.  If any -  // of the operands of the TF reach dest, then we can do the xform. +  // of the operands of the TF does not reach dest, then we cannot do the xform.    if (getOpcode() == ISD::TokenFactor) {      for (unsigned i = 0, e = getNumOperands(); i != e; ++i) -      if (getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1)) -        return true; -    return false; +      if (!getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1)) +        return false; +    return true;    }    // Loads don't have side effects, look through them. @@ -5600,6 +5784,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {    case ISD::EH_RETURN: return "EH_RETURN";    case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";    case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP"; +  case ISD::EH_SJLJ_DISPATCHSETUP: return "EH_SJLJ_DISPATCHSETUP";    case ISD::ConstantPool:  return "ConstantPool";    case ISD::ExternalSymbol: return "ExternalSymbol";    case ISD::BlockAddress:  return "BlockAddress"; @@ -5690,6 +5875,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {    case ISD::INSERT_VECTOR_ELT:   return "insert_vector_elt";    case ISD::EXTRACT_VECTOR_ELT:  return "extract_vector_elt";    case ISD::CONCAT_VECTORS:      return "concat_vectors"; +  case ISD::INSERT_SUBVECTOR:    return "insert_subvector";    case ISD::EXTRACT_SUBVECTOR:   return "extract_subvector";    case ISD::SCALAR_TO_VECTOR:    return "scalar_to_vector";    case ISD::VECTOR_SHUFFLE:      return "vector_shuffle"; @@ -5723,7 +5909,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {    case ISD::UINT_TO_FP:  return "uint_to_fp";    case ISD::FP_TO_SINT:  return "fp_to_sint";    case ISD::FP_TO_UINT:  return "fp_to_uint"; -  case ISD::BIT_CONVERT: return "bit_convert"; +  case ISD::BITCAST:     return "bit_convert";    case ISD::FP16_TO_FP32: return "fp16_to_fp32";    case ISD::FP32_TO_FP16: return "fp32_to_fp16"; @@ -5935,12 +6121,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {        OS << LBB->getName() << " ";      OS << (const void*)BBDN->getBasicBlock() << ">";    } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) { -    if (G && R->getReg() && -        TargetRegisterInfo::isPhysicalRegister(R->getReg())) { -      OS << " %" << G->getTarget().getRegisterInfo()->getName(R->getReg()); -    } else { -      OS << " %reg" << R->getReg(); -    } +    OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0);    } else if (const ExternalSymbolSDNode *ES =               dyn_cast<ExternalSymbolSDNode>(this)) {      OS << "'" << ES->getSymbol() << "'"; @@ -5986,7 +6167,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {      const char *AM = getIndexedModeName(ST->getAddressingMode());      if (*AM)        OS << ", " << AM; -     +      OS << ">";    } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {      OS << "<" << *M->getMemOperand() << ">"; @@ -6037,7 +6218,7 @@ void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {  static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,                                    const SelectionDAG *G, unsigned depth, -                                  unsigned indent)  +                                  unsigned indent)  {    if (depth == 0)      return; @@ -6058,7 +6239,7 @@ static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,  void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G,                              unsigned depth) const {    printrWithDepthHelper(OS, this, G, depth, 0); -}  +}  void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {    // Don't print impossibly deep things. @@ -6072,7 +6253,7 @@ void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {  void SDNode::dumprFull(const SelectionDAG *G) const {    // Don't print impossibly deep things.    dumprWithDepth(G, 100); -}  +}  static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) @@ -6156,10 +6337,10 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {  } -/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a  -/// location that is 'Dist' units away from the location that the 'Base' load  +/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a +/// location that is 'Dist' units away from the location that the 'Base' load  /// is loading from. -bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,  +bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,                                       unsigned Bytes, int Dist) const {    if (LD->getChain() != Base->getChain())      return false; @@ -6180,11 +6361,11 @@ bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,      if (FS != BFS || FS != (int)Bytes) return false;      return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);    } -  if (Loc.getOpcode() == ISD::ADD && Loc.getOperand(0) == BaseLoc) { -    ConstantSDNode *V = dyn_cast<ConstantSDNode>(Loc.getOperand(1)); -    if (V && (V->getSExtValue() == Dist*Bytes)) -      return true; -  } + +  // Handle X+C +  if (isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc && +      cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes) +    return true;    const GlobalValue *GV1 = NULL;    const GlobalValue *GV2 = NULL; @@ -6225,15 +6406,14 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {    int64_t FrameOffset = 0;    if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) {      FrameIdx = FI->getIndex(); -  } else if (Ptr.getOpcode() == ISD::ADD && -             isa<ConstantSDNode>(Ptr.getOperand(1)) && +  } else if (isBaseWithConstantOffset(Ptr) &&               isa<FrameIndexSDNode>(Ptr.getOperand(0))) { +    // Handle FI+Cst      FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();      FrameOffset = Ptr.getConstantOperandVal(1);    }    if (FrameIdx != (1 << 31)) { -    // FIXME: Handle FI+CST.      const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo();      unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),                                      FrameOffset); @@ -6354,7 +6534,7 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,      if (OpVal.getOpcode() == ISD::UNDEF)        SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize);      else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) -      SplatValue |= APInt(CN->getAPIntValue()).zextOrTrunc(EltBitSize). +      SplatValue |= CN->getAPIntValue().zextOrTrunc(EltBitSize).                      zextOrTrunc(sz) << BitPos;      else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal))        SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <<BitPos; @@ -6369,10 +6549,10 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,    while (sz > 8) {      unsigned HalfSize = sz / 2; -    APInt HighValue = APInt(SplatValue).lshr(HalfSize).trunc(HalfSize); -    APInt LowValue = APInt(SplatValue).trunc(HalfSize); -    APInt HighUndef = APInt(SplatUndef).lshr(HalfSize).trunc(HalfSize); -    APInt LowUndef = APInt(SplatUndef).trunc(HalfSize); +    APInt HighValue = SplatValue.lshr(HalfSize).trunc(HalfSize); +    APInt LowValue = SplatValue.trunc(HalfSize); +    APInt HighUndef = SplatUndef.lshr(HalfSize).trunc(HalfSize); +    APInt LowUndef = SplatUndef.trunc(HalfSize);      // If the two halves do not match (ignoring undef bits), stop here.      if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) || @@ -6412,7 +6592,7 @@ static void checkForCyclesHelper(const SDNode *N,    // If this node has already been checked, don't check it again.    if (Checked.count(N))      return; -   +    // If a node has already been visited on this depth-first walk, reject it as    // a cycle.    if (!Visited.insert(N)) { @@ -6421,10 +6601,10 @@ static void checkForCyclesHelper(const SDNode *N,      errs() << "Detected cycle in SelectionDAG\n";      abort();    } -   +    for(unsigned i = 0, e = N->getNumOperands(); i != e; ++i)      checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked); -   +    Checked.insert(N);    Visited.erase(N);  } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index e65744592c8b..452f5614b7bf 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -15,6 +15,7 @@  #include "SDNodeDbgValue.h"  #include "SelectionDAGBuilder.h"  #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/PostOrderIterator.h"  #include "llvm/ADT/SmallSet.h"  #include "llvm/Analysis/AliasAnalysis.h"  #include "llvm/Analysis/ConstantFolding.h" @@ -43,9 +44,8 @@  #include "llvm/CodeGen/PseudoSourceValue.h"  #include "llvm/CodeGen/SelectionDAG.h"  #include "llvm/Analysis/DebugInfo.h" -#include "llvm/Target/TargetRegisterInfo.h"  #include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetFrameLowering.h"  #include "llvm/Target/TargetInstrInfo.h"  #include "llvm/Target/TargetIntrinsicInfo.h"  #include "llvm/Target/TargetLowering.h" @@ -70,10 +70,28 @@ LimitFPPrecision("limit-float-precision",                   cl::location(LimitFloatPrecision),                   cl::init(0)); +// Limit the width of DAG chains. This is important in general to prevent +// prevent DAG-based analysis from blowing up. For example, alias analysis and +// load clustering may not complete in reasonable time. It is difficult to +// recognize and avoid this situation within each individual analysis, and +// future analyses are likely to have the same behavior. Limiting DAG width is +// the safe approach, and will be especially important with global DAGs. +// +// MaxParallelChains default is arbitrarily high to avoid affecting +// optimization, but could be lowered to improve compile time. Any ld-ld-st-st +// sequence over this should have been converted to llvm.memcpy by the +// frontend. It easy to induce this behavior with .ll code such as: +// %buffer = alloca [4096 x i8] +// %data = load [4096 x i8]* %argPtr +// store [4096 x i8] %data, [4096 x i8]* %buffer +static cl::opt<unsigned> +MaxParallelChains("dag-chain-limit", cl::desc("Max parallel isel dag chains"), +                  cl::init(64), cl::Hidden); +  static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,                                        const SDValue *Parts, unsigned NumParts,                                        EVT PartVT, EVT ValueVT); -   +  /// getCopyFromParts - Create a value that contains the specified legal parts  /// combined into the value they represent.  If the parts combine to a type  /// larger then ValueVT then AssertOp can be used to specify whether the extra @@ -85,7 +103,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,                                  ISD::NodeType AssertOp = ISD::DELETED_NODE) {    if (ValueVT.isVector())      return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT); -   +    assert(NumParts > 0 && "No parts to assemble!");    const TargetLowering &TLI = DAG.getTargetLoweringInfo();    SDValue Val = Parts[0]; @@ -112,8 +130,8 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,          Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,                                RoundParts / 2, PartVT, HalfVT);        } else { -        Lo = DAG.getNode(ISD::BIT_CONVERT, DL, HalfVT, Parts[0]); -        Hi = DAG.getNode(ISD::BIT_CONVERT, DL, HalfVT, Parts[1]); +        Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]); +        Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);        }        if (TLI.isBigEndian()) @@ -145,8 +163,8 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,        assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) &&               "Unexpected split");        SDValue Lo, Hi; -      Lo = DAG.getNode(ISD::BIT_CONVERT, DL, EVT(MVT::f64), Parts[0]); -      Hi = DAG.getNode(ISD::BIT_CONVERT, DL, EVT(MVT::f64), Parts[1]); +      Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]); +      Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);        if (TLI.isBigEndian())          std::swap(Lo, Hi);        Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi); @@ -188,7 +206,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,    }    if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) -    return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val); +    return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);    llvm_unreachable("Unknown mismatch!");    return SDValue(); @@ -206,7 +224,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,    assert(NumParts > 0 && "No parts to assemble!");    const TargetLowering &TLI = DAG.getTargetLoweringInfo();    SDValue Val = Parts[0]; -   +    // Handle a multi-element vector.    if (NumParts > 1) {      EVT IntermediateVT, RegisterVT; @@ -219,7 +237,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,      assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");      assert(RegisterVT == Parts[0].getValueType() &&             "Part type doesn't match part!"); -     +      // Assemble the parts into intermediate operands.      SmallVector<SDValue, 8> Ops(NumIntermediates);      if (NumIntermediates == NumParts) { @@ -238,20 +256,20 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,          Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,                                    PartVT, IntermediateVT);      } -     +      // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the      // intermediate operands.      Val = DAG.getNode(IntermediateVT.isVector() ?                        ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL,                        ValueVT, &Ops[0], NumIntermediates);    } -   +    // There is now one part, held in Val.  Correct it to match ValueVT.    PartVT = Val.getValueType(); -   +    if (PartVT == ValueVT)      return Val; -   +    if (PartVT.isVector()) {      // If the element type of the source/dest vectors are the same, but the      // parts vector has more elements than the value vector, then we have a @@ -262,12 +280,12 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,               "Cannot narrow, it would be a lossy transformation");        return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,                           DAG.getIntPtrConstant(0)); -    }                                       -     +    } +      // Vector/Vector bitcast. -    return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val); +    return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);    } -   +    assert(ValueVT.getVectorElementType() == PartVT &&           ValueVT.getVectorNumElements() == 1 &&           "Only trivial scalar-to-vector conversions should get here!"); @@ -280,7 +298,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,  static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl,                                   SDValue Val, SDValue *Parts, unsigned NumParts,                                   EVT PartVT); -   +  /// getCopyToParts - Create a series of nodes that contain the specified value  /// split into legal parts.  If the parts contain more bits than Val, then, for  /// integers, ExtendKind can be used to specify how to generate the extra bits. @@ -289,11 +307,11 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,                             EVT PartVT,                             ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {    EVT ValueVT = Val.getValueType(); -   +    // Handle the vector case separately.    if (ValueVT.isVector())      return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT); -   +    const TargetLowering &TLI = DAG.getTargetLoweringInfo();    unsigned PartBits = PartVT.getSizeInBits();    unsigned OrigNumParts = NumParts; @@ -316,14 +334,14 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,        Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);      } else {        assert(PartVT.isInteger() && ValueVT.isInteger() && -             "Unknown mismatch!");              +             "Unknown mismatch!");        ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);        Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);      }    } else if (PartBits == ValueVT.getSizeInBits()) {      // Different types of the same size.      assert(NumParts == 1 && PartVT != ValueVT); -    Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val); +    Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);    } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {      // If the parts cover less bits than value has, truncate the value.      assert(PartVT.isInteger() && ValueVT.isInteger() && @@ -366,7 +384,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,    // The number of parts is a power of 2.  Repeatedly bisect the value using    // EXTRACT_ELEMENT. -  Parts[0] = DAG.getNode(ISD::BIT_CONVERT, DL, +  Parts[0] = DAG.getNode(ISD::BITCAST, DL,                           EVT::getIntegerVT(*DAG.getContext(),                                             ValueVT.getSizeInBits()),                           Val); @@ -384,8 +402,8 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,                            ThisVT, Part0, DAG.getIntPtrConstant(0));        if (ThisBits == PartBits && ThisVT != PartVT) { -        Part0 = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Part0); -        Part1 = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Part1); +        Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0); +        Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1);        }      }    } @@ -403,13 +421,13 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,    EVT ValueVT = Val.getValueType();    assert(ValueVT.isVector() && "Not a vector");    const TargetLowering &TLI = DAG.getTargetLoweringInfo(); -   +    if (NumParts == 1) {      if (PartVT == ValueVT) {        // Nothing to do.      } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {        // Bitconvert vector->vector case. -      Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val); +      Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);      } else if (PartVT.isVector() &&                 PartVT.getVectorElementType() == ValueVT.getVectorElementType()&&                 PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) { @@ -420,7 +438,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,        for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)          Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,                                    ElementVT, Val, DAG.getIntPtrConstant(i))); -       +        for (unsigned i = ValueVT.getVectorNumElements(),             e = PartVT.getVectorNumElements(); i != e; ++i)          Ops.push_back(DAG.getUNDEF(ElementVT)); @@ -428,7 +446,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,        Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size());        // FIXME: Use CONCAT for 2x -> 4x. -       +        //SDValue UndefElts = DAG.getUNDEF(VectorTy);        //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);      } else { @@ -439,11 +457,11 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,        Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,                          PartVT, Val, DAG.getIntPtrConstant(0));      } -     +      Parts[0] = Val;      return;    } -   +    // Handle a multi-element vector.    EVT IntermediateVT, RegisterVT;    unsigned NumIntermediates; @@ -451,11 +469,11 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,                                                  IntermediateVT,                                                  NumIntermediates, RegisterVT);    unsigned NumElements = ValueVT.getVectorNumElements(); -   +    assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");    NumParts = NumRegs; // Silence a compiler warning.    assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); -   +    // Split the vector into intermediate operands.    SmallVector<SDValue, 8> Ops(NumIntermediates);    for (unsigned i = 0; i != NumIntermediates; ++i) { @@ -467,7 +485,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,        Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,                             IntermediateVT, Val, DAG.getIntPtrConstant(i));    } -   +    // Split the intermediate operands into legal parts.    if (NumParts == NumIntermediates) {      // If the register was not expanded, promote or copy the value, @@ -618,48 +636,49 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,        }        Chain = P.getValue(1); +      Parts[i] = P;        // If the source register was virtual and if we know something about it,        // add an assert node. -      if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) && -          RegisterVT.isInteger() && !RegisterVT.isVector()) { -        unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister; -        if (FuncInfo.LiveOutRegInfo.size() > SlotNo) { -          const FunctionLoweringInfo::LiveOutInfo &LOI = -            FuncInfo.LiveOutRegInfo[SlotNo]; - -          unsigned RegSize = RegisterVT.getSizeInBits(); -          unsigned NumSignBits = LOI.NumSignBits; -          unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes(); - -          // FIXME: We capture more information than the dag can represent.  For -          // now, just use the tightest assertzext/assertsext possible. -          bool isSExt = true; -          EVT FromVT(MVT::Other); -          if (NumSignBits == RegSize) -            isSExt = true, FromVT = MVT::i1;   // ASSERT SEXT 1 -          else if (NumZeroBits >= RegSize-1) -            isSExt = false, FromVT = MVT::i1;  // ASSERT ZEXT 1 -          else if (NumSignBits > RegSize-8) -            isSExt = true, FromVT = MVT::i8;   // ASSERT SEXT 8 -          else if (NumZeroBits >= RegSize-8) -            isSExt = false, FromVT = MVT::i8;  // ASSERT ZEXT 8 -          else if (NumSignBits > RegSize-16) -            isSExt = true, FromVT = MVT::i16;  // ASSERT SEXT 16 -          else if (NumZeroBits >= RegSize-16) -            isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16 -          else if (NumSignBits > RegSize-32) -            isSExt = true, FromVT = MVT::i32;  // ASSERT SEXT 32 -          else if (NumZeroBits >= RegSize-32) -            isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32 - -          if (FromVT != MVT::Other) -            P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl, -                            RegisterVT, P, DAG.getValueType(FromVT)); -        } -      } +      if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) || +          !RegisterVT.isInteger() || RegisterVT.isVector() || +          !FuncInfo.LiveOutRegInfo.inBounds(Regs[Part+i])) +        continue; +       +      const FunctionLoweringInfo::LiveOutInfo &LOI = +        FuncInfo.LiveOutRegInfo[Regs[Part+i]]; + +      unsigned RegSize = RegisterVT.getSizeInBits(); +      unsigned NumSignBits = LOI.NumSignBits; +      unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes(); + +      // FIXME: We capture more information than the dag can represent.  For +      // now, just use the tightest assertzext/assertsext possible. +      bool isSExt = true; +      EVT FromVT(MVT::Other); +      if (NumSignBits == RegSize) +        isSExt = true, FromVT = MVT::i1;   // ASSERT SEXT 1 +      else if (NumZeroBits >= RegSize-1) +        isSExt = false, FromVT = MVT::i1;  // ASSERT ZEXT 1 +      else if (NumSignBits > RegSize-8) +        isSExt = true, FromVT = MVT::i8;   // ASSERT SEXT 8 +      else if (NumZeroBits >= RegSize-8) +        isSExt = false, FromVT = MVT::i8;  // ASSERT ZEXT 8 +      else if (NumSignBits > RegSize-16) +        isSExt = true, FromVT = MVT::i16;  // ASSERT SEXT 16 +      else if (NumZeroBits >= RegSize-16) +        isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16 +      else if (NumSignBits > RegSize-32) +        isSExt = true, FromVT = MVT::i32;  // ASSERT SEXT 32 +      else if (NumZeroBits >= RegSize-32) +        isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32 +      else +        continue; -      Parts[i] = P; +      // Add an assertion node. +      assert(FromVT != MVT::Other); +      Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl, +                             RegisterVT, P, DAG.getValueType(FromVT));      }      Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), @@ -889,11 +908,8 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,                                Val.getResNo(), Offset, dl, DbgSDNodeOrder);          DAG.AddDbgValue(SDV, Val.getNode(), false);        } -    } else { -      SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()), -                            Offset, dl, SDNodeOrder); -      DAG.AddDbgValue(SDV, 0, false); -    } +    } else  +      DEBUG(dbgs() << "Dropping debug info for " << DI);      DanglingDebugInfoMap[V] = DanglingDebugInfo();    }  } @@ -913,7 +929,9 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {      unsigned InReg = It->second;      RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());      SDValue Chain = DAG.getEntryNode(); -    return N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain,NULL); +    N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain,NULL); +    resolveDanglingDebugInfo(V, N); +    return N;    }    // Otherwise create a new SDValue and remember it. @@ -1088,7 +1106,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {        Chains[i] =          DAG.getStore(Chain, getCurDebugLoc(),                       SDValue(RetOp.getNode(), RetOp.getResNo() + i), -                     Add, NULL, Offsets[i], false, false, 0); +                     // FIXME: better loc info would be nice. +                     Add, MachinePointerInfo(), false, false, 0);      }      Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), @@ -1347,7 +1366,7 @@ SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){      if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)        return false;    } -   +    return true;  } @@ -1383,6 +1402,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {    // If this is a series of conditions that are or'd or and'd together, emit    // this as a sequence of branches instead of setcc's with and/or operations. +  // As long as jumps are not expensive, this should improve performance.    // For example, instead of something like:    //     cmp A, B    //     C = seteq @@ -1397,7 +1417,8 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {    //     jle foo    //    if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { -    if (BOp->hasOneUse() && +    if (!TLI.isJumpExpensive() &&  +        BOp->hasOneUse() &&          (BOp->getOpcode() == Instruction::And ||           BOp->getOpcode() == Instruction::Or)) {        FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, @@ -1502,10 +1523,11 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,                                 MVT::Other, getControlRoot(), Cond,                                 DAG.getBasicBlock(CB.TrueBB)); -  // Insert the false branch. -  if (CB.FalseBB != NextBlock) -    BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, -                         DAG.getBasicBlock(CB.FalseBB)); +  // Insert the false branch. Do this even if it's a fall through branch, +  // this makes it easier to do DAG optimizations which require inverting +  // the branch condition. +  BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, +                       DAG.getBasicBlock(CB.FalseBB));    DAG.setRoot(BrCond);  } @@ -1592,12 +1614,28 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,                                    Sub, DAG.getConstant(B.Range, VT),                                    ISD::SETUGT); -  SDValue ShiftOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), -                                       TLI.getPointerTy()); +  // Determine the type of the test operands. +  bool UsePtrType = false; +  if (!TLI.isTypeLegal(VT)) +    UsePtrType = true; +  else { +    for (unsigned i = 0, e = B.Cases.size(); i != e; ++i) +      if ((uint64_t)((int64_t)B.Cases[i].Mask >> VT.getSizeInBits()) + 1 >= 2) { +        // Switch table case range are encoded into series of masks. +        // Just use pointer type, it's guaranteed to fit. +        UsePtrType = true; +        break; +      } +  } +  if (UsePtrType) { +    VT = TLI.getPointerTy(); +    Sub = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), VT); +  } -  B.Reg = FuncInfo.CreateReg(TLI.getPointerTy()); +  B.RegVT = VT; +  B.Reg = FuncInfo.CreateReg(VT);    SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), -                                    B.Reg, ShiftOp); +                                    B.Reg, Sub);    // Set NextBlock to be the MBB immediately after the current one, if any.    // This is used to avoid emitting unnecessary branches to the next block. @@ -1623,36 +1661,34 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,  }  /// visitBitTestCase - this function produces one "bit test" -void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB, +void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, +                                           MachineBasicBlock* NextMBB,                                             unsigned Reg,                                             BitTestCase &B,                                             MachineBasicBlock *SwitchBB) { -  SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg, -                                       TLI.getPointerTy()); +  EVT VT = BB.RegVT; +  SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), +                                       Reg, VT);    SDValue Cmp;    if (CountPopulation_64(B.Mask) == 1) {      // Testing for a single bit; just compare the shift count with what it      // would need to be to shift a 1 bit in that position.      Cmp = DAG.getSetCC(getCurDebugLoc(), -                       TLI.getSetCCResultType(ShiftOp.getValueType()), +                       TLI.getSetCCResultType(VT),                         ShiftOp, -                       DAG.getConstant(CountTrailingZeros_64(B.Mask), -                                       TLI.getPointerTy()), +                       DAG.getConstant(CountTrailingZeros_64(B.Mask), VT),                         ISD::SETEQ);    } else {      // Make desired shift -    SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), -                                    TLI.getPointerTy(), -                                    DAG.getConstant(1, TLI.getPointerTy()), -                                    ShiftOp); +    SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), VT, +                                    DAG.getConstant(1, VT), ShiftOp);      // Emit bit tests and jumps      SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(), -                                TLI.getPointerTy(), SwitchVal, -                                DAG.getConstant(B.Mask, TLI.getPointerTy())); +                                VT, SwitchVal, DAG.getConstant(B.Mask, VT));      Cmp = DAG.getSetCC(getCurDebugLoc(), -                       TLI.getSetCCResultType(AndOp.getValueType()), -                       AndOp, DAG.getConstant(0, TLI.getPointerTy()), +                       TLI.getSetCCResultType(VT), +                       AndOp, DAG.getConstant(0, VT),                         ISD::SETNE);    } @@ -1732,10 +1768,56 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,    if (++BBI != FuncInfo.MF->end())      NextBlock = BBI; -  // TODO: If any two of the cases has the same destination, and if one value +  // If any two of the cases has the same destination, and if one value    // is the same as the other, but has one bit unset that the other has set,    // use bit manipulation to do two compares at once.  For example:    // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)" +  // TODO: This could be extended to merge any 2 cases in switches with 3 cases. +  // TODO: Handle cases where CR.CaseBB != SwitchBB. +  if (Size == 2 && CR.CaseBB == SwitchBB) { +    Case &Small = *CR.Range.first; +    Case &Big = *(CR.Range.second-1); + +    if (Small.Low == Small.High && Big.Low == Big.High && Small.BB == Big.BB) { +      const APInt& SmallValue = cast<ConstantInt>(Small.Low)->getValue(); +      const APInt& BigValue = cast<ConstantInt>(Big.Low)->getValue(); + +      // Check that there is only one bit different. +      if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 && +          (SmallValue | BigValue) == BigValue) { +        // Isolate the common bit. +        APInt CommonBit = BigValue & ~SmallValue; +        assert((SmallValue | CommonBit) == BigValue && +               CommonBit.countPopulation() == 1 && "Not a common bit?"); + +        SDValue CondLHS = getValue(SV); +        EVT VT = CondLHS.getValueType(); +        DebugLoc DL = getCurDebugLoc(); + +        SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS, +                                 DAG.getConstant(CommonBit, VT)); +        SDValue Cond = DAG.getSetCC(DL, MVT::i1, +                                    Or, DAG.getConstant(BigValue, VT), +                                    ISD::SETEQ); + +        // Update successor info. +        SwitchBB->addSuccessor(Small.BB); +        SwitchBB->addSuccessor(Default); + +        // Insert the true branch. +        SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other, +                                     getControlRoot(), Cond, +                                     DAG.getBasicBlock(Small.BB)); + +        // Insert the false branch. +        BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond, +                             DAG.getBasicBlock(Default)); + +        DAG.setRoot(BrCond); +        return true; +      } +    } +  }    // Rearrange the case blocks so that the last one falls through if possible.    if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) { @@ -1800,9 +1882,8 @@ static inline bool areJTsAllowed(const TargetLowering &TLI) {  }  static APInt ComputeRange(const APInt &First, const APInt &Last) { -  APInt LastExt(Last), FirstExt(First);    uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1; -  LastExt.sext(BitWidth); FirstExt.sext(BitWidth); +  APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth);    return (LastExt - FirstExt + 1ULL);  } @@ -2151,7 +2232,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,    }    BitTestBlock BTB(lowBound, cmpRange, SV, -                   -1U, (CR.CaseBB == SwitchBB), +                   -1U, MVT::Other, (CR.CaseBB == SwitchBB),                     CR.CaseBB, Default, BTC);    if (CR.CaseBB == SwitchBB) @@ -2180,7 +2261,8 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,    if (Cases.size() >= 2)      // Must recompute end() each iteration because it may be      // invalidated by erase if we hold on to it -    for (CaseItr I = Cases.begin(), J = ++(Cases.begin()); J != Cases.end(); ) { +    for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin()); +         J != Cases.end(); ) {        const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();        const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();        MachineBasicBlock* nextBB = J->BB; @@ -2205,6 +2287,19 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,    return numCmps;  } +void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First, +                                           MachineBasicBlock *Last) { +  // Update JTCases. +  for (unsigned i = 0, e = JTCases.size(); i != e; ++i) +    if (JTCases[i].first.HeaderBB == First) +      JTCases[i].first.HeaderBB = Last; + +  // Update BitTestCases. +  for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i) +    if (BitTestCases[i].Parent == First) +      BitTestCases[i].Parent = Last; +} +  void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {    MachineBasicBlock *SwitchMBB = FuncInfo.MBB; @@ -2292,30 +2387,14 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {  void SelectionDAGBuilder::visitFSub(const User &I) {    // -0.0 - X --> fneg    const Type *Ty = I.getType(); -  if (Ty->isVectorTy()) { -    if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) { -      const VectorType *DestTy = cast<VectorType>(I.getType()); -      const Type *ElTy = DestTy->getElementType(); -      unsigned VL = DestTy->getNumElements(); -      std::vector<Constant*> NZ(VL, ConstantFP::getNegativeZero(ElTy)); -      Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size()); -      if (CV == CNZ) { -        SDValue Op2 = getValue(I.getOperand(1)); -        setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), -                                 Op2.getValueType(), Op2)); -        return; -      } -    } +  if (isa<Constant>(I.getOperand(0)) && +      I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) { +    SDValue Op2 = getValue(I.getOperand(1)); +    setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), +                             Op2.getValueType(), Op2)); +    return;    } -  if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0))) -    if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) { -      SDValue Op2 = getValue(I.getOperand(1)); -      setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), -                               Op2.getValueType(), Op2)); -      return; -    } -    visitBinary(I, ISD::FSUB);  } @@ -2329,31 +2408,29 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {  void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {    SDValue Op1 = getValue(I.getOperand(0));    SDValue Op2 = getValue(I.getOperand(1)); -  if (!I.getType()->isVectorTy() && -      Op2.getValueType() != TLI.getShiftAmountTy()) { +   +  MVT ShiftTy = TLI.getShiftAmountTy(); +   +  // Coerce the shift amount to the right type if we can. +  if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { +    unsigned ShiftSize = ShiftTy.getSizeInBits(); +    unsigned Op2Size = Op2.getValueType().getSizeInBits(); +    DebugLoc DL = getCurDebugLoc(); +          // If the operand is smaller than the shift count type, promote it. -    EVT PTy = TLI.getPointerTy(); -    EVT STy = TLI.getShiftAmountTy(); -    if (STy.bitsGT(Op2.getValueType())) -      Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(), -                        TLI.getShiftAmountTy(), Op2); +    if (ShiftSize > Op2Size) +      Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2); +          // If the operand is larger than the shift count type but the shift      // count type has enough bits to represent any shift value, truncate      // it now. This is a common case and it exposes the truncate to      // optimization early. -    else if (STy.getSizeInBits() >= -             Log2_32_Ceil(Op2.getValueType().getSizeInBits())) -      Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), -                        TLI.getShiftAmountTy(), Op2); -    // Otherwise we'll need to temporarily settle for some other -    // convenient type; type legalization will make adjustments as -    // needed. -    else if (PTy.bitsLT(Op2.getValueType())) -      Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), -                        TLI.getPointerTy(), Op2); -    else if (PTy.bitsGT(Op2.getValueType())) -      Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(), -                        TLI.getPointerTy(), Op2); +    else if (ShiftSize >= Log2_32_Ceil(Op2.getValueType().getSizeInBits())) +      Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2); +    // Otherwise we'll need to temporarily settle for some other convenient +    // type.  Type legalization will make adjustments once the shiftee is split. +    else +      Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);    }    setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(), @@ -2499,9 +2576,9 @@ void SelectionDAGBuilder::visitBitCast(const User &I) {    EVT DestVT = TLI.getValueType(I.getType());    // BitCast assures us that source and destination are the same size so this is -  // either a BIT_CONVERT or a no-op. +  // either a BITCAST or a no-op.    if (DestVT != N.getValueType()) -    setValue(&I, DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), +    setValue(&I, DAG.getNode(ISD::BITCAST, getCurDebugLoc(),                               DestVT, N)); // convert types.    else      setValue(&I, N);            // noop cast. @@ -2650,7 +2727,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {          } else {            StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;            if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts && -              StartIdx[Input] + MaskNumElts < SrcNumElts) +              StartIdx[Input] + MaskNumElts <= SrcNumElts)              RangeUse[Input] = 1; // Extract from a multiple of the mask length.          }        } @@ -2726,8 +2803,7 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {    bool IntoUndef = isa<UndefValue>(Op0);    bool FromUndef = isa<UndefValue>(Op1); -  unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy, -                                            I.idx_begin(), I.idx_end()); +  unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end());    SmallVector<EVT, 4> AggValueVTs;    ComputeValueVTs(TLI, AggTy, AggValueVTs); @@ -2765,8 +2841,7 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {    const Type *ValTy = I.getType();    bool OutOfUndef = isa<UndefValue>(Op0); -  unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy, -                                            I.idx_begin(), I.idx_end()); +  unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end());    SmallVector<EVT, 4> ValValueVTs;    ComputeValueVTs(TLI, ValTy, ValValueVTs); @@ -2884,7 +2959,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {    // Handle alignment.  If the requested alignment is less than or equal to    // the stack alignment, ignore it.  If the size is greater than or equal to    // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. -  unsigned StackAlign = TM.getFrameInfo()->getStackAlignment(); +  unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();    if (Align <= StackAlign)      Align = 0; @@ -2920,6 +2995,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {    bool isVolatile = I.isVolatile();    bool isNonTemporal = I.getMetadata("nontemporal") != 0;    unsigned Alignment = I.getAlignment(); +  const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);    SmallVector<EVT, 4> ValueVTs;    SmallVector<uint64_t, 4> Offsets; @@ -2930,10 +3006,11 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {    SDValue Root;    bool ConstantMemory = false; -  if (I.isVolatile()) +  if (I.isVolatile() || NumValues > MaxParallelChains)      // Serialize volatile loads with other side effects.      Root = getRoot(); -  else if (AA->pointsToConstantMemory(SV)) { +  else if (AA->pointsToConstantMemory( +             AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), TBAAInfo))) {      // Do not serialize (non-volatile) loads of constant memory with anything.      Root = DAG.getEntryNode();      ConstantMemory = true; @@ -2943,23 +3020,38 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {    }    SmallVector<SDValue, 4> Values(NumValues); -  SmallVector<SDValue, 4> Chains(NumValues); +  SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains), +                                          NumValues));    EVT PtrVT = Ptr.getValueType(); -  for (unsigned i = 0; i != NumValues; ++i) { +  unsigned ChainI = 0; +  for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { +    // Serializing loads here may result in excessive register pressure, and +    // TokenFactor places arbitrary choke points on the scheduler. SD scheduling +    // could recover a bit by hoisting nodes upward in the chain by recognizing +    // they are side-effect free or do not alias. The optimizer should really +    // avoid this case by converting large object/array copies to llvm.memcpy +    // (MaxParallelChains should always remain as failsafe). +    if (ChainI == MaxParallelChains) { +      assert(PendingLoads.empty() && "PendingLoads must be serialized first"); +      SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), +                                  MVT::Other, &Chains[0], ChainI); +      Root = Chain; +      ChainI = 0; +    }      SDValue A = DAG.getNode(ISD::ADD, getCurDebugLoc(),                              PtrVT, Ptr,                              DAG.getConstant(Offsets[i], PtrVT));      SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root, -                            A, SV, Offsets[i], isVolatile,  -                            isNonTemporal, Alignment); +                            A, MachinePointerInfo(SV, Offsets[i]), isVolatile, +                            isNonTemporal, Alignment, TBAAInfo);      Values[i] = L; -    Chains[i] = L.getValue(1); +    Chains[ChainI] = L.getValue(1);    }    if (!ConstantMemory) {      SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), -                                MVT::Other, &Chains[0], NumValues); +                                MVT::Other, &Chains[0], ChainI);      if (isVolatile)        DAG.setRoot(Chain);      else @@ -2989,23 +3081,37 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {    SDValue Ptr = getValue(PtrV);    SDValue Root = getRoot(); -  SmallVector<SDValue, 4> Chains(NumValues); +  SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains), +                                          NumValues));    EVT PtrVT = Ptr.getValueType();    bool isVolatile = I.isVolatile();    bool isNonTemporal = I.getMetadata("nontemporal") != 0;    unsigned Alignment = I.getAlignment(); - -  for (unsigned i = 0; i != NumValues; ++i) { +  const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa); + +  unsigned ChainI = 0; +  for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { +    // See visitLoad comments. +    if (ChainI == MaxParallelChains) { +      SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), +                                  MVT::Other, &Chains[0], ChainI); +      Root = Chain; +      ChainI = 0; +    }      SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr,                                DAG.getConstant(Offsets[i], PtrVT)); -    Chains[i] = DAG.getStore(Root, getCurDebugLoc(), -                             SDValue(Src.getNode(), Src.getResNo() + i), -                             Add, PtrV, Offsets[i], isVolatile,  -                             isNonTemporal, Alignment); -  } - -  DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), -                          MVT::Other, &Chains[0], NumValues)); +    SDValue St = DAG.getStore(Root, getCurDebugLoc(), +                              SDValue(Src.getNode(), Src.getResNo() + i), +                              Add, MachinePointerInfo(PtrV, Offsets[i]), +                              isVolatile, isNonTemporal, Alignment, TBAAInfo); +    Chains[ChainI] = St; +  } + +  SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), +                                  MVT::Other, &Chains[0], ChainI); +  ++SDNodeOrder; +  AssignOrderingToNode(StoreNode.getNode()); +  DAG.setRoot(StoreNode);  }  /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC @@ -3031,7 +3137,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,    bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);    // Add the intrinsic ID as an integer operand if it's not a target intrinsic. -  if (!IsTgtIntrinsic) +  if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || +      Info.opc == ISD::INTRINSIC_W_CHAIN)      Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy()));    // Add all operands of the call to the operand list. @@ -3062,7 +3169,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,      // This is target intrinsic that touches memory      Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(),                                       VTs, &Ops[0], Ops.size(), -                                     Info.memVT, Info.ptrVal, Info.offset, +                                     Info.memVT, +                                   MachinePointerInfo(Info.ptrVal, Info.offset),                                       Info.align, Info.vol,                                       Info.readMem, Info.writeMem);    } else if (!HasChain) { @@ -3087,7 +3195,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,    if (!I.getType()->isVoidTy()) {      if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) {        EVT VT = TLI.getValueType(PTy); -      Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result); +      Result = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), VT, Result);      }      setValue(&I, Result); @@ -3106,7 +3214,7 @@ GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) {                             DAG.getConstant(0x007fffff, MVT::i32));    SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,                             DAG.getConstant(0x3f800000, MVT::i32)); -  return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t2); +  return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);  }  /// GetExponent - Get the exponent: @@ -3205,13 +3313,13 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {        SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);        SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,                                 getF32Constant(DAG, 0x3f7f5e7e)); -      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t5); +      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t5);        // Add the exponent into the result in integer domain.        SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32,                                 TwoToFracPartOfX, IntegerPartOfX); -      result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t6); +      result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t6);      } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {        // For floating-point precision of 12:        // @@ -3231,13 +3339,13 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {        SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);        SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,                                 getF32Constant(DAG, 0x3f7ff8fd)); -      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t7); +      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t7);        // Add the exponent into the result in integer domain.        SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32,                                 TwoToFracPartOfX, IntegerPartOfX); -      result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t8); +      result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t8);      } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18        // For floating-point precision of 18:        // @@ -3269,14 +3377,14 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {        SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);        SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,                                  getF32Constant(DAG, 0x3f800000)); -      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl, +      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,                                               MVT::i32, t13);        // Add the exponent into the result in integer domain.        SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32,                                  TwoToFracPartOfX, IntegerPartOfX); -      result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t14); +      result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t14);      }    } else {      // No special expansion. @@ -3298,7 +3406,7 @@ SelectionDAGBuilder::visitLog(const CallInst &I) {    if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&        LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {      SDValue Op = getValue(I.getArgOperand(0)); -    SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); +    SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);      // Scale the exponent by log(2) [0.69314718f].      SDValue Exp = GetExponent(DAG, Op1, TLI, dl); @@ -3408,7 +3516,7 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) {    if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&        LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {      SDValue Op = getValue(I.getArgOperand(0)); -    SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); +    SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);      // Get the exponent.      SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl); @@ -3517,7 +3625,7 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) {    if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&        LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {      SDValue Op = getValue(I.getArgOperand(0)); -    SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); +    SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);      // Scale the exponent by log10(2) [0.30102999f].      SDValue Exp = GetExponent(DAG, Op1, TLI, dl); @@ -3645,11 +3753,11 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {        SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);        SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,                                 getF32Constant(DAG, 0x3f7f5e7e)); -      SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5); +      SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5);        SDValue TwoToFractionalPartOfX =          DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX); -      result = DAG.getNode(ISD::BIT_CONVERT, dl, +      result = DAG.getNode(ISD::BITCAST, dl,                             MVT::f32, TwoToFractionalPartOfX);      } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {        // For floating-point precision of 12: @@ -3670,11 +3778,11 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {        SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);        SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,                                 getF32Constant(DAG, 0x3f7ff8fd)); -      SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7); +      SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7);        SDValue TwoToFractionalPartOfX =          DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX); -      result = DAG.getNode(ISD::BIT_CONVERT, dl, +      result = DAG.getNode(ISD::BITCAST, dl,                             MVT::f32, TwoToFractionalPartOfX);      } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18        // For floating-point precision of 18: @@ -3706,11 +3814,11 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {        SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);        SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,                                  getF32Constant(DAG, 0x3f800000)); -      SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13); +      SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13);        SDValue TwoToFractionalPartOfX =          DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX); -      result = DAG.getNode(ISD::BIT_CONVERT, dl, +      result = DAG.getNode(ISD::BITCAST, dl,                             MVT::f32, TwoToFractionalPartOfX);      }    } else { @@ -3778,11 +3886,11 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {        SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);        SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,                                 getF32Constant(DAG, 0x3f7f5e7e)); -      SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5); +      SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5);        SDValue TwoToFractionalPartOfX =          DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX); -      result = DAG.getNode(ISD::BIT_CONVERT, dl, +      result = DAG.getNode(ISD::BITCAST, dl,                             MVT::f32, TwoToFractionalPartOfX);      } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {        // For floating-point precision of 12: @@ -3803,11 +3911,11 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {        SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);        SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,                                 getF32Constant(DAG, 0x3f7ff8fd)); -      SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7); +      SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7);        SDValue TwoToFractionalPartOfX =          DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX); -      result = DAG.getNode(ISD::BIT_CONVERT, dl, +      result = DAG.getNode(ISD::BITCAST, dl,                             MVT::f32, TwoToFractionalPartOfX);      } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18        // For floating-point precision of 18: @@ -3839,11 +3947,11 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {        SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);        SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,                                  getF32Constant(DAG, 0x3f800000)); -      SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13); +      SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13);        SDValue TwoToFractionalPartOfX =          DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX); -      result = DAG.getNode(ISD::BIT_CONVERT, dl, +      result = DAG.getNode(ISD::BITCAST, dl,                             MVT::f32, TwoToFractionalPartOfX);      }    } else { @@ -3915,13 +4023,16 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS,  /// At the end of instruction selection, they will be inserted to the entry BB.  bool  SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, -                                              int64_t Offset,  +                                              int64_t Offset,                                                const SDValue &N) {    const Argument *Arg = dyn_cast<Argument>(V);    if (!Arg)      return false;    MachineFunction &MF = DAG.getMachineFunction(); +  const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo(); +  const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); +    // Ignore inlined function arguments here.    DIVariable DV(Variable);    if (DV.isInlinedFnArgument(MF.getFunction())) @@ -3935,14 +4046,16 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,    if (Arg->hasByValAttr()) {      // Byval arguments' frame index is recorded during argument lowering.      // Use this info directly. -    const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();      Reg = TRI->getFrameRegister(MF);      Offset = FuncInfo.getByValArgumentFrameIndex(Arg); +    // If byval argument ofset is not recorded then ignore this. +    if (!Offset) +      Reg = 0;    }    if (N.getNode() && N.getOpcode() == ISD::CopyFromReg) {      Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg(); -    if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) { +    if (TargetRegisterInfo::isVirtualRegister(Reg)) {        MachineRegisterInfo &RegInfo = MF.getRegInfo();        unsigned PR = RegInfo.getLiveInPhysReg(Reg);        if (PR) @@ -3951,13 +4064,25 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,    }    if (!Reg) { +    // Check if ValueMap has reg number.      DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); -    if (VMI == FuncInfo.ValueMap.end()) -      return false; -    Reg = VMI->second; +    if (VMI != FuncInfo.ValueMap.end()) +      Reg = VMI->second;    } -  const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo(); +  if (!Reg && N.getNode()) { +    // Check if frame index is available. +    if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode())) +      if (FrameIndexSDNode *FINode = +          dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) { +        Reg = TRI->getFrameRegister(MF); +        Offset = FINode->getIndex(); +      } +  } + +  if (!Reg) +    return false; +    MachineInstrBuilder MIB = BuildMI(MF, getCurDebugLoc(),                                      TII->get(TargetOpcode::DBG_VALUE))      .addReg(Reg, RegState::Debug).addImm(Offset).addMetadata(Variable); @@ -3966,9 +4091,11 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,  }  // VisualStudio defines setjmp as _setjmp -#if defined(_MSC_VER) && defined(setjmp) -#define setjmp_undefined_for_visual_studio -#undef setjmp +#if defined(_MSC_VER) && defined(setjmp) && \ +                         !defined(setjmp_undefined_for_msvc) +#  pragma push_macro("setjmp") +#  undef setjmp +#  define setjmp_undefined_for_msvc  #endif  /// visitIntrinsicCall - Lower the call to the specified intrinsic function.  If @@ -4013,7 +4140,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {      unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();      bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();      DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false, -                              I.getArgOperand(0), 0, I.getArgOperand(1), 0)); +                              MachinePointerInfo(I.getArgOperand(0)), +                              MachinePointerInfo(I.getArgOperand(1))));      return 0;    }    case Intrinsic::memset: { @@ -4028,7 +4156,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {      unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();      bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();      DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol, -                              I.getArgOperand(0), 0)); +                              MachinePointerInfo(I.getArgOperand(0))));      return 0;    }    case Intrinsic::memmove: { @@ -4044,22 +4172,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {      SDValue Op3 = getValue(I.getArgOperand(2));      unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();      bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); - -    // If the source and destination are known to not be aliases, we can -    // lower memmove as memcpy. -    uint64_t Size = -1ULL; -    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op3)) -      Size = C->getZExtValue(); -    if (AA->alias(I.getArgOperand(0), Size, I.getArgOperand(1), Size) == -        AliasAnalysis::NoAlias) { -      DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol,  -                                false, I.getArgOperand(0), 0, -                                I.getArgOperand(1), 0)); -      return 0; -    } -      DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol, -                               I.getArgOperand(0), 0, I.getArgOperand(1), 0)); +                               MachinePointerInfo(I.getArgOperand(0)), +                               MachinePointerInfo(I.getArgOperand(1))));      return 0;    }    case Intrinsic::dbg_declare: { @@ -4078,10 +4193,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {      // Check if address has undef value.      if (isa<UndefValue>(Address) ||          (Address->use_empty() && !isa<Argument>(Address))) { -      SDDbgValue*SDV =  -        DAG.getDbgValue(Variable, UndefValue::get(Address->getType()), -                        0, dl, SDNodeOrder); -      DAG.AddDbgValue(SDV, 0, false); +      DEBUG(dbgs() << "Dropping debug info for " << DI);        return 0;      } @@ -4092,7 +4204,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {      SDDbgValue *SDV;      if (N.getNode()) {        // Parameters are handled specially. -      bool isParameter =  +      bool isParameter =          DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable;        if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))          Address = BCI->getOperand(0); @@ -4104,25 +4216,40 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {            // Byval parameter.  We have a frame index at this point.            SDV = DAG.getDbgValue(Variable, FINode->getIndex(),                                  0, dl, SDNodeOrder); -        else +        else {            // Can't do anything with other non-AI cases yet.  This might be a            // parameter of a callee function that got inlined, for example. +          DEBUG(dbgs() << "Dropping debug info for " << DI);            return 0; +        }        } else if (AI)          SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(),                                0, dl, SDNodeOrder); -      else +      else {          // Can't do anything with other non-AI cases yet. +        DEBUG(dbgs() << "Dropping debug info for " << DI);          return 0; +      }        DAG.AddDbgValue(SDV, N.getNode(), isParameter);      } else { -      // If Address is an arugment then try to emits its dbg value using -      // virtual register info from the FuncInfo.ValueMap. Otherwise add undef -      // to help track missing debug info. +      // If Address is an argument then try to emit its dbg value using +      // virtual register info from the FuncInfo.ValueMap.        if (!EmitFuncArgumentDbgValue(Address, Variable, 0, N)) { -        SDV = DAG.getDbgValue(Variable, UndefValue::get(Address->getType()), -                              0, dl, SDNodeOrder); -        DAG.AddDbgValue(SDV, 0, false); +        // If variable is pinned by a alloca in dominating bb then +        // use StaticAllocaMap. +        if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) { +          if (AI->getParent() != DI.getParent()) { +            DenseMap<const AllocaInst*, int>::iterator SI = +              FuncInfo.StaticAllocaMap.find(AI); +            if (SI != FuncInfo.StaticAllocaMap.end()) { +              SDV = DAG.getDbgValue(Variable, SI->second, +                                    0, dl, SDNodeOrder); +              DAG.AddDbgValue(SDV, 0, false); +              return 0; +            } +          } +        } +        DEBUG(dbgs() << "Dropping debug info for " << DI);        }      }      return 0; @@ -4160,17 +4287,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {                                  N.getResNo(), Offset, dl, SDNodeOrder);            DAG.AddDbgValue(SDV, N.getNode(), false);          } -      } else if (isa<PHINode>(V) && !V->use_empty() ) { +      } else if (!V->use_empty() ) {          // Do not call getValue(V) yet, as we don't want to generate code.          // Remember it for later.          DanglingDebugInfo DDI(&DI, dl, SDNodeOrder);          DanglingDebugInfoMap[V] = DDI;        } else {          // We may expand this to cover more cases.  One case where we have no -        // data available is an unreferenced parameter; we need this fallback. -        SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()), -                              Offset, dl, SDNodeOrder); -        DAG.AddDbgValue(SDV, 0, false); +        // data available is an unreferenced parameter. +        DEBUG(dbgs() << "Dropping debug info for " << DI);        }      } @@ -4186,7 +4311,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {      if (SI == FuncInfo.StaticAllocaMap.end())        return 0; // VLAs.      int FI = SI->second; -     +      MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();      if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo())        MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc()); @@ -4282,11 +4407,75 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {    }    case Intrinsic::eh_sjlj_longjmp: {      DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other, -                            getRoot(), -                            getValue(I.getArgOperand(0)))); +                            getRoot(), getValue(I.getArgOperand(0)))); +    return 0; +  } +  case Intrinsic::eh_sjlj_dispatch_setup: { +    DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other, +                            getRoot(), getValue(I.getArgOperand(0))));      return 0;    } +  case Intrinsic::x86_mmx_pslli_w: +  case Intrinsic::x86_mmx_pslli_d: +  case Intrinsic::x86_mmx_pslli_q: +  case Intrinsic::x86_mmx_psrli_w: +  case Intrinsic::x86_mmx_psrli_d: +  case Intrinsic::x86_mmx_psrli_q: +  case Intrinsic::x86_mmx_psrai_w: +  case Intrinsic::x86_mmx_psrai_d: { +    SDValue ShAmt = getValue(I.getArgOperand(1)); +    if (isa<ConstantSDNode>(ShAmt)) { +      visitTargetIntrinsic(I, Intrinsic); +      return 0; +    } +    unsigned NewIntrinsic = 0; +    EVT ShAmtVT = MVT::v2i32; +    switch (Intrinsic) { +    case Intrinsic::x86_mmx_pslli_w: +      NewIntrinsic = Intrinsic::x86_mmx_psll_w; +      break; +    case Intrinsic::x86_mmx_pslli_d: +      NewIntrinsic = Intrinsic::x86_mmx_psll_d; +      break; +    case Intrinsic::x86_mmx_pslli_q: +      NewIntrinsic = Intrinsic::x86_mmx_psll_q; +      break; +    case Intrinsic::x86_mmx_psrli_w: +      NewIntrinsic = Intrinsic::x86_mmx_psrl_w; +      break; +    case Intrinsic::x86_mmx_psrli_d: +      NewIntrinsic = Intrinsic::x86_mmx_psrl_d; +      break; +    case Intrinsic::x86_mmx_psrli_q: +      NewIntrinsic = Intrinsic::x86_mmx_psrl_q; +      break; +    case Intrinsic::x86_mmx_psrai_w: +      NewIntrinsic = Intrinsic::x86_mmx_psra_w; +      break; +    case Intrinsic::x86_mmx_psrai_d: +      NewIntrinsic = Intrinsic::x86_mmx_psra_d; +      break; +    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here. +    } + +    // The vector shift intrinsics with scalars uses 32b shift amounts but +    // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits +    // to be zero. +    // We must do this early because v2i32 is not a legal type. +    DebugLoc dl = getCurDebugLoc(); +    SDValue ShOps[2]; +    ShOps[0] = ShAmt; +    ShOps[1] = DAG.getConstant(0, MVT::i32); +    ShAmt =  DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2); +    EVT DestVT = TLI.getValueType(I.getType()); +    ShAmt = DAG.getNode(ISD::BITCAST, dl, DestVT, ShAmt); +    Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, +                       DAG.getConstant(NewIntrinsic, MVT::i32), +                       getValue(I.getArgOperand(0)), ShAmt); +    setValue(&I, Res); +    return 0; +  }    case Intrinsic::convertff:    case Intrinsic::convertfsi:    case Intrinsic::convertfui: @@ -4430,8 +4619,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {      // Store the stack protector onto the stack.      Res = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN, -                       PseudoSourceValue::getFixedStack(FI), -                       0, true, false, 0); +                       MachinePointerInfo::getFixedStack(FI), +                       true, false, 0);      setValue(&I, Res);      DAG.setRoot(Res);      return 0; @@ -4510,14 +4699,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {    case Intrinsic::prefetch: {      SDValue Ops[4]; +    unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();      Ops[0] = getRoot();      Ops[1] = getValue(I.getArgOperand(0));      Ops[2] = getValue(I.getArgOperand(1));      Ops[3] = getValue(I.getArgOperand(2)); -    DAG.setRoot(DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4)); +    DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl, +                                        DAG.getVTList(MVT::Other), +                                        &Ops[0], 4, +                                        EVT::getIntegerVT(*Context, 8), +                                        MachinePointerInfo(I.getArgOperand(0)), +                                        0, /* align */ +                                        false, /* volatile */ +                                        rw==0, /* read */ +                                        rw==1)); /* write */      return 0;    } -    case Intrinsic::memory_barrier: {      SDValue Ops[6];      Ops[0] = getRoot(); @@ -4536,7 +4733,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {                      getValue(I.getArgOperand(0)),                      getValue(I.getArgOperand(1)),                      getValue(I.getArgOperand(2)), -                    I.getArgOperand(0)); +                    MachinePointerInfo(I.getArgOperand(0)));      setValue(&I, L);      DAG.setRoot(L.getValue(1));      return 0; @@ -4599,6 +4796,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,                          FTy->isVarArg(), Outs, FTy->getContext());    SDValue DemoteStackSlot; +  int DemoteStackIdx = -100;    if (!CanLowerReturn) {      uint64_t TySize = TLI.getTargetData()->getTypeAllocSize( @@ -4606,10 +4804,10 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,      unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(                        FTy->getReturnType());      MachineFunction &MF = DAG.getMachineFunction(); -    int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); +    DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);      const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType()); -    DemoteStackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); +    DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI.getPointerTy());      Entry.Node = DemoteStackSlot;      Entry.Ty = StackSlotPtrType;      Entry.isSExt = false; @@ -4703,7 +4901,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,                                  DemoteStackSlot,                                  DAG.getConstant(Offsets[i], PtrVT));        SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second, -                              Add, NULL, Offsets[i], false, false, 1); +                              Add, +                  MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), +                              false, false, 1);        Values[i] = L;        Chains[i] = L.getValue(1);      } @@ -4711,7 +4911,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,      SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),                                  MVT::Other, &Chains[0], NumValues);      PendingLoads.push_back(Chain); -     +      // Collect the legal value parts into potentially illegal values      // that correspond to the original function's return values.      SmallVector<EVT, 4> RetTys; @@ -4724,7 +4924,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,        EVT VT = RetTys[I];        EVT RegisterVT = TLI.getRegisterType(RetTy->getContext(), VT);        unsigned NumRegs = TLI.getNumRegisters(RetTy->getContext(), VT); -   +        SDValue ReturnValue =          getCopyFromParts(DAG, getCurDebugLoc(), &Values[CurReg], NumRegs,                           RegisterVT, VT, AssertOp); @@ -4806,7 +5006,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,    SDValue Ptr = Builder.getValue(PtrVal);    SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root, -                                        Ptr, PtrVal /*SrcValue*/, 0/*SVOffset*/, +                                        Ptr, MachinePointerInfo(PtrVal),                                          false /*volatile*/,                                          false /*nontemporal*/, 1 /* align=1 */); @@ -4902,7 +5102,25 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {      visitInlineAsm(&I);      return;    } -   + +  // See if any floating point values are being passed to this function. This is +  // used to emit an undefined reference to fltused on Windows. +  const FunctionType *FT = +    cast<FunctionType>(I.getCalledValue()->getType()->getContainedType(0)); +  MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); +  if (FT->isVarArg() && +      !MMI.callsExternalVAFunctionWithFloatingPointArguments()) { +    for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { +      const Type* T = I.getArgOperand(i)->getType(); +      for (po_iterator<const Type*> i = po_begin(T), e = po_end(T); +           i != e; ++i) { +        if (!i->isFloatingPointTy()) continue; +        MMI.setCallsExternalVAFunctionWithFloatingPointArguments(true); +        break; +      } +    } +  } +    const char *RenameFn = 0;    if (Function *F = I.getCalledFunction()) {      if (F->isDeclaration()) { @@ -4980,7 +5198,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {        }      }    } -   +    SDValue Callee;    if (!RenameFn)      Callee = getValue(I.getCalledValue()); @@ -5008,7 +5226,7 @@ public:    /// contains the set of register corresponding to the operand.    RegsForValue AssignedRegs; -  explicit SDISelAsmOperandInfo(const InlineAsm::ConstraintInfo &info) +  explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info)      : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) {    } @@ -5083,6 +5301,8 @@ private:    }  }; +typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector; +  } // end llvm namespace.  /// isAllocatableRegister - If the specified register is safe to allocate, @@ -5192,7 +5412,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,        // vector types).        EVT RegVT = *PhysReg.second->vt_begin();        if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) { -        OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), +        OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, getCurDebugLoc(),                                           RegVT, OpInfo.CallOperand);          OpInfo.ConstraintVT = RegVT;        } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) { @@ -5202,7 +5422,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,          // machine.          RegVT = EVT::getIntegerVT(Context,                                    OpInfo.ConstraintVT.getSizeInBits()); -        OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), +        OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, getCurDebugLoc(),                                           RegVT, OpInfo.CallOperand);          OpInfo.ConstraintVT = RegVT;        } @@ -5320,30 +5540,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {    const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());    /// ConstraintOperands - Information about all of the constraints. -  std::vector<SDISelAsmOperandInfo> ConstraintOperands; +  SDISelAsmOperandInfoVector ConstraintOperands;    std::set<unsigned> OutputRegs, InputRegs; -  // Do a prepass over the constraints, canonicalizing them, and building up the -  // ConstraintOperands list. -  std::vector<InlineAsm::ConstraintInfo> -    ConstraintInfos = IA->ParseConstraints(); - -  bool hasMemory = hasInlineAsmMemConstraint(ConstraintInfos, TLI); - -  SDValue Chain, Flag; - -  // We won't need to flush pending loads if this asm doesn't touch -  // memory and is nonvolatile. -  if (hasMemory || IA->hasSideEffects()) -    Chain = getRoot(); -  else -    Chain = DAG.getRoot(); +  TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(CS); +  bool hasMemory = false;    unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst.    unsigned ResNo = 0;   // ResNo - The result number of the next output. -  for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { -    ConstraintOperands.push_back(SDISelAsmOperandInfo(ConstraintInfos[i])); +  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { +    ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i]));      SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();      EVT OpVT = MVT::Other; @@ -5380,9 +5587,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {      // If this is an input or an indirect output, process the call argument.      // BasicBlocks are labels, currently appearing only in asm's.      if (OpInfo.CallOperandVal) { -      // Strip bitcasts, if any.  This mostly comes up for functions. -      OpInfo.CallOperandVal = OpInfo.CallOperandVal->stripPointerCasts(); -        if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {          OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);        } else { @@ -5393,11 +5597,33 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {      }      OpInfo.ConstraintVT = OpVT; + +    // Indirect operand accesses access memory. +    if (OpInfo.isIndirect) +      hasMemory = true; +    else { +      for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) { +        TargetLowering::ConstraintType CType = TLI.getConstraintType(OpInfo.Codes[j]); +        if (CType == TargetLowering::C_Memory) { +          hasMemory = true; +          break; +        } +      } +    }    } +  SDValue Chain, Flag; + +  // We won't need to flush pending loads if this asm doesn't touch +  // memory and is nonvolatile. +  if (hasMemory || IA->hasSideEffects()) +    Chain = getRoot(); +  else +    Chain = DAG.getRoot(); +    // Second pass over the constraints: compute which constraint option to use    // and assign registers to constraints that want a specific physreg. -  for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { +  for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {      SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];      // If this is an output operand with a matching input operand, look up the @@ -5406,7 +5632,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {      // error.      if (OpInfo.hasMatchingInput()) {        SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; -       +        if (OpInfo.ConstraintVT != Input.ConstraintVT) {          if ((OpInfo.ConstraintVT.isInteger() !=               Input.ConstraintVT.isInteger()) || @@ -5427,7 +5653,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {      // need to to provide an address for the memory input.      if (OpInfo.ConstraintType == TargetLowering::C_Memory &&          !OpInfo.isIndirect) { -      assert(OpInfo.Type == InlineAsm::isInput && +      assert((OpInfo.isMultipleAlternative || (OpInfo.Type == InlineAsm::isInput)) &&               "Can only indirectify direct input operands!");        // Memory operands really want the address of the value.  If we don't have @@ -5451,7 +5677,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {          int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);          SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());          Chain = DAG.getStore(Chain, getCurDebugLoc(), -                             OpInfo.CallOperand, StackSlot, NULL, 0, +                             OpInfo.CallOperand, StackSlot, +                             MachinePointerInfo::getFixedStack(SSFI),                               false, false, 0);          OpInfo.CallOperand = StackSlot;        } @@ -5469,8 +5696,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {        GetRegistersForValue(OpInfo, OutputRegs, InputRegs);    } -  ConstraintInfos.clear(); -    // Second pass - Loop over all of the operands, assigning virtual or physregs    // to register class operands.    for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { @@ -5495,9 +5720,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {    const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc");    AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc)); -  // Remember the AlignStack bit as operand 3. -  AsmNodeOperands.push_back(DAG.getTargetConstant(IA->isAlignStack() ? 1 : 0, -                                            MVT::i1)); +  // Remember the HasSideEffect and AlignStack bits as operand 3. +  unsigned ExtraInfo = 0; +  if (IA->hasSideEffects()) +    ExtraInfo |= InlineAsm::Extra_HasSideEffects; +  if (IA->isAlignStack()) +    ExtraInfo |= InlineAsm::Extra_IsAlignStack; +  AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, +                                                  TLI.getPointerTy()));    // Loop over all of the inputs, copying the operand values into the    // appropriate registers and processing the output regs. @@ -5588,7 +5818,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {                            " don't know how to handle tied "                            "indirect register inputs");            } -           +            RegsForValue MatchedRegs;            MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());            EVT RegVT = AsmNodeOperands[CurOp+1].getValueType(); @@ -5607,7 +5837,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {                                             DAG, AsmNodeOperands);            break;          } -         +          assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!");          assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 &&                 "Unexpected number of operands"); @@ -5622,8 +5852,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {        }        // Treat indirect 'X' constraint as memory. -      if (OpInfo.ConstraintType == TargetLowering::C_Other &&  -          OpInfo.isIndirect)  +      if (OpInfo.ConstraintType == TargetLowering::C_Other && +          OpInfo.isIndirect)          OpInfo.ConstraintType = TargetLowering::C_Memory;        if (OpInfo.ConstraintType == TargetLowering::C_Other) { @@ -5642,7 +5872,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {          AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());          break;        } -       +        if (OpInfo.ConstraintType == TargetLowering::C_Memory) {          assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");          assert(InOperandVal.getValueType() == TLI.getPointerTy() && @@ -5693,7 +5923,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {    if (Flag.getNode()) AsmNodeOperands.push_back(Flag);    Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(), -                      DAG.getVTList(MVT::Other, MVT::Flag), +                      DAG.getVTList(MVT::Other, MVT::Glue),                        &AsmNodeOperands[0], AsmNodeOperands.size());    Flag = Chain.getValue(1); @@ -5713,7 +5943,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {        // not have the same VT as was expected.  Convert it to the right type        // with bit_convert.        if (ResultType != Val.getValueType() && Val.getValueType().isVector()) { -        Val = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), +        Val = DAG.getNode(ISD::BITCAST, getCurDebugLoc(),                            ResultType, Val);        } else if (ResultType != Val.getValueType() && @@ -5751,7 +5981,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {      SDValue Val = DAG.getStore(Chain, getCurDebugLoc(),                                 StoresToEmit[i].first,                                 getValue(StoresToEmit[i].second), -                               StoresToEmit[i].second, 0, +                               MachinePointerInfo(StoresToEmit[i].second),                                 false, false, 0);      OutChains.push_back(Val);    } @@ -5888,7 +6118,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,      unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);      for (unsigned i = 0; i != NumRegs; ++i) {        ISD::InputArg MyFlags; -      MyFlags.VT = RegisterVT; +      MyFlags.VT = RegisterVT.getSimpleVT();        MyFlags.Used = isReturnValueUsed;        if (RetSExt)          MyFlags.Flags.setSExt(); @@ -5924,7 +6154,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,    DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) {            assert(InVals[i].getNode() &&                   "LowerCall emitted a null value!"); -          assert(Ins[i].VT == InVals[i].getValueType() && +          assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&                   "LowerCall emitted a value with the wrong type!");          }); @@ -6085,7 +6315,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {        for (unsigned i = 0, e = Ins.size(); i != e; ++i) {          assert(InVals[i].getNode() &&                 "LowerFormalArguments emitted a null value!"); -        assert(Ins[i].VT == InVals[i].getValueType() && +        assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&                 "LowerFormalArguments emitted a value with the wrong type!");        }      }); @@ -6154,7 +6384,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {      // Note down frame index for byval arguments.      if (I->hasByValAttr() && !ArgValues.empty()) -      if (FrameIndexSDNode *FI =  +      if (FrameIndexSDNode *FI =            dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))          FuncInfo->setByValArgumentFrameIndex(I, FI->getIndex()); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 5f400e9c83ac..a1a70c394a51 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -258,15 +258,16 @@ private:    struct BitTestBlock {      BitTestBlock(APInt F, APInt R, const Value* SV, -                 unsigned Rg, bool E, +                 unsigned Rg, EVT RgVT, bool E,                   MachineBasicBlock* P, MachineBasicBlock* D,                   const BitTestInfo& C): -      First(F), Range(R), SValue(SV), Reg(Rg), Emitted(E), +      First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E),        Parent(P), Default(D), Cases(C) { }      APInt First;      APInt Range;      const Value *SValue;      unsigned Reg; +    EVT RegVT;      bool Emitted;      MachineBasicBlock *Parent;      MachineBasicBlock *Default; @@ -347,7 +348,7 @@ public:    SDValue getControlRoot();    DebugLoc getCurDebugLoc() const { return CurDebugLoc; } - +  void setCurDebugLoc(DebugLoc dl){ CurDebugLoc = dl; }    unsigned getSDNodeOrder() const { return SDNodeOrder; }    void CopyValueToVirtualRegister(const Value *V, unsigned Reg); @@ -398,6 +399,10 @@ public:    void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall,                     MachineBasicBlock *LandingPad = NULL); +  /// UpdateSplitBlock - When an MBB was split during scheduling, update the +  /// references that ned to refer to the last resulting block. +  void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last); +  private:    // Terminator instructions.    void visitRet(const ReturnInst &I); @@ -431,7 +436,8 @@ public:    void visitSwitchCase(CaseBlock &CB,                         MachineBasicBlock *SwitchBB);    void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB); -  void visitBitTestCase(MachineBasicBlock* NextMBB, +  void visitBitTestCase(BitTestBlock &BB, +                        MachineBasicBlock* NextMBB,                          unsigned Reg,                          BitTestCase &B,                          MachineBasicBlock *SwitchBB); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 66cb5ceb09e5..62ebc81ef86e 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -43,6 +43,7 @@  #include "llvm/Target/TargetLowering.h"  #include "llvm/Target/TargetMachine.h"  #include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h"  #include "llvm/Support/Compiler.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/ErrorHandling.h" @@ -53,8 +54,17 @@  using namespace llvm;  STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on"); +STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel"); +STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG");  STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path"); +#ifndef NDEBUG +STATISTIC(NumBBWithOutOfOrderLineInfo, +          "Number of blocks with out of order line number info"); +STATISTIC(NumMBBWithOutOfOrderLineInfo, +          "Number of machine blocks with out of order line number info"); +#endif +  static cl::opt<bool>  EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,            cl::desc("Enable verbose messages in the \"fast\" " @@ -170,15 +180,18 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,  // SelectionDAGISel code  //===----------------------------------------------------------------------===// -SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, CodeGenOpt::Level OL) : +SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, +                                   CodeGenOpt::Level OL) :    MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()),    FuncInfo(new FunctionLoweringInfo(TLI)),    CurDAG(new SelectionDAG(tm)),    SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),    GFI(),    OptLevel(OL), -  DAGSize(0) -{} +  DAGSize(0) { +    initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); +    initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry()); +  }  SelectionDAGISel::~SelectionDAGISel() {    delete SDB; @@ -202,6 +215,7 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {  static bool FunctionCallsSetJmp(const Function *F) {    const Module *M = F->getParent();    static const char *ReturnsTwiceFns[] = { +    "_setjmp",      "setjmp",      "sigsetjmp",      "setjmp_syscall", @@ -227,6 +241,44 @@ static bool FunctionCallsSetJmp(const Function *F) {  #undef NUM_RETURNS_TWICE_FNS  } +/// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that +/// may trap on it.  In this case we have to split the edge so that the path +/// through the predecessor block that doesn't go to the phi block doesn't +/// execute the possibly trapping instruction. +/// +/// This is required for correctness, so it must be done at -O0. +/// +static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) { +  // Loop for blocks with phi nodes. +  for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { +    PHINode *PN = dyn_cast<PHINode>(BB->begin()); +    if (PN == 0) continue; + +  ReprocessBlock: +    // For each block with a PHI node, check to see if any of the input values +    // are potentially trapping constant expressions.  Constant expressions are +    // the only potentially trapping value that can occur as the argument to a +    // PHI. +    for (BasicBlock::iterator I = BB->begin(); (PN = dyn_cast<PHINode>(I)); ++I) +      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { +        ConstantExpr *CE = dyn_cast<ConstantExpr>(PN->getIncomingValue(i)); +        if (CE == 0 || !CE->canTrap()) continue; + +        // The only case we have to worry about is when the edge is critical. +        // Since this block has a PHI Node, we assume it has multiple input +        // edges: check to see if the pred has multiple successors. +        BasicBlock *Pred = PN->getIncomingBlock(i); +        if (Pred->getTerminator()->getNumSuccessors() == 1) +          continue; + +        // Okay, we have to split this edge. +        SplitCriticalEdge(Pred->getTerminator(), +                          GetSuccessorNumber(Pred, BB), SDISel, true); +        goto ReprocessBlock; +      } +  } +} +  bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {    // Do some sanity-checking on the command-line options.    assert((!EnableFastISelVerbose || EnableFastISel) && @@ -245,6 +297,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {    DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); +  SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this); +    CurDAG->init(*MF);    FuncInfo->set(Fn, *MF);    SDB->init(GFI, *AA); @@ -261,7 +315,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {    if (!FuncInfo->ArgDbgValues.empty())      for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(),             E = RegInfo->livein_end(); LI != E; ++LI) -      if (LI->second)  +      if (LI->second)          LiveInMap.insert(std::make_pair(LI->first, LI->second));    // Insert DBG_VALUE instructions for function arguments to the entry block. @@ -282,14 +336,37 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {      if (LDI != LiveInMap.end()) {        MachineInstr *Def = RegInfo->getVRegDef(LDI->second);        MachineBasicBlock::iterator InsertPos = Def; -      const MDNode *Variable =  +      const MDNode *Variable =          MI->getOperand(MI->getNumOperands()-1).getMetadata();        unsigned Offset = MI->getOperand(1).getImm();        // Def is never a terminator here, so it is ok to increment InsertPos. -      BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(),  +      BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(),                TII.get(TargetOpcode::DBG_VALUE))          .addReg(LDI->second, RegState::Debug)          .addImm(Offset).addMetadata(Variable); + +      // If this vreg is directly copied into an exported register then +      // that COPY instructions also need DBG_VALUE, if it is the only +      // user of LDI->second. +      MachineInstr *CopyUseMI = NULL; +      for (MachineRegisterInfo::use_iterator +             UI = RegInfo->use_begin(LDI->second); +           MachineInstr *UseMI = UI.skipInstruction();) { +        if (UseMI->isDebugValue()) continue; +        if (UseMI->isCopy() && !CopyUseMI && UseMI->getParent() == EntryMBB) { +          CopyUseMI = UseMI; continue; +        } +        // Otherwise this is another use or second copy use. +        CopyUseMI = NULL; break; +      } +      if (CopyUseMI) { +        MachineInstr *NewMI = +          BuildMI(*MF, CopyUseMI->getDebugLoc(), +                  TII.get(TargetOpcode::DBG_VALUE)) +          .addReg(CopyUseMI->getOperand(0).getReg(), RegState::Debug) +          .addImm(Offset).addMetadata(Variable); +        EntryMBB->insertAfter(CopyUseMI, NewMI); +      }      }    } @@ -303,10 +380,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {               II = MBB->begin(), IE = MBB->end(); II != IE; ++II) {          const TargetInstrDesc &TID = TM.getInstrInfo()->get(II->getOpcode()); -        // Operand 1 of an inline asm instruction indicates whether the asm -        // needs stack or not. -        if ((II->isInlineAsm() && II->getOperand(1).getImm()) || -            (TID.isCall() && !TID.isReturn())) { +        if ((TID.isCall() && !TID.isReturn()) || +            II->isStackAligningInlineAsm()) {            MFI->setHasCalls(true);            goto done;          } @@ -362,6 +437,7 @@ SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,    // Final step, emit the lowered DAG as machine code.    CodeGenAndEmitDAG(); +  return;  }  void SelectionDAGISel::ComputeLiveOutVRegInfo() { @@ -406,9 +482,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {      // Only install this information if it tells us something.      if (NumSignBits != 1 || KnownZero != 0 || KnownOne != 0) { -      DestReg -= TargetRegisterInfo::FirstVirtualRegister; -      if (DestReg >= FuncInfo->LiveOutRegInfo.size()) -        FuncInfo->LiveOutRegInfo.resize(DestReg+1); +      FuncInfo->LiveOutRegInfo.grow(DestReg);        FunctionLoweringInfo::LiveOutInfo &LOI =          FuncInfo->LiveOutRegInfo[DestReg];        LOI.NumSignBits = NumSignBits; @@ -541,13 +615,19 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {    // Emit machine code to BB.  This can change 'BB' to the last block being    // inserted into. +  MachineBasicBlock *FirstMBB = FuncInfo->MBB, *LastMBB;    {      NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled); -    FuncInfo->MBB = Scheduler->EmitSchedule(); +    LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule();      FuncInfo->InsertPt = Scheduler->InsertPos;    } +  // If the block was split, make sure we update any references that are used to +  // update PHI nodes later on. +  if (FirstMBB != LastMBB) +    SDB->UpdateSplitBlock(FirstMBB, LastMBB); +    // Free the scheduler state.    {      NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName, @@ -563,19 +643,19 @@ void SelectionDAGISel::DoInstructionSelection() {    DEBUG(errs() << "===== Instruction selection begins:\n");    PreprocessISelDAG(); -   +    // Select target instructions for the DAG.    {      // Number all nodes with a topological order and set DAGSize.      DAGSize = CurDAG->AssignTopologicalOrder(); -     +      // Create a dummy node (which is not added to allnodes), that adds      // a reference to the root node, preventing it from being deleted,      // and tracking any changes of the root.      HandleSDNode Dummy(CurDAG->getRoot());      ISelPosition = SelectionDAG::allnodes_iterator(CurDAG->getRoot().getNode());      ++ISelPosition; -     +      // The AllNodes list is now topological-sorted. Visit the      // nodes by starting at the end of the list (the root of the      // graph) and preceding back toward the beginning (the entry @@ -587,19 +667,19 @@ void SelectionDAGISel::DoInstructionSelection() {        // makes it theoretically possible to disable the DAGCombiner.        if (Node->use_empty())          continue; -       +        SDNode *ResNode = Select(Node); -       +        // FIXME: This is pretty gross.  'Select' should be changed to not return        // anything at all and this code should be nuked with a tactical strike. -       +        // If node should not be replaced, continue with the next one.        if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE)          continue;        // Replace node.        if (ResNode)          ReplaceUses(Node, ResNode); -       +        // If after the replacement this node is not used any more,        // remove this dead node.        if (Node->use_empty()) { // Don't delete EntryToken, etc. @@ -607,9 +687,9 @@ void SelectionDAGISel::DoInstructionSelection() {          CurDAG->RemoveDeadNode(Node, &ISU);        }      } -     +      CurDAG->setRoot(Dummy.getValue()); -  }     +  }    DEBUG(errs() << "===== Instruction selection ends:\n"); @@ -661,6 +741,90 @@ void SelectionDAGISel::PrepareEHLandingPad() {    }  } + + + +bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI, +                                             FastISel *FastIS) { +  // Don't try to fold volatile loads.  Target has to deal with alignment +  // constraints. +  if (LI->isVolatile()) return false; + +  // Figure out which vreg this is going into. +  unsigned LoadReg = FastIS->getRegForValue(LI); +  assert(LoadReg && "Load isn't already assigned a vreg? "); + +  // Check to see what the uses of this vreg are.  If it has no uses, or more +  // than one use (at the machine instr level) then we can't fold it. +  MachineRegisterInfo::reg_iterator RI = RegInfo->reg_begin(LoadReg); +  if (RI == RegInfo->reg_end()) +    return false; + +  // See if there is exactly one use of the vreg.  If there are multiple uses, +  // then the instruction got lowered to multiple machine instructions or the +  // use of the loaded value ended up being multiple operands of the result, in +  // either case, we can't fold this. +  MachineRegisterInfo::reg_iterator PostRI = RI; ++PostRI; +  if (PostRI != RegInfo->reg_end()) +    return false; + +  assert(RI.getOperand().isUse() && +         "The only use of the vreg must be a use, we haven't emitted the def!"); + +  MachineInstr *User = &*RI; +   +  // Set the insertion point properly.  Folding the load can cause generation of +  // other random instructions (like sign extends) for addressing modes, make +  // sure they get inserted in a logical place before the new instruction. +  FuncInfo->InsertPt = User; +  FuncInfo->MBB = User->getParent(); + +  // Ask the target to try folding the load. +  return FastIS->TryToFoldLoad(User, RI.getOperandNo(), LI); +} + +#ifndef NDEBUG +/// CheckLineNumbers - Check if basic block instructions follow source order +/// or not. +static void CheckLineNumbers(const BasicBlock *BB) { +  unsigned Line = 0; +  unsigned Col = 0; +  for (BasicBlock::const_iterator BI = BB->begin(), +         BE = BB->end(); BI != BE; ++BI) { +    const DebugLoc DL = BI->getDebugLoc(); +    if (DL.isUnknown()) continue; +    unsigned L = DL.getLine(); +    unsigned C = DL.getCol(); +    if (L < Line || (L == Line && C < Col)) { +      ++NumBBWithOutOfOrderLineInfo; +      return; +    } +    Line = L; +    Col = C; +  } +} + +/// CheckLineNumbers - Check if machine basic block instructions follow source +/// order or not. +static void CheckLineNumbers(const MachineBasicBlock *MBB) { +  unsigned Line = 0; +  unsigned Col = 0; +  for (MachineBasicBlock::const_iterator MBI = MBB->begin(), +         MBE = MBB->end(); MBI != MBE; ++MBI) { +    const DebugLoc DL = MBI->getDebugLoc(); +    if (DL.isUnknown()) continue; +    unsigned L = DL.getLine(); +    unsigned C = DL.getCol(); +    if (L < Line || (L == Line && C < Col)) { +      ++NumMBBWithOutOfOrderLineInfo; +      return; +    } +    Line = L; +    Col = C; +  } +} +#endif +  void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {    // Initialize the Fast-ISel state, if needed.    FastISel *FastIS = 0; @@ -670,6 +834,9 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {    // Iterate over all basic blocks in the function.    for (Function::const_iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {      const BasicBlock *LLVMBB = &*I; +#ifndef NDEBUG +    CheckLineNumbers(LLVMBB); +#endif      FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB];      FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI(); @@ -682,10 +849,19 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {      // Setup an EH landing-pad block.      if (FuncInfo->MBB->isLandingPad())        PrepareEHLandingPad(); -     +      // Lower any arguments needed in this block if this is the entry block. -    if (LLVMBB == &Fn.getEntryBlock()) +    if (LLVMBB == &Fn.getEntryBlock()) { +      for (BasicBlock::const_iterator DBI = LLVMBB->begin(), DBE = LLVMBB->end(); +           DBI != DBE; ++DBI) { +        if (const DbgInfoIntrinsic *DI = dyn_cast<DbgInfoIntrinsic>(DBI)) { +          const DebugLoc DL = DI->getDebugLoc(); +          SDB->setCurDebugLoc(DL); +          break; +        } +      }        LowerArguments(LLVMBB); +    }      // Before doing SelectionDAG ISel, see if FastISel has been requested.      if (FastIS) { @@ -723,8 +899,19 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {          FastIS->recomputeInsertPt();          // Try to select the instruction with FastISel. -        if (FastIS->SelectInstruction(Inst)) +        if (FastIS->SelectInstruction(Inst)) { +          // If fast isel succeeded, check to see if there is a single-use +          // non-volatile load right before the selected instruction, and see if +          // the load is used by the instruction.  If so, try to fold it. +          const Instruction *BeforeInst = 0; +          if (Inst != Begin) +            BeforeInst = llvm::prior(llvm::prior(BI)); +          if (BeforeInst && isa<LoadInst>(BeforeInst) && +              BeforeInst->hasOneUse() && *BeforeInst->use_begin() == Inst && +              TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), FastIS)) +            --BI; // If we succeeded, don't re-select the load.            continue; +        }          // Then handle certain instructions as single-LLVM-Instruction blocks.          if (isa<CallInst>(Inst)) { @@ -771,6 +958,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {        FastIS->recomputeInsertPt();      } +    if (Begin != BI) +      ++NumDAGBlocks; +    else +      ++NumFastIselBlocks; +      // Run SelectionDAG instruction selection on the remainder of the block      // not handled by FastISel. If FastISel is not run, this is the entire      // block. @@ -782,6 +974,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {    }    delete FastIS; +#ifndef NDEBUG +  for (MachineFunction::const_iterator MBI = MF->begin(), MBE = MF->end(); +       MBI != MBE; ++MBI) +    CheckLineNumbers(MBI); +#endif  }  void @@ -831,12 +1028,14 @@ SelectionDAGISel::FinishBasicBlock() {        FuncInfo->InsertPt = FuncInfo->MBB->end();        // Emit the code        if (j+1 != ej) -        SDB->visitBitTestCase(SDB->BitTestCases[i].Cases[j+1].ThisBB, +        SDB->visitBitTestCase(SDB->BitTestCases[i], +                              SDB->BitTestCases[i].Cases[j+1].ThisBB,                                SDB->BitTestCases[i].Reg,                                SDB->BitTestCases[i].Cases[j],                                FuncInfo->MBB);        else -        SDB->visitBitTestCase(SDB->BitTestCases[i].Default, +        SDB->visitBitTestCase(SDB->BitTestCases[i], +                              SDB->BitTestCases[i].Default,                                SDB->BitTestCases[i].Reg,                                SDB->BitTestCases[i].Cases[j],                                FuncInfo->MBB); @@ -951,7 +1150,7 @@ SelectionDAGISel::FinishBasicBlock() {    // additional DAGs necessary.    for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) {      // Set the current basic block to the mbb we wish to insert the code into -    MachineBasicBlock *ThisBB = FuncInfo->MBB = SDB->SwitchCases[i].ThisBB; +    FuncInfo->MBB = SDB->SwitchCases[i].ThisBB;      FuncInfo->InsertPt = FuncInfo->MBB->end();      // Determine the unique successors. @@ -960,13 +1159,15 @@ SelectionDAGISel::FinishBasicBlock() {      if (SDB->SwitchCases[i].TrueBB != SDB->SwitchCases[i].FalseBB)        Succs.push_back(SDB->SwitchCases[i].FalseBB); -    // Emit the code. Note that this could result in ThisBB being split, so -    // we need to check for updates. +    // Emit the code. Note that this could result in FuncInfo->MBB being split.      SDB->visitSwitchCase(SDB->SwitchCases[i], FuncInfo->MBB);      CurDAG->setRoot(SDB->getRoot());      SDB->clear();      CodeGenAndEmitDAG(); -    ThisBB = FuncInfo->MBB; + +    // Remember the last block, now that any splitting is done, for use in +    // populating PHI nodes in successors. +    MachineBasicBlock *ThisBB = FuncInfo->MBB;      // Handle any PHI nodes in successors of this chunk, as if we were coming      // from the original BB before switch expansion.  Note that PHI nodes can @@ -1016,10 +1217,6 @@ ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() {    return Ctor(this, OptLevel);  } -ScheduleHazardRecognizer *SelectionDAGISel::CreateTargetHazardRecognizer() { -  return new ScheduleHazardRecognizer(); -} -  //===----------------------------------------------------------------------===//  // Helper functions used by the generated instruction selector.  //===----------------------------------------------------------------------===// @@ -1099,11 +1296,11 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {    Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0    Ops.push_back(InOps[InlineAsm::Op_AsmString]);  // 1    Ops.push_back(InOps[InlineAsm::Op_MDNode]);     // 2, !srcloc -  Ops.push_back(InOps[InlineAsm::Op_IsAlignStack]);  // 3 +  Ops.push_back(InOps[InlineAsm::Op_ExtraInfo]);  // 3 (SideEffect, AlignStack)    unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size(); -  if (InOps[e-1].getValueType() == MVT::Flag) -    --e;  // Don't process a flag operand if it is here. +  if (InOps[e-1].getValueType() == MVT::Glue) +    --e;  // Don't process a glue operand if it is here.    while (i != e) {      unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue(); @@ -1130,15 +1327,15 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {      }    } -  // Add the flag input back if present. +  // Add the glue input back if present.    if (e != InOps.size())      Ops.push_back(InOps.back());  } -/// findFlagUse - Return use of EVT::Flag value produced by the specified +/// findGlueUse - Return use of MVT::Glue value produced by the specified  /// SDNode.  /// -static SDNode *findFlagUse(SDNode *N) { +static SDNode *findGlueUse(SDNode *N) {    unsigned FlagResNo = N->getNumValues()-1;    for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {      SDUse &Use = I.getUse(); @@ -1160,11 +1357,11 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,    // never find it.    //    // The Use may be -1 (unassigned) if it is a newly allocated node.  This can -  // happen because we scan down to newly selected nodes in the case of flag +  // happen because we scan down to newly selected nodes in the case of glue    // uses.    if ((Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1))      return false; -   +    // Don't revisit nodes if we already scanned it and didn't fail, we know we    // won't fail if we scan it again.    if (!Visited.insert(Use)) @@ -1174,7 +1371,7 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,      // Ignore chain uses, they are validated by HandleMergeInputChains.      if (Use->getOperand(i).getValueType() == MVT::Other && IgnoreChains)        continue; -     +      SDNode *N = Use->getOperand(i).getNode();      if (N == Def) {        if (Use == ImmedUse || Use == Root) @@ -1221,8 +1418,8 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,    //    // * indicates nodes to be folded together.    // -  // If Root produces a flag, then it gets (even more) interesting. Since it -  // will be "glued" together with its flag use in the scheduler, we need to +  // If Root produces glue, then it gets (even more) interesting. Since it +  // will be "glued" together with its glue use in the scheduler, we need to    // check if it might reach N.    //    //          [N*]           // @@ -1240,30 +1437,30 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,    //           ^   /         //    //           f  /          //    //           | /           // -  //          [FU]           // +  //          [GU]           //    // -  // If FU (flag use) indirectly reaches N (the load), and Root folds N -  // (call it Fold), then X is a predecessor of FU and a successor of -  // Fold. But since Fold and FU are flagged together, this will create +  // If GU (glue use) indirectly reaches N (the load), and Root folds N +  // (call it Fold), then X is a predecessor of GU and a successor of +  // Fold. But since Fold and GU are glued together, this will create    // a cycle in the scheduling graph. -  // If the node has flags, walk down the graph to the "lowest" node in the -  // flagged set. +  // If the node has glue, walk down the graph to the "lowest" node in the +  // glueged set.    EVT VT = Root->getValueType(Root->getNumValues()-1); -  while (VT == MVT::Flag) { -    SDNode *FU = findFlagUse(Root); -    if (FU == NULL) +  while (VT == MVT::Glue) { +    SDNode *GU = findGlueUse(Root); +    if (GU == NULL)        break; -    Root = FU; +    Root = GU;      VT = Root->getValueType(Root->getNumValues()-1); -     -    // If our query node has a flag result with a use, we've walked up it.  If + +    // If our query node has a glue result with a use, we've walked up it.  If      // the user (which has already been selected) has a chain or indirectly uses      // the chain, our WalkChainUsers predicate will not consider it.  Because of      // this, we cannot ignore chains in this predicate.      IgnoreChains = false;    } -   +    SmallPtrSet<SDNode*, 16> Visited;    return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains); @@ -1272,10 +1469,10 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,  SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {    std::vector<SDValue> Ops(N->op_begin(), N->op_end());    SelectInlineAsmMemoryOperands(Ops); -     +    std::vector<EVT> VTs;    VTs.push_back(MVT::Other); -  VTs.push_back(MVT::Flag); +  VTs.push_back(MVT::Glue);    SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(),                                  VTs, &Ops[0], Ops.size());    New->setNodeId(-1); @@ -1287,11 +1484,11 @@ SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) {  }  /// GetVBR - decode a vbr encoding whose top bit is set. -ALWAYS_INLINE static uint64_t +LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t  GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {    assert(Val >= 128 && "Not a VBR");    Val &= 127;  // Remove first vbr bit. -   +    unsigned Shift = 7;    uint64_t NextBits;    do { @@ -1299,25 +1496,25 @@ GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {      Val |= (NextBits&127) << Shift;      Shift += 7;    } while (NextBits & 128); -   +    return Val;  } -/// UpdateChainsAndFlags - When a match is complete, this method updates uses of -/// interior flag and chain results to use the new flag and chain results. +/// UpdateChainsAndGlue - When a match is complete, this method updates uses of +/// interior glue and chain results to use the new glue and chain results.  void SelectionDAGISel:: -UpdateChainsAndFlags(SDNode *NodeToMatch, SDValue InputChain, -                     const SmallVectorImpl<SDNode*> &ChainNodesMatched, -                     SDValue InputFlag, -                     const SmallVectorImpl<SDNode*> &FlagResultNodesMatched, -                     bool isMorphNodeTo) { +UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain, +                    const SmallVectorImpl<SDNode*> &ChainNodesMatched, +                    SDValue InputGlue, +                    const SmallVectorImpl<SDNode*> &GlueResultNodesMatched, +                    bool isMorphNodeTo) {    SmallVector<SDNode*, 4> NowDeadNodes; -   +    ISelUpdater ISU(ISelPosition);    // Now that all the normal results are replaced, we replace the chain and -  // flag results if present. +  // glue results if present.    if (!ChainNodesMatched.empty()) {      assert(InputChain.getNode() != 0 &&             "Matched input chains but didn't produce a chain"); @@ -1325,55 +1522,55 @@ UpdateChainsAndFlags(SDNode *NodeToMatch, SDValue InputChain,      // Replace all the chain results with the final chain we ended up with.      for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {        SDNode *ChainNode = ChainNodesMatched[i]; -       +        // If this node was already deleted, don't look at it.        if (ChainNode->getOpcode() == ISD::DELETED_NODE)          continue; -       +        // Don't replace the results of the root node if we're doing a        // MorphNodeTo.        if (ChainNode == NodeToMatch && isMorphNodeTo)          continue; -       +        SDValue ChainVal = SDValue(ChainNode, ChainNode->getNumValues()-1); -      if (ChainVal.getValueType() == MVT::Flag) +      if (ChainVal.getValueType() == MVT::Glue)          ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2);        assert(ChainVal.getValueType() == MVT::Other && "Not a chain?");        CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain, &ISU); -       +        // If the node became dead and we haven't already seen it, delete it.        if (ChainNode->use_empty() &&            !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), ChainNode))          NowDeadNodes.push_back(ChainNode);      }    } -   -  // If the result produces a flag, update any flag results in the matched -  // pattern with the flag result. -  if (InputFlag.getNode() != 0) { + +  // If the result produces glue, update any glue results in the matched +  // pattern with the glue result. +  if (InputGlue.getNode() != 0) {      // Handle any interior nodes explicitly marked. -    for (unsigned i = 0, e = FlagResultNodesMatched.size(); i != e; ++i) { -      SDNode *FRN = FlagResultNodesMatched[i]; -       +    for (unsigned i = 0, e = GlueResultNodesMatched.size(); i != e; ++i) { +      SDNode *FRN = GlueResultNodesMatched[i]; +        // If this node was already deleted, don't look at it.        if (FRN->getOpcode() == ISD::DELETED_NODE)          continue; -       -      assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Flag && -             "Doesn't have a flag result"); + +      assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Glue && +             "Doesn't have a glue result");        CurDAG->ReplaceAllUsesOfValueWith(SDValue(FRN, FRN->getNumValues()-1), -                                        InputFlag, &ISU); -       +                                        InputGlue, &ISU); +        // If the node became dead and we haven't already seen it, delete it.        if (FRN->use_empty() &&            !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), FRN))          NowDeadNodes.push_back(FRN);      }    } -   +    if (!NowDeadNodes.empty())      CurDAG->RemoveDeadNodes(NowDeadNodes, &ISU); -   +    DEBUG(errs() << "ISEL: Match complete!\n");  } @@ -1392,17 +1589,17 @@ enum ChainResult {  ///  /// The walk we do here is guaranteed to be small because we quickly get down to  /// already selected nodes "below" us. -static ChainResult  +static ChainResult  WalkChainUsers(SDNode *ChainedNode,                 SmallVectorImpl<SDNode*> &ChainedNodesInPattern,                 SmallVectorImpl<SDNode*> &InteriorChainedNodes) {    ChainResult Result = CR_Simple; -   +    for (SDNode::use_iterator UI = ChainedNode->use_begin(),           E = ChainedNode->use_end(); UI != E; ++UI) {      // Make sure the use is of the chain, not some other value we produce.      if (UI.getUse().getValueType() != MVT::Other) continue; -     +      SDNode *User = *UI;      // If we see an already-selected machine node, then we've gone beyond the @@ -1411,7 +1608,7 @@ WalkChainUsers(SDNode *ChainedNode,      if (User->isMachineOpcode() ||          User->getOpcode() == ISD::HANDLENODE)  // Root of the graph.        continue; -     +      if (User->getOpcode() == ISD::CopyToReg ||          User->getOpcode() == ISD::CopyFromReg ||          User->getOpcode() == ISD::INLINEASM || @@ -1437,7 +1634,7 @@ WalkChainUsers(SDNode *ChainedNode,        if (!std::count(ChainedNodesInPattern.begin(),                        ChainedNodesInPattern.end(), User))          return CR_InducesCycle; -       +        // Otherwise we found a node that is part of our pattern.  For example in:        //   x = load ptr        //   y = x+4 @@ -1449,7 +1646,7 @@ WalkChainUsers(SDNode *ChainedNode,        InteriorChainedNodes.push_back(User);        continue;      } -     +      // If we found a TokenFactor, there are two cases to consider: first if the      // TokenFactor is just hanging "below" the pattern we're matching (i.e. no      // uses of the TF are in our pattern) we just want to ignore it.  Second, @@ -1486,7 +1683,7 @@ WalkChainUsers(SDNode *ChainedNode,      case CR_LeadsToInteriorNode:        break;  // Otherwise, keep processing.      } -     +      // Okay, we know we're in the interesting interior case.  The TokenFactor      // is now going to be considered part of the pattern so that we rewrite its      // uses (it may have uses that are not part of the pattern) with the @@ -1497,7 +1694,7 @@ WalkChainUsers(SDNode *ChainedNode,      InteriorChainedNodes.push_back(User);      continue;    } -   +    return Result;  } @@ -1519,7 +1716,7 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,                         InteriorChainedNodes) == CR_InducesCycle)        return SDValue(); // Would induce a cycle.    } -   +    // Okay, we have walked all the matched nodes and collected TokenFactor nodes    // that we are interested in.  Form our input TokenFactor node.    SmallVector<SDValue, 3> InputChains; @@ -1530,14 +1727,14 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,      if (N->getOpcode() != ISD::TokenFactor) {        if (std::count(InteriorChainedNodes.begin(),InteriorChainedNodes.end(),N))          continue; -       +        // Otherwise, add the input chain.        SDValue InChain = ChainNodesMatched[i]->getOperand(0);        assert(InChain.getValueType() == MVT::Other && "Not a chain");        InputChains.push_back(InChain);        continue;      } -     +      // If we have a token factor, we want to add all inputs of the token factor      // that are not part of the pattern we're matching.      for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) { @@ -1546,13 +1743,13 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,          InputChains.push_back(N->getOperand(op));      }    } -   +    SDValue Res;    if (InputChains.size() == 1)      return InputChains[0];    return CurDAG->getNode(ISD::TokenFactor, ChainNodesMatched[0]->getDebugLoc(),                           MVT::Other, &InputChains[0], InputChains.size()); -}   +}  /// MorphNode - Handle morphing a node in place for the selector.  SDNode *SelectionDAGISel:: @@ -1560,15 +1757,15 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,            const SDValue *Ops, unsigned NumOps, unsigned EmitNodeInfo) {    // It is possible we're using MorphNodeTo to replace a node with no    // normal results with one that has a normal result (or we could be -  // adding a chain) and the input could have flags and chains as well. +  // adding a chain) and the input could have glue and chains as well.    // In this case we need to shift the operands down.    // FIXME: This is a horrible hack and broken in obscure cases, no worse    // than the old isel though. -  int OldFlagResultNo = -1, OldChainResultNo = -1; +  int OldGlueResultNo = -1, OldChainResultNo = -1;    unsigned NTMNumResults = Node->getNumValues(); -  if (Node->getValueType(NTMNumResults-1) == MVT::Flag) { -    OldFlagResultNo = NTMNumResults-1; +  if (Node->getValueType(NTMNumResults-1) == MVT::Glue) { +    OldGlueResultNo = NTMNumResults-1;      if (NTMNumResults != 1 &&          Node->getValueType(NTMNumResults-2) == MVT::Other)        OldChainResultNo = NTMNumResults-2; @@ -1589,54 +1786,55 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,    }    unsigned ResNumResults = Res->getNumValues(); -  // Move the flag if needed. -  if ((EmitNodeInfo & OPFL_FlagOutput) && OldFlagResultNo != -1 && -      (unsigned)OldFlagResultNo != ResNumResults-1) -    CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldFlagResultNo),  +  // Move the glue if needed. +  if ((EmitNodeInfo & OPFL_GlueOutput) && OldGlueResultNo != -1 && +      (unsigned)OldGlueResultNo != ResNumResults-1) +    CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldGlueResultNo),                                        SDValue(Res, ResNumResults-1)); -  if ((EmitNodeInfo & OPFL_FlagOutput) != 0) +  if ((EmitNodeInfo & OPFL_GlueOutput) != 0)      --ResNumResults;    // Move the chain reference if needed.    if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 &&        (unsigned)OldChainResultNo != ResNumResults-1) -    CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo),  +    CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo),                                        SDValue(Res, ResNumResults-1));    // Otherwise, no replacement happened because the node already exists. Replace    // Uses of the old node with the new one.    if (Res != Node)      CurDAG->ReplaceAllUsesWith(Node, Res); -   +    return Res;  }  /// CheckPatternPredicate - Implements OP_CheckPatternPredicate. -ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool  CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, -          SDValue N, const SmallVectorImpl<SDValue> &RecordedNodes) { +          SDValue N, +          const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {    // Accept if it is exactly the same as a previously recorded node.    unsigned RecNo = MatcherTable[MatcherIndex++];    assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); -  return N == RecordedNodes[RecNo]; +  return N == RecordedNodes[RecNo].first;  } -   +  /// CheckPatternPredicate - Implements OP_CheckPatternPredicate. -ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool  CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,                        SelectionDAGISel &SDISel) {    return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]);  }  /// CheckNodePredicate - Implements OP_CheckNodePredicate. -ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool  CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,                     SelectionDAGISel &SDISel, SDNode *N) {    return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]);  } -ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool  CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,              SDNode *N) {    uint16_t Opc = MatcherTable[MatcherIndex++]; @@ -1644,17 +1842,17 @@ CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,    return N->getOpcode() == Opc;  } -ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool  CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex,            SDValue N, const TargetLowering &TLI) {    MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];    if (N.getValueType() == VT) return true; -   +    // Handle the case when VT is iPTR.    return VT == MVT::iPTR && N.getValueType() == TLI.getPointerTy();  } -ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool  CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,                 SDValue N, const TargetLowering &TLI,                 unsigned ChildNo) { @@ -1664,57 +1862,57 @@ CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,  } -ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool  CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,                SDValue N) {    return cast<CondCodeSDNode>(N)->get() ==        (ISD::CondCode)MatcherTable[MatcherIndex++];  } -ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool  CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,                 SDValue N, const TargetLowering &TLI) {    MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];    if (cast<VTSDNode>(N)->getVT() == VT)      return true; -   +    // Handle the case when VT is iPTR.    return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI.getPointerTy();  } -ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool  CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,               SDValue N) {    int64_t Val = MatcherTable[MatcherIndex++];    if (Val & 128)      Val = GetVBR(Val, MatcherTable, MatcherIndex); -   +    ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);    return C != 0 && C->getSExtValue() == Val;  } -ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool  CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,              SDValue N, SelectionDAGISel &SDISel) {    int64_t Val = MatcherTable[MatcherIndex++];    if (Val & 128)      Val = GetVBR(Val, MatcherTable, MatcherIndex); -   +    if (N->getOpcode() != ISD::AND) return false; -   +    ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));    return C != 0 && SDISel.CheckAndMask(N.getOperand(0), C, Val);  } -ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool  CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,             SDValue N, SelectionDAGISel &SDISel) {    int64_t Val = MatcherTable[MatcherIndex++];    if (Val & 128)      Val = GetVBR(Val, MatcherTable, MatcherIndex); -   +    if (N->getOpcode() != ISD::OR) return false; -   +    ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));    return C != 0 && SDISel.CheckOrMask(N.getOperand(0), C, Val);  } @@ -1724,11 +1922,11 @@ CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,  /// fail, set Result=true and return anything.  If the current predicate is  /// known to pass, set Result=false and return the MatcherIndex to continue  /// with.  If the current predicate is unknown, set Result=false and return the -/// MatcherIndex to continue with.  +/// MatcherIndex to continue with.  static unsigned IsPredicateKnownToFail(const unsigned char *Table,                                         unsigned Index, SDValue N,                                         bool &Result, SelectionDAGISel &SDISel, -                                       SmallVectorImpl<SDValue> &RecordedNodes){ +                 SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {    switch (Table[Index++]) {    default:      Result = false; @@ -1782,21 +1980,21 @@ namespace {  struct MatchScope {    /// FailIndex - If this match fails, this is the index to continue with.    unsigned FailIndex; -   +    /// NodeStack - The node stack when the scope was formed.    SmallVector<SDValue, 4> NodeStack; -   +    /// NumRecordedNodes - The number of recorded nodes when the scope was formed.    unsigned NumRecordedNodes; -   +    /// NumMatchedMemRefs - The number of matched memref entries.    unsigned NumMatchedMemRefs; -   -  /// InputChain/InputFlag - The current chain/flag  -  SDValue InputChain, InputFlag; + +  /// InputChain/InputGlue - The current chain/glue +  SDValue InputChain, InputGlue;    /// HasChainNodesMatched - True if the ChainNodesMatched list is non-empty. -  bool HasChainNodesMatched, HasFlagResultNodesMatched; +  bool HasChainNodesMatched, HasGlueResultNodesMatched;  };  } @@ -1838,7 +2036,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,    case ISD::INLINEASM: return Select_INLINEASM(NodeToMatch);    case ISD::UNDEF:     return Select_UNDEF(NodeToMatch);    } -   +    assert(!NodeToMatch->isMachineOpcode() && "Node already selected!");    // Set up the node stack with NodeToMatch as the only node on the stack. @@ -1849,37 +2047,38 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,    // MatchScopes - Scopes used when matching, if a match failure happens, this    // indicates where to continue checking.    SmallVector<MatchScope, 8> MatchScopes; -   +    // RecordedNodes - This is the set of nodes that have been recorded by the -  // state machine. -  SmallVector<SDValue, 8> RecordedNodes; -   +  // state machine.  The second value is the parent of the node, or null if the +  // root is recorded. +  SmallVector<std::pair<SDValue, SDNode*>, 8> RecordedNodes; +    // MatchedMemRefs - This is the set of MemRef's we've seen in the input    // pattern.    SmallVector<MachineMemOperand*, 2> MatchedMemRefs; -   -  // These are the current input chain and flag for use when generating nodes. + +  // These are the current input chain and glue for use when generating nodes.    // Various Emit operations change these.  For example, emitting a copytoreg    // uses and updates these. -  SDValue InputChain, InputFlag; -   +  SDValue InputChain, InputGlue; +    // ChainNodesMatched - If a pattern matches nodes that have input/output    // chains, the OPC_EmitMergeInputChains operation is emitted which indicates    // which ones they are.  The result is captured into this list so that we can    // update the chain results when the pattern is complete.    SmallVector<SDNode*, 3> ChainNodesMatched; -  SmallVector<SDNode*, 3> FlagResultNodesMatched; -   +  SmallVector<SDNode*, 3> GlueResultNodesMatched; +    DEBUG(errs() << "ISEL: Starting pattern match on root node: ";          NodeToMatch->dump(CurDAG);          errs() << '\n'); -   +    // Determine where to start the interpreter.  Normally we start at opcode #0,    // but if the state machine starts with an OPC_SwitchOpcode, then we    // accelerate the first lookup (which is guaranteed to be hot) with the    // OpcodeOffset table.    unsigned MatcherIndex = 0; -   +    if (!OpcodeOffset.empty()) {      // Already computed the OpcodeOffset table, just index into it.      if (N.getOpcode() < OpcodeOffset.size()) @@ -1911,7 +2110,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,      if (N.getOpcode() < OpcodeOffset.size())        MatcherIndex = OpcodeOffset[N.getOpcode()];    } -   +    while (1) {      assert(MatcherIndex < TableSize && "Invalid index");  #ifndef NDEBUG @@ -1926,7 +2125,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,        // determine immediately that the first check (or first several) will        // immediately fail, don't even bother pushing a scope for them.        unsigned FailIndex; -       +        while (1) {          unsigned NumToSkip = MatcherTable[MatcherIndex++];          if (NumToSkip & 128) @@ -1936,12 +2135,12 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,            FailIndex = 0;            break;          } -         +          FailIndex = MatcherIndex+NumToSkip; -         +          unsigned MatcherIndexOfPredicate = MatcherIndex;          (void)MatcherIndexOfPredicate; // silence warning. -         +          // If we can't evaluate this predicate without pushing a scope (e.g. if          // it is a 'MoveParent') or if the predicate succeeds on this node, we          // push the scope and evaluate the full predicate chain. @@ -1950,20 +2149,20 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,                                                Result, *this, RecordedNodes);          if (!Result)            break; -         +          DEBUG(errs() << "  Skipped scope entry (due to false predicate) at "                       << "index " << MatcherIndexOfPredicate                       << ", continuing at " << FailIndex << "\n");          ++NumDAGIselRetries; -         +          // Otherwise, we know that this case of the Scope is guaranteed to fail,          // move to the next case.          MatcherIndex = FailIndex;        } -       +        // If the whole scope failed to match, bail.        if (FailIndex == 0) break; -       +        // Push a MatchScope which indicates where to go if the first child fails        // to match.        MatchScope NewEntry; @@ -1972,17 +2171,21 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,        NewEntry.NumRecordedNodes = RecordedNodes.size();        NewEntry.NumMatchedMemRefs = MatchedMemRefs.size();        NewEntry.InputChain = InputChain; -      NewEntry.InputFlag = InputFlag; +      NewEntry.InputGlue = InputGlue;        NewEntry.HasChainNodesMatched = !ChainNodesMatched.empty(); -      NewEntry.HasFlagResultNodesMatched = !FlagResultNodesMatched.empty(); +      NewEntry.HasGlueResultNodesMatched = !GlueResultNodesMatched.empty();        MatchScopes.push_back(NewEntry);        continue;      } -    case OPC_RecordNode: +    case OPC_RecordNode: {        // Remember this node, it may end up being an operand in the pattern. -      RecordedNodes.push_back(N); +      SDNode *Parent = 0; +      if (NodeStack.size() > 1) +        Parent = NodeStack[NodeStack.size()-2].getNode(); +      RecordedNodes.push_back(std::make_pair(N, Parent));        continue; -         +    } +      case OPC_RecordChild0: case OPC_RecordChild1:      case OPC_RecordChild2: case OPC_RecordChild3:      case OPC_RecordChild4: case OPC_RecordChild5: @@ -1991,20 +2194,21 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,        if (ChildNo >= N.getNumOperands())          break;  // Match fails if out of range child #. -      RecordedNodes.push_back(N->getOperand(ChildNo)); +      RecordedNodes.push_back(std::make_pair(N->getOperand(ChildNo), +                                             N.getNode()));        continue;      }      case OPC_RecordMemRef:        MatchedMemRefs.push_back(cast<MemSDNode>(N)->getMemOperand());        continue; -         -    case OPC_CaptureFlagInput: -      // If the current node has an input flag, capture it in InputFlag. + +    case OPC_CaptureGlueInput: +      // If the current node has an input glue, capture it in InputGlue.        if (N->getNumOperands() != 0 && -          N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) -        InputFlag = N->getOperand(N->getNumOperands()-1); +          N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) +        InputGlue = N->getOperand(N->getNumOperands()-1);        continue; -         +      case OPC_MoveChild: {        unsigned ChildNo = MatcherTable[MatcherIndex++];        if (ChildNo >= N.getNumOperands()) @@ -2013,14 +2217,14 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,        NodeStack.push_back(N);        continue;      } -         +      case OPC_MoveParent:        // Pop the current node off the NodeStack.        NodeStack.pop_back();        assert(!NodeStack.empty() && "Node stack imbalance!"); -      N = NodeStack.back();   +      N = NodeStack.back();        continue; -      +      case OPC_CheckSame:        if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break;        continue; @@ -2036,7 +2240,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,        unsigned CPNum = MatcherTable[MatcherIndex++];        unsigned RecNo = MatcherTable[MatcherIndex++];        assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat"); -      if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo], CPNum, +      if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second, +                               RecordedNodes[RecNo].first, CPNum,                                 RecordedNodes))          break;        continue; @@ -2044,11 +2249,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,      case OPC_CheckOpcode:        if (!::CheckOpcode(MatcherTable, MatcherIndex, N.getNode())) break;        continue; -         +      case OPC_CheckType:        if (!::CheckType(MatcherTable, MatcherIndex, N, TLI)) break;        continue; -         +      case OPC_SwitchOpcode: {        unsigned CurNodeOpcode = N.getOpcode();        unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart; @@ -2066,22 +2271,22 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,          // If the opcode matches, then we will execute this case.          if (CurNodeOpcode == Opc)            break; -       +          // Otherwise, skip over this case.          MatcherIndex += CaseSize;        } -       +        // If no cases matched, bail out.        if (CaseSize == 0) break; -       +        // Otherwise, execute the case we found.        DEBUG(errs() << "  OpcodeSwitch from " << SwitchStart                     << " to " << MatcherIndex << "\n");        continue;      } -         +      case OPC_SwitchType: { -      MVT::SimpleValueType CurNodeVT = N.getValueType().getSimpleVT().SimpleTy; +      MVT CurNodeVT = N.getValueType().getSimpleVT();        unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;        unsigned CaseSize;        while (1) { @@ -2090,23 +2295,22 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,          if (CaseSize & 128)            CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);          if (CaseSize == 0) break; -         -        MVT::SimpleValueType CaseVT = -          (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + +        MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];          if (CaseVT == MVT::iPTR) -          CaseVT = TLI.getPointerTy().SimpleTy; -         +          CaseVT = TLI.getPointerTy(); +          // If the VT matches, then we will execute this case.          if (CurNodeVT == CaseVT)            break; -         +          // Otherwise, skip over this case.          MatcherIndex += CaseSize;        } -       +        // If no cases matched, bail out.        if (CaseSize == 0) break; -       +        // Otherwise, execute the case we found.        DEBUG(errs() << "  TypeSwitch[" << EVT(CurNodeVT).getEVTString()                     << "] from " << SwitchStart << " to " << MatcherIndex<<'\n'); @@ -2135,7 +2339,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,      case OPC_CheckOrImm:        if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break;        continue; -         +      case OPC_CheckFoldableChainNode: {        assert(NodeStack.size() != 1 && "No parent node");        // Verify that all intermediate nodes between the root and this one have @@ -2156,7 +2360,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,                           NodeToMatch, OptLevel,                           true/*We validate our own chains*/))          break; -       +        continue;      }      case OPC_EmitInteger: { @@ -2165,22 +2369,24 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,        int64_t Val = MatcherTable[MatcherIndex++];        if (Val & 128)          Val = GetVBR(Val, MatcherTable, MatcherIndex); -      RecordedNodes.push_back(CurDAG->getTargetConstant(Val, VT)); +      RecordedNodes.push_back(std::pair<SDValue, SDNode*>( +                              CurDAG->getTargetConstant(Val, VT), (SDNode*)0));        continue;      }      case OPC_EmitRegister: {        MVT::SimpleValueType VT =          (MVT::SimpleValueType)MatcherTable[MatcherIndex++];        unsigned RegNo = MatcherTable[MatcherIndex++]; -      RecordedNodes.push_back(CurDAG->getRegister(RegNo, VT)); +      RecordedNodes.push_back(std::pair<SDValue, SDNode*>( +                              CurDAG->getRegister(RegNo, VT), (SDNode*)0));        continue;      } -         +      case OPC_EmitConvertToTarget:  {        // Convert from IMM/FPIMM to target version.        unsigned RecNo = MatcherTable[MatcherIndex++];        assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); -      SDValue Imm = RecordedNodes[RecNo]; +      SDValue Imm = RecordedNodes[RecNo].first;        if (Imm->getOpcode() == ISD::Constant) {          int64_t Val = cast<ConstantSDNode>(Imm)->getZExtValue(); @@ -2189,11 +2395,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,          const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue();          Imm = CurDAG->getTargetConstantFP(*Val, Imm.getValueType());        } -       -      RecordedNodes.push_back(Imm); + +      RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second));        continue;      } -         +      case OPC_EmitMergeInputChains1_0:    // OPC_EmitMergeInputChains, 1, 0      case OPC_EmitMergeInputChains1_1: {  // OPC_EmitMergeInputChains, 1, 1        // These are space-optimized forms of OPC_EmitMergeInputChains. @@ -2201,28 +2407,28 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,               "EmitMergeInputChains should be the first chain producing node");        assert(ChainNodesMatched.empty() &&               "Should only have one EmitMergeInputChains per match"); -       +        // Read all of the chained nodes.        unsigned RecNo = Opcode == OPC_EmitMergeInputChains1_1;        assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); -      ChainNodesMatched.push_back(RecordedNodes[RecNo].getNode()); -         +      ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); +        // FIXME: What if other value results of the node have uses not matched        // by this pattern?        if (ChainNodesMatched.back() != NodeToMatch && -          !RecordedNodes[RecNo].hasOneUse()) { +          !RecordedNodes[RecNo].first.hasOneUse()) {          ChainNodesMatched.clear();          break;        } -       +        // Merge the input chains if they are not intra-pattern references.        InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG); -       +        if (InputChain.getNode() == 0)          break;  // Failed to merge.        continue;      } -         +      case OPC_EmitMergeInputChains: {        assert(InputChain.getNode() == 0 &&               "EmitMergeInputChains should be the first chain producing node"); @@ -2242,54 +2448,55 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,        for (unsigned i = 0; i != NumChains; ++i) {          unsigned RecNo = MatcherTable[MatcherIndex++];          assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); -        ChainNodesMatched.push_back(RecordedNodes[RecNo].getNode()); -         +        ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); +          // FIXME: What if other value results of the node have uses not matched          // by this pattern?          if (ChainNodesMatched.back() != NodeToMatch && -            !RecordedNodes[RecNo].hasOneUse()) { +            !RecordedNodes[RecNo].first.hasOneUse()) {            ChainNodesMatched.clear();            break;          }        } -       +        // If the inner loop broke out, the match fails.        if (ChainNodesMatched.empty())          break;        // Merge the input chains if they are not intra-pattern references.        InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG); -       +        if (InputChain.getNode() == 0)          break;  // Failed to merge.        continue;      } -         +      case OPC_EmitCopyToReg: {        unsigned RecNo = MatcherTable[MatcherIndex++];        assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");        unsigned DestPhysReg = MatcherTable[MatcherIndex++]; -       +        if (InputChain.getNode() == 0)          InputChain = CurDAG->getEntryNode(); -       +        InputChain = CurDAG->getCopyToReg(InputChain, NodeToMatch->getDebugLoc(), -                                        DestPhysReg, RecordedNodes[RecNo], -                                        InputFlag); -       -      InputFlag = InputChain.getValue(1); +                                        DestPhysReg, RecordedNodes[RecNo].first, +                                        InputGlue); + +      InputGlue = InputChain.getValue(1);        continue;      } -         +      case OPC_EmitNodeXForm: {        unsigned XFormNo = MatcherTable[MatcherIndex++];        unsigned RecNo = MatcherTable[MatcherIndex++];        assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); -      RecordedNodes.push_back(RunSDNodeXForm(RecordedNodes[RecNo], XFormNo)); +      SDValue Res = RunSDNodeXForm(RecordedNodes[RecNo].first, XFormNo); +      RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, (SDNode*) 0));        continue;      } -         +      case OPC_EmitNode:      case OPC_MorphNodeTo: {        uint16_t TargetOpc = MatcherTable[MatcherIndex++]; @@ -2304,12 +2511,12 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,          if (VT == MVT::iPTR) VT = TLI.getPointerTy().SimpleTy;          VTs.push_back(VT);        } -       +        if (EmitNodeInfo & OPFL_Chain)          VTs.push_back(MVT::Other); -      if (EmitNodeInfo & OPFL_FlagOutput) -        VTs.push_back(MVT::Flag); -       +      if (EmitNodeInfo & OPFL_GlueOutput) +        VTs.push_back(MVT::Glue); +        // This is hot code, so optimize the two most common cases of 1 and 2        // results.        SDVTList VTList; @@ -2327,11 +2534,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,          unsigned RecNo = MatcherTable[MatcherIndex++];          if (RecNo & 128)            RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex); -         +          assert(RecNo < RecordedNodes.size() && "Invalid EmitNode"); -        Ops.push_back(RecordedNodes[RecNo]); +        Ops.push_back(RecordedNodes[RecNo].first);        } -       +        // If there are variadic operands to add, handle them now.        if (EmitNodeInfo & OPFL_VariadicInfo) {          // Determine the start index to copy from. @@ -2339,22 +2546,22 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,          FirstOpToCopy += (EmitNodeInfo & OPFL_Chain) ? 1 : 0;          assert(NodeToMatch->getNumOperands() >= FirstOpToCopy &&                 "Invalid variadic node"); -        // Copy all of the variadic operands, not including a potential flag +        // Copy all of the variadic operands, not including a potential glue          // input.          for (unsigned i = FirstOpToCopy, e = NodeToMatch->getNumOperands();               i != e; ++i) {            SDValue V = NodeToMatch->getOperand(i); -          if (V.getValueType() == MVT::Flag) break; +          if (V.getValueType() == MVT::Glue) break;            Ops.push_back(V);          }        } -       -      // If this has chain/flag inputs, add them. + +      // If this has chain/glue inputs, add them.        if (EmitNodeInfo & OPFL_Chain)          Ops.push_back(InputChain); -      if ((EmitNodeInfo & OPFL_FlagInput) && InputFlag.getNode() != 0) -        Ops.push_back(InputFlag); -       +      if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != 0) +        Ops.push_back(InputGlue); +        // Create the node.        SDNode *Res = 0;        if (Opcode != OPC_MorphNodeTo) { @@ -2362,28 +2569,29 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,          // add the results to the RecordedNodes list.          Res = CurDAG->getMachineNode(TargetOpc, NodeToMatch->getDebugLoc(),                                       VTList, Ops.data(), Ops.size()); -         -        // Add all the non-flag/non-chain results to the RecordedNodes list. + +        // Add all the non-glue/non-chain results to the RecordedNodes list.          for (unsigned i = 0, e = VTs.size(); i != e; ++i) { -          if (VTs[i] == MVT::Other || VTs[i] == MVT::Flag) break; -          RecordedNodes.push_back(SDValue(Res, i)); +          if (VTs[i] == MVT::Other || VTs[i] == MVT::Glue) break; +          RecordedNodes.push_back(std::pair<SDValue,SDNode*>(SDValue(Res, i), +                                                             (SDNode*) 0));          } -         +        } else {          Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops.data(), Ops.size(),                          EmitNodeInfo);        } -       -      // If the node had chain/flag results, update our notion of the current -      // chain and flag. -      if (EmitNodeInfo & OPFL_FlagOutput) { -        InputFlag = SDValue(Res, VTs.size()-1); + +      // If the node had chain/glue results, update our notion of the current +      // chain and glue. +      if (EmitNodeInfo & OPFL_GlueOutput) { +        InputGlue = SDValue(Res, VTs.size()-1);          if (EmitNodeInfo & OPFL_Chain)            InputChain = SDValue(Res, VTs.size()-2);        } else if (EmitNodeInfo & OPFL_Chain)          InputChain = SDValue(Res, VTs.size()-1); -      // If the OPFL_MemRefs flag is set on this node, slap all of the +      // If the OPFL_MemRefs glue is set on this node, slap all of the        // accumulated memrefs onto it.        //        // FIXME: This is vastly incorrect for patterns with multiple outputs @@ -2396,37 +2604,37 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,          cast<MachineSDNode>(Res)            ->setMemRefs(MemRefs, MemRefs + MatchedMemRefs.size());        } -       +        DEBUG(errs() << "  "                     << (Opcode == OPC_MorphNodeTo ? "Morphed" : "Created")                     << " node: "; Res->dump(CurDAG); errs() << "\n"); -       +        // If this was a MorphNodeTo then we're completely done!        if (Opcode == OPC_MorphNodeTo) { -        // Update chain and flag uses. -        UpdateChainsAndFlags(NodeToMatch, InputChain, ChainNodesMatched, -                             InputFlag, FlagResultNodesMatched, true); +        // Update chain and glue uses. +        UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched, +                            InputGlue, GlueResultNodesMatched, true);          return Res;        } -       +        continue;      } -         -    case OPC_MarkFlagResults: { + +    case OPC_MarkGlueResults: {        unsigned NumNodes = MatcherTable[MatcherIndex++]; -       -      // Read and remember all the flag-result nodes. + +      // Read and remember all the glue-result nodes.        for (unsigned i = 0; i != NumNodes; ++i) {          unsigned RecNo = MatcherTable[MatcherIndex++];          if (RecNo & 128)            RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);          assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); -        FlagResultNodesMatched.push_back(RecordedNodes[RecNo].getNode()); +        GlueResultNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());        }        continue;      } -       +      case OPC_CompleteMatch: {        // The match has been completed, and any new nodes (if any) have been        // created.  Patch up references to the matched dag to use the newly @@ -2437,13 +2645,13 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,          unsigned ResSlot = MatcherTable[MatcherIndex++];          if (ResSlot & 128)            ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex); -         +          assert(ResSlot < RecordedNodes.size() && "Invalid CheckSame"); -        SDValue Res = RecordedNodes[ResSlot]; -         +        SDValue Res = RecordedNodes[ResSlot].first; +          assert(i < NodeToMatch->getNumValues() &&                 NodeToMatch->getValueType(i) != MVT::Other && -               NodeToMatch->getValueType(i) != MVT::Flag && +               NodeToMatch->getValueType(i) != MVT::Glue &&                 "Invalid number of results to complete!");          assert((NodeToMatch->getValueType(i) == Res.getValueType() ||                  NodeToMatch->getValueType(i) == MVT::iPTR || @@ -2454,24 +2662,23 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,          CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res);        } -      // If the root node defines a flag, add it to the flag nodes to update -      // list. -      if (NodeToMatch->getValueType(NodeToMatch->getNumValues()-1) == MVT::Flag) -        FlagResultNodesMatched.push_back(NodeToMatch); -       -      // Update chain and flag uses. -      UpdateChainsAndFlags(NodeToMatch, InputChain, ChainNodesMatched, -                           InputFlag, FlagResultNodesMatched, false); -       +      // If the root node defines glue, add it to the glue nodes to update list. +      if (NodeToMatch->getValueType(NodeToMatch->getNumValues()-1) == MVT::Glue) +        GlueResultNodesMatched.push_back(NodeToMatch); + +      // Update chain and glue uses. +      UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched, +                          InputGlue, GlueResultNodesMatched, false); +        assert(NodeToMatch->use_empty() &&               "Didn't replace all uses of the node?"); -       +        // FIXME: We just return here, which interacts correctly with SelectRoot        // above.  We should fix this to not return an SDNode* anymore.        return 0;      }      } -     +      // If the code reached this point, then the match failed.  See if there is      // another child to try in the current 'Scope', otherwise pop it until we      // find a case to check. @@ -2494,15 +2701,15 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,        if (LastScope.NumMatchedMemRefs != MatchedMemRefs.size())          MatchedMemRefs.resize(LastScope.NumMatchedMemRefs);        MatcherIndex = LastScope.FailIndex; -       +        DEBUG(errs() << "  Continuing at " << MatcherIndex << "\n"); -     +        InputChain = LastScope.InputChain; -      InputFlag = LastScope.InputFlag; +      InputGlue = LastScope.InputGlue;        if (!LastScope.HasChainNodesMatched)          ChainNodesMatched.clear(); -      if (!LastScope.HasFlagResultNodesMatched) -        FlagResultNodesMatched.clear(); +      if (!LastScope.HasGlueResultNodesMatched) +        GlueResultNodesMatched.clear();        // Check to see what the offset is at the new MatcherIndex.  If it is zero        // we have reached the end of this scope, otherwise we have another child @@ -2517,21 +2724,21 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,          LastScope.FailIndex = MatcherIndex+NumToSkip;          break;        } -       +        // End of this scope, pop it and try the next child in the containing        // scope.        MatchScopes.pop_back();      }    }  } -     +  void SelectionDAGISel::CannotYetSelect(SDNode *N) {    std::string msg;    raw_string_ostream Msg(msg); -  Msg << "Cannot yet select: "; -   +  Msg << "Cannot select: "; +    if (N->getOpcode() != ISD::INTRINSIC_W_CHAIN &&        N->getOpcode() != ISD::INTRINSIC_WO_CHAIN &&        N->getOpcode() != ISD::INTRINSIC_VOID) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 8313de5e32bb..76eb9453561e 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -93,7 +93,7 @@ namespace llvm {      static std::string getEdgeAttributes(const void *Node, EdgeIter EI) {        SDValue Op = EI.getNode()->getOperand(EI.getOperand());        EVT VT = Op.getValueType(); -      if (VT == MVT::Flag) +      if (VT == MVT::Glue)          return "color=red,style=bold";        else if (VT == MVT::Other)          return "color=blue,style=dashed"; @@ -273,14 +273,14 @@ std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const {    raw_string_ostream O(s);    O << "SU(" << SU->NodeNum << "): ";    if (SU->getNode()) { -    SmallVector<SDNode *, 4> FlaggedNodes; -    for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) -      FlaggedNodes.push_back(N); -    while (!FlaggedNodes.empty()) { +    SmallVector<SDNode *, 4> GluedNodes; +    for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) +      GluedNodes.push_back(N); +    while (!GluedNodes.empty()) {        O << DOTGraphTraits<SelectionDAG*> -        ::getSimpleNodeLabel(FlaggedNodes.back(), DAG); -      FlaggedNodes.pop_back(); -      if (!FlaggedNodes.empty()) +        ::getSimpleNodeLabel(GluedNodes.back(), DAG); +      GluedNodes.pop_back(); +      if (!GluedNodes.empty())          O << "\n    ";      }    } else { diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index b74f600cfa2d..691390e2a0e4 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -28,6 +28,7 @@  #include "llvm/ADT/STLExtras.h"  #include "llvm/Support/ErrorHandling.h"  #include "llvm/Support/MathExtras.h" +#include <cctype>  using namespace llvm;  namespace llvm { @@ -530,7 +531,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm,        setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand);        setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand);      } -     +      // These operations default to expand.      setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand);      setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand); @@ -538,8 +539,8 @@ TargetLowering::TargetLowering(const TargetMachine &tm,    // Most targets ignore the @llvm.prefetch intrinsic.    setOperationAction(ISD::PREFETCH, MVT::Other, Expand); -   -  // ConstantFP nodes default to expand.  Targets can either change this to  + +  // ConstantFP nodes default to expand.  Targets can either change this to    // Legal, in which case all fp constants are legal, or use isFPImmLegal()    // to optimize expansions for certain constants.    setOperationAction(ISD::ConstantFP, MVT::f32, Expand); @@ -560,18 +561,21 @@ TargetLowering::TargetLowering(const TargetMachine &tm,    // Default ISD::TRAP to expand (which turns it into abort).    setOperationAction(ISD::TRAP, MVT::Other, Expand); -     +    IsLittleEndian = TD->isLittleEndian();    ShiftAmountTy = PointerTy = MVT::getIntegerVT(8*TD->getPointerSize());    memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));    memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));    maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8; +  maxStoresPerMemsetOptSize = maxStoresPerMemcpyOptSize +    = maxStoresPerMemmoveOptSize = 4;    benefitFromCodePlacementOpt = false;    UseUnderscoreSetJmp = false;    UseUnderscoreLongJmp = false;    SelectIsExpensive = false;    IntDivIsCheap = false;    Pow2DivIsCheap = false; +  JumpIsExpensive = false;    StackPointerRegisterToSaveRestore = 0;    ExceptionPointerRegister = 0;    ExceptionSelectorRegister = 0; @@ -617,16 +621,16 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,    // Figure out the right, legal destination reg to copy into.    unsigned NumElts = VT.getVectorNumElements();    MVT EltTy = VT.getVectorElementType(); -   +    unsigned NumVectorRegs = 1; -   -  // FIXME: We don't support non-power-of-2-sized vectors for now.  Ideally we  + +  // FIXME: We don't support non-power-of-2-sized vectors for now.  Ideally we    // could break down into LHS/RHS like LegalizeDAG does.    if (!isPowerOf2_32(NumElts)) {      NumVectorRegs = NumElts;      NumElts = 1;    } -   +    // Divide the input until we get to a supported size.  This will always    // end with a scalar if the target doesn't support vectors.    while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) { @@ -635,7 +639,7 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,    }    NumIntermediates = NumVectorRegs; -   +    MVT NewVT = MVT::getVectorVT(EltTy, NumElts);    if (!TLI->isTypeLegal(NewVT))      NewVT = EltTy; @@ -645,7 +649,7 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,    RegisterVT = DestVT;    if (EVT(DestVT).bitsLT(NewVT))    // Value is expanded, e.g. i64 -> i16.      return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits()); -   +    // Otherwise, promotion or legal types use the same number of registers as    // the vector decimated to the appropriate level.    return NumVectorRegs; @@ -750,7 +754,7 @@ void TargetLowering::computeRegisterProperties() {      RegisterTypeForVT[MVT::ppcf128] = MVT::f64;      TransformToType[MVT::ppcf128] = MVT::f64;      ValueTypeActions.setTypeAction(MVT::ppcf128, Expand); -  }     +  }    // Decide how to handle f64. If the target does not have native f64 support,    // expand it to i64 and we will be generating soft float library calls. @@ -776,13 +780,13 @@ void TargetLowering::computeRegisterProperties() {        ValueTypeActions.setTypeAction(MVT::f32, Expand);      }    } -   +    // Loop over all of the vector value types to see which need transformations.    for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;         i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {      MVT VT = (MVT::SimpleValueType)i;      if (isTypeLegal(VT)) continue; -     +      // Determine if there is a legal wider type.  If so, we should promote to      // that wider vector type.      EVT EltVT = VT.getVectorElementType(); @@ -792,8 +796,8 @@ void TargetLowering::computeRegisterProperties() {        for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {          EVT SVT = (MVT::SimpleValueType)nVT;          if (SVT.getVectorElementType() == EltVT && -            SVT.getVectorNumElements() > NElts &&  -            isTypeSynthesizable(SVT)) { +            SVT.getVectorNumElements() > NElts && +            isTypeLegal(SVT)) {            TransformToType[i] = SVT;            RegisterTypeForVT[i] = SVT;            NumRegistersForVT[i] = 1; @@ -804,7 +808,7 @@ void TargetLowering::computeRegisterProperties() {        }        if (IsLegalWiderType) continue;      } -     +      MVT IntermediateVT;      EVT RegisterVT;      unsigned NumIntermediates; @@ -812,7 +816,7 @@ void TargetLowering::computeRegisterProperties() {        getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates,                                  RegisterVT, this);      RegisterTypeForVT[i] = RegisterVT; -     +      EVT NVT = VT.getPow2VectorType();      if (NVT == VT) {        // Type is already a power of 2.  The default action is to split. @@ -865,7 +869,7 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,                                                  unsigned &NumIntermediates,                                                  EVT &RegisterVT) const {    unsigned NumElts = VT.getVectorNumElements(); -   +    // If there is a wider vector type with the same element type as this one,    // we should widen to that legal vector type.  This handles things like    // <2 x float> -> <4 x float>. @@ -877,19 +881,19 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,        return 1;      }    } -   +    // Figure out the right, legal destination reg to copy into.    EVT EltTy = VT.getVectorElementType(); -   +    unsigned NumVectorRegs = 1; -   -  // FIXME: We don't support non-power-of-2-sized vectors for now.  Ideally we  + +  // FIXME: We don't support non-power-of-2-sized vectors for now.  Ideally we    // could break down into LHS/RHS like LegalizeDAG does.    if (!isPowerOf2_32(NumElts)) {      NumVectorRegs = NumElts;      NumElts = 1;    } -   +    // Divide the input until we get to a supported size.  This will always    // end with a scalar if the target doesn't support vectors.    while (NumElts > 1 && !isTypeLegal( @@ -899,7 +903,7 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,    }    NumIntermediates = NumVectorRegs; -   +    EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts);    if (!isTypeLegal(NewVT))      NewVT = EltTy; @@ -909,13 +913,13 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,    RegisterVT = DestVT;    if (DestVT.bitsLT(NewVT))   // Value is expanded, e.g. i64 -> i16.      return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits()); -   +    // Otherwise, promotion or legal types use the same number of registers as    // the vector decimated to the appropriate level.    return NumVectorRegs;  } -/// Get the EVTs and ArgFlags collections that represent the legalized return  +/// Get the EVTs and ArgFlags collections that represent the legalized return  /// type of the given function.  This does not require a DAG or a return value,  /// and is suitable for use before any DAGs for the function are constructed.  /// TODO: Move this out of TargetLowering.cpp. @@ -988,11 +992,11 @@ unsigned TargetLowering::getJumpTableEncoding() const {    // In non-pic modes, just use the address of a block.    if (getTargetMachine().getRelocationModel() != Reloc::PIC_)      return MachineJumpTableInfo::EK_BlockAddress; -   +    // In PIC mode, if the target supports a GPRel32 directive, use it.    if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != 0)      return MachineJumpTableInfo::EK_GPRel32BlockAddress; -   +    // Otherwise, use a label difference.    return MachineJumpTableInfo::EK_LabelDifference32;  } @@ -1036,11 +1040,11 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {  //  Optimization Methods  //===----------------------------------------------------------------------===// -/// ShrinkDemandedConstant - Check to see if the specified operand of the  +/// ShrinkDemandedConstant - Check to see if the specified operand of the  /// specified instruction is a constant integer.  If so, check to see if there  /// are any bits set in the constant that are not demanded.  If so, shrink the  /// constant and return true. -bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,  +bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,                                                          const APInt &Demanded) {    DebugLoc dl = Op.getDebugLoc(); @@ -1062,7 +1066,7 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,        EVT VT = Op.getValueType();        SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0),                                  DAG.getConstant(Demanded & -                                                C->getAPIntValue(),  +                                                C->getAPIntValue(),                                                  VT));        return CombineTo(Op, New);      } @@ -1139,9 +1143,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,    KnownZero = KnownOne = APInt(BitWidth, 0);    // Other users may use these bits. -  if (!Op.getNode()->hasOneUse()) {  +  if (!Op.getNode()->hasOneUse()) {      if (Depth != 0) { -      // If not at the root, Just compute the KnownZero/KnownOne bits to  +      // If not at the root, Just compute the KnownZero/KnownOne bits to        // simplify things downstream.        TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth);        return false; @@ -1149,7 +1153,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,      // If this is the root being simplified, allow it to have multiple uses,      // just set the NewMask to all bits.      NewMask = APInt::getAllOnesValue(BitWidth); -  } else if (DemandedMask == 0) {    +  } else if (DemandedMask == 0) {      // Not demanding any bits from Op.      if (Op.getOpcode() != ISD::UNDEF)        return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType())); @@ -1172,8 +1176,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,      // the RHS.      if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {        APInt LHSZero, LHSOne; +      // Do not increment Depth here; that can cause an infinite loop.        TLO.DAG.ComputeMaskedBits(Op.getOperand(0), NewMask, -                                LHSZero, LHSOne, Depth+1); +                                LHSZero, LHSOne, Depth);        // If the LHS already has zeros where RHSC does, this and is dead.        if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask))          return TLO.CombineTo(Op, Op.getOperand(0)); @@ -1182,16 +1187,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,        if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask))          return true;      } -     +      if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,                               KnownOne, TLO, Depth+1))        return true; -    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");  +    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");      if (SimplifyDemandedBits(Op.getOperand(0), ~KnownZero & NewMask,                               KnownZero2, KnownOne2, TLO, Depth+1))        return true; -    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");  -       +    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); +      // If all of the demanded bits are known one on one side, return the other.      // These bits cannot contribute to the result of the 'and'.      if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask)) @@ -1214,15 +1219,15 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,      KnownZero |= KnownZero2;      break;    case ISD::OR: -    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,  +    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,                               KnownOne, TLO, Depth+1))        return true; -    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");  +    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");      if (SimplifyDemandedBits(Op.getOperand(0), ~KnownOne & NewMask,                               KnownZero2, KnownOne2, TLO, Depth+1))        return true; -    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");  -     +    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); +      // If all of the demanded bits are known zero on one side, return the other.      // These bits cannot contribute to the result of the 'or'.      if ((NewMask & ~KnownOne2 & KnownZero) == (~KnownOne2 & NewMask)) @@ -1248,15 +1253,15 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,      KnownOne |= KnownOne2;      break;    case ISD::XOR: -    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,  +    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,                               KnownOne, TLO, Depth+1))        return true; -    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");  +    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");      if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero2,                               KnownOne2, TLO, Depth+1))        return true; -    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");  -     +    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); +      // If all of the demanded bits are known zero on one side, return the other.      // These bits cannot contribute to the result of the 'xor'.      if ((KnownZero & NewMask) == NewMask) @@ -1274,12 +1279,12 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(),                                                 Op.getOperand(0),                                                 Op.getOperand(1))); -     +      // Output known-0 bits are known if clear or set in both the LHS & RHS.      KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);      // Output known-1 are known to be set if set in only one of the LHS, RHS.      KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); -     +      // If all of the demanded bits on one side are known, and all of the set      // bits on that side are also known to be set on the other side, turn this      // into an AND, as we know the bits will be cleared. @@ -1288,11 +1293,11 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,        if ((KnownOne & KnownOne2) == KnownOne) {          EVT VT = Op.getValueType();          SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT); -        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,  +        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,                                                   Op.getOperand(0), ANDC));        }      } -     +      // If the RHS is a constant, see if we can simplify it.      // for XOR, we prefer to force bits to 1 if they will make a -1.      // if we can't force bits, try to shrink constant @@ -1317,37 +1322,37 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,      KnownOne  = KnownOneOut;      break;    case ISD::SELECT: -    if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero,  +    if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero,                               KnownOne, TLO, Depth+1))        return true;      if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero2,                               KnownOne2, TLO, Depth+1))        return true; -    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");  -    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");  -     +    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); +    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); +      // If the operands are constants, see if we can simplify them.      if (TLO.ShrinkDemandedConstant(Op, NewMask))        return true; -     +      // Only known if known in both the LHS and RHS.      KnownOne &= KnownOne2;      KnownZero &= KnownZero2;      break;    case ISD::SELECT_CC: -    if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero,  +    if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero,                               KnownOne, TLO, Depth+1))        return true;      if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero2,                               KnownOne2, TLO, Depth+1))        return true; -    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");  -    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");  -     +    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); +    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); +      // If the operands are constants, see if we can simplify them.      if (TLO.ShrinkDemandedConstant(Op, NewMask))        return true; -       +      // Only known if known in both the LHS and RHS.      KnownOne &= KnownOne2;      KnownZero &= KnownZero2; @@ -1373,16 +1378,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,            if (Diff < 0) {              Diff = -Diff;              Opc = ISD::SRL; -          }           -           -          SDValue NewSA =  +          } + +          SDValue NewSA =              TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());            EVT VT = Op.getValueType();            return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,                                                     InOp.getOperand(0), NewSA));          } -      }       -       +      } +        if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt),                                 KnownZero, KnownOne, TLO, Depth+1))          return true; @@ -1421,7 +1426,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,        unsigned ShAmt = SA->getZExtValue();        unsigned VTSize = VT.getSizeInBits();        SDValue InOp = Op.getOperand(0); -       +        // If the shift count is an invalid immediate, don't do anything.        if (ShAmt >= BitWidth)          break; @@ -1438,20 +1443,20 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,            if (Diff < 0) {              Diff = -Diff;              Opc = ISD::SHL; -          }           -           +          } +            SDValue NewSA =              TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());            return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,                                                     InOp.getOperand(0), NewSA));          } -      }       -       +      } +        // Compute the new bits that are at the top now.        if (SimplifyDemandedBits(InOp, (NewMask << ShAmt),                                 KnownZero, KnownOne, TLO, Depth+1))          return true; -      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");  +      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");        KnownZero = KnownZero.lshr(ShAmt);        KnownOne  = KnownOne.lshr(ShAmt); @@ -1472,7 +1477,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,      if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {        EVT VT = Op.getValueType();        unsigned ShAmt = SA->getZExtValue(); -       +        // If the shift count is an invalid immediate, don't do anything.        if (ShAmt >= BitWidth)          break; @@ -1484,21 +1489,21 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,        APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);        if (HighBits.intersects(NewMask))          InDemandedMask |= APInt::getSignBit(VT.getScalarType().getSizeInBits()); -       +        if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask,                                 KnownZero, KnownOne, TLO, Depth+1))          return true; -      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");  +      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");        KnownZero = KnownZero.lshr(ShAmt);        KnownOne  = KnownOne.lshr(ShAmt); -       +        // Handle the sign bit, adjusted to where it is now in the mask.        APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt); -       +        // If the input sign bit is known to be zero, or if none of the top bits        // are demanded, turn this into an unsigned shift right.        if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) { -        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,  +        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,                                                   Op.getOperand(0),                                                   Op.getOperand(1)));        } else if (KnownOne.intersects(SignBit)) { // New bits are known one. @@ -1509,23 +1514,23 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,    case ISD::SIGN_EXTEND_INREG: {      EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); -    // Sign extension.  Compute the demanded bits in the result that are not  +    // Sign extension.  Compute the demanded bits in the result that are not      // present in the input.      APInt NewBits =        APInt::getHighBitsSet(BitWidth,                              BitWidth - EVT.getScalarType().getSizeInBits()); -     +      // If none of the extended bits are demanded, eliminate the sextinreg.      if ((NewBits & NewMask) == 0)        return TLO.CombineTo(Op, Op.getOperand(0)); -    APInt InSignBit = APInt::getSignBit(EVT.getScalarType().getSizeInBits()); -    InSignBit.zext(BitWidth); +    APInt InSignBit = +      APInt::getSignBit(EVT.getScalarType().getSizeInBits()).zext(BitWidth);      APInt InputDemandedBits =        APInt::getLowBitsSet(BitWidth,                             EVT.getScalarType().getSizeInBits()) &        NewMask; -     +      // Since the sign extended bits are demanded, we know that the sign      // bit is demanded.      InputDemandedBits |= InSignBit; @@ -1533,16 +1538,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,      if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,                               KnownZero, KnownOne, TLO, Depth+1))        return true; -    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");  +    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");      // If the sign bit of the input is known set or clear, then we know the      // top bits of the result. -     +      // If the input sign bit is known zero, convert this into a zero extension.      if (KnownZero.intersects(InSignBit)) -      return TLO.CombineTo(Op,  +      return TLO.CombineTo(Op,                             TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,EVT)); -     +      if (KnownOne.intersects(InSignBit)) {    // Input sign bit known set        KnownOne |= NewBits;        KnownZero &= ~NewBits; @@ -1555,23 +1560,22 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,    case ISD::ZERO_EXTEND: {      unsigned OperandBitWidth =        Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); -    APInt InMask = NewMask; -    InMask.trunc(OperandBitWidth); -     +    APInt InMask = NewMask.trunc(OperandBitWidth); +      // If none of the top bits are demanded, convert this into an any_extend.      APInt NewBits =        APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask;      if (!NewBits.intersects(NewMask))        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, -                                               Op.getValueType(),  +                                               Op.getValueType(),                                                 Op.getOperand(0))); -     +      if (SimplifyDemandedBits(Op.getOperand(0), InMask,                               KnownZero, KnownOne, TLO, Depth+1))        return true; -    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");  -    KnownZero.zext(BitWidth); -    KnownOne.zext(BitWidth); +    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); +    KnownZero = KnownZero.zext(BitWidth); +    KnownOne = KnownOne.zext(BitWidth);      KnownZero |= NewBits;      break;    } @@ -1581,31 +1585,31 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,      APInt InMask    = APInt::getLowBitsSet(BitWidth, InBits);      APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits);      APInt NewBits   = ~InMask & NewMask; -     +      // If none of the top bits are demanded, convert this into an any_extend.      if (NewBits == 0)        return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl,                                                Op.getValueType(),                                                Op.getOperand(0))); -     +      // Since some of the sign extended bits are demanded, we know that the sign      // bit is demanded.      APInt InDemandedBits = InMask & NewMask;      InDemandedBits |= InSignBit; -    InDemandedBits.trunc(InBits); -     -    if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero,  +    InDemandedBits = InDemandedBits.trunc(InBits); + +    if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero,                               KnownOne, TLO, Depth+1))        return true; -    KnownZero.zext(BitWidth); -    KnownOne.zext(BitWidth); -     +    KnownZero = KnownZero.zext(BitWidth); +    KnownOne = KnownOne.zext(BitWidth); +      // If the sign bit is known zero, convert this to a zero extend.      if (KnownZero.intersects(InSignBit))        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, -                                               Op.getValueType(),  +                                               Op.getValueType(),                                                 Op.getOperand(0))); -     +      // If the sign bit is known one, the top bits match.      if (KnownOne.intersects(InSignBit)) {        KnownOne  |= NewBits; @@ -1619,14 +1623,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,    case ISD::ANY_EXTEND: {      unsigned OperandBitWidth =        Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); -    APInt InMask = NewMask; -    InMask.trunc(OperandBitWidth); +    APInt InMask = NewMask.trunc(OperandBitWidth);      if (SimplifyDemandedBits(Op.getOperand(0), InMask,                               KnownZero, KnownOne, TLO, Depth+1))        return true; -    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");  -    KnownZero.zext(BitWidth); -    KnownOne.zext(BitWidth); +    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); +    KnownZero = KnownZero.zext(BitWidth); +    KnownOne = KnownOne.zext(BitWidth);      break;    }    case ISD::TRUNCATE: { @@ -1634,14 +1637,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,      // zero/one bits live out.      unsigned OperandBitWidth =        Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); -    APInt TruncMask = NewMask; -    TruncMask.zext(OperandBitWidth); +    APInt TruncMask = NewMask.zext(OperandBitWidth);      if (SimplifyDemandedBits(Op.getOperand(0), TruncMask,                               KnownZero, KnownOne, TLO, Depth+1))        return true; -    KnownZero.trunc(BitWidth); -    KnownOne.trunc(BitWidth); -     +    KnownZero = KnownZero.trunc(BitWidth); +    KnownOne = KnownOne.trunc(BitWidth); +      // If the input is only used by this truncate, see if we can shrink it based      // on the known demanded bits.      if (Op.getOperand(0).getNode()->hasOneUse()) { @@ -1661,25 +1663,24 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,            break;          APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,                                                 OperandBitWidth - BitWidth); -        HighBits = HighBits.lshr(ShAmt->getZExtValue()); -        HighBits.trunc(BitWidth); +        HighBits = HighBits.lshr(ShAmt->getZExtValue()).trunc(BitWidth);          if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) {            // None of the shifted in bits are needed.  Add a truncate of the            // shift input, then shift it.            SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, -                                             Op.getValueType(),  +                                             Op.getValueType(),                                               In.getOperand(0));            return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,                                                     Op.getValueType(), -                                                   NewTrunc,  +                                                   NewTrunc,                                                     In.getOperand(1)));          }          break;        }      } -     -    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");  + +    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");      break;    }    case ISD::AssertZext: { @@ -1689,7 +1690,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,      if (SimplifyDemandedBits(Op.getOperand(0), NewMask,                               KnownZero, KnownOne, TLO, Depth+1))        return true; -    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");  +    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");      EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();      APInt InMask = APInt::getLowBitsSet(BitWidth, @@ -1697,7 +1698,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,      KnownZero |= ~InMask & NewMask;      break;    } -  case ISD::BIT_CONVERT: +  case ISD::BITCAST:  #if 0      // If this is an FP->Int bitcast and if the sign bit is the only thing that      // is demanded, turn this into a FGETSIGN. @@ -1709,7 +1710,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,            isOperationLegal(ISD::FGETSIGN, Op.getValueType())) {          // Make a FGETSIGN + SHL to move the sign bit into the appropriate          // place.  We expect the SHL to be eliminated by other optimizations. -        SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, Op.getValueType(),  +        SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, Op.getValueType(),                                           Op.getOperand(0));          unsigned ShVal = Op.getValueType().getSizeInBits()-1;          SDValue ShAmt = TLO.DAG.getConstant(ShVal, getShiftAmountTy()); @@ -1742,21 +1743,21 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,      TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth);      break;    } -   +    // If we know the value of all of the demanded bits, return this as a    // constant.    if ((NewMask & (KnownZero|KnownOne)) == NewMask)      return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType())); -   +    return false;  } -/// computeMaskedBitsForTargetNode - Determine which of the bits specified  -/// in Mask are known to be either zero or one and return them in the  +/// computeMaskedBitsForTargetNode - Determine which of the bits specified +/// in Mask are known to be either zero or one and return them in the  /// KnownZero/KnownOne bitsets. -void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,  +void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,                                                      const APInt &Mask, -                                                    APInt &KnownZero,  +                                                    APInt &KnownZero,                                                      APInt &KnownOne,                                                      const SelectionDAG &DAG,                                                      unsigned Depth) const { @@ -1817,7 +1818,7 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {           (KnownOne.countPopulation() == 1);  } -/// SimplifySetCC - Try to simplify a setcc built with the specified operands  +/// SimplifySetCC - Try to simplify a setcc built with the specified operands  /// and cc. If it is unable to simplify it, return a null SDValue.  SDValue  TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, @@ -1869,6 +1870,30 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,        }      } +    SDValue CTPOP = N0; +    // Look through truncs that don't change the value of a ctpop. +    if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE) +      CTPOP = N0.getOperand(0); + +    if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP && +        (N0 == CTPOP || N0.getValueType().getSizeInBits() > +                        Log2_32_Ceil(CTPOP.getValueType().getSizeInBits()))) { +      EVT CTVT = CTPOP.getValueType(); +      SDValue CTOp = CTPOP.getOperand(0); + +      // (ctpop x) u< 2 -> (x & x-1) == 0 +      // (ctpop x) u> 1 -> (x & x-1) != 0 +      if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){ +        SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp, +                                  DAG.getConstant(1, CTVT)); +        SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub); +        ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE; +        return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, CTVT), CC); +      } + +      // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal. +    } +      // If the LHS is '(and load, const)', the RHS is 0,      // the test is for equality or unsigned, and all 1 bits of the const are      // in the same partial word, see if we can shorten the load. @@ -1884,7 +1909,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,        if (!Lod->isVolatile() && Lod->isUnindexed()) {          unsigned origWidth = N0.getValueType().getSizeInBits();          unsigned maskWidth = origWidth; -        // We can narrow (e.g.) 16-bit extending loads on 32-bit target to  +        // We can narrow (e.g.) 16-bit extending loads on 32-bit target to          // 8 bits, but have to be careful...          if (Lod->getExtensionType() != ISD::NON_EXTLOAD)            origWidth = Lod->getMemoryVT().getSizeInBits(); @@ -1916,10 +1941,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,                                DAG.getConstant(bestOffset, PtrType));            unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);            SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr, -                                        Lod->getSrcValue(),  -                                        Lod->getSrcValueOffset() + bestOffset, +                                Lod->getPointerInfo().getWithOffset(bestOffset),                                          false, false, NewAlign); -          return DAG.getSetCC(dl, VT,  +          return DAG.getSetCC(dl, VT,                                DAG.getNode(ISD::AND, dl, newVT, NewLoad,                                        DAG.getConstant(bestMask.trunc(bestWidth),                                                        newVT)), @@ -1969,7 +1993,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,              (isOperationLegal(ISD::SETCC, newVT) &&                getCondCodeAction(Cond, newVT)==Legal))            return DAG.getSetCC(dl, VT, N0.getOperand(0), -                              DAG.getConstant(APInt(C1).trunc(InSize), newVT), +                              DAG.getConstant(C1.trunc(InSize), newVT),                                Cond);          break;        } @@ -1987,7 +2011,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,        // the sign extension, it is impossible for both sides to be equal.        if (C1.getMinSignedBits() > ExtSrcTyBits)          return DAG.getConstant(Cond == ISD::SETNE, VT); -       +        SDValue ZextOp;        EVT Op0Ty = N0.getOperand(0).getValueType();        if (Op0Ty == ExtSrcTy) { @@ -2000,10 +2024,10 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,        if (!DCI.isCalledByLegalizer())          DCI.AddToWorklist(ZextOp.getNode());        // Otherwise, make this a use of a zext. -      return DAG.getSetCC(dl, VT, ZextOp,  +      return DAG.getSetCC(dl, VT, ZextOp,                            DAG.getConstant(C1 & APInt::getLowBitsSet(                                                                ExtDstTyBits, -                                                              ExtSrcTyBits),  +                                                              ExtSrcTyBits),                                            ExtDstTy),                            Cond);      } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) && @@ -2013,16 +2037,16 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,            isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) {          bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1);          if (TrueWhenTrue) -          return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);         +          return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);          // Invert the condition.          ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); -        CC = ISD::getSetCCInverse(CC,  +        CC = ISD::getSetCCInverse(CC,                                    N0.getOperand(0).getValueType().isInteger());          return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);        }        if ((N0.getOpcode() == ISD::XOR || -           (N0.getOpcode() == ISD::AND &&  +           (N0.getOpcode() == ISD::AND &&              N0.getOperand(0).getOpcode() == ISD::XOR &&              N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&            isa<ConstantSDNode>(N0.getOperand(1)) && @@ -2038,7 +2062,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,            if (N0.getOpcode() == ISD::XOR)              Val = N0.getOperand(0);            else { -            assert(N0.getOpcode() == ISD::AND &&  +            assert(N0.getOpcode() == ISD::AND &&                      N0.getOperand(0).getOpcode() == ISD::XOR);              // ((X^1)&1)^1 -> X & 1              Val = DAG.getNode(ISD::AND, dl, N0.getValueType(), @@ -2082,7 +2106,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,          }        }      } -     +      APInt MinVal, MaxVal;      unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits();      if (ISD::isSignedIntSetCC(Cond)) { @@ -2097,7 +2121,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,      if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {        if (C1 == MinVal) return DAG.getConstant(1, VT);   // X >= MIN --> true        // X >= C0 --> X > (C0-1) -      return DAG.getSetCC(dl, VT, N0,  +      return DAG.getSetCC(dl, VT, N0,                            DAG.getConstant(C1-1, N1.getValueType()),                            (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT);      } @@ -2105,7 +2129,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,      if (Cond == ISD::SETLE || Cond == ISD::SETULE) {        if (C1 == MaxVal) return DAG.getConstant(1, VT);   // X <= MAX --> true        // X <= C0 --> X < (C0+1) -      return DAG.getSetCC(dl, VT, N0,  +      return DAG.getSetCC(dl, VT, N0,                            DAG.getConstant(C1+1, N1.getValueType()),                            (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT);      } @@ -2128,12 +2152,12 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,      // If we have setult X, 1, turn it into seteq X, 0      if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1) -      return DAG.getSetCC(dl, VT, N0,  -                          DAG.getConstant(MinVal, N0.getValueType()),  +      return DAG.getSetCC(dl, VT, N0, +                          DAG.getConstant(MinVal, N0.getValueType()),                            ISD::SETEQ);      // If we have setugt X, Max-1, turn it into seteq X, Max      else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1) -      return DAG.getSetCC(dl, VT, N0,  +      return DAG.getSetCC(dl, VT, N0,                            DAG.getConstant(MaxVal, N0.getValueType()),                            ISD::SETEQ); @@ -2141,9 +2165,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,      // by changing cc.      // SETUGT X, SINTMAX  -> SETLT X, 0 -    if (Cond == ISD::SETUGT &&  +    if (Cond == ISD::SETUGT &&          C1 == APInt::getSignedMaxValue(OperandBitSize)) -      return DAG.getSetCC(dl, VT, N0,  +      return DAG.getSetCC(dl, VT, N0,                            DAG.getConstant(0, N1.getValueType()),                            ISD::SETLT); @@ -2203,7 +2227,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,          return DAG.getUNDEF(VT);        }      } -     +      // Otherwise, we know the RHS is not a NaN.  Simplify the node to drop the      // constant if knowing that the operand is non-nan is enough.  We prefer to      // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to @@ -2278,14 +2302,14 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,          if (DAG.isCommutativeBinOp(N0.getOpcode())) {            // If X op Y == Y op X, try other combinations.            if (N0.getOperand(0) == N1.getOperand(1)) -            return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),  +            return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),                                  Cond);            if (N0.getOperand(1) == N1.getOperand(0)) -            return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),  +            return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),                                  Cond);          }        } -       +        if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) {          if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {            // Turn (X+C1) == C2 --> X == C2-C1 @@ -2295,7 +2319,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,                                                  LHSR->getAPIntValue(),                                  N0.getValueType()), Cond);            } -           +            // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.            if (N0.getOpcode() == ISD::XOR)              // If we know that all of the inverted bits are zero, don't bother @@ -2308,7 +2332,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,                                               N0.getValueType()),                               Cond);          } -         +          // Turn (C1-X) == C2 --> X == C1-C2          if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {            if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) { @@ -2319,7 +2343,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,                                             N0.getValueType()),                             Cond);            } -        }           +        }        }        // Simplify (X+Z) == X -->  Z == 0 @@ -2334,7 +2358,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,            assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");            // (Z-X) == X  --> Z == X<<1            SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), -                                     N1,  +                                     N1,                                       DAG.getConstant(1, getShiftAmountTy()));            if (!DCI.isCalledByLegalizer())              DCI.AddToWorklist(SH.getNode()); @@ -2356,7 +2380,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,          } else if (N1.getNode()->hasOneUse()) {            assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");            // X == (Z-X)  --> X<<1 == Z -          SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0,  +          SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0,                                       DAG.getConstant(1, getShiftAmountTy()));            if (!DCI.isCalledByLegalizer())              DCI.AddToWorklist(SH.getNode()); @@ -2443,7 +2467,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,  /// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the  /// node is a GlobalAddress + offset. -bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue* &GA, +bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA,                                      int64_t &Offset) const {    if (isa<GlobalAddressSDNode>(N)) {      GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N); @@ -2469,6 +2493,7 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue* &GA,        }      }    } +      return false;  } @@ -2497,7 +2522,10 @@ TargetLowering::getConstraintType(const std::string &Constraint) const {        return C_Memory;      case 'i':    // Simple Integer or Relocatable Constant      case 'n':    // Simple Integer +    case 'E':    // Floating Point Constant +    case 'F':    // Floating Point Constant      case 's':    // Relocatable Constant +    case 'p':    // Address.      case 'X':    // Allow ANY value.      case 'I':    // Target registers.      case 'J': @@ -2507,11 +2535,13 @@ TargetLowering::getConstraintType(const std::string &Constraint) const {      case 'N':      case 'O':      case 'P': +    case '<': +    case '>':        return C_Other;      }    } -   -  if (Constraint.size() > 1 && Constraint[0] == '{' &&  + +  if (Constraint.size() > 1 && Constraint[0] == '{' &&        Constraint[Constraint.size()-1] == '}')      return C_Register;    return C_Unknown; @@ -2550,7 +2580,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,      // is possible and fine if either GV or C are missing.      ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);      GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); -     +      // If we have "(add GV, C)", pull out GV/C      if (Op.getOpcode() == ISD::ADD) {        C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); @@ -2562,14 +2592,14 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,        if (C == 0 || GA == 0)          C = 0, GA = 0;      } -     +      // If we find a valid operand, map to the TargetXXX version so that the      // value itself doesn't get selected.      if (GA) {   // Either &GV   or   &GV+C        if (ConstraintLetter != 'n') {          int64_t Offs = GA->getOffset();          if (C) Offs += C->getZExtValue(); -        Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),  +        Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),                                                   C ? C->getDebugLoc() : DebugLoc(),                                                   Op.getValueType(), Offs));          return; @@ -2613,8 +2643,8 @@ getRegForInlineAsmConstraint(const std::string &Constraint,    for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),         E = RI->regclass_end(); RCI != E; ++RCI) {      const TargetRegisterClass *RC = *RCI; -     -    // If none of the value types for this register class are valid, we  + +    // If none of the value types for this register class are valid, we      // can't use it.  For example, 64-bit reg classes on 32-bit targets.      bool isLegal = false;      for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); @@ -2624,16 +2654,16 @@ getRegForInlineAsmConstraint(const std::string &Constraint,          break;        }      } -     +      if (!isLegal) continue; -     -    for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();  + +    for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();           I != E; ++I) {        if (RegName.equals_lower(RI->getName(*I)))          return std::make_pair(*I, RC);      }    } -   +    return std::make_pair(0u, static_cast<const TargetRegisterClass*>(0));  } @@ -2655,6 +2685,186 @@ unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {  } +/// ParseConstraints - Split up the constraint string from the inline +/// assembly value into the specific constraints and their prefixes, +/// and also tie in the associated operand values. +/// If this returns an empty vector, and if the constraint string itself +/// isn't empty, there was an error parsing. +TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( +    ImmutableCallSite CS) const { +  /// ConstraintOperands - Information about all of the constraints. +  AsmOperandInfoVector ConstraintOperands; +  const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); +  unsigned maCount = 0; // Largest number of multiple alternative constraints. + +  // Do a prepass over the constraints, canonicalizing them, and building up the +  // ConstraintOperands list. +  InlineAsm::ConstraintInfoVector +    ConstraintInfos = IA->ParseConstraints(); + +  unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst. +  unsigned ResNo = 0;   // ResNo - The result number of the next output. + +  for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { +    ConstraintOperands.push_back(AsmOperandInfo(ConstraintInfos[i])); +    AsmOperandInfo &OpInfo = ConstraintOperands.back(); + +    // Update multiple alternative constraint count. +    if (OpInfo.multipleAlternatives.size() > maCount) +      maCount = OpInfo.multipleAlternatives.size(); + +    OpInfo.ConstraintVT = MVT::Other; + +    // Compute the value type for each operand. +    switch (OpInfo.Type) { +    case InlineAsm::isOutput: +      // Indirect outputs just consume an argument. +      if (OpInfo.isIndirect) { +        OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); +        break; +      } + +      // The return value of the call is this value.  As such, there is no +      // corresponding argument. +      assert(!CS.getType()->isVoidTy() && +             "Bad inline asm!"); +      if (const StructType *STy = dyn_cast<StructType>(CS.getType())) { +        OpInfo.ConstraintVT = getValueType(STy->getElementType(ResNo)); +      } else { +        assert(ResNo == 0 && "Asm only has one result!"); +        OpInfo.ConstraintVT = getValueType(CS.getType()); +      } +      ++ResNo; +      break; +    case InlineAsm::isInput: +      OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); +      break; +    case InlineAsm::isClobber: +      // Nothing to do. +      break; +    } + +    if (OpInfo.CallOperandVal) { +      const llvm::Type *OpTy = OpInfo.CallOperandVal->getType(); +      if (OpInfo.isIndirect) { +        const llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy); +        if (!PtrTy) +          report_fatal_error("Indirect operand for inline asm not a pointer!"); +        OpTy = PtrTy->getElementType(); +      } +      // If OpTy is not a single value, it may be a struct/union that we +      // can tile with integers. +      if (!OpTy->isSingleValueType() && OpTy->isSized()) { +        unsigned BitSize = TD->getTypeSizeInBits(OpTy); +        switch (BitSize) { +        default: break; +        case 1: +        case 8: +        case 16: +        case 32: +        case 64: +        case 128: +          OpInfo.ConstraintVT = +              EVT::getEVT(IntegerType::get(OpTy->getContext(), BitSize), true); +          break; +        } +      } else if (dyn_cast<PointerType>(OpTy)) { +        OpInfo.ConstraintVT = MVT::getIntegerVT(8*TD->getPointerSize()); +      } else { +        OpInfo.ConstraintVT = EVT::getEVT(OpTy, true); +      } +    } +  } + +  // If we have multiple alternative constraints, select the best alternative. +  if (ConstraintInfos.size()) { +    if (maCount) { +      unsigned bestMAIndex = 0; +      int bestWeight = -1; +      // weight:  -1 = invalid match, and 0 = so-so match to 5 = good match. +      int weight = -1; +      unsigned maIndex; +      // Compute the sums of the weights for each alternative, keeping track +      // of the best (highest weight) one so far. +      for (maIndex = 0; maIndex < maCount; ++maIndex) { +        int weightSum = 0; +        for (unsigned cIndex = 0, eIndex = ConstraintOperands.size(); +            cIndex != eIndex; ++cIndex) { +          AsmOperandInfo& OpInfo = ConstraintOperands[cIndex]; +          if (OpInfo.Type == InlineAsm::isClobber) +            continue; + +          // If this is an output operand with a matching input operand, +          // look up the matching input. If their types mismatch, e.g. one +          // is an integer, the other is floating point, or their sizes are +          // different, flag it as an maCantMatch. +          if (OpInfo.hasMatchingInput()) { +            AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; +            if (OpInfo.ConstraintVT != Input.ConstraintVT) { +              if ((OpInfo.ConstraintVT.isInteger() != +                   Input.ConstraintVT.isInteger()) || +                  (OpInfo.ConstraintVT.getSizeInBits() != +                   Input.ConstraintVT.getSizeInBits())) { +                weightSum = -1;  // Can't match. +                break; +              } +            } +          } +          weight = getMultipleConstraintMatchWeight(OpInfo, maIndex); +          if (weight == -1) { +            weightSum = -1; +            break; +          } +          weightSum += weight; +        } +        // Update best. +        if (weightSum > bestWeight) { +          bestWeight = weightSum; +          bestMAIndex = maIndex; +        } +      } + +      // Now select chosen alternative in each constraint. +      for (unsigned cIndex = 0, eIndex = ConstraintOperands.size(); +          cIndex != eIndex; ++cIndex) { +        AsmOperandInfo& cInfo = ConstraintOperands[cIndex]; +        if (cInfo.Type == InlineAsm::isClobber) +          continue; +        cInfo.selectAlternative(bestMAIndex); +      } +    } +  } + +  // Check and hook up tied operands, choose constraint code to use. +  for (unsigned cIndex = 0, eIndex = ConstraintOperands.size(); +      cIndex != eIndex; ++cIndex) { +    AsmOperandInfo& OpInfo = ConstraintOperands[cIndex]; + +    // If this is an output operand with a matching input operand, look up the +    // matching input. If their types mismatch, e.g. one is an integer, the +    // other is floating point, or their sizes are different, flag it as an +    // error. +    if (OpInfo.hasMatchingInput()) { +      AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; + +      if (OpInfo.ConstraintVT != Input.ConstraintVT) { +        if ((OpInfo.ConstraintVT.isInteger() != +             Input.ConstraintVT.isInteger()) || +            (OpInfo.ConstraintVT.getSizeInBits() != +             Input.ConstraintVT.getSizeInBits())) { +          report_fatal_error("Unsupported asm: input constraint" +                             " with a matching output constraint of" +                             " incompatible type!"); +        } +      } + +    } +  } + +  return ConstraintOperands; +} + +  /// getConstraintGenerality - Return an integer indicating how general CT  /// is.  static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { @@ -2672,6 +2882,79 @@ static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {    }  } +/// Examine constraint type and operand type and determine a weight value. +/// This object must already have been set up with the operand type +/// and the current alternative constraint selected. +TargetLowering::ConstraintWeight +  TargetLowering::getMultipleConstraintMatchWeight( +    AsmOperandInfo &info, int maIndex) const { +  InlineAsm::ConstraintCodeVector *rCodes; +  if (maIndex >= (int)info.multipleAlternatives.size()) +    rCodes = &info.Codes; +  else +    rCodes = &info.multipleAlternatives[maIndex].Codes; +  ConstraintWeight BestWeight = CW_Invalid; + +  // Loop over the options, keeping track of the most general one. +  for (unsigned i = 0, e = rCodes->size(); i != e; ++i) { +    ConstraintWeight weight = +      getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str()); +    if (weight > BestWeight) +      BestWeight = weight; +  } + +  return BestWeight; +} + +/// Examine constraint type and operand type and determine a weight value. +/// This object must already have been set up with the operand type +/// and the current alternative constraint selected. +TargetLowering::ConstraintWeight +  TargetLowering::getSingleConstraintMatchWeight( +    AsmOperandInfo &info, const char *constraint) const { +  ConstraintWeight weight = CW_Invalid; +  Value *CallOperandVal = info.CallOperandVal; +    // If we don't have a value, we can't do a match, +    // but allow it at the lowest weight. +  if (CallOperandVal == NULL) +    return CW_Default; +  // Look at the constraint type. +  switch (*constraint) { +    case 'i': // immediate integer. +    case 'n': // immediate integer with a known value. +      if (isa<ConstantInt>(CallOperandVal)) +        weight = CW_Constant; +      break; +    case 's': // non-explicit intregal immediate. +      if (isa<GlobalValue>(CallOperandVal)) +        weight = CW_Constant; +      break; +    case 'E': // immediate float if host format. +    case 'F': // immediate float. +      if (isa<ConstantFP>(CallOperandVal)) +        weight = CW_Constant; +      break; +    case '<': // memory operand with autodecrement. +    case '>': // memory operand with autoincrement. +    case 'm': // memory operand. +    case 'o': // offsettable memory operand +    case 'V': // non-offsettable memory operand +      weight = CW_Memory; +      break; +    case 'r': // general register. +    case 'g': // general register, memory operand or immediate integer. +              // note: Clang converts "g" to "imr". +      if (CallOperandVal->getType()->isIntegerTy()) +        weight = CW_Register; +      break; +    case 'X': // any operand. +    default: +      weight = CW_Default; +      break; +  } +  return weight; +} +  /// ChooseConstraint - If there are multiple different constraints that we  /// could pick for this operand (e.g. "imr") try to pick the 'best' one.  /// This is somewhat tricky: constraints fall into four classes: @@ -2721,12 +3004,12 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,          break;        }      } -     +      // Things with matching constraints can only be registers, per gcc      // documentation.  This mainly affects "g" constraints.      if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())        continue; -     +      // This constraint letter is more general than the previous one, use it.      int Generality = getConstraintGenerality(CType);      if (Generality > BestGenerality) { @@ -2735,7 +3018,7 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,        BestGenerality = Generality;      }    } -   +    OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];    OpInfo.ConstraintType = BestType;  } @@ -2744,10 +3027,10 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,  /// type to use for the specific AsmOperandInfo, setting  /// OpInfo.ConstraintCode and OpInfo.ConstraintType.  void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, -                                            SDValue Op,  +                                            SDValue Op,                                              SelectionDAG *DAG) const {    assert(!OpInfo.Codes.empty() && "Must have at least one constraint"); -   +    // Single-letter constraints ('r') are very common.    if (OpInfo.Codes.size() == 1) {      OpInfo.ConstraintCode = OpInfo.Codes[0]; @@ -2755,7 +3038,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,    } else {      ChooseConstraint(OpInfo, *this, Op, DAG);    } -   +    // 'X' matches anything.    if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {      // Labels and constants are handled elsewhere ('X' is the only thing @@ -2766,7 +3049,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,        OpInfo.CallOperandVal = v;        return;      } -     +      // Otherwise, try to resolve it to something we know about by looking at      // the actual operand type.      if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) { @@ -2782,7 +3065,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,  /// isLegalAddressingMode - Return true if the addressing mode represented  /// by AM is legal for this target, for a load/store of the specified type. -bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,  +bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,                                             const Type *Ty) const {    // The default implementation of this implements a conservative RISCy, r+r and    // r+i addr mode. @@ -2790,12 +3073,12 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,    // Allows a sign-extended 16-bit immediate field.    if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)      return false; -   +    // No global is ever allowed as a base.    if (AM.BaseGV)      return false; -   -  // Only support r+r,  + +  // Only support r+r,    switch (AM.Scale) {    case 0:  // "r+i" or just "i", depending on HasBaseReg.      break; @@ -2810,7 +3093,7 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,      // Allow 2*r as r+r.      break;    } -   +    return true;  } @@ -2818,19 +3101,19 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,  /// return a DAG expression to select that will generate the same value by  /// multiplying by a magic number.  See:  /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> -SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,  +SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,                                    std::vector<SDNode*>* Created) const {    EVT VT = N->getValueType(0);    DebugLoc dl= N->getDebugLoc(); -   +    // Check to see if we can do this.    // FIXME: We should be more aggressive here.    if (!isTypeLegal(VT))      return SDValue(); -   +    APInt d = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();    APInt::ms magics = d.magic(); -   +    // Multiply the numerator (operand 0) by the magic value    // FIXME: We should support doing a MUL in a wider type    SDValue Q; @@ -2844,7 +3127,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,    else      return SDValue();       // No mulhs or equvialent    // If d > 0 and m < 0, add the numerator -  if (d.isStrictlyPositive() && magics.m.isNegative()) {  +  if (d.isStrictlyPositive() && magics.m.isNegative()) {      Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0));      if (Created)        Created->push_back(Q.getNode()); @@ -2857,7 +3140,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,    }    // Shift right algebraic if shift value is nonzero    if (magics.s > 0) { -    Q = DAG.getNode(ISD::SRA, dl, VT, Q,  +    Q = DAG.getNode(ISD::SRA, dl, VT, Q,                      DAG.getConstant(magics.s, getShiftAmountTy()));      if (Created)        Created->push_back(Q.getNode()); @@ -2908,20 +3191,20 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,    if (magics.a == 0) {      assert(magics.s < N1C->getAPIntValue().getBitWidth() &&             "We shouldn't generate an undefined shift!"); -    return DAG.getNode(ISD::SRL, dl, VT, Q,  +    return DAG.getNode(ISD::SRL, dl, VT, Q,                         DAG.getConstant(magics.s, getShiftAmountTy()));    } else {      SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q);      if (Created)        Created->push_back(NPQ.getNode()); -    NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ,  +    NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ,                        DAG.getConstant(1, getShiftAmountTy()));      if (Created)        Created->push_back(NPQ.getNode());      NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);      if (Created)        Created->push_back(NPQ.getNode()); -    return DAG.getNode(ISD::SRL, dl, VT, NPQ,  +    return DAG.getNode(ISD::SRL, dl, VT, NPQ,                         DAG.getConstant(magics.s-1, getShiftAmountTy()));    }  }  | 
