diff options
Diffstat (limited to 'lib/CodeGen/SelectionDAG/DAGCombiner.cpp')
| -rw-r--r-- | lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 1211 | 
1 files changed, 785 insertions, 426 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c9c4d91e9736..90356021f602 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -25,7 +25,6 @@  #include "llvm/CodeGen/PseudoSourceValue.h"  #include "llvm/Analysis/AliasAnalysis.h"  #include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetFrameInfo.h"  #include "llvm/Target/TargetLowering.h"  #include "llvm/Target/TargetMachine.h"  #include "llvm/Target/TargetOptions.h" @@ -43,6 +42,7 @@ STATISTIC(NodesCombined   , "Number of dag nodes combined");  STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");  STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");  STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed"); +STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");  namespace {    static cl::opt<bool> @@ -185,7 +185,7 @@ namespace {      SDValue visitANY_EXTEND(SDNode *N);      SDValue visitSIGN_EXTEND_INREG(SDNode *N);      SDValue visitTRUNCATE(SDNode *N); -    SDValue visitBIT_CONVERT(SDNode *N); +    SDValue visitBITCAST(SDNode *N);      SDValue visitBUILD_PAIR(SDNode *N);      SDValue visitFADD(SDNode *N);      SDValue visitFSUB(SDNode *N); @@ -229,12 +229,13 @@ namespace {      SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,                                           unsigned HiOp);      SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); -    SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, EVT); +    SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);      SDValue BuildSDIV(SDNode *N);      SDValue BuildUDIV(SDNode *N);      SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL);      SDValue ReduceLoadWidth(SDNode *N);      SDValue ReduceLoadOpStoreWidth(SDNode *N); +    SDValue TransformFPLoadStorePair(SDNode *N);      SDValue GetDemandedBits(SDValue V, const APInt &Mask); @@ -248,16 +249,19 @@ namespace {      bool isAlias(SDValue Ptr1, int64_t Size1,                   const Value *SrcValue1, int SrcValueOffset1,                   unsigned SrcValueAlign1, +                 const MDNode *TBAAInfo1,                   SDValue Ptr2, int64_t Size2,                   const Value *SrcValue2, int SrcValueOffset2, -                 unsigned SrcValueAlign2) const; +                 unsigned SrcValueAlign2, +                 const MDNode *TBAAInfo2) const;      /// FindAliasInfo - Extracts the relevant alias information from the memory      /// node.  Returns true if the operand was a load.      bool FindAliasInfo(SDNode *N,                         SDValue &Ptr, int64_t &Size,                         const Value *&SrcValue, int &SrcValueOffset, -                       unsigned &SrcValueAlignment) const; +                       unsigned &SrcValueAlignment, +                       const MDNode *&TBAAInfo) const;      /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,      /// looking for a better chain (aliasing node.) @@ -270,15 +274,15 @@ namespace {      /// Run - runs the dag combiner on all nodes in the work list      void Run(CombineLevel AtLevel); -     +      SelectionDAG &getDAG() const { return DAG; } -     +      /// getShiftAmountTy - Returns a type large enough to hold any valid      /// shift amount - before type legalization these can be huge.      EVT getShiftAmountTy() {        return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy();      } -     +      /// isTypeLegal - This method returns true if we are running before type      /// legalization or if the specified VT is legal.      bool isTypeLegal(const EVT &VT) { @@ -631,7 +635,7 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {    // Replace the old value with the new one.    ++NodesCombined; -  DEBUG(dbgs() << "\nReplacing.2 ";  +  DEBUG(dbgs() << "\nReplacing.2 ";          TLO.Old.getNode()->dump(&DAG);          dbgs() << "\nWith: ";          TLO.New.getNode()->dump(&DAG); @@ -666,12 +670,13 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {    if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {      EVT MemVT = LD->getMemoryVT();      ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) -      ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD) +      ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD  +                                                  : ISD::EXTLOAD)        : LD->getExtensionType();      Replace = true; -    return DAG.getExtLoad(ExtType, PVT, dl, +    return DAG.getExtLoad(ExtType, dl, PVT,                            LD->getChain(), LD->getBasePtr(), -                          LD->getSrcValue(), LD->getSrcValueOffset(), +                          LD->getPointerInfo(),                            MemVT, LD->isVolatile(),                            LD->isNonTemporal(), LD->getAlignment());    } @@ -691,7 +696,7 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {      unsigned ExtOpc =        Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;      return DAG.getNode(ExtOpc, dl, PVT, Op); -  }     +  }    }    if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)) @@ -889,11 +894,12 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {      LoadSDNode *LD = cast<LoadSDNode>(N);      EVT MemVT = LD->getMemoryVT();      ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) -      ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD) +      ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD  +                                                  : ISD::EXTLOAD)        : LD->getExtensionType(); -    SDValue NewLD = DAG.getExtLoad(ExtType, PVT, dl, +    SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,                                     LD->getChain(), LD->getBasePtr(), -                                   LD->getSrcValue(), LD->getSrcValueOffset(), +                                   LD->getPointerInfo(),                                     MemVT, LD->isVolatile(),                                     LD->isNonTemporal(), LD->getAlignment());      SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD); @@ -975,7 +981,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) {             RV.getNode()->getOpcode() != ISD::DELETED_NODE &&             "Node was deleted but visit returned new node!"); -    DEBUG(dbgs() << "\nReplacing.3 ";  +    DEBUG(dbgs() << "\nReplacing.3 ";            N->dump(&DAG);            dbgs() << "\nWith: ";            RV.getNode()->dump(&DAG); @@ -1054,7 +1060,7 @@ SDValue DAGCombiner::visit(SDNode *N) {    case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);    case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);    case ISD::TRUNCATE:           return visitTRUNCATE(N); -  case ISD::BIT_CONVERT:        return visitBIT_CONVERT(N); +  case ISD::BITCAST:            return visitBITCAST(N);    case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);    case ISD::FADD:               return visitFADD(N);    case ISD::FSUB:               return visitFSUB(N); @@ -1225,7 +1231,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {        }      }    } -   +    SDValue Result;    // If we've change things around then replace token factor. @@ -1424,6 +1430,29 @@ SDValue DAGCombiner::visitADD(SDNode *N) {                                         N0.getOperand(0).getOperand(1),                                         N0.getOperand(1))); +  if (N1.getOpcode() == ISD::AND) { +    SDValue AndOp0 = N1.getOperand(0); +    ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1)); +    unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0); +    unsigned DestBits = VT.getScalarType().getSizeInBits(); + +    // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) +    // and similar xforms where the inner op is either ~0 or 0. +    if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) { +      DebugLoc DL = N->getDebugLoc(); +      return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); +    } +  } + +  // add (sext i1), X -> sub X, (zext i1) +  if (N0.getOpcode() == ISD::SIGN_EXTEND && +      N0.getOperand(0).getValueType() == MVT::i1 && +      !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) { +    DebugLoc DL = N->getDebugLoc(); +    SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); +    return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); +  } +    return SDValue();  } @@ -1438,7 +1467,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {    if (N->hasNUsesOfValue(0, 1))      return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0),                       DAG.getNode(ISD::CARRY_FALSE, -                                 N->getDebugLoc(), MVT::Flag)); +                                 N->getDebugLoc(), MVT::Glue));    // canonicalize constant to RHS.    if (N0C && !N1C) @@ -1447,7 +1476,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {    // fold (addc x, 0) -> x + no carry out    if (N1C && N1C->isNullValue())      return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, -                                        N->getDebugLoc(), MVT::Flag)); +                                        N->getDebugLoc(), MVT::Glue));    // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.    APInt LHSZero, LHSOne; @@ -1464,7 +1493,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {          (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))        return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1),                         DAG.getNode(ISD::CARRY_FALSE, -                                   N->getDebugLoc(), MVT::Flag)); +                                   N->getDebugLoc(), MVT::Glue));    }    return SDValue(); @@ -1489,6 +1518,22 @@ SDValue DAGCombiner::visitADDE(SDNode *N) {    return SDValue();  } +// Since it may not be valid to emit a fold to zero for vector initializers +// check if we can before folding. +static SDValue tryFoldToZero(DebugLoc DL, const TargetLowering &TLI, EVT VT, +                             SelectionDAG &DAG, bool LegalOperations) {                             +  if (!VT.isVector()) { +    return DAG.getConstant(0, VT); +  } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) { +    // Produce a vector of zeros. +    SDValue El = DAG.getConstant(0, VT.getVectorElementType()); +    std::vector<SDValue> Ops(VT.getVectorNumElements(), El); +    return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, +      &Ops[0], Ops.size()); +  } +  return SDValue(); +} +  SDValue DAGCombiner::visitSUB(SDNode *N) {    SDValue N0 = N->getOperand(0);    SDValue N1 = N->getOperand(1); @@ -1503,8 +1548,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {    }    // fold (sub x, x) -> 0 +  // FIXME: Refactor this and xor and other similar operations together.    if (N0 == N1) -    return DAG.getConstant(0, N->getValueType(0)); +    return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations);    // fold (sub c1, c2) -> c1-c2    if (N0C && N1C)      return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C); @@ -1515,6 +1561,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {    // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)    if (N0C && N0C->isAllOnesValue())      return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0); +  // fold A-(A-B) -> B +  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) +    return N1.getOperand(1);    // fold (A+B)-A -> B    if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)      return N0.getOperand(1); @@ -1897,6 +1946,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {    SDValue N1 = N->getOperand(1);    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);    EVT VT = N->getValueType(0); +  DebugLoc DL = N->getDebugLoc();    // fold (mulhs x, 0) -> 0    if (N1C && N1C->isNullValue()) @@ -1910,6 +1960,22 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {    if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)      return DAG.getConstant(0, VT); +  // If the type twice as wide is legal, transform the mulhs to a wider multiply +  // plus a shift. +  if (VT.isSimple() && !VT.isVector()) { +    MVT Simple = VT.getSimpleVT(); +    unsigned SimpleSize = Simple.getSizeInBits(); +    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); +    if (TLI.isOperationLegal(ISD::MUL, NewVT)) { +      N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0); +      N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); +      N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); +      N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, +                       DAG.getConstant(SimpleSize, getShiftAmountTy())); +      return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); +    } +  } +      return SDValue();  } @@ -1918,6 +1984,7 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {    SDValue N1 = N->getOperand(1);    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);    EVT VT = N->getValueType(0); +  DebugLoc DL = N->getDebugLoc();    // fold (mulhu x, 0) -> 0    if (N1C && N1C->isNullValue()) @@ -1929,6 +1996,22 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {    if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)      return DAG.getConstant(0, VT); +  // If the type twice as wide is legal, transform the mulhu to a wider multiply +  // plus a shift. +  if (VT.isSimple() && !VT.isVector()) { +    MVT Simple = VT.getSimpleVT(); +    unsigned SimpleSize = Simple.getSizeInBits(); +    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); +    if (TLI.isOperationLegal(ISD::MUL, NewVT)) { +      N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0); +      N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); +      N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); +      N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, +                       DAG.getConstant(SimpleSize, getShiftAmountTy())); +      return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); +    } +  } +      return SDValue();  } @@ -1992,6 +2075,29 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {    SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS);    if (Res.getNode()) return Res; +  EVT VT = N->getValueType(0); +  DebugLoc DL = N->getDebugLoc(); + +  // If the type twice as wide is legal, transform the mulhu to a wider multiply +  // plus a shift. +  if (VT.isSimple() && !VT.isVector()) { +    MVT Simple = VT.getSimpleVT(); +    unsigned SimpleSize = Simple.getSizeInBits(); +    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); +    if (TLI.isOperationLegal(ISD::MUL, NewVT)) { +      SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0)); +      SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1)); +      Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); +      // Compute the high part as N1. +      Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, +                       DAG.getConstant(SimpleSize, getShiftAmountTy())); +      Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); +      // Compute the low part as N0. +      Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); +      return CombineTo(N, Lo, Hi); +    } +  } +      return SDValue();  } @@ -1999,6 +2105,29 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {    SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU);    if (Res.getNode()) return Res; +  EVT VT = N->getValueType(0); +  DebugLoc DL = N->getDebugLoc(); +   +  // If the type twice as wide is legal, transform the mulhu to a wider multiply +  // plus a shift. +  if (VT.isSimple() && !VT.isVector()) { +    MVT Simple = VT.getSimpleVT(); +    unsigned SimpleSize = Simple.getSizeInBits(); +    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); +    if (TLI.isOperationLegal(ISD::MUL, NewVT)) { +      SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0)); +      SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1)); +      Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); +      // Compute the high part as N1. +      Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, +                       DAG.getConstant(SimpleSize, getShiftAmountTy())); +      Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); +      // Compute the low part as N0. +      Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); +      return CombineTo(N, Lo, Hi); +    } +  } +      return SDValue();  } @@ -2116,7 +2245,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {    if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {      SDValue N0Op0 = N0.getOperand(0);      APInt Mask = ~N1C->getAPIntValue(); -    Mask.trunc(N0Op0.getValueSizeInBits()); +    Mask = Mask.trunc(N0Op0.getValueSizeInBits());      if (DAG.MaskedValueIsZero(N0Op0, Mask)) {        SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(),                                   N0.getValueType(), N0Op0); @@ -2198,10 +2327,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {                             BitWidth - MemVT.getScalarType().getSizeInBits())) &&          ((!LegalOperations && !LN0->isVolatile()) ||           TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { -      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N0.getDebugLoc(), +      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,                                         LN0->getChain(), LN0->getBasePtr(), -                                       LN0->getSrcValue(), -                                       LN0->getSrcValueOffset(), MemVT, +                                       LN0->getPointerInfo(), MemVT,                                         LN0->isVolatile(), LN0->isNonTemporal(),                                         LN0->getAlignment());        AddToWorkList(N); @@ -2221,10 +2349,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {                             BitWidth - MemVT.getScalarType().getSizeInBits())) &&          ((!LegalOperations && !LN0->isVolatile()) ||           TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { -      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N0.getDebugLoc(), +      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,                                         LN0->getChain(), -                                       LN0->getBasePtr(), LN0->getSrcValue(), -                                       LN0->getSrcValueOffset(), MemVT, +                                       LN0->getBasePtr(), LN0->getPointerInfo(), +                                       MemVT,                                         LN0->isVolatile(), LN0->isNonTemporal(),                                         LN0->getAlignment());        AddToWorkList(N); @@ -2253,18 +2381,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) {          if (ExtVT == LoadedVT &&              (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {            EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; -           -          SDValue NewLoad =  -            DAG.getExtLoad(ISD::ZEXTLOAD, LoadResultTy, LN0->getDebugLoc(), + +          SDValue NewLoad = +            DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,                             LN0->getChain(), LN0->getBasePtr(), -                           LN0->getSrcValue(), LN0->getSrcValueOffset(), +                           LN0->getPointerInfo(),                             ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),                             LN0->getAlignment());            AddToWorkList(N);            CombineTo(LN0, NewLoad, NewLoad.getValue(1));            return SDValue(N, 0);   // Return N so it doesn't get rechecked!          } -         +          // Do not change the width of a volatile load.          // Do not generate loads of non-round integer types since these can          // be expensive (and would be wrong if the type is not byte sized). @@ -2288,12 +2416,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) {            }            AddToWorkList(NewPtr.getNode()); -           +            EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;            SDValue Load = -            DAG.getExtLoad(ISD::ZEXTLOAD, LoadResultTy, LN0->getDebugLoc(), +            DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,                             LN0->getChain(), NewPtr, -                           LN0->getSrcValue(), LN0->getSrcValueOffset(), +                           LN0->getPointerInfo(),                             ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),                             Alignment);            AddToWorkList(N); @@ -2722,17 +2850,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {                                           N01C->getAPIntValue(), VT));    }    // fold (xor x, x) -> 0 -  if (N0 == N1) { -    if (!VT.isVector()) { -      return DAG.getConstant(0, VT); -    } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)){ -      // Produce a vector of zeros. -      SDValue El = DAG.getConstant(0, VT.getVectorElementType()); -      std::vector<SDValue> Ops(VT.getVectorNumElements(), El); -      return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, -                         &Ops[0], Ops.size()); -    } -  } +  if (N0 == N1) +    return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations);    // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))    if (N0.getOpcode() == N1.getOpcode()) { @@ -2810,7 +2929,8 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {                                 LHS->getOperand(1), N->getOperand(1));    // Create the new shift. -  SDValue NewShift = DAG.getNode(N->getOpcode(), LHS->getOperand(0).getDebugLoc(), +  SDValue NewShift = DAG.getNode(N->getOpcode(), +                                 LHS->getOperand(0).getDebugLoc(),                                   VT, LHS->getOperand(0), N->getOperand(1));    // Create the new binop. @@ -2850,7 +2970,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {        EVT TruncVT = N1.getValueType();        SDValue N100 = N1.getOperand(0).getOperand(0);        APInt TruncC = N101C->getAPIntValue(); -      TruncC.trunc(TruncVT.getSizeInBits()); +      TruncC = TruncC.trunc(TruncVT.getSizeInBits());        return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,                           DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT,                                       DAG.getNode(ISD::TRUNCATE, @@ -2868,11 +2988,37 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {        N0.getOperand(1).getOpcode() == ISD::Constant) {      uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();      uint64_t c2 = N1C->getZExtValue(); -    if (c1 + c2 > OpSizeInBits) +    if (c1 + c2 >= OpSizeInBits)        return DAG.getConstant(0, VT);      return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0),                         DAG.getConstant(c1 + c2, N1.getValueType()));    } + +  // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2))) +  // For this to be valid, the second form must not preserve any of the bits +  // that are shifted out by the inner shift in the first form.  This means +  // the outer shift size must be >= the number of bits added by the ext. +  // As a corollary, we don't care what kind of ext it is. +  if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND || +              N0.getOpcode() == ISD::ANY_EXTEND || +              N0.getOpcode() == ISD::SIGN_EXTEND) && +      N0.getOperand(0).getOpcode() == ISD::SHL && +      isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { +    uint64_t c1 =  +      cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); +    uint64_t c2 = N1C->getZExtValue(); +    EVT InnerShiftVT = N0.getOperand(0).getValueType(); +    uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); +    if (c2 >= OpSizeInBits - InnerShiftSize) { +      if (c1 + c2 >= OpSizeInBits) +        return DAG.getConstant(0, VT); +      return DAG.getNode(ISD::SHL, N0->getDebugLoc(), VT, +                         DAG.getNode(N0.getOpcode(), N0->getDebugLoc(), VT, +                                     N0.getOperand(0)->getOperand(0)), +                         DAG.getConstant(c1 + c2, N1.getValueType())); +    } +  } +    // fold (shl (srl x, c1), c2) -> (shl (and x, (shl -1, c1)), (sub c2, c1)) or    //                               (srl (and x, (shl -1, c1)), (sub c1, c2))    if (N1C && N0.getOpcode() == ISD::SRL && @@ -2973,7 +3119,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {      if (N01C && N1C) {        // Determine what the truncate's result bitsize and type would be.        EVT TruncVT = -        EVT::getIntegerVT(*DAG.getContext(), OpSizeInBits - N1C->getZExtValue()); +        EVT::getIntegerVT(*DAG.getContext(), +                          OpSizeInBits - N1C->getZExtValue());        // Determine the residual right-shift amount.        signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); @@ -3006,7 +3153,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {        EVT TruncVT = N1.getValueType();        SDValue N100 = N1.getOperand(0).getOperand(0);        APInt TruncC = N101C->getAPIntValue(); -      TruncC.trunc(TruncVT.getScalarType().getSizeInBits()); +      TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits());        return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,                           DAG.getNode(ISD::AND, N->getDebugLoc(),                                       TruncVT, @@ -3017,6 +3164,29 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {      }    } +  // fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2)) +  //      if c1 is equal to the number of bits the trunc removes +  if (N0.getOpcode() == ISD::TRUNCATE && +      (N0.getOperand(0).getOpcode() == ISD::SRL || +       N0.getOperand(0).getOpcode() == ISD::SRA) && +      N0.getOperand(0).hasOneUse() && +      N0.getOperand(0).getOperand(1).hasOneUse() && +      N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) { +    EVT LargeVT = N0.getOperand(0).getValueType(); +    ConstantSDNode *LargeShiftAmt = +      cast<ConstantSDNode>(N0.getOperand(0).getOperand(1)); + +    if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits == +        LargeShiftAmt->getZExtValue()) { +      SDValue Amt = +        DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(), +                        getShiftAmountTy()); +      SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), LargeVT, +                                N0.getOperand(0).getOperand(0), Amt); +      return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, SRA); +    } +  } +    // Simplify, based on bits shifted out of the LHS.    if (N1C && SimplifyDemandedBits(SDValue(N, 0)))      return SDValue(N, 0); @@ -3065,12 +3235,33 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {        N0.getOperand(1).getOpcode() == ISD::Constant) {      uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();      uint64_t c2 = N1C->getZExtValue(); -    if (c1 + c2 > OpSizeInBits) +    if (c1 + c2 >= OpSizeInBits)        return DAG.getConstant(0, VT);      return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0),                         DAG.getConstant(c1 + c2, N1.getValueType()));    } -   + +  // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2))) +  if (N1C && N0.getOpcode() == ISD::TRUNCATE && +      N0.getOperand(0).getOpcode() == ISD::SRL && +      isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { +    uint64_t c1 =  +      cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); +    uint64_t c2 = N1C->getZExtValue(); +    EVT InnerShiftVT = N0.getOperand(0).getValueType(); +    EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType(); +    uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); +    // This is only valid if the OpSizeInBits + c1 = size of inner shift. +    if (c1 + OpSizeInBits == InnerShiftSize) { +      if (c1 + c2 >= InnerShiftSize) +        return DAG.getConstant(0, VT); +      return DAG.getNode(ISD::TRUNCATE, N0->getDebugLoc(), VT, +                         DAG.getNode(ISD::SRL, N0->getDebugLoc(), InnerShiftVT,  +                                     N0.getOperand(0)->getOperand(0), +                                     DAG.getConstant(c1 + c2, ShiftCountVT))); +    } +  } +    // fold (srl (shl x, c), c) -> (and x, cst2)    if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&        N0.getValueSizeInBits() <= 64) { @@ -3078,7 +3269,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {      return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0),                         DAG.getConstant(~0ULL >> ShAmt, VT));    } -   +    // fold (srl (anyextend x), c) -> (anyextend (srl x, c))    if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { @@ -3147,7 +3338,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {        EVT TruncVT = N1.getValueType();        SDValue N100 = N1.getOperand(0).getOperand(0);        APInt TruncC = N101C->getAPIntValue(); -      TruncC.trunc(TruncVT.getSizeInBits()); +      TruncC = TruncC.trunc(TruncVT.getSizeInBits());        return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0,                           DAG.getNode(ISD::AND, N->getDebugLoc(),                                       TruncVT, @@ -3182,7 +3373,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {    //   brcond i32 %c ...    //    // into -  //  +  //    //   %a = ...    //   %b = and %a, 2    //   %c = setcc eq %b, 0 @@ -3422,7 +3613,7 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,      }      if (BothLiveOut)        // Both unextended and extended values are live out. There had better be -      // good a reason for the transformation. +      // a good reason for the transformation.        return ExtendNodes.size();    }    return true; @@ -3503,10 +3694,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {        DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);      if (DoXform) {        LoadSDNode *LN0 = cast<LoadSDNode>(N0); -      SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), +      SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,                                         LN0->getChain(), -                                       LN0->getBasePtr(), LN0->getSrcValue(), -                                       LN0->getSrcValueOffset(), +                                       LN0->getBasePtr(), LN0->getPointerInfo(),                                         N0.getValueType(),                                         LN0->isVolatile(), LN0->isNonTemporal(),                                         LN0->getAlignment()); @@ -3547,10 +3737,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {      EVT MemVT = LN0->getMemoryVT();      if ((!LegalOperations && !LN0->isVolatile()) ||          TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) { -      SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), +      SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,                                         LN0->getChain(), -                                       LN0->getBasePtr(), LN0->getSrcValue(), -                                       LN0->getSrcValueOffset(), MemVT, +                                       LN0->getBasePtr(), LN0->getPointerInfo(), +                                       MemVT,                                         LN0->isVolatile(), LN0->isNonTemporal(),                                         LN0->getAlignment());        CombineTo(N, ExtLoad); @@ -3611,7 +3801,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {                                        N0.getOperand(0), N0.getOperand(1),                                   cast<CondCodeSDNode>(N0.getOperand(2))->get()),                           NegOne, DAG.getConstant(0, VT)); -  }   +  }    // fold (sext x) -> (zext x) if the sign bit is known zero.    if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && @@ -3652,6 +3842,20 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {    // fold (zext (truncate x)) -> (and x, mask)    if (N0.getOpcode() == ISD::TRUNCATE &&        (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) { + +    // fold (zext (truncate (load x))) -> (zext (smaller load x)) +    // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n))) +    SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); +    if (NarrowLoad.getNode()) { +      SDNode* oye = N0.getNode()->getOperand(0).getNode(); +      if (NarrowLoad.getNode() != N0.getNode()) { +        CombineTo(N0.getNode(), NarrowLoad); +        // CombineTo deleted the truncate, if needed, but not what's under it. +        AddToWorkList(oye); +      } +      return SDValue(N, 0);   // Return N so it doesn't get rechecked! +    } +      SDValue Op = N0.getOperand(0);      if (Op.getValueType().bitsLT(VT)) {        Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op); @@ -3677,7 +3881,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {        X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X);      }      APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); -    Mask.zext(VT.getSizeInBits()); +    Mask = Mask.zext(VT.getSizeInBits());      return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,                         X, DAG.getConstant(Mask, VT));    } @@ -3692,10 +3896,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {        DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);      if (DoXform) {        LoadSDNode *LN0 = cast<LoadSDNode>(N0); -      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N->getDebugLoc(), +      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,                                         LN0->getChain(), -                                       LN0->getBasePtr(), LN0->getSrcValue(), -                                       LN0->getSrcValueOffset(), +                                       LN0->getBasePtr(), LN0->getPointerInfo(),                                         N0.getValueType(),                                         LN0->isVolatile(), LN0->isNonTemporal(),                                         LN0->getAlignment()); @@ -3736,10 +3939,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {      EVT MemVT = LN0->getMemoryVT();      if ((!LegalOperations && !LN0->isVolatile()) ||          TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) { -      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N->getDebugLoc(), +      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,                                         LN0->getChain(), -                                       LN0->getBasePtr(), LN0->getSrcValue(), -                                       LN0->getSrcValueOffset(), MemVT, +                                       LN0->getBasePtr(), LN0->getPointerInfo(), +                                       MemVT,                                         LN0->isVolatile(), LN0->isNonTemporal(),                                         LN0->getAlignment());        CombineTo(N, ExtLoad); @@ -3805,21 +4008,27 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {        isa<ConstantSDNode>(N0.getOperand(1)) &&        N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&        N0.hasOneUse()) { +    SDValue ShAmt = N0.getOperand(1); +    unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();      if (N0.getOpcode() == ISD::SHL) { +      SDValue InnerZExt = N0.getOperand(0);        // If the original shl may be shifting out bits, do not perform this        // transformation. -      unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); -      unsigned KnownZeroBits = N0.getOperand(0).getValueType().getSizeInBits() - -        N0.getOperand(0).getOperand(0).getValueType().getSizeInBits(); -      if (ShAmt > KnownZeroBits) +      unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() - +        InnerZExt.getOperand(0).getValueType().getSizeInBits(); +      if (ShAmtVal > KnownZeroBits)          return SDValue();      } -    DebugLoc dl = N->getDebugLoc(); -    return DAG.getNode(N0.getOpcode(), dl, VT, -                       DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)), -                       DAG.getNode(ISD::ZERO_EXTEND, dl, -                                   N0.getOperand(1).getValueType(), -                                   N0.getOperand(1))); + +    DebugLoc DL = N->getDebugLoc(); +     +    // Ensure that the shift amount is wide enough for the shifted value.  +    if (VT.getSizeInBits() >= 256) +      ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt); +     +    return DAG.getNode(N0.getOpcode(), DL, VT, +                       DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)), +                       ShAmt);    }    return SDValue(); @@ -3879,7 +4088,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {        X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X);      }      APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); -    Mask.zext(VT.getSizeInBits()); +    Mask = Mask.zext(VT.getSizeInBits());      return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,                         X, DAG.getConstant(Mask, VT));    } @@ -3894,10 +4103,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {        DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);      if (DoXform) {        LoadSDNode *LN0 = cast<LoadSDNode>(N0); -      SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, N->getDebugLoc(), +      SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,                                         LN0->getChain(), -                                       LN0->getBasePtr(), LN0->getSrcValue(), -                                       LN0->getSrcValueOffset(), +                                       LN0->getBasePtr(), LN0->getPointerInfo(),                                         N0.getValueType(),                                         LN0->isVolatile(), LN0->isNonTemporal(),                                         LN0->getAlignment()); @@ -3938,11 +4146,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {        N0.hasOneUse()) {      LoadSDNode *LN0 = cast<LoadSDNode>(N0);      EVT MemVT = LN0->getMemoryVT(); -    SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), VT, -                                     N->getDebugLoc(), -                                     LN0->getChain(), LN0->getBasePtr(), -                                     LN0->getSrcValue(), -                                     LN0->getSrcValueOffset(), MemVT, +    SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(), +                                     VT, LN0->getChain(), LN0->getBasePtr(), +                                     LN0->getPointerInfo(), MemVT,                                       LN0->isVolatile(), LN0->isNonTemporal(),                                       LN0->getAlignment());      CombineTo(N, ExtLoad); @@ -4053,11 +4259,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {    if (Opc == ISD::SIGN_EXTEND_INREG) {      ExtType = ISD::SEXTLOAD;      ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT(); -    if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT)) -      return SDValue();    } else if (Opc == ISD::SRL) { -    // Annother special-case: SRL is basically zero-extending a narrower -    // value. +    // Another special-case: SRL is basically zero-extending a narrower value.      ExtType = ISD::ZEXTLOAD;      N0 = SDValue(N, 0);      ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); @@ -4065,10 +4268,18 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {      ExtVT = EVT::getIntegerVT(*DAG.getContext(),                                VT.getSizeInBits() - N01->getZExtValue());    } +  if (LegalOperations && !TLI.isLoadExtLegal(ExtType, ExtVT)) +    return SDValue();    unsigned EVTBits = ExtVT.getSizeInBits(); +   +  // Do not generate loads of non-round integer types since these can +  // be expensive (and would be wrong if the type is not byte sized). +  if (!ExtVT.isRound()) +    return SDValue(); +      unsigned ShAmt = 0; -  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse() && ExtVT.isRound()) { +  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {      if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {        ShAmt = N01->getZExtValue();        // Is the shift amount a multiple of size of VT? @@ -4078,52 +4289,88 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {          if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)            return SDValue();        } + +      // At this point, we must have a load or else we can't do the transform. +      if (!isa<LoadSDNode>(N0)) return SDValue(); +       +      // If the shift amount is larger than the input type then we're not +      // accessing any of the loaded bytes.  If the load was a zextload/extload +      // then the result of the shift+trunc is zero/undef (handled elsewhere). +      // If the load was a sextload then the result is a splat of the sign bit +      // of the extended byte.  This is not worth optimizing for. +      if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits()) +        return SDValue();      }    } -  // Do not generate loads of non-round integer types since these can -  // be expensive (and would be wrong if the type is not byte sized). -  if (isa<LoadSDNode>(N0) && N0.hasOneUse() && ExtVT.isRound() && -      cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() >= EVTBits && -      // Do not change the width of a volatile load. -      !cast<LoadSDNode>(N0)->isVolatile()) { -    LoadSDNode *LN0 = cast<LoadSDNode>(N0); -    EVT PtrType = N0.getOperand(1).getValueType(); - -    // For big endian targets, we need to adjust the offset to the pointer to -    // load the correct bytes. -    if (TLI.isBigEndian()) { -      unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); -      unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); -      ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; -    } - -    uint64_t PtrOff =  ShAmt / 8; -    unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); -    SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), -                                 PtrType, LN0->getBasePtr(), -                                 DAG.getConstant(PtrOff, PtrType)); -    AddToWorkList(NewPtr.getNode()); - -    SDValue Load = (ExtType == ISD::NON_EXTLOAD) -      ? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, -                    LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff, -                    LN0->isVolatile(), LN0->isNonTemporal(), NewAlign) -      : DAG.getExtLoad(ExtType, VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, -                       LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff, -                       ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), -                       NewAlign); - -    // Replace the old load's chain with the new load's chain. -    WorkListRemover DeadNodes(*this); -    DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1), -                                  &DeadNodes); +  // If the load is shifted left (and the result isn't shifted back right), +  // we can fold the truncate through the shift. +  unsigned ShLeftAmt = 0; +  if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() && +      ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) { +    if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { +      ShLeftAmt = N01->getZExtValue(); +      N0 = N0.getOperand(0); +    } +  } +   +  // If we haven't found a load, we can't narrow it.  Don't transform one with +  // multiple uses, this would require adding a new load. +  if (!isa<LoadSDNode>(N0) || !N0.hasOneUse() || +      // Don't change the width of a volatile load. +      cast<LoadSDNode>(N0)->isVolatile()) +    return SDValue(); +   +  // Verify that we are actually reducing a load width here. +  if (cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() < EVTBits) +    return SDValue(); +   +  LoadSDNode *LN0 = cast<LoadSDNode>(N0); +  EVT PtrType = N0.getOperand(1).getValueType(); + +  // For big endian targets, we need to adjust the offset to the pointer to +  // load the correct bytes. +  if (TLI.isBigEndian()) { +    unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); +    unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); +    ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; +  } + +  uint64_t PtrOff = ShAmt / 8; +  unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); +  SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), +                               PtrType, LN0->getBasePtr(), +                               DAG.getConstant(PtrOff, PtrType)); +  AddToWorkList(NewPtr.getNode()); + +  SDValue Load; +  if (ExtType == ISD::NON_EXTLOAD) +    Load =  DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, +                        LN0->getPointerInfo().getWithOffset(PtrOff), +                        LN0->isVolatile(), LN0->isNonTemporal(), NewAlign); +  else +    Load = DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(),NewPtr, +                          LN0->getPointerInfo().getWithOffset(PtrOff), +                          ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), +                          NewAlign); + +  // Replace the old load's chain with the new load's chain. +  WorkListRemover DeadNodes(*this); +  DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1), +                                &DeadNodes); -    // Return the new loaded value. -    return Load; +  // Shift the result left, if we've swallowed a left shift. +  SDValue Result = Load; +  if (ShLeftAmt != 0) { +    EVT ShImmTy = getShiftAmountTy(); +    if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt)) +      ShImmTy = VT; +    Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, +                         Result, DAG.getConstant(ShLeftAmt, ShImmTy));    } -  return SDValue(); +  // Return the new loaded value. +  return Result;  }  SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { @@ -4196,10 +4443,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {        ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||         TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {      LoadSDNode *LN0 = cast<LoadSDNode>(N0); -    SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), +    SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,                                       LN0->getChain(), -                                     LN0->getBasePtr(), LN0->getSrcValue(), -                                     LN0->getSrcValueOffset(), EVT, +                                     LN0->getBasePtr(), LN0->getPointerInfo(), +                                     EVT,                                       LN0->isVolatile(), LN0->isNonTemporal(),                                       LN0->getAlignment());      CombineTo(N, ExtLoad); @@ -4213,10 +4460,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {        ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||         TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {      LoadSDNode *LN0 = cast<LoadSDNode>(N0); -    SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), +    SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,                                       LN0->getChain(), -                                     LN0->getBasePtr(), LN0->getSrcValue(), -                                     LN0->getSrcValueOffset(), EVT, +                                     LN0->getBasePtr(), LN0->getPointerInfo(), +                                     EVT,                                       LN0->isVolatile(), LN0->isNonTemporal(),                                       LN0->getAlignment());      CombineTo(N, ExtLoad); @@ -4295,7 +4542,9 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {    LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));    LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); -  if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse()) +  if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() || +      LD1->getPointerInfo().getAddrSpace() != +         LD2->getPointerInfo().getAddrSpace())      return SDValue();    EVT LD1VT = LD1->getValueType(0); @@ -4313,14 +4562,14 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {      if (NewAlign <= Align &&          (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))        return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(), -                         LD1->getBasePtr(), LD1->getSrcValue(), -                         LD1->getSrcValueOffset(), false, false, Align); +                         LD1->getBasePtr(), LD1->getPointerInfo(), +                         false, false, Align);    }    return SDValue();  } -SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { +SDValue DAGCombiner::visitBITCAST(SDNode *N) {    SDValue N0 = N->getOperand(0);    EVT VT = N->getValueType(0); @@ -4344,12 +4593,12 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {      assert(!DestEltVT.isVector() &&             "Element type of vector ValueType must not be vector!");      if (isSimple) -      return ConstantFoldBIT_CONVERTofBUILD_VECTOR(N0.getNode(), DestEltVT); +      return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);    }    // If the input is a constant, let getNode fold it.    if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) { -    SDValue Res = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, N0); +    SDValue Res = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, N0);      if (Res.getNode() != N) {        if (!LegalOperations ||            TLI.isOperationLegal(Res.getNode()->getOpcode(), VT)) @@ -4365,8 +4614,8 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {    }    // (conv (conv x, t1), t2) -> (conv x, t2) -  if (N0.getOpcode() == ISD::BIT_CONVERT) -    return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, +  if (N0.getOpcode() == ISD::BITCAST) +    return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT,                         N0.getOperand(0));    // fold (conv (load x)) -> (load (conv*)x) @@ -4382,13 +4631,12 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {      if (Align <= OrigAlign) {        SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(), -                                 LN0->getBasePtr(), -                                 LN0->getSrcValue(), LN0->getSrcValueOffset(), +                                 LN0->getBasePtr(), LN0->getPointerInfo(),                                   LN0->isVolatile(), LN0->isNonTemporal(),                                   OrigAlign);        AddToWorkList(N);        CombineTo(N0.getNode(), -                DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), +                DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),                              N0.getValueType(), Load),                  Load.getValue(1));        return Load; @@ -4400,7 +4648,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {    // This often reduces constant pool loads.    if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) &&        N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) { -    SDValue NewConv = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), VT, +    SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT,                                    N0.getOperand(0));      AddToWorkList(NewConv.getNode()); @@ -4423,7 +4671,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {      unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();      EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);      if (isTypeLegal(IntXVT)) { -      SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), +      SDValue X = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),                                IntXVT, N0.getOperand(1));        AddToWorkList(X.getNode()); @@ -4448,7 +4696,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {                        X, DAG.getConstant(SignBit, VT));        AddToWorkList(X.getNode()); -      SDValue Cst = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), +      SDValue Cst = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),                                  VT, N0.getOperand(0));        Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT,                          Cst, DAG.getConstant(~SignBit, VT)); @@ -4473,11 +4721,11 @@ SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {    return CombineConsecutiveLoads(N, VT);  } -/// ConstantFoldBIT_CONVERTofBUILD_VECTOR - We know that BV is a build_vector +/// ConstantFoldBITCASTofBUILD_VECTOR - We know that BV is a build_vector  /// node with Constant, ConstantFP or Undef operands.  DstEltVT indicates the  /// destination element value type.  SDValue DAGCombiner:: -ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { +ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {    EVT SrcEltVT = BV->getValueType(0).getVectorElementType();    // If this is already the right type, we're done. @@ -4495,10 +4743,10 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {      // Due to the FP element handling below calling this routine recursively,      // we can end up with a scalar-to-vector node here.      if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR) -      return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,  -                         DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(), +      return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, +                         DAG.getNode(ISD::BITCAST, BV->getDebugLoc(),                                       DstEltVT, BV->getOperand(0))); -       +      SmallVector<SDValue, 8> Ops;      for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {        SDValue Op = BV->getOperand(i); @@ -4506,7 +4754,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {        // are promoted and implicitly truncated.  Make that explicit here.        if (Op.getValueType() != SrcEltVT)          Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op); -      Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(), +      Ops.push_back(DAG.getNode(ISD::BITCAST, BV->getDebugLoc(),                                  DstEltVT, Op));        AddToWorkList(Ops.back().getNode());      } @@ -4522,7 +4770,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {      // same sizes.      assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!");      EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits()); -    BV = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, IntVT).getNode(); +    BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();      SrcEltVT = IntVT;    } @@ -4531,10 +4779,10 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {    if (DstEltVT.isFloatingPoint()) {      assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!");      EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits()); -    SDNode *Tmp = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, TmpVT).getNode(); +    SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();      // Next, convert to FP elements of the same size. -    return ConstantFoldBIT_CONVERTofBUILD_VECTOR(Tmp, DstEltVT); +    return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);    }    // Okay, we know the src/dst types are both integers of differing types. @@ -4556,7 +4804,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {          if (Op.getOpcode() == ISD::UNDEF) continue;          EltIsUndef = false; -        NewBits |= APInt(cast<ConstantSDNode>(Op)->getAPIntValue()). +        NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().                     zextOrTrunc(SrcBitSize).zext(DstBitSize);        } @@ -4586,13 +4834,13 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {        continue;      } -    APInt OpVal = APInt(cast<ConstantSDNode>(BV->getOperand(i))-> -                        getAPIntValue()).zextOrTrunc(SrcBitSize); +    APInt OpVal = cast<ConstantSDNode>(BV->getOperand(i))-> +                  getAPIntValue().zextOrTrunc(SrcBitSize);      for (unsigned j = 0; j != NumOutputsPerInput; ++j) { -      APInt ThisVal = APInt(OpVal).trunc(DstBitSize); +      APInt ThisVal = OpVal.trunc(DstBitSize);        Ops.push_back(DAG.getConstant(ThisVal, DstEltVT)); -      if (isS2V && i == 0 && j == 0 && APInt(ThisVal).zext(SrcBitSize) == OpVal) +      if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal)          // Simply turn this into a SCALAR_TO_VECTOR of the new type.          return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,                             Ops[0]); @@ -4984,10 +5232,9 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {        ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||         TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {      LoadSDNode *LN0 = cast<LoadSDNode>(N0); -    SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, N->getDebugLoc(), +    SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,                                       LN0->getChain(), -                                     LN0->getBasePtr(), LN0->getSrcValue(), -                                     LN0->getSrcValueOffset(), +                                     LN0->getBasePtr(), LN0->getPointerInfo(),                                       N0.getValueType(),                                       LN0->isVolatile(), LN0->isNonTemporal(),                                       LN0->getAlignment()); @@ -5011,7 +5258,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {    // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading    // constant pool values. -  if (N0.getOpcode() == ISD::BIT_CONVERT &&  +  if (N0.getOpcode() == ISD::BITCAST &&        !VT.isVector() &&        N0.getNode()->hasOneUse() &&        N0.getOperand(0).getValueType().isInteger()) { @@ -5021,7 +5268,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {        Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int,                DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT));        AddToWorkList(Int.getNode()); -      return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), +      return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),                           VT, Int);      }    } @@ -5047,7 +5294,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {    // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading    // constant pool values. -  if (N0.getOpcode() == ISD::BIT_CONVERT && N0.getNode()->hasOneUse() && +  if (N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&        N0.getOperand(0).getValueType().isInteger() &&        !N0.getOperand(0).getValueType().isVector()) {      SDValue Int = N0.getOperand(0); @@ -5056,7 +5303,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {        Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int,               DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT));        AddToWorkList(Int.getNode()); -      return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), +      return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),                           N->getValueType(0), Int);      }    } @@ -5084,14 +5331,17 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {                         N1.getOperand(0), N1.getOperand(1), N2);    } -  SDNode *Trunc = 0; -  if (N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) { -    // Look past truncate. -    Trunc = N1.getNode(); -    N1 = N1.getOperand(0); -  } +  if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) || +      ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) && +       (N1.getOperand(0).hasOneUse() && +        N1.getOperand(0).getOpcode() == ISD::SRL))) { +    SDNode *Trunc = 0; +    if (N1.getOpcode() == ISD::TRUNCATE) { +      // Look pass the truncate. +      Trunc = N1.getNode(); +      N1 = N1.getOperand(0); +    } -  if (N1.hasOneUse() && N1.getOpcode() == ISD::SRL) {      // Match this pattern so that we can generate simpler code:      //      //   %a = ... @@ -5100,7 +5350,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {      //   brcond i32 %c ...      //      // into -    //  +    //      //   %a = ...      //   %b = and i32 %a, 2      //   %c = setcc eq %b, 0 @@ -5146,8 +5396,12 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {          }        }      } + +    if (Trunc) +      // Restore N1 if the above transformation doesn't match. +      N1 = N->getOperand(1);    } -   +    // Transform br(xor(x, y)) -> br(x != y)    // Transform br(xor(xor(x,y), 1)) -> br (x == y)    if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) { @@ -5181,9 +5435,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {            Equal = true;          } -      SDValue NodeToReplace = Trunc ? SDValue(Trunc, 0) : N1; -       -      EVT SetCCVT = NodeToReplace.getValueType(); +      EVT SetCCVT = N1.getValueType();        if (LegalTypes)          SetCCVT = TLI.getSetCCResultType(SetCCVT);        SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(), @@ -5192,9 +5444,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {                                     Equal ? ISD::SETEQ : ISD::SETNE);        // Replace the uses of XOR with SETCC        WorkListRemover DeadNodes(*this); -      DAG.ReplaceAllUsesOfValueWith(NodeToReplace, SetCC, &DeadNodes); -      removeFromWorkList(NodeToReplace.getNode()); -      DAG.DeleteNode(NodeToReplace.getNode()); +      DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes); +      removeFromWorkList(N1.getNode()); +      DAG.DeleteNode(N1.getNode());        return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),                           MVT::Other, Chain, SetCC, N2);      } @@ -5568,10 +5820,10 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {    if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {      if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {        if (Align > LD->getAlignment()) -        return DAG.getExtLoad(LD->getExtensionType(), LD->getValueType(0), -                              N->getDebugLoc(), -                              Chain, Ptr, LD->getSrcValue(), -                              LD->getSrcValueOffset(), LD->getMemoryVT(), +        return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), +                              LD->getValueType(0), +                              Chain, Ptr, LD->getPointerInfo(), +                              LD->getMemoryVT(),                                LD->isVolatile(), LD->isNonTemporal(), Align);      }    } @@ -5587,15 +5839,13 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {        // Replace the chain to void dependency.        if (LD->getExtensionType() == ISD::NON_EXTLOAD) {          ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(), -                               BetterChain, Ptr, -                               LD->getSrcValue(), LD->getSrcValueOffset(), +                               BetterChain, Ptr, LD->getPointerInfo(),                                 LD->isVolatile(), LD->isNonTemporal(),                                 LD->getAlignment());        } else { -        ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getValueType(0), -                                  LD->getDebugLoc(), -                                  BetterChain, Ptr, LD->getSrcValue(), -                                  LD->getSrcValueOffset(), +        ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), +                                  LD->getValueType(0), +                                  BetterChain, Ptr, LD->getPointerInfo(),                                    LD->getMemoryVT(),                                    LD->isVolatile(),                                    LD->isNonTemporal(), @@ -5605,10 +5855,10 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {        // Create token factor to keep old chain connected.        SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),                                    MVT::Other, Chain, ReplLoad.getValue(1)); -       +        // Make sure the new and old chains are cleaned up.        AddToWorkList(Token.getNode()); -       +        // Replace uses with load result and token factor. Don't add users        // to work list.        return CombineTo(N, ReplLoad.getValue(0), Token, false); @@ -5628,17 +5878,17 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {  static std::pair<unsigned, unsigned>  CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {    std::pair<unsigned, unsigned> Result(0, 0); -   +    // Check for the structure we're looking for.    if (V->getOpcode() != ISD::AND ||        !isa<ConstantSDNode>(V->getOperand(1)) ||        !ISD::isNormalLoad(V->getOperand(0).getNode()))      return Result; -   +    // Check the chain and pointer.    LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));    if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer. -   +    // The store should be chained directly to the load or be an operand of a    // tokenfactor.    if (LD == Chain.getNode()) @@ -5654,7 +5904,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {        }      if (!isOk) return Result;    } -   +    // This only handles simple types.    if (V.getValueType() != MVT::i16 &&        V.getValueType() != MVT::i32 && @@ -5670,7 +5920,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {    unsigned NotMaskTZ = CountTrailingZeros_64(NotMask);    if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.    if (NotMaskLZ == 64) return Result;  // All zero mask. -   +    // See if we have a continuous run of bits.  If so, we have 0*1+0*    if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64)      return Result; @@ -5678,19 +5928,19 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {    // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.    if (V.getValueType() != MVT::i64 && NotMaskLZ)      NotMaskLZ -= 64-V.getValueSizeInBits(); -   +    unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;    switch (MaskedBytes) { -  case 1:  -  case 2:  +  case 1: +  case 2:    case 4: break;    default: return Result; // All one mask, or 5-byte mask.    } -   +    // Verify that the first bit starts at a multiple of mask so that the access    // is aligned the same as the access width.    if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result; -   +    Result.first = MaskedBytes;    Result.second = NotMaskTZ/8;    return Result; @@ -5707,20 +5957,20 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,    unsigned NumBytes = MaskInfo.first;    unsigned ByteShift = MaskInfo.second;    SelectionDAG &DAG = DC->getDAG(); -   +    // Check to see if IVal is all zeros in the part being masked in by the 'or'    // that uses this.  If not, this is not a replacement.    APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),                                    ByteShift*8, (ByteShift+NumBytes)*8);    if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0; -   +    // Check that it is legal on the target to do this.  It is legal if the new    // VT we're shrinking to (i8/i16/i32) is legal or we're still before type    // legalization.    MVT VT = MVT::getIntegerVT(NumBytes*8);    if (!DC->isTypeLegal(VT))      return 0; -   +    // Okay, we can do this!  Replace the 'St' store with a store of IVal that is    // shifted by ByteShift and truncated down to NumBytes.    if (ByteShift) @@ -5735,20 +5985,20 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,      StOffset = ByteShift;    else      StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes; -   +    SDValue Ptr = St->getBasePtr();    if (StOffset) {      Ptr = DAG.getNode(ISD::ADD, IVal->getDebugLoc(), Ptr.getValueType(),                        Ptr, DAG.getConstant(StOffset, Ptr.getValueType()));      NewAlign = MinAlign(NewAlign, StOffset);    } -   +    // Truncate down to the new size.    IVal = DAG.getNode(ISD::TRUNCATE, IVal->getDebugLoc(), VT, IVal); -   +    ++OpsNarrowed; -  return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr,  -                      St->getSrcValue(), St->getSrcValueOffset()+StOffset, +  return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr, +                      St->getPointerInfo().getWithOffset(StOffset),                        false, false, NewAlign).getNode();  } @@ -5771,7 +6021,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {      return SDValue();    unsigned Opc = Value.getOpcode(); -   +    // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst    // is a byte mask indicating a consecutive number of bytes, check to see if    // Y is known to provide just those bytes.  If so, we try to replace the @@ -5784,7 +6034,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {        if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,                                                    Value.getOperand(1), ST,this))          return SDValue(NewST, 0); -                                            +      // Or is commutative, so try swapping X and Y.      MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);      if (MaskedLoad.first) @@ -5792,7 +6042,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {                                                    Value.getOperand(0), ST,this))          return SDValue(NewST, 0);    } -   +    if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||        Value.getOperand(1).getOpcode() != ISD::Constant)      return SDValue(); @@ -5801,7 +6051,9 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {    if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&        Chain == SDValue(N0.getNode(), 1)) {      LoadSDNode *LD = cast<LoadSDNode>(N0); -    if (LD->getBasePtr() != Ptr) +    if (LD->getBasePtr() != Ptr || +        LD->getPointerInfo().getAddrSpace() != +        ST->getPointerInfo().getAddrSpace())        return SDValue();      // Find the type to narrow it the load / op / store to. @@ -5850,14 +6102,14 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {                                     DAG.getConstant(PtrOff, Ptr.getValueType()));        SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(),                                    LD->getChain(), NewPtr, -                                  LD->getSrcValue(), LD->getSrcValueOffset(), +                                  LD->getPointerInfo().getWithOffset(PtrOff),                                    LD->isVolatile(), LD->isNonTemporal(),                                    NewAlign);        SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD,                                     DAG.getConstant(NewImm, NewVT));        SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(),                                     NewVal, NewPtr, -                                   ST->getSrcValue(), ST->getSrcValueOffset(), +                                   ST->getPointerInfo().getWithOffset(PtrOff),                                     false, false, NewAlign);        AddToWorkList(NewPtr.getNode()); @@ -5874,6 +6126,63 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {    return SDValue();  } +/// TransformFPLoadStorePair - For a given floating point load / store pair, +/// if the load value isn't used by any other operations, then consider +/// transforming the pair to integer load / store operations if the target +/// deems the transformation profitable. +SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { +  StoreSDNode *ST  = cast<StoreSDNode>(N); +  SDValue Chain = ST->getChain(); +  SDValue Value = ST->getValue(); +  if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) && +      Value.hasOneUse() && +      Chain == SDValue(Value.getNode(), 1)) { +    LoadSDNode *LD = cast<LoadSDNode>(Value); +    EVT VT = LD->getMemoryVT(); +    if (!VT.isFloatingPoint() || +        VT != ST->getMemoryVT() || +        LD->isNonTemporal() || +        ST->isNonTemporal() || +        LD->getPointerInfo().getAddrSpace() != 0 || +        ST->getPointerInfo().getAddrSpace() != 0) +      return SDValue(); + +    EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); +    if (!TLI.isOperationLegal(ISD::LOAD, IntVT) || +        !TLI.isOperationLegal(ISD::STORE, IntVT) || +        !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) || +        !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT)) +      return SDValue(); + +    unsigned LDAlign = LD->getAlignment(); +    unsigned STAlign = ST->getAlignment(); +    const Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); +    unsigned ABIAlign = TLI.getTargetData()->getABITypeAlignment(IntVTTy); +    if (LDAlign < ABIAlign || STAlign < ABIAlign) +      return SDValue(); + +    SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(), +                                LD->getChain(), LD->getBasePtr(), +                                LD->getPointerInfo(), +                                false, false, LDAlign); + +    SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(), +                                 NewLD, ST->getBasePtr(), +                                 ST->getPointerInfo(), +                                 false, false, STAlign); + +    AddToWorkList(NewLD.getNode()); +    AddToWorkList(NewST.getNode()); +    WorkListRemover DeadNodes(*this); +    DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1), +                                  &DeadNodes); +    ++LdStFP2Int; +    return NewST; +  } + +  return SDValue(); +} +  SDValue DAGCombiner::visitSTORE(SDNode *N) {    StoreSDNode *ST  = cast<StoreSDNode>(N);    SDValue Chain = ST->getChain(); @@ -5882,7 +6191,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {    // If this is a store of a bit convert, store the input value if the    // resultant store does not need a higher alignment than the original. -  if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() && +  if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&        ST->isUnindexed()) {      unsigned OrigAlign = ST->getAlignment();      EVT SVT = Value.getOperand(0).getValueType(); @@ -5892,8 +6201,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {          ((!LegalOperations && !ST->isVolatile()) ||           TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))        return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0), -                          Ptr, ST->getSrcValue(), -                          ST->getSrcValueOffset(), ST->isVolatile(), +                          Ptr, ST->getPointerInfo(), ST->isVolatile(),                            ST->isNonTemporal(), OrigAlign);    } @@ -5917,8 +6225,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {            Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().                                bitcastToAPInt().getZExtValue(), MVT::i32);            return DAG.getStore(Chain, N->getDebugLoc(), Tmp, -                              Ptr, ST->getSrcValue(), -                              ST->getSrcValueOffset(), ST->isVolatile(), +                              Ptr, ST->getPointerInfo(), ST->isVolatile(),                                ST->isNonTemporal(), ST->getAlignment());          }          break; @@ -5929,8 +6236,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {            Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().                                  getZExtValue(), MVT::i64);            return DAG.getStore(Chain, N->getDebugLoc(), Tmp, -                              Ptr, ST->getSrcValue(), -                              ST->getSrcValueOffset(), ST->isVolatile(), +                              Ptr, ST->getPointerInfo(), ST->isVolatile(),                                ST->isNonTemporal(), ST->getAlignment());          } else if (!ST->isVolatile() &&                     TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { @@ -5942,23 +6248,20 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {            SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32);            if (TLI.isBigEndian()) std::swap(Lo, Hi); -          int SVOffset = ST->getSrcValueOffset();            unsigned Alignment = ST->getAlignment();            bool isVolatile = ST->isVolatile();            bool isNonTemporal = ST->isNonTemporal();            SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo, -                                     Ptr, ST->getSrcValue(), -                                     ST->getSrcValueOffset(), +                                     Ptr, ST->getPointerInfo(),                                       isVolatile, isNonTemporal,                                       ST->getAlignment());            Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr,                              DAG.getConstant(4, Ptr.getValueType())); -          SVOffset += 4;            Alignment = MinAlign(Alignment, 4U);            SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi, -                                     Ptr, ST->getSrcValue(), -                                     SVOffset, isVolatile, isNonTemporal, +                                     Ptr, ST->getPointerInfo().getWithOffset(4), +                                     isVolatile, isNonTemporal,                                       Alignment);            return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,                               St0, St1); @@ -5974,12 +6277,17 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {      if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {        if (Align > ST->getAlignment())          return DAG.getTruncStore(Chain, N->getDebugLoc(), Value, -                                 Ptr, ST->getSrcValue(), -                                 ST->getSrcValueOffset(), ST->getMemoryVT(), +                                 Ptr, ST->getPointerInfo(), ST->getMemoryVT(),                                   ST->isVolatile(), ST->isNonTemporal(), Align);      }    } +  // Try transforming a pair floating point load / store ops to integer +  // load / store ops. +  SDValue NewST = TransformFPLoadStorePair(N); +  if (NewST.getNode()) +    return NewST; +    if (CombinerAA) {      // Walk up chain skipping non-aliasing memory nodes.      SDValue BetterChain = FindBetterChain(N, Chain); @@ -5991,12 +6299,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {        // Replace the chain to avoid dependency.        if (ST->isTruncatingStore()) {          ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr, -                                      ST->getSrcValue(),ST->getSrcValueOffset(), +                                      ST->getPointerInfo(),                                        ST->getMemoryVT(), ST->isVolatile(),                                        ST->isNonTemporal(), ST->getAlignment());        } else {          ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr, -                                 ST->getSrcValue(), ST->getSrcValueOffset(), +                                 ST->getPointerInfo(),                                   ST->isVolatile(), ST->isNonTemporal(),                                   ST->getAlignment());        } @@ -6030,17 +6338,16 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {      AddToWorkList(Value.getNode());      if (Shorter.getNode())        return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter, -                               Ptr, ST->getSrcValue(), -                               ST->getSrcValueOffset(), ST->getMemoryVT(), +                               Ptr, ST->getPointerInfo(), ST->getMemoryVT(),                                 ST->isVolatile(), ST->isNonTemporal(),                                 ST->getAlignment());      // Otherwise, see if we can simplify the operation with      // SimplifyDemandedBits, which only works if the value has a single use.      if (SimplifyDemandedBits(Value, -                             APInt::getLowBitsSet( -                               Value.getValueType().getScalarType().getSizeInBits(), -                               ST->getMemoryVT().getScalarType().getSizeInBits()))) +                        APInt::getLowBitsSet( +                          Value.getValueType().getScalarType().getSizeInBits(), +                          ST->getMemoryVT().getScalarType().getSizeInBits())))        return SDValue(N, 0);    } @@ -6064,8 +6371,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {        TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),                              ST->getMemoryVT())) {      return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0), -                             Ptr, ST->getSrcValue(), -                             ST->getSrcValueOffset(), ST->getMemoryVT(), +                             Ptr, ST->getPointerInfo(), ST->getMemoryVT(),                               ST->isVolatile(), ST->isNonTemporal(),                               ST->getAlignment());    } @@ -6082,6 +6388,12 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {    if (InVal.getOpcode() == ISD::UNDEF)      return InVec; +  EVT VT = InVec.getValueType(); + +  // If we can't generate a legal BUILD_VECTOR, exit  +  if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) +    return SDValue(); +    // If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new    // vector with the inserted element.    if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa<ConstantSDNode>(EltNo)) { @@ -6091,13 +6403,12 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {      if (Elt < Ops.size())        Ops[Elt] = InVal;      return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), -                       InVec.getValueType(), &Ops[0], Ops.size()); +                       VT, &Ops[0], Ops.size());    } -  // If the invec is an UNDEF and if EltNo is a constant, create a new  +  // If the invec is an UNDEF and if EltNo is a constant, create a new    // BUILD_VECTOR with undef elements and the inserted element. -  if (!LegalOperations && InVec.getOpcode() == ISD::UNDEF &&  +  if (InVec.getOpcode() == ISD::UNDEF &&        isa<ConstantSDNode>(EltNo)) { -    EVT VT = InVec.getValueType();      EVT EltVT = VT.getVectorElementType();      unsigned NElts = VT.getVectorNumElements();      SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EltVT)); @@ -6106,7 +6417,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {      if (Elt < Ops.size())        Ops[Elt] = InVal;      return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), -                       InVec.getValueType(), &Ops[0], Ops.size()); +                       VT, &Ops[0], Ops.size());    }    return SDValue();  } @@ -6138,14 +6449,14 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {    SDValue EltNo = N->getOperand(1);    if (isa<ConstantSDNode>(EltNo)) { -    unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); +    int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();      bool NewLoad = false;      bool BCNumEltsChanged = false;      EVT VT = InVec.getValueType();      EVT ExtVT = VT.getVectorElementType();      EVT LVT = ExtVT; -    if (InVec.getOpcode() == ISD::BIT_CONVERT) { +    if (InVec.getOpcode() == ISD::BITCAST) {        EVT BCVT = InVec.getOperand(0).getValueType();        if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))          return SDValue(); @@ -6176,10 +6487,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {        // Select the input vector, guarding against out of range extract vector.        unsigned NumElems = VT.getVectorNumElements(); -      int Idx = (Elt > NumElems) ? -1 : SVN->getMaskElt(Elt); +      int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);        InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); -      if (InVec.getOpcode() == ISD::BIT_CONVERT) +      if (InVec.getOpcode() == ISD::BITCAST)          InVec = InVec.getOperand(0);        if (ISD::isNormalLoad(InVec.getNode())) {          LN0 = cast<LoadSDNode>(InVec); @@ -6190,12 +6501,17 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {      if (!LN0 || !LN0->hasOneUse() || LN0->isVolatile())        return SDValue(); +    // If Idx was -1 above, Elt is going to be -1, so just return undef. +    if (Elt == -1) +      return DAG.getUNDEF(LN0->getBasePtr().getValueType()); +      unsigned Align = LN0->getAlignment();      if (NewLoad) {        // Check the resultant load doesn't need a higher alignment than the        // original load.        unsigned NewAlign = -        TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext())); +        TLI.getTargetData() +            ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));        if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))          return SDValue(); @@ -6204,8 +6520,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {      }      SDValue NewPtr = LN0->getBasePtr(); +    unsigned PtrOff = 0; +      if (Elt) { -      unsigned PtrOff = LVT.getSizeInBits() * Elt / 8; +      PtrOff = LVT.getSizeInBits() * Elt / 8;        EVT PtrType = NewPtr.getValueType();        if (TLI.isBigEndian())          PtrOff = VT.getSizeInBits() / 8 - PtrOff; @@ -6214,7 +6532,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {      }      return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, -                       LN0->getSrcValue(), LN0->getSrcValueOffset(), +                       LN0->getPointerInfo().getWithOffset(PtrOff),                         LN0->isVolatile(), LN0->isNonTemporal(), Align);    } @@ -6280,7 +6598,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {          unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();          if (ExtIndex > VT.getVectorNumElements())            return SDValue(); -         +          Mask.push_back(ExtIndex);          continue;        } @@ -6328,15 +6646,16 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {    // FIXME: implement canonicalizations from DAG.getVectorShuffle() -  // If it is a splat, check if the argument vector is a build_vector with -  // all scalar elements the same. -  if (cast<ShuffleVectorSDNode>(N)->isSplat()) { +  // If it is a splat, check if the argument vector is another splat or a +  // build_vector with all scalar elements the same. +  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); +  if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {      SDNode *V = N0.getNode();      // If this is a bit convert that changes the element type of the vector but      // not the number of vector elements, look through it.  Be careful not to      // look though conversions that change things like v4f32 to v2f64. -    if (V->getOpcode() == ISD::BIT_CONVERT) { +    if (V->getOpcode() == ISD::BITCAST) {        SDValue ConvInput = V->getOperand(0);        if (ConvInput.getValueType().isVector() &&            ConvInput.getValueType().getVectorNumElements() == NumElts) @@ -6344,30 +6663,28 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {      }      if (V->getOpcode() == ISD::BUILD_VECTOR) { -      unsigned NumElems = V->getNumOperands(); -      unsigned BaseIdx = cast<ShuffleVectorSDNode>(N)->getSplatIndex(); -      if (NumElems > BaseIdx) { -        SDValue Base; -        bool AllSame = true; -        for (unsigned i = 0; i != NumElems; ++i) { -          if (V->getOperand(i).getOpcode() != ISD::UNDEF) { -            Base = V->getOperand(i); -            break; -          } +      assert(V->getNumOperands() == NumElts && +             "BUILD_VECTOR has wrong number of operands"); +      SDValue Base; +      bool AllSame = true; +      for (unsigned i = 0; i != NumElts; ++i) { +        if (V->getOperand(i).getOpcode() != ISD::UNDEF) { +          Base = V->getOperand(i); +          break;          } -        // Splat of <u, u, u, u>, return <u, u, u, u> -        if (!Base.getNode()) -          return N0; -        for (unsigned i = 0; i != NumElems; ++i) { -          if (V->getOperand(i) != Base) { -            AllSame = false; -            break; -          } +      } +      // Splat of <u, u, u, u>, return <u, u, u, u> +      if (!Base.getNode()) +        return N0; +      for (unsigned i = 0; i != NumElts; ++i) { +        if (V->getOperand(i) != Base) { +          AllSame = false; +          break;          } -        // Splat of <x, x, x, x>, return <x, x, x, x> -        if (AllSame) -          return N0;        } +      // Splat of <x, x, x, x>, return <x, x, x, x> +      if (AllSame) +        return N0;      }    }    return SDValue(); @@ -6436,7 +6753,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {    SDValue LHS = N->getOperand(0);    SDValue RHS = N->getOperand(1);    if (N->getOpcode() == ISD::AND) { -    if (RHS.getOpcode() == ISD::BIT_CONVERT) +    if (RHS.getOpcode() == ISD::BITCAST)        RHS = RHS.getOperand(0);      if (RHS.getOpcode() == ISD::BUILD_VECTOR) {        SmallVector<int, 8> Indices; @@ -6464,9 +6781,9 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {                                       DAG.getConstant(0, EltVT));        SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),                                   RVT, &ZeroOps[0], ZeroOps.size()); -      LHS = DAG.getNode(ISD::BIT_CONVERT, dl, RVT, LHS); +      LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);        SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); -      return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuf); +      return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);      }    } @@ -6480,10 +6797,9 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {    // things. Simplifying them may result in a loss of legality.    if (LegalOperations) return SDValue(); -  EVT VT = N->getValueType(0); -  assert(VT.isVector() && "SimplifyVBinOp only works on vectors!"); +  assert(N->getValueType(0).isVector() && +         "SimplifyVBinOp only works on vectors!"); -  EVT EltType = VT.getVectorElementType();    SDValue LHS = N->getOperand(0);    SDValue RHS = N->getOperand(1);    SDValue Shuffle = XformToShuffleWithZero(N); @@ -6516,14 +6832,10 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {            break;        } -      // If the vector element type is not legal, the BUILD_VECTOR operands -      // are promoted and implicitly truncated.  Make that explicit here. -      if (LHSOp.getValueType() != EltType) -        LHSOp = DAG.getNode(ISD::TRUNCATE, LHS.getDebugLoc(), EltType, LHSOp); -      if (RHSOp.getValueType() != EltType) -        RHSOp = DAG.getNode(ISD::TRUNCATE, RHS.getDebugLoc(), EltType, RHSOp); - -      SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), EltType, +      EVT VT = LHSOp.getValueType(); +      assert(RHSOp.getValueType() == VT && +             "SimplifyVBinOp with different BUILD_VECTOR element types"); +      SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), VT,                                     LHSOp, RHSOp);        if (FoldOp.getOpcode() != ISD::UNDEF &&            FoldOp.getOpcode() != ISD::Constant && @@ -6533,11 +6845,9 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {        AddToWorkList(FoldOp.getNode());      } -    if (Ops.size() == LHS.getNumOperands()) { -      EVT VT = LHS.getValueType(); -      return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, -                         &Ops[0], Ops.size()); -    } +    if (Ops.size() == LHS.getNumOperands()) +      return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), +                         LHS.getValueType(), &Ops[0], Ops.size());    }    return SDValue(); @@ -6580,103 +6890,101 @@ SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0,  bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,                                      SDValue RHS) { +  // Cannot simplify select with vector condition +  if (TheSelect->getOperand(0).getValueType().isVector()) return false; +    // If this is a select from two identical things, try to pull the operation    // through the select. -  if (LHS.getOpcode() == RHS.getOpcode() && LHS.hasOneUse() && RHS.hasOneUse()){ -    // If this is a load and the token chain is identical, replace the select -    // of two loads with a load through a select of the address to load from. -    // This triggers in things like "select bool X, 10.0, 123.0" after the FP -    // constants have been dropped into the constant pool. -    if (LHS.getOpcode() == ISD::LOAD && +  if (LHS.getOpcode() != RHS.getOpcode() || +      !LHS.hasOneUse() || !RHS.hasOneUse()) +    return false; + +  // If this is a load and the token chain is identical, replace the select +  // of two loads with a load through a select of the address to load from. +  // This triggers in things like "select bool X, 10.0, 123.0" after the FP +  // constants have been dropped into the constant pool. +  if (LHS.getOpcode() == ISD::LOAD) { +    LoadSDNode *LLD = cast<LoadSDNode>(LHS); +    LoadSDNode *RLD = cast<LoadSDNode>(RHS); + +    // Token chains must be identical. +    if (LHS.getOperand(0) != RHS.getOperand(0) ||          // Do not let this transformation reduce the number of volatile loads. -        !cast<LoadSDNode>(LHS)->isVolatile() && -        !cast<LoadSDNode>(RHS)->isVolatile() && -        // Token chains must be identical. -        LHS.getOperand(0) == RHS.getOperand(0)) { -      LoadSDNode *LLD = cast<LoadSDNode>(LHS); -      LoadSDNode *RLD = cast<LoadSDNode>(RHS); - -      // If this is an EXTLOAD, the VT's must match. -      if (LLD->getMemoryVT() == RLD->getMemoryVT()) { +        LLD->isVolatile() || RLD->isVolatile() || +        // If this is an EXTLOAD, the VT's must match. +        LLD->getMemoryVT() != RLD->getMemoryVT() || +        // If this is an EXTLOAD, the kind of extension must match. +        (LLD->getExtensionType() != RLD->getExtensionType() && +         // The only exception is if one of the extensions is anyext. +         LLD->getExtensionType() != ISD::EXTLOAD && +         RLD->getExtensionType() != ISD::EXTLOAD) ||          // FIXME: this discards src value information.  This is          // over-conservative. It would be beneficial to be able to remember          // both potential memory locations.  Since we are discarding          // src value info, don't do the transformation if the memory          // locations are not in the default address space. -        unsigned LLDAddrSpace = 0, RLDAddrSpace = 0; -        if (const Value *LLDVal = LLD->getMemOperand()->getValue()) { -          if (const PointerType *PT = dyn_cast<PointerType>(LLDVal->getType())) -            LLDAddrSpace = PT->getAddressSpace(); -        } -        if (const Value *RLDVal = RLD->getMemOperand()->getValue()) { -          if (const PointerType *PT = dyn_cast<PointerType>(RLDVal->getType())) -            RLDAddrSpace = PT->getAddressSpace(); -        } -        SDValue Addr; -        if (LLDAddrSpace == 0 && RLDAddrSpace == 0) { -          if (TheSelect->getOpcode() == ISD::SELECT) { -            // Check that the condition doesn't reach either load.  If so, folding -            // this will induce a cycle into the DAG. -            if ((!LLD->hasAnyUseOfValue(1) || -                 !LLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) && -                (!RLD->hasAnyUseOfValue(1) || -                 !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()))) { -              Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(), -                                 LLD->getBasePtr().getValueType(), -                                 TheSelect->getOperand(0), LLD->getBasePtr(), -                                 RLD->getBasePtr()); -            } -          } else { -            // Check that the condition doesn't reach either load.  If so, folding -            // this will induce a cycle into the DAG. -            if ((!LLD->hasAnyUseOfValue(1) || -                 (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && -                  !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()))) && -                (!RLD->hasAnyUseOfValue(1) || -                 (!RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && -                  !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())))) { -              Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(), -                                 LLD->getBasePtr().getValueType(), -                                 TheSelect->getOperand(0), -                                 TheSelect->getOperand(1), -                                 LLD->getBasePtr(), RLD->getBasePtr(), -                                 TheSelect->getOperand(4)); -            } -          } -        } - -        if (Addr.getNode()) { -          SDValue Load; -          if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { -            Load = DAG.getLoad(TheSelect->getValueType(0), -                               TheSelect->getDebugLoc(), -                               LLD->getChain(), -                               Addr, 0, 0, -                               LLD->isVolatile(), -                               LLD->isNonTemporal(), -                               LLD->getAlignment()); -          } else { -            Load = DAG.getExtLoad(LLD->getExtensionType(), -                                  TheSelect->getValueType(0), -                                  TheSelect->getDebugLoc(), -                                  LLD->getChain(), Addr, 0, 0, -                                  LLD->getMemoryVT(), -                                  LLD->isVolatile(), -                                  LLD->isNonTemporal(), -                                  LLD->getAlignment()); -          } +        LLD->getPointerInfo().getAddrSpace() != 0 || +        RLD->getPointerInfo().getAddrSpace() != 0) +      return false; -          // Users of the select now use the result of the load. -          CombineTo(TheSelect, Load); +    // Check that the select condition doesn't reach either load.  If so, +    // folding this will induce a cycle into the DAG.  If not, this is safe to +    // xform, so create a select of the addresses. +    SDValue Addr; +    if (TheSelect->getOpcode() == ISD::SELECT) { +      SDNode *CondNode = TheSelect->getOperand(0).getNode(); +      if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) || +          (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode))) +        return false; +      Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(), +                         LLD->getBasePtr().getValueType(), +                         TheSelect->getOperand(0), LLD->getBasePtr(), +                         RLD->getBasePtr()); +    } else {  // Otherwise SELECT_CC +      SDNode *CondLHS = TheSelect->getOperand(0).getNode(); +      SDNode *CondRHS = TheSelect->getOperand(1).getNode(); + +      if ((LLD->hasAnyUseOfValue(1) && +           (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) || +          (LLD->hasAnyUseOfValue(1) && +           (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS)))) +        return false; -          // Users of the old loads now use the new load's chain.  We know the -          // old-load value is dead now. -          CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1)); -          CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1)); -          return true; -        } -      } -    } +      Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(), +                         LLD->getBasePtr().getValueType(), +                         TheSelect->getOperand(0), +                         TheSelect->getOperand(1), +                         LLD->getBasePtr(), RLD->getBasePtr(), +                         TheSelect->getOperand(4)); +    } + +    SDValue Load; +    if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { +      Load = DAG.getLoad(TheSelect->getValueType(0), +                         TheSelect->getDebugLoc(), +                         // FIXME: Discards pointer info. +                         LLD->getChain(), Addr, MachinePointerInfo(), +                         LLD->isVolatile(), LLD->isNonTemporal(), +                         LLD->getAlignment()); +    } else { +      Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ? +                            RLD->getExtensionType() : LLD->getExtensionType(), +                            TheSelect->getDebugLoc(), +                            TheSelect->getValueType(0), +                            // FIXME: Discards pointer info. +                            LLD->getChain(), Addr, MachinePointerInfo(), +                            LLD->getMemoryVT(), LLD->isVolatile(), +                            LLD->isNonTemporal(), LLD->getAlignment()); +    } + +    // Users of the select now use the result of the load. +    CombineTo(TheSelect, Load); + +    // Users of the old loads now use the new load's chain.  We know the +    // old-load value is dead now. +    CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1)); +    CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1)); +    return true;    }    return false; @@ -6689,7 +6997,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,                                        ISD::CondCode CC, bool NotExtCompare) {    // (x ? y : y) -> y.    if (N2 == N3) return N2; -   +    EVT VT = N2.getValueType();    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());    ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); @@ -6725,7 +7033,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,          return DAG.getNode(ISD::FABS, DL, VT, N3);      }    } -   +    // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"    // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0    // in it.  This is a win when the constant is not otherwise available because @@ -6748,7 +7056,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,          };          const Type *FPTy = Elts[0]->getType();          const TargetData &TD = *TLI.getTargetData(); -         +          // Create a ConstantArray of the two constants.          Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts, 2);          SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(), @@ -6760,7 +7068,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,          SDValue Zero = DAG.getIntPtrConstant(0);          unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());          SDValue One = DAG.getIntPtrConstant(EltSize); -         +          SDValue Cond = DAG.getSetCC(DL,                                      TLI.getSetCCResultType(N0.getValueType()),                                      N0, N1, CC); @@ -6769,11 +7077,11 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,          CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx,                              CstOffset);          return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, -                           PseudoSourceValue::getConstantPool(), 0, false, +                           MachinePointerInfo::getConstantPool(), false,                             false, Alignment);        } -    }   +    }    // Check to see if we can perform the "gzip trick", transforming    // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A) @@ -6818,6 +7126,35 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,      }    } +  // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A) +  // where y is has a single bit set. +  // A plaintext description would be, we can turn the SELECT_CC into an AND +  // when the condition can be materialized as an all-ones register.  Any +  // single bit-test can be materialized as an all-ones register with +  // shift-left and shift-right-arith. +  if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND && +      N0->getValueType(0) == VT && +      N1C && N1C->isNullValue() && +      N2C && N2C->isNullValue()) { +    SDValue AndLHS = N0->getOperand(0); +    ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1)); +    if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) { +      // Shift the tested bit over the sign bit. +      APInt AndMask = ConstAndRHS->getAPIntValue(); +      SDValue ShlAmt = +        DAG.getConstant(AndMask.countLeadingZeros(), getShiftAmountTy()); +      SDValue Shl = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, AndLHS, ShlAmt); + +      // Now arithmetic right shift it all the way over, so the result is either +      // all-ones, or zero. +      SDValue ShrAmt = +        DAG.getConstant(AndMask.getBitWidth()-1, getShiftAmountTy()); +      SDValue Shr = DAG.getNode(ISD::SRA, N0.getDebugLoc(), VT, Shl, ShrAmt); + +      return DAG.getNode(ISD::AND, DL, VT, Shr, N3); +    } +  } +    // fold select C, 16, 0 -> shl C, 4    if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&        TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent) { @@ -6971,7 +7308,8 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {  }  /// FindBaseOffset - Return true if base is a frame index, which is known not -// to alias with anything but itself.  Provides base object and offset as results. +// to alias with anything but itself.  Provides base object and offset as +// results.  static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,                             const GlobalValue *&GV, void *&CV) {    // Assume it is a primitive operation. @@ -6984,7 +7322,7 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,        Offset += C->getZExtValue();      }    } -   +    // Return the underlying GlobalValue, and update the Offset.  Return false    // for GlobalAddressSDNode since the same GlobalAddress may be represented    // by multiple nodes with different offsets. @@ -7012,9 +7350,11 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,  bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,                            const Value *SrcValue1, int SrcValueOffset1,                            unsigned SrcValueAlign1, +                          const MDNode *TBAAInfo1,                            SDValue Ptr2, int64_t Size2,                            const Value *SrcValue2, int SrcValueOffset2, -                          unsigned SrcValueAlign2) const { +                          unsigned SrcValueAlign2, +                          const MDNode *TBAAInfo2) const {    // If they are the same then they must be aliases.    if (Ptr1 == Ptr2) return true; @@ -7030,8 +7370,19 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,    if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))      return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1); -  // If we know what the bases are, and they aren't identical, then we know they -  // cannot alias. +  // It is possible for different frame indices to alias each other, mostly +  // when tail call optimization reuses return address slots for arguments. +  // To catch this case, look up the actual index of frame indices to compute +  // the real alias relationship. +  if (isFrameIndex1 && isFrameIndex2) { +    MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); +    Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex()); +    Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex()); +    return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1); +  } + +  // Otherwise, if we know what the bases are, and they aren't identical, then +  // we know they cannot alias.    if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))      return false; @@ -7044,20 +7395,21 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,        (Size1 == Size2) && (SrcValueAlign1 > Size1)) {      int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1;      int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1; -     +      // There is no overlap between these relatively aligned accesses of similar      // size, return no alias.      if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1)        return false;    } -   +    if (CombinerGlobalAA) {      // Use alias analysis information.      int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);      int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset;      int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset;      AliasAnalysis::AliasResult AAResult = -                             AA.alias(SrcValue1, Overlap1, SrcValue2, Overlap2); +      AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1, TBAAInfo1), +               AliasAnalysis::Location(SrcValue2, Overlap2, TBAAInfo2));      if (AAResult == AliasAnalysis::NoAlias)        return false;    } @@ -7070,15 +7422,17 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,  /// node.  Returns true if the operand was a load.  bool DAGCombiner::FindAliasInfo(SDNode *N,                          SDValue &Ptr, int64_t &Size, -                        const Value *&SrcValue,  +                        const Value *&SrcValue,                          int &SrcValueOffset, -                        unsigned &SrcValueAlign) const { +                        unsigned &SrcValueAlign, +                        const MDNode *&TBAAInfo) const {    if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {      Ptr = LD->getBasePtr();      Size = LD->getMemoryVT().getSizeInBits() >> 3;      SrcValue = LD->getSrcValue();      SrcValueOffset = LD->getSrcValueOffset();      SrcValueAlign = LD->getOriginalAlignment(); +    TBAAInfo = LD->getTBAAInfo();      return true;    } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {      Ptr = ST->getBasePtr(); @@ -7086,6 +7440,7 @@ bool DAGCombiner::FindAliasInfo(SDNode *N,      SrcValue = ST->getSrcValue();      SrcValueOffset = ST->getSrcValueOffset();      SrcValueAlign = ST->getOriginalAlignment(); +    TBAAInfo = ST->getTBAAInfo();    } else {      llvm_unreachable("FindAliasInfo expected a memory operand");    } @@ -7106,26 +7461,27 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,    const Value *SrcValue;    int SrcValueOffset;    unsigned SrcValueAlign; -  bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset,  -                              SrcValueAlign); +  const MDNode *SrcTBAAInfo; +  bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset, +                              SrcValueAlign, SrcTBAAInfo);    // Starting off.    Chains.push_back(OriginalChain);    unsigned Depth = 0; -   +    // Look at each chain and determine if it is an alias.  If so, add it to the    // aliases list.  If not, then continue up the chain looking for the next    // candidate.    while (!Chains.empty()) {      SDValue Chain = Chains.back();      Chains.pop_back(); -     -    // For TokenFactor nodes, look at each operand and only continue up the  -    // chain until we find two aliases.  If we've seen two aliases, assume we'll  + +    // For TokenFactor nodes, look at each operand and only continue up the +    // chain until we find two aliases.  If we've seen two aliases, assume we'll      // find more and revert to original chain since the xform is unlikely to be      // profitable. -    //  -    // FIXME: The depth check could be made to return the last non-aliasing  +    // +    // FIXME: The depth check could be made to return the last non-aliasing      // chain we found before we hit a tokenfactor rather than the original      // chain.      if (Depth > 6 || Aliases.size() == 2) { @@ -7151,15 +7507,18 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,        const Value *OpSrcValue;        int OpSrcValueOffset;        unsigned OpSrcValueAlign; +      const MDNode *OpSrcTBAAInfo;        bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize,                                      OpSrcValue, OpSrcValueOffset, -                                    OpSrcValueAlign); +                                    OpSrcValueAlign, +                                    OpSrcTBAAInfo);        // If chain is alias then stop here.        if (!(IsLoad && IsOpLoad) &&            isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign, +                  SrcTBAAInfo,                    OpPtr, OpSize, OpSrcValue, OpSrcValueOffset, -                  OpSrcValueAlign)) { +                  OpSrcValueAlign, OpSrcTBAAInfo)) {          Aliases.push_back(Chain);        } else {          // Look further up the chain. @@ -7206,9 +7565,9 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {      // If a single operand then chain to it.  We don't need to revisit it.      return Aliases[0];    } -   +    // Construct a custom tailored token factor. -  return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,  +  return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,                       &Aliases[0], Aliases.size());  }  | 
