diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2019-01-19 10:01:25 +0000 | 
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2019-01-19 10:01:25 +0000 | 
| commit | d8e91e46262bc44006913e6796843909f1ac7bcd (patch) | |
| tree | 7d0c143d9b38190e0fa0180805389da22cd834c5 /lib/Target/PowerPC/PPCISelDAGToDAG.cpp | |
| parent | b7eb8e35e481a74962664b63dfb09483b200209a (diff) | |
Notes
Diffstat (limited to 'lib/Target/PowerPC/PPCISelDAGToDAG.cpp')
| -rw-r--r-- | lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 491 | 
1 files changed, 338 insertions, 153 deletions
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 6cec664d1e665..31acd0ff870fd 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -81,6 +81,8 @@ STATISTIC(NumLogicOpsOnComparison,            "Number of logical ops on i1 values calculated in GPR.");  STATISTIC(OmittedForNonExtendUses,            "Number of compares not eliminated as they have non-extending uses."); +STATISTIC(NumP9Setb, +          "Number of compares lowered to setb.");  // FIXME: Remove this once the bug has been fixed!  cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug", @@ -327,7 +329,6 @@ private:      bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;      void transferMemOperands(SDNode *N, SDNode *Result); -    MachineSDNode *flipSignBit(const SDValue &N, SDNode **SignBit = nullptr);    };  } // end anonymous namespace @@ -490,7 +491,7 @@ static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo,    if (!FuncInfo->BPI) return PPC::BR_NO_HINT;    const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); -  const TerminatorInst *BBTerm = BB->getTerminator(); +  const Instruction *BBTerm = BB->getTerminator();    if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT; @@ -687,9 +688,8 @@ bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {    SDValue Op1 = N->getOperand(1);    SDLoc dl(N); -  KnownBits LKnown, RKnown; -  CurDAG->computeKnownBits(Op0, LKnown); -  CurDAG->computeKnownBits(Op1, RKnown); +  KnownBits LKnown = CurDAG->computeKnownBits(Op0); +  KnownBits RKnown = CurDAG->computeKnownBits(Op1);    unsigned TargetMask = LKnown.Zero.getZExtValue();    unsigned InsertMask = RKnown.Zero.getZExtValue(); @@ -733,8 +733,7 @@ bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {         // The AND mask might not be a constant, and we need to make sure that         // if we're going to fold the masking with the insert, all bits not         // know to be zero in the mask are known to be one. -        KnownBits MKnown; -        CurDAG->computeKnownBits(Op1.getOperand(1), MKnown); +        KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1));          bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();          unsigned SHOpc = Op1.getOperand(0).getOpcode(); @@ -1083,9 +1082,14 @@ class BitPermutationSelector {      // lowest-order bit.      unsigned Idx; +    // ConstZero means a bit we need to mask off. +    // Variable is a bit comes from an input variable. +    // VariableKnownToBeZero is also a bit comes from an input variable, +    // but it is known to be already zero. So we do not need to mask them.      enum Kind {        ConstZero, -      Variable +      Variable, +      VariableKnownToBeZero      } K;      ValueBit(SDValue V, unsigned I, Kind K = Variable) @@ -1094,11 +1098,11 @@ class BitPermutationSelector {        : V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {}      bool isZero() const { -      return K == ConstZero; +      return K == ConstZero || K == VariableKnownToBeZero;      }      bool hasValue() const { -      return K == Variable; +      return K == Variable || K == VariableKnownToBeZero;      }      SDValue getValue() const { @@ -1248,8 +1252,14 @@ class BitPermutationSelector {          for (unsigned i = 0; i < NumBits; ++i)            if (((Mask >> i) & 1) == 1)              Bits[i] = (*LHSBits)[i]; -          else -            Bits[i] = ValueBit(ValueBit::ConstZero); +          else { +            // AND instruction masks this bit. If the input is already zero, +            // we have nothing to do here. Otherwise, make the bit ConstZero. +            if ((*LHSBits)[i].isZero()) +              Bits[i] = (*LHSBits)[i]; +            else +              Bits[i] = ValueBit(ValueBit::ConstZero); +          }          return std::make_pair(Interesting, &Bits);        } @@ -1259,8 +1269,26 @@ class BitPermutationSelector {        const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;        bool AllDisjoint = true; -      for (unsigned i = 0; i < NumBits; ++i) -        if (LHSBits[i].isZero()) +      SDValue LastVal = SDValue(); +      unsigned LastIdx = 0; +      for (unsigned i = 0; i < NumBits; ++i) { +        if (LHSBits[i].isZero() && RHSBits[i].isZero()) { +          // If both inputs are known to be zero and one is ConstZero and +          // another is VariableKnownToBeZero, we can select whichever +          // we like. To minimize the number of bit groups, we select +          // VariableKnownToBeZero if this bit is the next bit of the same +          // input variable from the previous bit. Otherwise, we select +          // ConstZero. +          if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal && +              LHSBits[i].getValueBitIndex() == LastIdx + 1) +            Bits[i] = LHSBits[i]; +          else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal && +                   RHSBits[i].getValueBitIndex() == LastIdx + 1) +            Bits[i] = RHSBits[i]; +          else +            Bits[i] = ValueBit(ValueBit::ConstZero); +        } +        else if (LHSBits[i].isZero())            Bits[i] = RHSBits[i];          else if (RHSBits[i].isZero())            Bits[i] = LHSBits[i]; @@ -1268,6 +1296,16 @@ class BitPermutationSelector {            AllDisjoint = false;            break;          } +        // We remember the value and bit index of this bit. +        if (Bits[i].hasValue()) { +          LastVal = Bits[i].getValue(); +          LastIdx = Bits[i].getValueBitIndex(); +        } +        else { +          if (LastVal) LastVal = SDValue(); +          LastIdx = 0; +        } +      }        if (!AllDisjoint)          break; @@ -1293,6 +1331,72 @@ class BitPermutationSelector {        return std::make_pair(Interesting, &Bits);      } +    case ISD::TRUNCATE: { +      EVT FromType = V.getOperand(0).getValueType(); +      EVT ToType = V.getValueType(); +      // We support only the case with truncate from i64 to i32. +      if (FromType != MVT::i64 || ToType != MVT::i32) +        break; +      const unsigned NumAllBits = FromType.getSizeInBits(); +      SmallVector<ValueBit, 64> *InBits; +      std::tie(Interesting, InBits) = getValueBits(V.getOperand(0), +                                                    NumAllBits); +      const unsigned NumValidBits = ToType.getSizeInBits(); + +      // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value. +      // So, we cannot include this truncate. +      bool UseUpper32bit = false; +      for (unsigned i = 0; i < NumValidBits; ++i) +        if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) { +          UseUpper32bit = true; +          break; +        } +      if (UseUpper32bit) +        break; + +      for (unsigned i = 0; i < NumValidBits; ++i) +        Bits[i] = (*InBits)[i]; + +      return std::make_pair(Interesting, &Bits); +    } +    case ISD::AssertZext: { +      // For AssertZext, we look through the operand and +      // mark the bits known to be zero. +      const SmallVector<ValueBit, 64> *LHSBits; +      std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), +                                                    NumBits); + +      EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT(); +      const unsigned NumValidBits = FromType.getSizeInBits(); +      for (unsigned i = 0; i < NumValidBits; ++i) +        Bits[i] = (*LHSBits)[i]; + +      // These bits are known to be zero. +      for (unsigned i = NumValidBits; i < NumBits; ++i) +        Bits[i] = ValueBit((*LHSBits)[i].getValue(), +                           (*LHSBits)[i].getValueBitIndex(), +                           ValueBit::VariableKnownToBeZero); + +      return std::make_pair(Interesting, &Bits); +    } +    case ISD::LOAD: +      LoadSDNode *LD = cast<LoadSDNode>(V); +      if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) { +        EVT VT = LD->getMemoryVT(); +        const unsigned NumValidBits = VT.getSizeInBits(); + +        for (unsigned i = 0; i < NumValidBits; ++i) +          Bits[i] = ValueBit(V, i); + +        // These bits are known to be zero. +        for (unsigned i = NumValidBits; i < NumBits; ++i) +          Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero); + +        // Zero-extending load itself cannot be optimized. So, it is not +        // interesting by itself though it gives useful information. +        return std::make_pair(Interesting = false, &Bits); +      } +      break;      }      for (unsigned i = 0; i < NumBits; ++i) @@ -1304,7 +1408,7 @@ class BitPermutationSelector {    // For each value (except the constant ones), compute the left-rotate amount    // to get it from its original to final position.    void computeRotationAmounts() { -    HasZeros = false; +    NeedMask = false;      RLAmt.resize(Bits.size());      for (unsigned i = 0; i < Bits.size(); ++i)        if (Bits[i].hasValue()) { @@ -1314,7 +1418,7 @@ class BitPermutationSelector {          else            RLAmt[i] = Bits.size() - (VBI - i);        } else if (Bits[i].isZero()) { -        HasZeros = true; +        NeedMask = true;          RLAmt[i] = UINT32_MAX;        } else {          llvm_unreachable("Unknown value bit type"); @@ -1330,6 +1434,7 @@ class BitPermutationSelector {      unsigned LastRLAmt = RLAmt[0];      SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();      unsigned LastGroupStartIdx = 0; +    bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();      for (unsigned i = 1; i < Bits.size(); ++i) {        unsigned ThisRLAmt = RLAmt[i];        SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue(); @@ -1342,10 +1447,20 @@ class BitPermutationSelector {            LastGroupStartIdx = 0;        } +      // If this bit is known to be zero and the current group is a bit group +      // of zeros, we do not need to terminate the current bit group even the +      // Value or RLAmt does not match here. Instead, we terminate this group +      // when the first non-zero bit appears later. +      if (IsGroupOfZeros && Bits[i].isZero()) +        continue; +        // If this bit has the same underlying value and the same rotate factor as        // the last one, then they're part of the same group.        if (ThisRLAmt == LastRLAmt && ThisValue == LastValue) -        continue; +        // We cannot continue the current group if this bits is not known to +        // be zero in a bit group of zeros. +        if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero())) +          continue;        if (LastValue.getNode())          BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx, @@ -1353,6 +1468,7 @@ class BitPermutationSelector {        LastRLAmt = ThisRLAmt;        LastValue = ThisValue;        LastGroupStartIdx = i; +      IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();      }      if (LastValue.getNode())        BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx, @@ -1401,7 +1517,7 @@ class BitPermutationSelector {      for (auto &I : ValueRots) {        ValueRotsVec.push_back(I.second);      } -    llvm::sort(ValueRotsVec.begin(), ValueRotsVec.end()); +    llvm::sort(ValueRotsVec);    }    // In 64-bit mode, rlwinm and friends have a rotation operator that @@ -1588,6 +1704,17 @@ class BitPermutationSelector {      return ExtVal;    } +  SDValue TruncateToInt32(SDValue V, const SDLoc &dl) { +    if (V.getValueSizeInBits() == 32) +      return V; + +    assert(V.getValueSizeInBits() == 64); +    SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); +    SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, +                                                    MVT::i32, V, SubRegIdx), 0); +    return SubVal; +  } +    // Depending on the number of groups for a particular value, it might be    // better to rotate, mask explicitly (using andi/andis), and then or the    // result. Select this part of the result first. @@ -1646,12 +1773,12 @@ class BitPermutationSelector {        SDValue VRot;        if (VRI.RLAmt) {          SDValue Ops[] = -          { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl), -            getI32Imm(31, dl) }; +          { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl), +            getI32Imm(0, dl), getI32Imm(31, dl) };          VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,                                                Ops), 0);        } else { -        VRot = VRI.V; +        VRot = TruncateToInt32(VRI.V, dl);        }        SDValue ANDIVal, ANDISVal; @@ -1698,17 +1825,17 @@ class BitPermutationSelector {      // If we've not yet selected a 'starting' instruction, and we have no zeros      // to fill in, select the (Value, RLAmt) with the highest priority (largest      // number of groups), and start with this rotated value. -    if ((!HasZeros || LateMask) && !Res) { +    if ((!NeedMask || LateMask) && !Res) {        ValueRotInfo &VRI = ValueRotsVec[0];        if (VRI.RLAmt) {          if (InstCnt) *InstCnt += 1;          SDValue Ops[] = -          { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl), -            getI32Imm(31, dl) }; +          { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl), +            getI32Imm(0, dl), getI32Imm(31, dl) };          Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),                        0);        } else { -        Res = VRI.V; +        Res = TruncateToInt32(VRI.V, dl);        }        // Now, remove all groups with this underlying value and rotation factor. @@ -1723,13 +1850,13 @@ class BitPermutationSelector {      for (auto &BG : BitGroups) {        if (!Res) {          SDValue Ops[] = -          { BG.V, getI32Imm(BG.RLAmt, dl), +          { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),              getI32Imm(Bits.size() - BG.EndIdx - 1, dl),              getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };          Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);        } else {          SDValue Ops[] = -          { Res, BG.V, getI32Imm(BG.RLAmt, dl), +          { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),                getI32Imm(Bits.size() - BG.EndIdx - 1, dl),              getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };          Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0); @@ -2077,7 +2204,7 @@ class BitPermutationSelector {      // If we've not yet selected a 'starting' instruction, and we have no zeros      // to fill in, select the (Value, RLAmt) with the highest priority (largest      // number of groups), and start with this rotated value. -    if ((!HasZeros || LateMask) && !Res) { +    if ((!NeedMask || LateMask) && !Res) {        // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32        // groups will come first, and so the VRI representing the largest number        // of groups might not be first (it might be the first Repl32 groups). @@ -2230,7 +2357,7 @@ class BitPermutationSelector {    SmallVector<ValueBit, 64> Bits; -  bool HasZeros; +  bool NeedMask;    SmallVector<unsigned, 64> RLAmt;    SmallVector<BitGroup, 16> BitGroups; @@ -2259,10 +2386,10 @@ public:                           " selection for:    ");      LLVM_DEBUG(N->dump(CurDAG)); -    // Fill it RLAmt and set HasZeros. +    // Fill it RLAmt and set NeedMask.      computeRotationAmounts(); -    if (!HasZeros) +    if (!NeedMask)        return Select(N, false);      // We currently have two techniques for handling results with zeros: early @@ -4045,54 +4172,148 @@ bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {  void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {    // Transfer memoperands. -  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); -  MemOp[0] = cast<MemSDNode>(N)->getMemOperand(); -  cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); +  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); +  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});  } -/// This method returns a node after flipping the MSB of each element -/// of vector integer type. Additionally, if SignBitVec is non-null, -/// this method sets a node with one at MSB of all elements -/// and zero at other bits in SignBitVec. -MachineSDNode * -PPCDAGToDAGISel::flipSignBit(const SDValue &N, SDNode **SignBitVec) { -  SDLoc dl(N); -  EVT VecVT = N.getValueType(); -  if (VecVT == MVT::v4i32) { -    if (SignBitVec) { -      SDNode *ZV = CurDAG->getMachineNode(PPC::V_SET0, dl, MVT::v4i32); -      *SignBitVec = CurDAG->getMachineNode(PPC::XVNEGSP, dl, VecVT, -                                        SDValue(ZV, 0)); -    } -    return CurDAG->getMachineNode(PPC::XVNEGSP, dl, VecVT, N); -  } -  else if (VecVT == MVT::v8i16) { -    SDNode *Hi = CurDAG->getMachineNode(PPC::LIS, dl, MVT::i32, -                                     getI32Imm(0x8000, dl)); -    SDNode *ScaImm = CurDAG->getMachineNode(PPC::ORI, dl, MVT::i32, -                                         SDValue(Hi, 0), -                                         getI32Imm(0x8000, dl)); -    SDNode *VecImm = CurDAG->getMachineNode(PPC::MTVSRWS, dl, VecVT, -                                         SDValue(ScaImm, 0)); -    /* -    Alternatively, we can do this as follow to use VRF instead of GPR. -      vspltish 5, 1 -      vspltish 6, 15 -      vslh 5, 6, 5 -    */ -    if (SignBitVec) *SignBitVec = VecImm; -    return CurDAG->getMachineNode(PPC::VADDUHM, dl, VecVT, N, -                                  SDValue(VecImm, 0)); -  } -  else if (VecVT == MVT::v16i8) { -    SDNode *VecImm = CurDAG->getMachineNode(PPC::XXSPLTIB, dl, MVT::i32, -                                         getI32Imm(0x80, dl)); -    if (SignBitVec) *SignBitVec = VecImm; -    return CurDAG->getMachineNode(PPC::VADDUBM, dl, VecVT, N, -                                  SDValue(VecImm, 0)); +static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG, +                         bool &NeedSwapOps, bool &IsUnCmp) { + +  assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here."); + +  SDValue LHS = N->getOperand(0); +  SDValue RHS = N->getOperand(1); +  SDValue TrueRes = N->getOperand(2); +  SDValue FalseRes = N->getOperand(3); +  ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes); +  if (!TrueConst) +    return false; + +  assert((N->getSimpleValueType(0) == MVT::i64 || +          N->getSimpleValueType(0) == MVT::i32) && +         "Expecting either i64 or i32 here."); + +  // We are looking for any of: +  // (select_cc lhs, rhs,  1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1) +  // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1) +  // (select_cc lhs, rhs,  0, (select_cc [lr]hs, [lr]hs,  1, -1, cc2), seteq) +  // (select_cc lhs, rhs,  0, (select_cc [lr]hs, [lr]hs, -1,  1, cc2), seteq) +  int64_t TrueResVal = TrueConst->getSExtValue(); +  if ((TrueResVal < -1 || TrueResVal > 1) || +      (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) || +      (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) || +      (TrueResVal == 0 && +       (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ))) +    return false; + +  bool InnerIsSel = FalseRes.getOpcode() == ISD::SELECT_CC; +  SDValue SetOrSelCC = InnerIsSel ? FalseRes : FalseRes.getOperand(0); +  if (SetOrSelCC.getOpcode() != ISD::SETCC && +      SetOrSelCC.getOpcode() != ISD::SELECT_CC) +    return false; + +  // Without this setb optimization, the outer SELECT_CC will be manually +  // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass +  // transforms pseduo instruction to isel instruction. When there are more than +  // one use for result like zext/sext, with current optimization we only see +  // isel is replaced by setb but can't see any significant gain. Since +  // setb has longer latency than original isel, we should avoid this. Another +  // point is that setb requires comparison always kept, it can break the +  // oppotunity to get the comparison away if we have in future. +  if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse())) +    return false; + +  SDValue InnerLHS = SetOrSelCC.getOperand(0); +  SDValue InnerRHS = SetOrSelCC.getOperand(1); +  ISD::CondCode InnerCC = +      cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get(); +  // If the inner comparison is a select_cc, make sure the true/false values are +  // 1/-1 and canonicalize it if needed. +  if (InnerIsSel) { +    ConstantSDNode *SelCCTrueConst = +        dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2)); +    ConstantSDNode *SelCCFalseConst = +        dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3)); +    if (!SelCCTrueConst || !SelCCFalseConst) +      return false; +    int64_t SelCCTVal = SelCCTrueConst->getSExtValue(); +    int64_t SelCCFVal = SelCCFalseConst->getSExtValue(); +    // The values must be -1/1 (requiring a swap) or 1/-1. +    if (SelCCTVal == -1 && SelCCFVal == 1) { +      std::swap(InnerLHS, InnerRHS); +    } else if (SelCCTVal != 1 || SelCCFVal != -1) +      return false;    } -  else -    llvm_unreachable("Unsupported vector data type for flipSignBit"); + +  // Canonicalize unsigned case +  if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) { +    IsUnCmp = true; +    InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT; +  } + +  bool InnerSwapped = false; +  if (LHS == InnerRHS && RHS == InnerLHS) +    InnerSwapped = true; +  else if (LHS != InnerLHS || RHS != InnerRHS) +    return false; + +  switch (CC) { +  // (select_cc lhs, rhs,  0, \ +  //     (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq) +  case ISD::SETEQ: +    if (!InnerIsSel) +      return false; +    if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT) +      return false; +    NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped; +    break; + +  // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt) +  // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt) +  // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt) +  // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt) +  // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt) +  // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt) +  case ISD::SETULT: +    if (!IsUnCmp && InnerCC != ISD::SETNE) +      return false; +    IsUnCmp = true; +    LLVM_FALLTHROUGH; +  case ISD::SETLT: +    if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) || +        (InnerCC == ISD::SETLT && InnerSwapped)) +      NeedSwapOps = (TrueResVal == 1); +    else +      return false; +    break; + +  // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt) +  // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt) +  // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt) +  // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt) +  // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt) +  // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt) +  case ISD::SETUGT: +    if (!IsUnCmp && InnerCC != ISD::SETNE) +      return false; +    IsUnCmp = true; +    LLVM_FALLTHROUGH; +  case ISD::SETGT: +    if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) || +        (InnerCC == ISD::SETGT && InnerSwapped)) +      NeedSwapOps = (TrueResVal == -1); +    else +      return false; +    break; + +  default: +    return false; +  } + +  LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: "); +  LLVM_DEBUG(N->dump()); + +  return true;  }  // Select - Convert the specified operand from a target-independent to a @@ -4429,8 +4650,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {      int16_t Imm;      if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&          isIntS16Immediate(N->getOperand(1), Imm)) { -      KnownBits LHSKnown; -      CurDAG->computeKnownBits(N->getOperand(0), LHSKnown); +      KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0));        // If this is equivalent to an add, then we can fold it with the        // FrameIndex calculation. @@ -4557,6 +4777,31 @@ void PPCDAGToDAGISel::Select(SDNode *N) {          N->getOperand(0).getValueType() == MVT::i1)        break; +    if (PPCSubTarget->isISA3_0() && PPCSubTarget->isPPC64()) { +      bool NeedSwapOps = false; +      bool IsUnCmp = false; +      if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) { +        SDValue LHS = N->getOperand(0); +        SDValue RHS = N->getOperand(1); +        if (NeedSwapOps) +          std::swap(LHS, RHS); + +        // Make use of SelectCC to generate the comparison to set CR bits, for +        // equality comparisons having one literal operand, SelectCC probably +        // doesn't need to materialize the whole literal and just use xoris to +        // check it first, it leads the following comparison result can't +        // exactly represent GT/LT relationship. So to avoid this we specify +        // SETGT/SETUGT here instead of SETEQ. +        SDValue GenCC = +            SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl); +        CurDAG->SelectNodeTo( +            N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB, +            N->getValueType(0), GenCC); +        NumP9Setb++; +        return; +      } +    } +      // Handle the setcc cases here.  select_cc lhs, 0, 1, 0, cc      if (!isPPC64)        if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1))) @@ -4648,14 +4893,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) {      CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);      return;    } -  case ISD::VSELECT: -    if (PPCSubTarget->hasVSX()) { -      SDValue Ops[] = { N->getOperand(2), N->getOperand(1), N->getOperand(0) }; -      CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops); -      return; -    } -    break; -    case ISD::VECTOR_SHUFFLE:      if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||                                    N->getValueType(0) == MVT::v2i64)) { @@ -4683,11 +4920,10 @@ void PPCDAGToDAGISel::Select(SDNode *N) {              SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {            SDValue Chain = LD->getChain();            SDValue Ops[] = { Base, Offset, Chain }; -          MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); -          MemOp[0] = LD->getMemOperand(); +          MachineMemOperand *MemOp = LD->getMemOperand();            SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,                                                N->getValueType(0), Ops); -          cast<MachineSDNode>(NewN)->setMemRefs(MemOp, MemOp + 1); +          CurDAG->setNodeMemRefs(cast<MachineSDNode>(NewN), {MemOp});            return;          }        } @@ -4753,6 +4989,15 @@ void PPCDAGToDAGISel::Select(SDNode *N) {        case PPC::PRED_NE: Opc = PPC::CRXOR;  Swap = false; break;        } +      // A signed comparison of i1 values produces the opposite result to an +      // unsigned one if the condition code includes less-than or greater-than. +      // This is because 1 is the most negative signed i1 number and the most +      // positive unsigned i1 number. The CR-logical operations used for such +      // comparisons are non-commutative so for signed comparisons vs. unsigned +      // ones, the input operands just need to be swapped. +      if (ISD::isSignedIntSetCC(CC)) +        Swap = !Swap; +        SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,                                               N->getOperand(Swap ? 3 : 2),                                               N->getOperand(Swap ? 2 : 3)), 0); @@ -4809,9 +5054,9 @@ void PPCDAGToDAGISel::Select(SDNode *N) {      SDValue TOCbase = N->getOperand(1);      SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,                                           TOCbase, GA); - -    if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA) || -        CModel == CodeModel::Large) { +    if (PPCLowering->isAccessedAsGotIndirect(GA)) { +      // If it is access as got-indirect, we need an extra LD to load +      // the address.        SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,                                            SDValue(Tmp, 0));        transferMemOperands(N, MN); @@ -4819,18 +5064,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {        return;      } -    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) { -      const GlobalValue *GV = G->getGlobal(); -      unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV); -      if (GVFlags & PPCII::MO_NLP_FLAG) { -        SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, -                                            SDValue(Tmp, 0)); -        transferMemOperands(N, MN); -        ReplaceNode(N, MN); -        return; -      } -    } - +    // Build the address relative to the TOC-pointer..      ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,                                            SDValue(Tmp, 0), GA));      return; @@ -4916,55 +5150,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) {        return;      }    } -  case ISD::ABS: { -    assert(PPCSubTarget->hasP9Vector() && "ABS is supported with P9 Vector"); - -    // For vector absolute difference, we use VABSDUW instruction of POWER9. -    // Since VABSDU instructions are for unsigned integers, we need adjustment -    // for signed integers. -    // For abs(sub(a, b)), we generate VABSDUW(a+0x80000000, b+0x80000000). -    // Otherwise, abs(sub(-1, 0)) returns 0xFFFFFFFF(=-1) instead of 1. -    // For abs(a), we generate VABSDUW(a+0x80000000, 0x80000000). -    EVT VecVT = N->getOperand(0).getValueType(); -    SDNode *AbsOp = nullptr; -    unsigned AbsOpcode; - -    if (VecVT == MVT::v4i32) -      AbsOpcode = PPC::VABSDUW; -    else if (VecVT == MVT::v8i16) -      AbsOpcode = PPC::VABSDUH; -    else if (VecVT == MVT::v16i8) -      AbsOpcode = PPC::VABSDUB; -    else -      llvm_unreachable("Unsupported vector data type for ISD::ABS"); - -    // Even for signed integers, we can skip adjustment if all values are -    // known to be positive (as signed integer) due to zero-extended inputs. -    if (N->getOperand(0).getOpcode() == ISD::SUB && -        N->getOperand(0)->getOperand(0).getOpcode() == ISD::ZERO_EXTEND && -        N->getOperand(0)->getOperand(1).getOpcode() == ISD::ZERO_EXTEND) { -      AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT, -                                     SDValue(N->getOperand(0)->getOperand(0)), -                                     SDValue(N->getOperand(0)->getOperand(1))); -      ReplaceNode(N, AbsOp); -      return; -    } -    if (N->getOperand(0).getOpcode() == ISD::SUB) { -      SDValue SubVal = N->getOperand(0); -      SDNode *Op0 = flipSignBit(SubVal->getOperand(0)); -      SDNode *Op1 = flipSignBit(SubVal->getOperand(1)); -      AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT, -                                     SDValue(Op0, 0), SDValue(Op1, 0)); -    } -    else { -      SDNode *Op1 = nullptr; -      SDNode *Op0 = flipSignBit(N->getOperand(0), &Op1); -      AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT, SDValue(Op0, 0), -                                     SDValue(Op1, 0)); -    } -    ReplaceNode(N, AbsOp); -    return; -  }    }    SelectCode(N);  | 
