diff options
Diffstat (limited to 'lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp')
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 272 |
1 files changed, 198 insertions, 74 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index ea730539f834..f330bd7ebcdd 100644 --- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -172,8 +172,9 @@ private: MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const; - SDNode *glueCopyToM0LDSInit(SDNode *N) const; + SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const; SDNode *glueCopyToM0(SDNode *N, SDValue Val) const; + SDNode *glueCopyToM0LDSInit(SDNode *N) const; const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); @@ -186,10 +187,11 @@ private: bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &Offen, SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, - SDValue &TFE, SDValue &DLC) const; + SDValue &TFE, SDValue &DLC, SDValue &SWZ) const; bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &GLC, - SDValue &SLC, SDValue &TFE, SDValue &DLC) const; + SDValue &SLC, SDValue &TFE, SDValue &DLC, + SDValue &SWZ) const; bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &SLC) const; @@ -202,21 +204,20 @@ private: bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, SDValue &Offset, SDValue &GLC, SDValue &SLC, - SDValue &TFE, SDValue &DLC) const; + SDValue &TFE, SDValue &DLC, SDValue &SWZ) const; bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset, SDValue &SLC) const; bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset) const; + template <bool IsSigned> + bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr, + SDValue &Offset, SDValue &SLC) const; bool SelectFlatAtomic(SDNode *N, SDValue Addr, SDValue &VAddr, SDValue &Offset, SDValue &SLC) const; bool SelectFlatAtomicSigned(SDNode *N, SDValue Addr, SDValue &VAddr, SDValue &Offset, SDValue &SLC) const; - template <bool IsSigned> - bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr, - SDValue &Offset, SDValue &SLC) const; - bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, bool &Imm) const; SDValue Expand32BitAddress(SDValue Addr) const; @@ -262,6 +263,8 @@ private: SDValue getHi16Elt(SDValue In) const; + SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const; + void SelectADD_SUB_I64(SDNode *N); void SelectAddcSubb(SDNode *N); void SelectUADDO_USUBO(SDNode *N); @@ -282,6 +285,7 @@ private: void SelectDSAppendConsume(SDNode *N, unsigned IntrID); void SelectDS_GWS(SDNode *N, unsigned IntrID); void SelectINTRINSIC_W_CHAIN(SDNode *N); + void SelectINTRINSIC_WO_CHAIN(SDNode *N); void SelectINTRINSIC_VOID(SDNode *N); protected: @@ -543,7 +547,7 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, if (!N->isMachineOpcode()) { if (N->getOpcode() == ISD::CopyToReg) { unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo(); return MRI.getRegClass(Reg); } @@ -582,19 +586,10 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, } } -SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const { - const SITargetLowering& Lowering = - *static_cast<const SITargetLowering*>(getTargetLowering()); - - assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain"); - - SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N), - Val); - - SDValue Glue = M0.getValue(1); - +SDNode *AMDGPUDAGToDAGISel::glueCopyToOp(SDNode *N, SDValue NewChain, + SDValue Glue) const { SmallVector <SDValue, 8> Ops; - Ops.push_back(M0); // Replace the chain. + Ops.push_back(NewChain); // Replace the chain. for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) Ops.push_back(N->getOperand(i)); @@ -602,6 +597,16 @@ SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const { return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); } +SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const { + const SITargetLowering& Lowering = + *static_cast<const SITargetLowering*>(getTargetLowering()); + + assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain"); + + SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N), Val); + return glueCopyToOp(N, M0, M0.getValue(1)); +} + SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const { unsigned AS = cast<MemSDNode>(N)->getAddressSpace(); if (AS == AMDGPUAS::LOCAL_ADDRESS) { @@ -635,13 +640,13 @@ MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm, static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { switch (NumVectorElts) { case 1: - return AMDGPU::SReg_32_XM0RegClassID; + return AMDGPU::SReg_32RegClassID; case 2: return AMDGPU::SReg_64RegClassID; case 3: return AMDGPU::SGPR_96RegClassID; case 4: - return AMDGPU::SReg_128RegClassID; + return AMDGPU::SGPR_128RegClassID; case 5: return AMDGPU::SGPR_160RegClassID; case 8: @@ -713,12 +718,17 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { return; // Already selected. } - if (isa<AtomicSDNode>(N) || + // isa<MemSDNode> almost works but is slightly too permissive for some DS + // intrinsics. + if (Opc == ISD::LOAD || Opc == ISD::STORE || isa<AtomicSDNode>(N) || (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC || Opc == ISD::ATOMIC_LOAD_FADD || Opc == AMDGPUISD::ATOMIC_LOAD_FMIN || - Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) + Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) { N = glueCopyToM0LDSInit(N); + SelectCode(N); + return; + } switch (Opc) { default: @@ -781,7 +791,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { SDValue RC, SubReg0, SubReg1; SDLoc DL(N); if (N->getValueType(0) == MVT::i128) { - RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); + RC = CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32); SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); } else if (N->getValueType(0) == MVT::i64) { @@ -815,14 +825,6 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0))); return; } - case ISD::LOAD: - case ISD::STORE: - case ISD::ATOMIC_LOAD: - case ISD::ATOMIC_STORE: { - N = glueCopyToM0LDSInit(N); - break; - } - case AMDGPUISD::BFE_I32: case AMDGPUISD::BFE_U32: { // There is a scalar version available, but unlike the vector version which @@ -908,6 +910,10 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { SelectINTRINSIC_W_CHAIN(N); return; } + case ISD::INTRINSIC_WO_CHAIN: { + SelectINTRINSIC_WO_CHAIN(N); + return; + } case ISD::INTRINSIC_VOID: { SelectINTRINSIC_VOID(N); return; @@ -961,6 +967,14 @@ bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, return true; } +SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val, + const SDLoc &DL) const { + SDNode *Mov = CurDAG->getMachineNode( + AMDGPU::S_MOV_B32, DL, MVT::i32, + CurDAG->getTargetConstant(Val, DL, MVT::i32)); + return SDValue(Mov, 0); +} + // FIXME: Should only handle addcarry/subcarry void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { SDLoc DL(N); @@ -1308,7 +1322,8 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &Offset, SDValue &Offen, SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, - SDValue &TFE, SDValue &DLC) const { + SDValue &TFE, SDValue &DLC, + SDValue &SWZ) const { // Subtarget prefers to use flat instruction if (Subtarget->useFlatForGlobal()) return false; @@ -1321,6 +1336,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); DLC = CurDAG->getTargetConstant(0, DL, MVT::i1); + SWZ = CurDAG->getTargetConstant(0, DL, MVT::i1); Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); @@ -1400,7 +1416,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &GLC, SDValue &SLC, SDValue &TFE, - SDValue &DLC) const { + SDValue &DLC, SDValue &SWZ) const { SDValue Ptr, Offen, Idxen, Addr64; // addr64 bit was removed for volcanic islands. @@ -1408,7 +1424,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, return false; if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, - GLC, SLC, TFE, DLC)) + GLC, SLC, TFE, DLC, SWZ)) return false; ConstantSDNode *C = cast<ConstantSDNode>(Addr64); @@ -1430,9 +1446,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &Offset, SDValue &SLC) const { SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); - SDValue GLC, TFE, DLC; + SDValue GLC, TFE, DLC, SWZ; - return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC); + return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC, SWZ); } static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { @@ -1557,13 +1573,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent, bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, SDValue &Offset, SDValue &GLC, SDValue &SLC, - SDValue &TFE, SDValue &DLC) const { + SDValue &TFE, SDValue &DLC, + SDValue &SWZ) const { SDValue Ptr, VAddr, Offen, Idxen, Addr64; const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, - GLC, SLC, TFE, DLC)) + GLC, SLC, TFE, DLC, SWZ)) return false; if (!cast<ConstantSDNode>(Offen)->getSExtValue() && @@ -1585,16 +1602,30 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset ) const { - SDValue GLC, SLC, TFE, DLC; + SDValue GLC, SLC, TFE, DLC, SWZ; - return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC); + return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC, SWZ); } bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset, SDValue &SLC) const { - SDValue GLC, TFE, DLC; + SDValue GLC, TFE, DLC, SWZ; + + return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC, SWZ); +} - return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC); +// Find a load or store from corresponding pattern root. +// Roots may be build_vector, bitconvert or their combinations. +static MemSDNode* findMemSDNode(SDNode *N) { + N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode(); + if (MemSDNode *MN = dyn_cast<MemSDNode>(N)) + return MN; + assert(isa<BuildVectorSDNode>(N)); + for (SDValue V : N->op_values()) + if (MemSDNode *MN = + dyn_cast<MemSDNode>(AMDGPUTargetLowering::stripBitcast(V))) + return MN; + llvm_unreachable("cannot find MemSDNode in the pattern!"); } template <bool IsSigned> @@ -1603,8 +1634,95 @@ bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N, SDValue &VAddr, SDValue &Offset, SDValue &SLC) const { - return static_cast<const SITargetLowering*>(getTargetLowering())-> - SelectFlatOffset(IsSigned, *CurDAG, N, Addr, VAddr, Offset, SLC); + int64_t OffsetVal = 0; + + if (Subtarget->hasFlatInstOffsets() && + (!Subtarget->hasFlatSegmentOffsetBug() || + findMemSDNode(N)->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS) && + CurDAG->isBaseWithConstantOffset(Addr)) { + SDValue N0 = Addr.getOperand(0); + SDValue N1 = Addr.getOperand(1); + uint64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue(); + + const SIInstrInfo *TII = Subtarget->getInstrInfo(); + unsigned AS = findMemSDNode(N)->getAddressSpace(); + if (TII->isLegalFLATOffset(COffsetVal, AS, IsSigned)) { + Addr = N0; + OffsetVal = COffsetVal; + } else { + // If the offset doesn't fit, put the low bits into the offset field and + // add the rest. + + SDLoc DL(N); + uint64_t ImmField; + const unsigned NumBits = TII->getNumFlatOffsetBits(AS, IsSigned); + if (IsSigned) { + ImmField = SignExtend64(COffsetVal, NumBits); + + // Don't use a negative offset field if the base offset is positive. + // Since the scheduler currently relies on the offset field, doing so + // could result in strange scheduling decisions. + + // TODO: Should we not do this in the opposite direction as well? + if (static_cast<int64_t>(COffsetVal) > 0) { + if (static_cast<int64_t>(ImmField) < 0) { + const uint64_t OffsetMask = maskTrailingOnes<uint64_t>(NumBits - 1); + ImmField = COffsetVal & OffsetMask; + } + } + } else { + // TODO: Should we do this for a negative offset? + const uint64_t OffsetMask = maskTrailingOnes<uint64_t>(NumBits); + ImmField = COffsetVal & OffsetMask; + } + + uint64_t RemainderOffset = COffsetVal - ImmField; + + assert(TII->isLegalFLATOffset(ImmField, AS, IsSigned)); + assert(RemainderOffset + ImmField == COffsetVal); + + OffsetVal = ImmField; + + // TODO: Should this try to use a scalar add pseudo if the base address is + // uniform and saddr is usable? + SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); + SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); + + SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, + DL, MVT::i32, N0, Sub0); + SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, + DL, MVT::i32, N0, Sub1); + + SDValue AddOffsetLo + = getMaterializedScalarImm32(Lo_32(RemainderOffset), DL); + SDValue AddOffsetHi + = getMaterializedScalarImm32(Hi_32(RemainderOffset), DL); + + SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i1); + SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); + + SDNode *Add = CurDAG->getMachineNode( + AMDGPU::V_ADD_I32_e64, DL, VTs, + {AddOffsetLo, SDValue(N0Lo, 0), Clamp}); + + SDNode *Addc = CurDAG->getMachineNode( + AMDGPU::V_ADDC_U32_e64, DL, VTs, + {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp}); + + SDValue RegSequenceArgs[] = { + CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32), + SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1 + }; + + Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, + MVT::i64, RegSequenceArgs), 0); + } + } + + VAddr = Addr; + Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16); + SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1); + return true; } bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDNode *N, @@ -1616,10 +1734,10 @@ bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDNode *N, } bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDNode *N, - SDValue Addr, - SDValue &VAddr, - SDValue &Offset, - SDValue &SLC) const { + SDValue Addr, + SDValue &VAddr, + SDValue &Offset, + SDValue &SLC) const { return SelectFlatOffset<true>(N, Addr, VAddr, Offset, SLC); } @@ -2158,10 +2276,12 @@ void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) { // offset field) % 64. Some versions of the programming guide omit the m0 // part, or claim it's from offset 0. if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) { - // If we have a constant offset, try to use the default value for m0 as a - // base to possibly avoid setting it up. - glueCopyToM0(N, CurDAG->getTargetConstant(-1, SL, MVT::i32)); - ImmOffset = ConstOffset->getZExtValue() + 1; + // If we have a constant offset, try to use the 0 in m0 as the base. + // TODO: Look into changing the default m0 initialization value. If the + // default -1 only set the low 16-bits, we could leave it as-is and add 1 to + // the immediate offset. + glueCopyToM0(N, CurDAG->getTargetConstant(0, SL, MVT::i32)); + ImmOffset = ConstOffset->getZExtValue(); } else { if (CurDAG->isBaseWithConstantOffset(BaseOffset)) { ImmOffset = BaseOffset.getConstantOperandVal(1); @@ -2182,22 +2302,7 @@ void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) { glueCopyToM0(N, SDValue(M0Base, 0)); } - SDValue V0; SDValue Chain = N->getOperand(0); - SDValue Glue; - if (HasVSrc) { - SDValue VSrc0 = N->getOperand(2); - - // The manual doesn't mention this, but it seems only v0 works. - V0 = CurDAG->getRegister(AMDGPU::VGPR0, MVT::i32); - - SDValue CopyToV0 = CurDAG->getCopyToReg( - N->getOperand(0), SL, V0, VSrc0, - N->getOperand(N->getNumOperands() - 1)); - Chain = CopyToV0; - Glue = CopyToV0.getValue(1); - } - SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32); // TODO: Can this just be removed from the instruction? @@ -2206,14 +2311,11 @@ void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) { const unsigned Opc = gwsIntrinToOpcode(IntrID); SmallVector<SDValue, 5> Ops; if (HasVSrc) - Ops.push_back(V0); + Ops.push_back(N->getOperand(2)); Ops.push_back(OffsetField); Ops.push_back(GDS); Ops.push_back(Chain); - if (HasVSrc) - Ops.push_back(Glue); - SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO}); } @@ -2233,6 +2335,28 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) { SelectCode(N); } +void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) { + unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned Opcode; + switch (IntrID) { + case Intrinsic::amdgcn_wqm: + Opcode = AMDGPU::WQM; + break; + case Intrinsic::amdgcn_softwqm: + Opcode = AMDGPU::SOFT_WQM; + break; + case Intrinsic::amdgcn_wwm: + Opcode = AMDGPU::WWM; + break; + default: + SelectCode(N); + return; + } + + SDValue Src = N->getOperand(1); + CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src}); +} + void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) { unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); switch (IntrID) { |