aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp')
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp272
1 files changed, 198 insertions, 74 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index ea730539f834..f330bd7ebcdd 100644
--- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -172,8 +172,9 @@ private:
MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
- SDNode *glueCopyToM0LDSInit(SDNode *N) const;
+ SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const;
SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
+ SDNode *glueCopyToM0LDSInit(SDNode *N) const;
const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
@@ -186,10 +187,11 @@ private:
bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
SDValue &SOffset, SDValue &Offset, SDValue &Offen,
SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
- SDValue &TFE, SDValue &DLC) const;
+ SDValue &TFE, SDValue &DLC, SDValue &SWZ) const;
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
SDValue &SOffset, SDValue &Offset, SDValue &GLC,
- SDValue &SLC, SDValue &TFE, SDValue &DLC) const;
+ SDValue &SLC, SDValue &TFE, SDValue &DLC,
+ SDValue &SWZ) const;
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
SDValue &SLC) const;
@@ -202,21 +204,20 @@ private:
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
SDValue &Offset, SDValue &GLC, SDValue &SLC,
- SDValue &TFE, SDValue &DLC) const;
+ SDValue &TFE, SDValue &DLC, SDValue &SWZ) const;
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset, SDValue &SLC) const;
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset) const;
+ template <bool IsSigned>
+ bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
+ SDValue &Offset, SDValue &SLC) const;
bool SelectFlatAtomic(SDNode *N, SDValue Addr, SDValue &VAddr,
SDValue &Offset, SDValue &SLC) const;
bool SelectFlatAtomicSigned(SDNode *N, SDValue Addr, SDValue &VAddr,
SDValue &Offset, SDValue &SLC) const;
- template <bool IsSigned>
- bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
- SDValue &Offset, SDValue &SLC) const;
-
bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
bool &Imm) const;
SDValue Expand32BitAddress(SDValue Addr) const;
@@ -262,6 +263,8 @@ private:
SDValue getHi16Elt(SDValue In) const;
+ SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const;
+
void SelectADD_SUB_I64(SDNode *N);
void SelectAddcSubb(SDNode *N);
void SelectUADDO_USUBO(SDNode *N);
@@ -282,6 +285,7 @@ private:
void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
void SelectDS_GWS(SDNode *N, unsigned IntrID);
void SelectINTRINSIC_W_CHAIN(SDNode *N);
+ void SelectINTRINSIC_WO_CHAIN(SDNode *N);
void SelectINTRINSIC_VOID(SDNode *N);
protected:
@@ -543,7 +547,7 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
if (!N->isMachineOpcode()) {
if (N->getOpcode() == ISD::CopyToReg) {
unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
return MRI.getRegClass(Reg);
}
@@ -582,19 +586,10 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
}
}
-SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
- const SITargetLowering& Lowering =
- *static_cast<const SITargetLowering*>(getTargetLowering());
-
- assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain");
-
- SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N),
- Val);
-
- SDValue Glue = M0.getValue(1);
-
+SDNode *AMDGPUDAGToDAGISel::glueCopyToOp(SDNode *N, SDValue NewChain,
+ SDValue Glue) const {
SmallVector <SDValue, 8> Ops;
- Ops.push_back(M0); // Replace the chain.
+ Ops.push_back(NewChain); // Replace the chain.
for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
Ops.push_back(N->getOperand(i));
@@ -602,6 +597,16 @@ SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
}
+SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
+ const SITargetLowering& Lowering =
+ *static_cast<const SITargetLowering*>(getTargetLowering());
+
+ assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain");
+
+ SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N), Val);
+ return glueCopyToOp(N, M0, M0.getValue(1));
+}
+
SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
unsigned AS = cast<MemSDNode>(N)->getAddressSpace();
if (AS == AMDGPUAS::LOCAL_ADDRESS) {
@@ -635,13 +640,13 @@ MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
switch (NumVectorElts) {
case 1:
- return AMDGPU::SReg_32_XM0RegClassID;
+ return AMDGPU::SReg_32RegClassID;
case 2:
return AMDGPU::SReg_64RegClassID;
case 3:
return AMDGPU::SGPR_96RegClassID;
case 4:
- return AMDGPU::SReg_128RegClassID;
+ return AMDGPU::SGPR_128RegClassID;
case 5:
return AMDGPU::SGPR_160RegClassID;
case 8:
@@ -713,12 +718,17 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
return; // Already selected.
}
- if (isa<AtomicSDNode>(N) ||
+ // isa<MemSDNode> almost works but is slightly too permissive for some DS
+ // intrinsics.
+ if (Opc == ISD::LOAD || Opc == ISD::STORE || isa<AtomicSDNode>(N) ||
(Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
Opc == ISD::ATOMIC_LOAD_FADD ||
Opc == AMDGPUISD::ATOMIC_LOAD_FMIN ||
- Opc == AMDGPUISD::ATOMIC_LOAD_FMAX))
+ Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) {
N = glueCopyToM0LDSInit(N);
+ SelectCode(N);
+ return;
+ }
switch (Opc) {
default:
@@ -781,7 +791,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
SDValue RC, SubReg0, SubReg1;
SDLoc DL(N);
if (N->getValueType(0) == MVT::i128) {
- RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
+ RC = CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32);
SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
} else if (N->getValueType(0) == MVT::i64) {
@@ -815,14 +825,6 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
return;
}
- case ISD::LOAD:
- case ISD::STORE:
- case ISD::ATOMIC_LOAD:
- case ISD::ATOMIC_STORE: {
- N = glueCopyToM0LDSInit(N);
- break;
- }
-
case AMDGPUISD::BFE_I32:
case AMDGPUISD::BFE_U32: {
// There is a scalar version available, but unlike the vector version which
@@ -908,6 +910,10 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
SelectINTRINSIC_W_CHAIN(N);
return;
}
+ case ISD::INTRINSIC_WO_CHAIN: {
+ SelectINTRINSIC_WO_CHAIN(N);
+ return;
+ }
case ISD::INTRINSIC_VOID: {
SelectINTRINSIC_VOID(N);
return;
@@ -961,6 +967,14 @@ bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
return true;
}
+SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
+ const SDLoc &DL) const {
+ SDNode *Mov = CurDAG->getMachineNode(
+ AMDGPU::S_MOV_B32, DL, MVT::i32,
+ CurDAG->getTargetConstant(Val, DL, MVT::i32));
+ return SDValue(Mov, 0);
+}
+
// FIXME: Should only handle addcarry/subcarry
void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
SDLoc DL(N);
@@ -1308,7 +1322,8 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
SDValue &Offset, SDValue &Offen,
SDValue &Idxen, SDValue &Addr64,
SDValue &GLC, SDValue &SLC,
- SDValue &TFE, SDValue &DLC) const {
+ SDValue &TFE, SDValue &DLC,
+ SDValue &SWZ) const {
// Subtarget prefers to use flat instruction
if (Subtarget->useFlatForGlobal())
return false;
@@ -1321,6 +1336,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
DLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
+ SWZ = CurDAG->getTargetConstant(0, DL, MVT::i1);
Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
@@ -1400,7 +1416,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &VAddr, SDValue &SOffset,
SDValue &Offset, SDValue &GLC,
SDValue &SLC, SDValue &TFE,
- SDValue &DLC) const {
+ SDValue &DLC, SDValue &SWZ) const {
SDValue Ptr, Offen, Idxen, Addr64;
// addr64 bit was removed for volcanic islands.
@@ -1408,7 +1424,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
return false;
if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
- GLC, SLC, TFE, DLC))
+ GLC, SLC, TFE, DLC, SWZ))
return false;
ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
@@ -1430,9 +1446,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &Offset,
SDValue &SLC) const {
SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
- SDValue GLC, TFE, DLC;
+ SDValue GLC, TFE, DLC, SWZ;
- return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC);
+ return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC, SWZ);
}
static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
@@ -1557,13 +1573,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
SDValue &SOffset, SDValue &Offset,
SDValue &GLC, SDValue &SLC,
- SDValue &TFE, SDValue &DLC) const {
+ SDValue &TFE, SDValue &DLC,
+ SDValue &SWZ) const {
SDValue Ptr, VAddr, Offen, Idxen, Addr64;
const SIInstrInfo *TII =
static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
- GLC, SLC, TFE, DLC))
+ GLC, SLC, TFE, DLC, SWZ))
return false;
if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
@@ -1585,16 +1602,30 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
SDValue &Soffset, SDValue &Offset
) const {
- SDValue GLC, SLC, TFE, DLC;
+ SDValue GLC, SLC, TFE, DLC, SWZ;
- return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC);
+ return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC, SWZ);
}
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
SDValue &Soffset, SDValue &Offset,
SDValue &SLC) const {
- SDValue GLC, TFE, DLC;
+ SDValue GLC, TFE, DLC, SWZ;
+
+ return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC, SWZ);
+}
- return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC);
+// Find a load or store from corresponding pattern root.
+// Roots may be build_vector, bitconvert or their combinations.
+static MemSDNode* findMemSDNode(SDNode *N) {
+ N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode();
+ if (MemSDNode *MN = dyn_cast<MemSDNode>(N))
+ return MN;
+ assert(isa<BuildVectorSDNode>(N));
+ for (SDValue V : N->op_values())
+ if (MemSDNode *MN =
+ dyn_cast<MemSDNode>(AMDGPUTargetLowering::stripBitcast(V)))
+ return MN;
+ llvm_unreachable("cannot find MemSDNode in the pattern!");
}
template <bool IsSigned>
@@ -1603,8 +1634,95 @@ bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N,
SDValue &VAddr,
SDValue &Offset,
SDValue &SLC) const {
- return static_cast<const SITargetLowering*>(getTargetLowering())->
- SelectFlatOffset(IsSigned, *CurDAG, N, Addr, VAddr, Offset, SLC);
+ int64_t OffsetVal = 0;
+
+ if (Subtarget->hasFlatInstOffsets() &&
+ (!Subtarget->hasFlatSegmentOffsetBug() ||
+ findMemSDNode(N)->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS) &&
+ CurDAG->isBaseWithConstantOffset(Addr)) {
+ SDValue N0 = Addr.getOperand(0);
+ SDValue N1 = Addr.getOperand(1);
+ uint64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
+
+ const SIInstrInfo *TII = Subtarget->getInstrInfo();
+ unsigned AS = findMemSDNode(N)->getAddressSpace();
+ if (TII->isLegalFLATOffset(COffsetVal, AS, IsSigned)) {
+ Addr = N0;
+ OffsetVal = COffsetVal;
+ } else {
+ // If the offset doesn't fit, put the low bits into the offset field and
+ // add the rest.
+
+ SDLoc DL(N);
+ uint64_t ImmField;
+ const unsigned NumBits = TII->getNumFlatOffsetBits(AS, IsSigned);
+ if (IsSigned) {
+ ImmField = SignExtend64(COffsetVal, NumBits);
+
+ // Don't use a negative offset field if the base offset is positive.
+ // Since the scheduler currently relies on the offset field, doing so
+ // could result in strange scheduling decisions.
+
+ // TODO: Should we not do this in the opposite direction as well?
+ if (static_cast<int64_t>(COffsetVal) > 0) {
+ if (static_cast<int64_t>(ImmField) < 0) {
+ const uint64_t OffsetMask = maskTrailingOnes<uint64_t>(NumBits - 1);
+ ImmField = COffsetVal & OffsetMask;
+ }
+ }
+ } else {
+ // TODO: Should we do this for a negative offset?
+ const uint64_t OffsetMask = maskTrailingOnes<uint64_t>(NumBits);
+ ImmField = COffsetVal & OffsetMask;
+ }
+
+ uint64_t RemainderOffset = COffsetVal - ImmField;
+
+ assert(TII->isLegalFLATOffset(ImmField, AS, IsSigned));
+ assert(RemainderOffset + ImmField == COffsetVal);
+
+ OffsetVal = ImmField;
+
+ // TODO: Should this try to use a scalar add pseudo if the base address is
+ // uniform and saddr is usable?
+ SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
+ SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
+
+ SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ DL, MVT::i32, N0, Sub0);
+ SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ DL, MVT::i32, N0, Sub1);
+
+ SDValue AddOffsetLo
+ = getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
+ SDValue AddOffsetHi
+ = getMaterializedScalarImm32(Hi_32(RemainderOffset), DL);
+
+ SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i1);
+ SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
+
+ SDNode *Add = CurDAG->getMachineNode(
+ AMDGPU::V_ADD_I32_e64, DL, VTs,
+ {AddOffsetLo, SDValue(N0Lo, 0), Clamp});
+
+ SDNode *Addc = CurDAG->getMachineNode(
+ AMDGPU::V_ADDC_U32_e64, DL, VTs,
+ {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp});
+
+ SDValue RegSequenceArgs[] = {
+ CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32),
+ SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1
+ };
+
+ Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
+ MVT::i64, RegSequenceArgs), 0);
+ }
+ }
+
+ VAddr = Addr;
+ Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
+ SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
+ return true;
}
bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDNode *N,
@@ -1616,10 +1734,10 @@ bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDNode *N,
}
bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDNode *N,
- SDValue Addr,
- SDValue &VAddr,
- SDValue &Offset,
- SDValue &SLC) const {
+ SDValue Addr,
+ SDValue &VAddr,
+ SDValue &Offset,
+ SDValue &SLC) const {
return SelectFlatOffset<true>(N, Addr, VAddr, Offset, SLC);
}
@@ -2158,10 +2276,12 @@ void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
// offset field) % 64. Some versions of the programming guide omit the m0
// part, or claim it's from offset 0.
if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
- // If we have a constant offset, try to use the default value for m0 as a
- // base to possibly avoid setting it up.
- glueCopyToM0(N, CurDAG->getTargetConstant(-1, SL, MVT::i32));
- ImmOffset = ConstOffset->getZExtValue() + 1;
+ // If we have a constant offset, try to use the 0 in m0 as the base.
+ // TODO: Look into changing the default m0 initialization value. If the
+ // default -1 only set the low 16-bits, we could leave it as-is and add 1 to
+ // the immediate offset.
+ glueCopyToM0(N, CurDAG->getTargetConstant(0, SL, MVT::i32));
+ ImmOffset = ConstOffset->getZExtValue();
} else {
if (CurDAG->isBaseWithConstantOffset(BaseOffset)) {
ImmOffset = BaseOffset.getConstantOperandVal(1);
@@ -2182,22 +2302,7 @@ void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
glueCopyToM0(N, SDValue(M0Base, 0));
}
- SDValue V0;
SDValue Chain = N->getOperand(0);
- SDValue Glue;
- if (HasVSrc) {
- SDValue VSrc0 = N->getOperand(2);
-
- // The manual doesn't mention this, but it seems only v0 works.
- V0 = CurDAG->getRegister(AMDGPU::VGPR0, MVT::i32);
-
- SDValue CopyToV0 = CurDAG->getCopyToReg(
- N->getOperand(0), SL, V0, VSrc0,
- N->getOperand(N->getNumOperands() - 1));
- Chain = CopyToV0;
- Glue = CopyToV0.getValue(1);
- }
-
SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
// TODO: Can this just be removed from the instruction?
@@ -2206,14 +2311,11 @@ void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
const unsigned Opc = gwsIntrinToOpcode(IntrID);
SmallVector<SDValue, 5> Ops;
if (HasVSrc)
- Ops.push_back(V0);
+ Ops.push_back(N->getOperand(2));
Ops.push_back(OffsetField);
Ops.push_back(GDS);
Ops.push_back(Chain);
- if (HasVSrc)
- Ops.push_back(Glue);
-
SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
}
@@ -2233,6 +2335,28 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
SelectCode(N);
}
+void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
+ unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ unsigned Opcode;
+ switch (IntrID) {
+ case Intrinsic::amdgcn_wqm:
+ Opcode = AMDGPU::WQM;
+ break;
+ case Intrinsic::amdgcn_softwqm:
+ Opcode = AMDGPU::SOFT_WQM;
+ break;
+ case Intrinsic::amdgcn_wwm:
+ Opcode = AMDGPU::WWM;
+ break;
+ default:
+ SelectCode(N);
+ return;
+ }
+
+ SDValue Src = N->getOperand(1);
+ CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src});
+}
+
void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
switch (IntrID) {