aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp111
1 files changed, 92 insertions, 19 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index cf947dccafac..d6bf0d8cb2ef 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2072,11 +2072,45 @@ SDValue SITargetLowering::getPreloadedValue(SelectionDAG &DAG,
const SIMachineFunctionInfo &MFI,
EVT VT,
AMDGPUFunctionArgInfo::PreloadedValue PVID) const {
- const ArgDescriptor *Reg;
+ const ArgDescriptor *Reg = nullptr;
const TargetRegisterClass *RC;
LLT Ty;
- std::tie(Reg, RC, Ty) = MFI.getPreloadedValue(PVID);
+ CallingConv::ID CC = DAG.getMachineFunction().getFunction().getCallingConv();
+ const ArgDescriptor WorkGroupIDX =
+ ArgDescriptor::createRegister(AMDGPU::TTMP9);
+ // If GridZ is not programmed in an entry function then the hardware will set
+ // it to all zeros, so there is no need to mask the GridY value in the low
+ // order bits.
+ const ArgDescriptor WorkGroupIDY = ArgDescriptor::createRegister(
+ AMDGPU::TTMP7,
+ AMDGPU::isEntryFunctionCC(CC) && !MFI.hasWorkGroupIDZ() ? ~0u : 0xFFFFu);
+ const ArgDescriptor WorkGroupIDZ =
+ ArgDescriptor::createRegister(AMDGPU::TTMP7, 0xFFFF0000u);
+ if (Subtarget->hasArchitectedSGPRs() && AMDGPU::isCompute(CC)) {
+ switch (PVID) {
+ case AMDGPUFunctionArgInfo::WORKGROUP_ID_X:
+ Reg = &WorkGroupIDX;
+ RC = &AMDGPU::SReg_32RegClass;
+ Ty = LLT::scalar(32);
+ break;
+ case AMDGPUFunctionArgInfo::WORKGROUP_ID_Y:
+ Reg = &WorkGroupIDY;
+ RC = &AMDGPU::SReg_32RegClass;
+ Ty = LLT::scalar(32);
+ break;
+ case AMDGPUFunctionArgInfo::WORKGROUP_ID_Z:
+ Reg = &WorkGroupIDZ;
+ RC = &AMDGPU::SReg_32RegClass;
+ Ty = LLT::scalar(32);
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (!Reg)
+ std::tie(Reg, RC, Ty) = MFI.getPreloadedValue(PVID);
if (!Reg) {
if (PVID == AMDGPUFunctionArgInfo::PreloadedValue::KERNARG_SEGMENT_PTR) {
// It's possible for a kernarg intrinsic call to appear in a kernel with
@@ -2505,28 +2539,24 @@ void SITargetLowering::allocateSystemSGPRs(CCState &CCInfo,
}
}
- if (Info.hasWorkGroupIDX()) {
- Register Reg = Info.addWorkGroupIDX(HasArchitectedSGPRs);
- if (!HasArchitectedSGPRs)
+ if (!HasArchitectedSGPRs) {
+ if (Info.hasWorkGroupIDX()) {
+ Register Reg = Info.addWorkGroupIDX();
MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
+ CCInfo.AllocateReg(Reg);
+ }
- CCInfo.AllocateReg(Reg);
- }
-
- if (Info.hasWorkGroupIDY()) {
- Register Reg = Info.addWorkGroupIDY(HasArchitectedSGPRs);
- if (!HasArchitectedSGPRs)
+ if (Info.hasWorkGroupIDY()) {
+ Register Reg = Info.addWorkGroupIDY();
MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
+ CCInfo.AllocateReg(Reg);
+ }
- CCInfo.AllocateReg(Reg);
- }
-
- if (Info.hasWorkGroupIDZ()) {
- Register Reg = Info.addWorkGroupIDZ(HasArchitectedSGPRs);
- if (!HasArchitectedSGPRs)
+ if (Info.hasWorkGroupIDZ()) {
+ Register Reg = Info.addWorkGroupIDZ();
MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
-
- CCInfo.AllocateReg(Reg);
+ CCInfo.AllocateReg(Reg);
+ }
}
if (Info.hasWorkGroupInfo()) {
@@ -7890,6 +7920,17 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
return Loads[0];
}
+SDValue SITargetLowering::lowerWaveID(SelectionDAG &DAG, SDValue Op) const {
+ // With architected SGPRs, waveIDinGroup is in TTMP8[29:25].
+ if (!Subtarget->hasArchitectedSGPRs())
+ return {};
+ SDLoc SL(Op);
+ MVT VT = MVT::i32;
+ SDValue TTMP8 = DAG.getCopyFromReg(DAG.getEntryNode(), SL, AMDGPU::TTMP8, VT);
+ return DAG.getNode(AMDGPUISD::BFE_U32, SL, VT, TTMP8,
+ DAG.getConstant(25, SL, VT), DAG.getConstant(5, SL, VT));
+}
+
SDValue SITargetLowering::lowerWorkitemID(SelectionDAG &DAG, SDValue Op,
unsigned Dim,
const ArgDescriptor &Arg) const {
@@ -8060,6 +8101,8 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::amdgcn_workgroup_id_z:
return getPreloadedValue(DAG, *MFI, VT,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
+ case Intrinsic::amdgcn_wave_id:
+ return lowerWaveID(DAG, Op);
case Intrinsic::amdgcn_lds_kernel_id: {
if (MFI->isEntryFunction())
return getLDSKernelId(DAG, DL);
@@ -8242,6 +8285,36 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SIInstrInfo::MO_ABS32_LO);
return {DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, GA), 0};
}
+ case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16:
+ case Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16:
+ case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16:
+ case Intrinsic::amdgcn_swmmac_f32_16x16x32_f16:
+ case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8:
+ case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8:
+ case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8:
+ case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8: {
+ if (Op.getOperand(4).getValueType() == MVT::i32)
+ return SDValue();
+
+ SDLoc SL(Op);
+ auto IndexKeyi32 = DAG.getAnyExtOrTrunc(Op.getOperand(4), SL, MVT::i32);
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SL, Op.getValueType(),
+ Op.getOperand(0), Op.getOperand(1), Op.getOperand(2),
+ Op.getOperand(3), IndexKeyi32);
+ }
+ case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4:
+ case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8:
+ case Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4: {
+ if (Op.getOperand(6).getValueType() == MVT::i32)
+ return SDValue();
+
+ SDLoc SL(Op);
+ auto IndexKeyi32 = DAG.getAnyExtOrTrunc(Op.getOperand(6), SL, MVT::i32);
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SL, Op.getValueType(),
+ {Op.getOperand(0), Op.getOperand(1), Op.getOperand(2),
+ Op.getOperand(3), Op.getOperand(4), Op.getOperand(5),
+ IndexKeyi32, Op.getOperand(7)});
+ }
default:
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))