summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp106
1 files changed, 99 insertions, 7 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index 3ac7c45b3275..f5018e3a19ac 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -672,15 +672,15 @@ void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
Kern[".kind"] = Kern.getDocument()->getNode("fini");
}
-void MetadataStreamerV3::emitKernelArgs(const Function &Func,
- const GCNSubtarget &ST,
+void MetadataStreamerV3::emitKernelArgs(const MachineFunction &MF,
msgpack::MapDocNode Kern) {
+ auto &Func = MF.getFunction();
unsigned Offset = 0;
auto Args = HSAMetadataDoc->getArrayNode();
for (auto &Arg : Func.args())
emitKernelArg(Arg, Offset, Args);
- emitHiddenKernelArgs(Func, ST, Offset, Args);
+ emitHiddenKernelArgs(MF, Offset, Args);
Kern[".args"] = Args;
}
@@ -789,10 +789,12 @@ void MetadataStreamerV3::emitKernelArg(
Args.push_back(Arg);
}
-void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
- const GCNSubtarget &ST,
+void MetadataStreamerV3::emitHiddenKernelArgs(const MachineFunction &MF,
unsigned &Offset,
msgpack::ArrayDocNode Args) {
+ auto &Func = MF.getFunction();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+
unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
if (!HiddenArgNumBytes)
return;
@@ -910,7 +912,6 @@ void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
const SIProgramInfo &ProgramInfo) {
auto &Func = MF.getFunction();
auto Kern = getHSAKernelProps(MF, ProgramInfo);
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
Func.getCallingConv() == CallingConv::SPIR_KERNEL);
@@ -924,7 +925,7 @@ void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
(Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
emitKernelLanguage(Func, Kern);
emitKernelAttrs(Func, Kern);
- emitKernelArgs(Func, ST, Kern);
+ emitKernelArgs(MF, Kern);
}
Kernels.push_back(Kern);
@@ -954,6 +955,97 @@ void MetadataStreamerV4::begin(const Module &Mod,
getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
}
+//===----------------------------------------------------------------------===//
+// HSAMetadataStreamerV5
+//===----------------------------------------------------------------------===//
+
+void MetadataStreamerV5::emitVersion() {
+ auto Version = HSAMetadataDoc->getArrayNode();
+ Version.push_back(Version.getDocument()->getNode(VersionMajorV5));
+ Version.push_back(Version.getDocument()->getNode(VersionMinorV5));
+ getRootMetadata("amdhsa.version") = Version;
+}
+
+void MetadataStreamerV5::emitHiddenKernelArgs(const MachineFunction &MF,
+ unsigned &Offset,
+ msgpack::ArrayDocNode Args) {
+ auto &Func = MF.getFunction();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const Module *M = Func.getParent();
+ auto &DL = M->getDataLayout();
+
+ auto Int64Ty = Type::getInt64Ty(Func.getContext());
+ auto Int32Ty = Type::getInt32Ty(Func.getContext());
+ auto Int16Ty = Type::getInt16Ty(Func.getContext());
+
+ emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_x", Offset, Args);
+ emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_y", Offset, Args);
+ emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_z", Offset, Args);
+
+ emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_x", Offset, Args);
+ emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_y", Offset, Args);
+ emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_z", Offset, Args);
+
+ emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_x", Offset, Args);
+ emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_y", Offset, Args);
+ emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_z", Offset, Args);
+
+ // Reserved for hidden_tool_correlation_id.
+ Offset += 8;
+
+ Offset += 8; // Reserved.
+
+ emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, Args);
+ emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, Args);
+ emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, Args);
+
+ emitKernelArg(DL, Int16Ty, Align(2), "hidden_grid_dims", Offset, Args);
+
+ Offset += 6; // Reserved.
+ auto Int8PtrTy =
+ Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
+
+ if (M->getNamedMetadata("llvm.printf.fmts")) {
+ emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
+ Args);
+ } else
+ Offset += 8; // Skipped.
+
+ if (M->getModuleFlag("amdgpu_hostcall")) {
+ emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
+ Args);
+ } else
+ Offset += 8; // Skipped.
+
+ emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
+ Args);
+
+ // Ignore temporarily until it is implemented.
+ // emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args);
+ Offset += 8;
+
+ if (Func.hasFnAttribute("calls-enqueue-kernel")) {
+ emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
+ Args);
+ emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
+ Args);
+ } else
+ Offset += 16; // Skipped.
+
+ Offset += 72; // Reserved.
+
+ // hidden_private_base and hidden_shared_base are only used by GFX8.
+ if (ST.getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ emitKernelArg(DL, Int32Ty, Align(4), "hidden_private_base", Offset, Args);
+ emitKernelArg(DL, Int32Ty, Align(4), "hidden_shared_base", Offset, Args);
+ } else
+ Offset += 8; // Skipped.
+
+ const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+ if (MFI.hasQueuePtr())
+ emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args);
+}
+
} // end namespace HSAMD
} // end namespace AMDGPU
} // end namespace llvm