diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp | 956 |
1 files changed, 956 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp new file mode 100644 index 000000000000..9f5bcd8ff5f0 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -0,0 +1,956 @@ +//===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// AMDGPU HSA Metadata Streamer. +/// +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUHSAMetadataStreamer.h" +#include "AMDGPU.h" +#include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUTargetStreamer.h" +#include "SIMachineFunctionInfo.h" +#include "SIProgramInfo.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +static cl::opt<bool> DumpHSAMetadata( + "amdgpu-dump-hsa-metadata", + cl::desc("Dump AMDGPU HSA Metadata")); +static cl::opt<bool> VerifyHSAMetadata( + "amdgpu-verify-hsa-metadata", + cl::desc("Verify AMDGPU HSA Metadata")); + +namespace AMDGPU { +namespace HSAMD { + +//===----------------------------------------------------------------------===// +// HSAMetadataStreamerV2 +//===----------------------------------------------------------------------===// +void MetadataStreamerV2::dump(StringRef HSAMetadataString) const { + errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; +} + +void MetadataStreamerV2::verify(StringRef HSAMetadataString) const { + errs() << "AMDGPU HSA Metadata Parser Test: "; + + HSAMD::Metadata FromHSAMetadataString; + if (fromString(HSAMetadataString, FromHSAMetadataString)) { + errs() << "FAIL\n"; + return; + } + + std::string ToHSAMetadataString; + if (toString(FromHSAMetadataString, ToHSAMetadataString)) { + errs() << "FAIL\n"; + return; + } + + errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL") + << '\n'; + if (HSAMetadataString != ToHSAMetadataString) { + errs() << "Original input: " << HSAMetadataString << '\n' + << "Produced output: " << ToHSAMetadataString << '\n'; + } +} + +AccessQualifier +MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const { + if (AccQual.empty()) + return AccessQualifier::Unknown; + + return StringSwitch<AccessQualifier>(AccQual) + .Case("read_only", AccessQualifier::ReadOnly) + .Case("write_only", AccessQualifier::WriteOnly) + .Case("read_write", AccessQualifier::ReadWrite) + .Default(AccessQualifier::Default); +} + +AddressSpaceQualifier +MetadataStreamerV2::getAddressSpaceQualifier( + unsigned AddressSpace) const { + switch (AddressSpace) { + case AMDGPUAS::PRIVATE_ADDRESS: + return AddressSpaceQualifier::Private; + case AMDGPUAS::GLOBAL_ADDRESS: + return AddressSpaceQualifier::Global; + case AMDGPUAS::CONSTANT_ADDRESS: + return AddressSpaceQualifier::Constant; + case AMDGPUAS::LOCAL_ADDRESS: + return AddressSpaceQualifier::Local; + case AMDGPUAS::FLAT_ADDRESS: + return AddressSpaceQualifier::Generic; + case AMDGPUAS::REGION_ADDRESS: + return AddressSpaceQualifier::Region; + default: + return AddressSpaceQualifier::Unknown; + } +} + +ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual, + StringRef BaseTypeName) const { + if (TypeQual.find("pipe") != StringRef::npos) + return ValueKind::Pipe; + + return StringSwitch<ValueKind>(BaseTypeName) + .Case("image1d_t", ValueKind::Image) + .Case("image1d_array_t", ValueKind::Image) + .Case("image1d_buffer_t", ValueKind::Image) + .Case("image2d_t", ValueKind::Image) + .Case("image2d_array_t", ValueKind::Image) + .Case("image2d_array_depth_t", ValueKind::Image) + .Case("image2d_array_msaa_t", ValueKind::Image) + .Case("image2d_array_msaa_depth_t", ValueKind::Image) + .Case("image2d_depth_t", ValueKind::Image) + .Case("image2d_msaa_t", ValueKind::Image) + .Case("image2d_msaa_depth_t", ValueKind::Image) + .Case("image3d_t", ValueKind::Image) + .Case("sampler_t", ValueKind::Sampler) + .Case("queue_t", ValueKind::Queue) + .Default(isa<PointerType>(Ty) ? + (Ty->getPointerAddressSpace() == + AMDGPUAS::LOCAL_ADDRESS ? + ValueKind::DynamicSharedPointer : + ValueKind::GlobalBuffer) : + ValueKind::ByValue); +} + +ValueType MetadataStreamerV2::getValueType(Type *Ty, StringRef TypeName) const { + switch (Ty->getTypeID()) { + case Type::IntegerTyID: { + auto Signed = !TypeName.startswith("u"); + switch (Ty->getIntegerBitWidth()) { + case 8: + return Signed ? ValueType::I8 : ValueType::U8; + case 16: + return Signed ? ValueType::I16 : ValueType::U16; + case 32: + return Signed ? ValueType::I32 : ValueType::U32; + case 64: + return Signed ? ValueType::I64 : ValueType::U64; + default: + return ValueType::Struct; + } + } + case Type::HalfTyID: + return ValueType::F16; + case Type::FloatTyID: + return ValueType::F32; + case Type::DoubleTyID: + return ValueType::F64; + case Type::PointerTyID: + return getValueType(Ty->getPointerElementType(), TypeName); + case Type::VectorTyID: + return getValueType(Ty->getVectorElementType(), TypeName); + default: + return ValueType::Struct; + } +} + +std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const { + switch (Ty->getTypeID()) { + case Type::IntegerTyID: { + if (!Signed) + return (Twine('u') + getTypeName(Ty, true)).str(); + + auto BitWidth = Ty->getIntegerBitWidth(); + switch (BitWidth) { + case 8: + return "char"; + case 16: + return "short"; + case 32: + return "int"; + case 64: + return "long"; + default: + return (Twine('i') + Twine(BitWidth)).str(); + } + } + case Type::HalfTyID: + return "half"; + case Type::FloatTyID: + return "float"; + case Type::DoubleTyID: + return "double"; + case Type::VectorTyID: { + auto VecTy = cast<VectorType>(Ty); + auto ElTy = VecTy->getElementType(); + auto NumElements = VecTy->getVectorNumElements(); + return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); + } + default: + return "unknown"; + } +} + +std::vector<uint32_t> +MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const { + std::vector<uint32_t> Dims; + if (Node->getNumOperands() != 3) + return Dims; + + for (auto &Op : Node->operands()) + Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue()); + return Dims; +} + +Kernel::CodeProps::Metadata +MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF, + const SIProgramInfo &ProgramInfo) const { + const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); + const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); + HSAMD::Kernel::CodeProps::Metadata HSACodeProps; + const Function &F = MF.getFunction(); + + assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL || + F.getCallingConv() == CallingConv::SPIR_KERNEL); + + Align MaxKernArgAlign; + HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F, + MaxKernArgAlign); + HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize; + HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize; + HSACodeProps.mKernargSegmentAlign = + std::max(MaxKernArgAlign, Align(4)).value(); + HSACodeProps.mWavefrontSize = STM.getWavefrontSize(); + HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR; + HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR; + HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize(); + HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack; + HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled(); + HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs(); + HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs(); + + return HSACodeProps; +} + +Kernel::DebugProps::Metadata +MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF, + const SIProgramInfo &ProgramInfo) const { + return HSAMD::Kernel::DebugProps::Metadata(); +} + +void MetadataStreamerV2::emitVersion() { + auto &Version = HSAMetadata.mVersion; + + Version.push_back(VersionMajor); + Version.push_back(VersionMinor); +} + +void MetadataStreamerV2::emitPrintf(const Module &Mod) { + auto &Printf = HSAMetadata.mPrintf; + + auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); + if (!Node) + return; + + for (auto Op : Node->operands()) + if (Op->getNumOperands()) + Printf.push_back(cast<MDString>(Op->getOperand(0))->getString()); +} + +void MetadataStreamerV2::emitKernelLanguage(const Function &Func) { + auto &Kernel = HSAMetadata.mKernels.back(); + + // TODO: What about other languages? + auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); + if (!Node || !Node->getNumOperands()) + return; + auto Op0 = Node->getOperand(0); + if (Op0->getNumOperands() <= 1) + return; + + Kernel.mLanguage = "OpenCL C"; + Kernel.mLanguageVersion.push_back( + mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()); + Kernel.mLanguageVersion.push_back( + mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()); +} + +void MetadataStreamerV2::emitKernelAttrs(const Function &Func) { + auto &Attrs = HSAMetadata.mKernels.back().mAttrs; + + if (auto Node = Func.getMetadata("reqd_work_group_size")) + Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node); + if (auto Node = Func.getMetadata("work_group_size_hint")) + Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node); + if (auto Node = Func.getMetadata("vec_type_hint")) { + Attrs.mVecTypeHint = getTypeName( + cast<ValueAsMetadata>(Node->getOperand(0))->getType(), + mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()); + } + if (Func.hasFnAttribute("runtime-handle")) { + Attrs.mRuntimeHandle = + Func.getFnAttribute("runtime-handle").getValueAsString().str(); + } +} + +void MetadataStreamerV2::emitKernelArgs(const Function &Func) { + for (auto &Arg : Func.args()) + emitKernelArg(Arg); + + emitHiddenKernelArgs(Func); +} + +void MetadataStreamerV2::emitKernelArg(const Argument &Arg) { + auto Func = Arg.getParent(); + auto ArgNo = Arg.getArgNo(); + const MDNode *Node; + + StringRef Name; + Node = Func->getMetadata("kernel_arg_name"); + if (Node && ArgNo < Node->getNumOperands()) + Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); + else if (Arg.hasName()) + Name = Arg.getName(); + + StringRef TypeName; + Node = Func->getMetadata("kernel_arg_type"); + if (Node && ArgNo < Node->getNumOperands()) + TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); + + StringRef BaseTypeName; + Node = Func->getMetadata("kernel_arg_base_type"); + if (Node && ArgNo < Node->getNumOperands()) + BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); + + StringRef AccQual; + if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() && + Arg.hasNoAliasAttr()) { + AccQual = "read_only"; + } else { + Node = Func->getMetadata("kernel_arg_access_qual"); + if (Node && ArgNo < Node->getNumOperands()) + AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); + } + + StringRef TypeQual; + Node = Func->getMetadata("kernel_arg_type_qual"); + if (Node && ArgNo < Node->getNumOperands()) + TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); + + Type *Ty = Arg.getType(); + const DataLayout &DL = Func->getParent()->getDataLayout(); + + unsigned PointeeAlign = 0; + if (auto PtrTy = dyn_cast<PointerType>(Ty)) { + if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { + PointeeAlign = Arg.getParamAlignment(); + if (PointeeAlign == 0) + PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType()); + } + } + + emitKernelArg(DL, Ty, getValueKind(Arg.getType(), TypeQual, BaseTypeName), + PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual); +} + +void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty, + ValueKind ValueKind, + unsigned PointeeAlign, StringRef Name, + StringRef TypeName, + StringRef BaseTypeName, + StringRef AccQual, StringRef TypeQual) { + HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata()); + auto &Arg = HSAMetadata.mKernels.back().mArgs.back(); + + Arg.mName = Name; + Arg.mTypeName = TypeName; + Arg.mSize = DL.getTypeAllocSize(Ty); + Arg.mAlign = DL.getABITypeAlignment(Ty); + Arg.mValueKind = ValueKind; + Arg.mValueType = getValueType(Ty, BaseTypeName); + Arg.mPointeeAlign = PointeeAlign; + + if (auto PtrTy = dyn_cast<PointerType>(Ty)) + Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace()); + + Arg.mAccQual = getAccessQualifier(AccQual); + + // TODO: Emit Arg.mActualAccQual. + + SmallVector<StringRef, 1> SplitTypeQuals; + TypeQual.split(SplitTypeQuals, " ", -1, false); + for (StringRef Key : SplitTypeQuals) { + auto P = StringSwitch<bool*>(Key) + .Case("const", &Arg.mIsConst) + .Case("restrict", &Arg.mIsRestrict) + .Case("volatile", &Arg.mIsVolatile) + .Case("pipe", &Arg.mIsPipe) + .Default(nullptr); + if (P) + *P = true; + } +} + +void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) { + int HiddenArgNumBytes = + getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0); + + if (!HiddenArgNumBytes) + return; + + auto &DL = Func.getParent()->getDataLayout(); + auto Int64Ty = Type::getInt64Ty(Func.getContext()); + + if (HiddenArgNumBytes >= 8) + emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX); + if (HiddenArgNumBytes >= 16) + emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY); + if (HiddenArgNumBytes >= 24) + emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ); + + auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(), + AMDGPUAS::GLOBAL_ADDRESS); + + // Emit "printf buffer" argument if printf is used, otherwise emit dummy + // "none" argument. + if (HiddenArgNumBytes >= 32) { + if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) + emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer); + else + emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); + } + + // Emit "default queue" and "completion action" arguments if enqueue kernel is + // used, otherwise emit dummy "none" arguments. + if (HiddenArgNumBytes >= 48) { + if (Func.hasFnAttribute("calls-enqueue-kernel")) { + emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue); + emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction); + } else { + emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); + emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); + } + } + + // Emit the pointer argument for multi-grid object. + if (HiddenArgNumBytes >= 56) + emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenMultiGridSyncArg); +} + +bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) { + return TargetStreamer.EmitHSAMetadata(getHSAMetadata()); +} + +void MetadataStreamerV2::begin(const Module &Mod) { + emitVersion(); + emitPrintf(Mod); +} + +void MetadataStreamerV2::end() { + std::string HSAMetadataString; + if (toString(HSAMetadata, HSAMetadataString)) + return; + + if (DumpHSAMetadata) + dump(HSAMetadataString); + if (VerifyHSAMetadata) + verify(HSAMetadataString); +} + +void MetadataStreamerV2::emitKernel(const MachineFunction &MF, + const SIProgramInfo &ProgramInfo) { + auto &Func = MF.getFunction(); + if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL) + return; + + auto CodeProps = getHSACodeProps(MF, ProgramInfo); + auto DebugProps = getHSADebugProps(MF, ProgramInfo); + + HSAMetadata.mKernels.push_back(Kernel::Metadata()); + auto &Kernel = HSAMetadata.mKernels.back(); + + Kernel.mName = Func.getName(); + Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str(); + emitKernelLanguage(Func); + emitKernelAttrs(Func); + emitKernelArgs(Func); + HSAMetadata.mKernels.back().mCodeProps = CodeProps; + HSAMetadata.mKernels.back().mDebugProps = DebugProps; +} + +//===----------------------------------------------------------------------===// +// HSAMetadataStreamerV3 +//===----------------------------------------------------------------------===// + +void MetadataStreamerV3::dump(StringRef HSAMetadataString) const { + errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; +} + +void MetadataStreamerV3::verify(StringRef HSAMetadataString) const { + errs() << "AMDGPU HSA Metadata Parser Test: "; + + msgpack::Document FromHSAMetadataString; + + if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) { + errs() << "FAIL\n"; + return; + } + + std::string ToHSAMetadataString; + raw_string_ostream StrOS(ToHSAMetadataString); + FromHSAMetadataString.toYAML(StrOS); + + errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n'; + if (HSAMetadataString != ToHSAMetadataString) { + errs() << "Original input: " << HSAMetadataString << '\n' + << "Produced output: " << StrOS.str() << '\n'; + } +} + +Optional<StringRef> +MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const { + return StringSwitch<Optional<StringRef>>(AccQual) + .Case("read_only", StringRef("read_only")) + .Case("write_only", StringRef("write_only")) + .Case("read_write", StringRef("read_write")) + .Default(None); +} + +Optional<StringRef> +MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const { + switch (AddressSpace) { + case AMDGPUAS::PRIVATE_ADDRESS: + return StringRef("private"); + case AMDGPUAS::GLOBAL_ADDRESS: + return StringRef("global"); + case AMDGPUAS::CONSTANT_ADDRESS: + return StringRef("constant"); + case AMDGPUAS::LOCAL_ADDRESS: + return StringRef("local"); + case AMDGPUAS::FLAT_ADDRESS: + return StringRef("generic"); + case AMDGPUAS::REGION_ADDRESS: + return StringRef("region"); + default: + return None; + } +} + +StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual, + StringRef BaseTypeName) const { + if (TypeQual.find("pipe") != StringRef::npos) + return "pipe"; + + return StringSwitch<StringRef>(BaseTypeName) + .Case("image1d_t", "image") + .Case("image1d_array_t", "image") + .Case("image1d_buffer_t", "image") + .Case("image2d_t", "image") + .Case("image2d_array_t", "image") + .Case("image2d_array_depth_t", "image") + .Case("image2d_array_msaa_t", "image") + .Case("image2d_array_msaa_depth_t", "image") + .Case("image2d_depth_t", "image") + .Case("image2d_msaa_t", "image") + .Case("image2d_msaa_depth_t", "image") + .Case("image3d_t", "image") + .Case("sampler_t", "sampler") + .Case("queue_t", "queue") + .Default(isa<PointerType>(Ty) + ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS + ? "dynamic_shared_pointer" + : "global_buffer") + : "by_value"); +} + +StringRef MetadataStreamerV3::getValueType(Type *Ty, StringRef TypeName) const { + switch (Ty->getTypeID()) { + case Type::IntegerTyID: { + auto Signed = !TypeName.startswith("u"); + switch (Ty->getIntegerBitWidth()) { + case 8: + return Signed ? "i8" : "u8"; + case 16: + return Signed ? "i16" : "u16"; + case 32: + return Signed ? "i32" : "u32"; + case 64: + return Signed ? "i64" : "u64"; + default: + return "struct"; + } + } + case Type::HalfTyID: + return "f16"; + case Type::FloatTyID: + return "f32"; + case Type::DoubleTyID: + return "f64"; + case Type::PointerTyID: + return getValueType(Ty->getPointerElementType(), TypeName); + case Type::VectorTyID: + return getValueType(Ty->getVectorElementType(), TypeName); + default: + return "struct"; + } +} + +std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const { + switch (Ty->getTypeID()) { + case Type::IntegerTyID: { + if (!Signed) + return (Twine('u') + getTypeName(Ty, true)).str(); + + auto BitWidth = Ty->getIntegerBitWidth(); + switch (BitWidth) { + case 8: + return "char"; + case 16: + return "short"; + case 32: + return "int"; + case 64: + return "long"; + default: + return (Twine('i') + Twine(BitWidth)).str(); + } + } + case Type::HalfTyID: + return "half"; + case Type::FloatTyID: + return "float"; + case Type::DoubleTyID: + return "double"; + case Type::VectorTyID: { + auto VecTy = cast<VectorType>(Ty); + auto ElTy = VecTy->getElementType(); + auto NumElements = VecTy->getVectorNumElements(); + return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); + } + default: + return "unknown"; + } +} + +msgpack::ArrayDocNode +MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const { + auto Dims = HSAMetadataDoc->getArrayNode(); + if (Node->getNumOperands() != 3) + return Dims; + + for (auto &Op : Node->operands()) + Dims.push_back(Dims.getDocument()->getNode( + uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue()))); + return Dims; +} + +void MetadataStreamerV3::emitVersion() { + auto Version = HSAMetadataDoc->getArrayNode(); + Version.push_back(Version.getDocument()->getNode(VersionMajor)); + Version.push_back(Version.getDocument()->getNode(VersionMinor)); + getRootMetadata("amdhsa.version") = Version; +} + +void MetadataStreamerV3::emitPrintf(const Module &Mod) { + auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); + if (!Node) + return; + + auto Printf = HSAMetadataDoc->getArrayNode(); + for (auto Op : Node->operands()) + if (Op->getNumOperands()) + Printf.push_back(Printf.getDocument()->getNode( + cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true)); + getRootMetadata("amdhsa.printf") = Printf; +} + +void MetadataStreamerV3::emitKernelLanguage(const Function &Func, + msgpack::MapDocNode Kern) { + // TODO: What about other languages? + auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); + if (!Node || !Node->getNumOperands()) + return; + auto Op0 = Node->getOperand(0); + if (Op0->getNumOperands() <= 1) + return; + + Kern[".language"] = Kern.getDocument()->getNode("OpenCL C"); + auto LanguageVersion = Kern.getDocument()->getArrayNode(); + LanguageVersion.push_back(Kern.getDocument()->getNode( + mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue())); + LanguageVersion.push_back(Kern.getDocument()->getNode( + mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue())); + Kern[".language_version"] = LanguageVersion; +} + +void MetadataStreamerV3::emitKernelAttrs(const Function &Func, + msgpack::MapDocNode Kern) { + + if (auto Node = Func.getMetadata("reqd_work_group_size")) + Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node); + if (auto Node = Func.getMetadata("work_group_size_hint")) + Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node); + if (auto Node = Func.getMetadata("vec_type_hint")) { + Kern[".vec_type_hint"] = Kern.getDocument()->getNode( + getTypeName( + cast<ValueAsMetadata>(Node->getOperand(0))->getType(), + mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()), + /*Copy=*/true); + } + if (Func.hasFnAttribute("runtime-handle")) { + Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode( + Func.getFnAttribute("runtime-handle").getValueAsString().str(), + /*Copy=*/true); + } +} + +void MetadataStreamerV3::emitKernelArgs(const Function &Func, + msgpack::MapDocNode Kern) { + unsigned Offset = 0; + auto Args = HSAMetadataDoc->getArrayNode(); + for (auto &Arg : Func.args()) + emitKernelArg(Arg, Offset, Args); + + emitHiddenKernelArgs(Func, Offset, Args); + + Kern[".args"] = Args; +} + +void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset, + msgpack::ArrayDocNode Args) { + auto Func = Arg.getParent(); + auto ArgNo = Arg.getArgNo(); + const MDNode *Node; + + StringRef Name; + Node = Func->getMetadata("kernel_arg_name"); + if (Node && ArgNo < Node->getNumOperands()) + Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); + else if (Arg.hasName()) + Name = Arg.getName(); + + StringRef TypeName; + Node = Func->getMetadata("kernel_arg_type"); + if (Node && ArgNo < Node->getNumOperands()) + TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); + + StringRef BaseTypeName; + Node = Func->getMetadata("kernel_arg_base_type"); + if (Node && ArgNo < Node->getNumOperands()) + BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); + + StringRef AccQual; + if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() && + Arg.hasNoAliasAttr()) { + AccQual = "read_only"; + } else { + Node = Func->getMetadata("kernel_arg_access_qual"); + if (Node && ArgNo < Node->getNumOperands()) + AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); + } + + StringRef TypeQual; + Node = Func->getMetadata("kernel_arg_type_qual"); + if (Node && ArgNo < Node->getNumOperands()) + TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); + + Type *Ty = Arg.getType(); + const DataLayout &DL = Func->getParent()->getDataLayout(); + + unsigned PointeeAlign = 0; + if (auto PtrTy = dyn_cast<PointerType>(Ty)) { + if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { + PointeeAlign = Arg.getParamAlignment(); + if (PointeeAlign == 0) + PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType()); + } + } + + emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(), + getValueKind(Arg.getType(), TypeQual, BaseTypeName), Offset, + Args, PointeeAlign, Name, TypeName, BaseTypeName, AccQual, + TypeQual); +} + +void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty, + StringRef ValueKind, unsigned &Offset, + msgpack::ArrayDocNode Args, + unsigned PointeeAlign, StringRef Name, + StringRef TypeName, + StringRef BaseTypeName, + StringRef AccQual, StringRef TypeQual) { + auto Arg = Args.getDocument()->getMapNode(); + + if (!Name.empty()) + Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true); + if (!TypeName.empty()) + Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true); + auto Size = DL.getTypeAllocSize(Ty); + auto Align = DL.getABITypeAlignment(Ty); + Arg[".size"] = Arg.getDocument()->getNode(Size); + Offset = alignTo(Offset, Align); + Arg[".offset"] = Arg.getDocument()->getNode(Offset); + Offset += Size; + Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true); + Arg[".value_type"] = + Arg.getDocument()->getNode(getValueType(Ty, BaseTypeName), /*Copy=*/true); + if (PointeeAlign) + Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign); + + if (auto PtrTy = dyn_cast<PointerType>(Ty)) + if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace())) + Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, /*Copy=*/true); + + if (auto AQ = getAccessQualifier(AccQual)) + Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true); + + // TODO: Emit Arg[".actual_access"]. + + SmallVector<StringRef, 1> SplitTypeQuals; + TypeQual.split(SplitTypeQuals, " ", -1, false); + for (StringRef Key : SplitTypeQuals) { + if (Key == "const") + Arg[".is_const"] = Arg.getDocument()->getNode(true); + else if (Key == "restrict") + Arg[".is_restrict"] = Arg.getDocument()->getNode(true); + else if (Key == "volatile") + Arg[".is_volatile"] = Arg.getDocument()->getNode(true); + else if (Key == "pipe") + Arg[".is_pipe"] = Arg.getDocument()->getNode(true); + } + + Args.push_back(Arg); +} + +void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func, + unsigned &Offset, + msgpack::ArrayDocNode Args) { + int HiddenArgNumBytes = + getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0); + + if (!HiddenArgNumBytes) + return; + + auto &DL = Func.getParent()->getDataLayout(); + auto Int64Ty = Type::getInt64Ty(Func.getContext()); + + if (HiddenArgNumBytes >= 8) + emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, Args); + if (HiddenArgNumBytes >= 16) + emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, Args); + if (HiddenArgNumBytes >= 24) + emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, Args); + + auto Int8PtrTy = + Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); + + // Emit "printf buffer" argument if printf is used, otherwise emit dummy + // "none" argument. + if (HiddenArgNumBytes >= 32) { + if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) + emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args); + else + emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args); + } + + // Emit "default queue" and "completion action" arguments if enqueue kernel is + // used, otherwise emit dummy "none" arguments. + if (HiddenArgNumBytes >= 48) { + if (Func.hasFnAttribute("calls-enqueue-kernel")) { + emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, Args); + emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, Args); + } else { + emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args); + emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args); + } + } + + // Emit the pointer argument for multi-grid object. + if (HiddenArgNumBytes >= 56) + emitKernelArg(DL, Int8PtrTy, "hidden_multigrid_sync_arg", Offset, Args); +} + +msgpack::MapDocNode +MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF, + const SIProgramInfo &ProgramInfo) const { + const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); + const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); + const Function &F = MF.getFunction(); + + auto Kern = HSAMetadataDoc->getMapNode(); + + Align MaxKernArgAlign; + Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode( + STM.getKernArgSegmentSize(F, MaxKernArgAlign)); + Kern[".group_segment_fixed_size"] = + Kern.getDocument()->getNode(ProgramInfo.LDSSize); + Kern[".private_segment_fixed_size"] = + Kern.getDocument()->getNode(ProgramInfo.ScratchSize); + Kern[".kernarg_segment_align"] = + Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value()); + Kern[".wavefront_size"] = + Kern.getDocument()->getNode(STM.getWavefrontSize()); + Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR); + Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR); + Kern[".max_flat_workgroup_size"] = + Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize()); + Kern[".sgpr_spill_count"] = + Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs()); + Kern[".vgpr_spill_count"] = + Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs()); + + return Kern; +} + +bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) { + return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true); +} + +void MetadataStreamerV3::begin(const Module &Mod) { + emitVersion(); + emitPrintf(Mod); + getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode(); +} + +void MetadataStreamerV3::end() { + std::string HSAMetadataString; + raw_string_ostream StrOS(HSAMetadataString); + HSAMetadataDoc->toYAML(StrOS); + + if (DumpHSAMetadata) + dump(StrOS.str()); + if (VerifyHSAMetadata) + verify(StrOS.str()); +} + +void MetadataStreamerV3::emitKernel(const MachineFunction &MF, + const SIProgramInfo &ProgramInfo) { + auto &Func = MF.getFunction(); + auto Kern = getHSAKernelProps(MF, ProgramInfo); + + assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL || + Func.getCallingConv() == CallingConv::SPIR_KERNEL); + + auto Kernels = + getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true); + + { + Kern[".name"] = Kern.getDocument()->getNode(Func.getName()); + Kern[".symbol"] = Kern.getDocument()->getNode( + (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true); + emitKernelLanguage(Func, Kern); + emitKernelAttrs(Func, Kern); + emitKernelArgs(Func, Kern); + } + + Kernels.push_back(Kern); +} + +} // end namespace HSAMD +} // end namespace AMDGPU +} // end namespace llvm |