diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp | 956 | 
1 files changed, 956 insertions, 0 deletions
| diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp new file mode 100644 index 000000000000..9f5bcd8ff5f0 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -0,0 +1,956 @@ +//===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// AMDGPU HSA Metadata Streamer. +/// +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUHSAMetadataStreamer.h" +#include "AMDGPU.h" +#include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUTargetStreamer.h" +#include "SIMachineFunctionInfo.h" +#include "SIProgramInfo.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +static cl::opt<bool> DumpHSAMetadata( +    "amdgpu-dump-hsa-metadata", +    cl::desc("Dump AMDGPU HSA Metadata")); +static cl::opt<bool> VerifyHSAMetadata( +    "amdgpu-verify-hsa-metadata", +    cl::desc("Verify AMDGPU HSA Metadata")); + +namespace AMDGPU { +namespace HSAMD { + +//===----------------------------------------------------------------------===// +// HSAMetadataStreamerV2 +//===----------------------------------------------------------------------===// +void MetadataStreamerV2::dump(StringRef HSAMetadataString) const { +  errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; +} + +void MetadataStreamerV2::verify(StringRef HSAMetadataString) const { +  errs() << "AMDGPU HSA Metadata Parser Test: "; + +  HSAMD::Metadata FromHSAMetadataString; +  if (fromString(HSAMetadataString, FromHSAMetadataString)) { +    errs() << "FAIL\n"; +    return; +  } + +  std::string ToHSAMetadataString; +  if (toString(FromHSAMetadataString, ToHSAMetadataString)) { +    errs() << "FAIL\n"; +    return; +  } + +  errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL") +         << '\n'; +  if (HSAMetadataString != ToHSAMetadataString) { +    errs() << "Original input: " << HSAMetadataString << '\n' +           << "Produced output: " << ToHSAMetadataString << '\n'; +  } +} + +AccessQualifier +MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const { +  if (AccQual.empty()) +    return AccessQualifier::Unknown; + +  return StringSwitch<AccessQualifier>(AccQual) +             .Case("read_only",  AccessQualifier::ReadOnly) +             .Case("write_only", AccessQualifier::WriteOnly) +             .Case("read_write", AccessQualifier::ReadWrite) +             .Default(AccessQualifier::Default); +} + +AddressSpaceQualifier +MetadataStreamerV2::getAddressSpaceQualifier( +    unsigned AddressSpace) const { +  switch (AddressSpace) { +  case AMDGPUAS::PRIVATE_ADDRESS: +    return AddressSpaceQualifier::Private; +  case AMDGPUAS::GLOBAL_ADDRESS: +    return AddressSpaceQualifier::Global; +  case AMDGPUAS::CONSTANT_ADDRESS: +    return AddressSpaceQualifier::Constant; +  case AMDGPUAS::LOCAL_ADDRESS: +    return AddressSpaceQualifier::Local; +  case AMDGPUAS::FLAT_ADDRESS: +    return AddressSpaceQualifier::Generic; +  case AMDGPUAS::REGION_ADDRESS: +    return AddressSpaceQualifier::Region; +  default: +    return AddressSpaceQualifier::Unknown; +  } +} + +ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual, +                                           StringRef BaseTypeName) const { +  if (TypeQual.find("pipe") != StringRef::npos) +    return ValueKind::Pipe; + +  return StringSwitch<ValueKind>(BaseTypeName) +             .Case("image1d_t", ValueKind::Image) +             .Case("image1d_array_t", ValueKind::Image) +             .Case("image1d_buffer_t", ValueKind::Image) +             .Case("image2d_t", ValueKind::Image) +             .Case("image2d_array_t", ValueKind::Image) +             .Case("image2d_array_depth_t", ValueKind::Image) +             .Case("image2d_array_msaa_t", ValueKind::Image) +             .Case("image2d_array_msaa_depth_t", ValueKind::Image) +             .Case("image2d_depth_t", ValueKind::Image) +             .Case("image2d_msaa_t", ValueKind::Image) +             .Case("image2d_msaa_depth_t", ValueKind::Image) +             .Case("image3d_t", ValueKind::Image) +             .Case("sampler_t", ValueKind::Sampler) +             .Case("queue_t", ValueKind::Queue) +             .Default(isa<PointerType>(Ty) ? +                          (Ty->getPointerAddressSpace() == +                           AMDGPUAS::LOCAL_ADDRESS ? +                           ValueKind::DynamicSharedPointer : +                           ValueKind::GlobalBuffer) : +                      ValueKind::ByValue); +} + +ValueType MetadataStreamerV2::getValueType(Type *Ty, StringRef TypeName) const { +  switch (Ty->getTypeID()) { +  case Type::IntegerTyID: { +    auto Signed = !TypeName.startswith("u"); +    switch (Ty->getIntegerBitWidth()) { +    case 8: +      return Signed ? ValueType::I8 : ValueType::U8; +    case 16: +      return Signed ? ValueType::I16 : ValueType::U16; +    case 32: +      return Signed ? ValueType::I32 : ValueType::U32; +    case 64: +      return Signed ? ValueType::I64 : ValueType::U64; +    default: +      return ValueType::Struct; +    } +  } +  case Type::HalfTyID: +    return ValueType::F16; +  case Type::FloatTyID: +    return ValueType::F32; +  case Type::DoubleTyID: +    return ValueType::F64; +  case Type::PointerTyID: +    return getValueType(Ty->getPointerElementType(), TypeName); +  case Type::VectorTyID: +    return getValueType(Ty->getVectorElementType(), TypeName); +  default: +    return ValueType::Struct; +  } +} + +std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const { +  switch (Ty->getTypeID()) { +  case Type::IntegerTyID: { +    if (!Signed) +      return (Twine('u') + getTypeName(Ty, true)).str(); + +    auto BitWidth = Ty->getIntegerBitWidth(); +    switch (BitWidth) { +    case 8: +      return "char"; +    case 16: +      return "short"; +    case 32: +      return "int"; +    case 64: +      return "long"; +    default: +      return (Twine('i') + Twine(BitWidth)).str(); +    } +  } +  case Type::HalfTyID: +    return "half"; +  case Type::FloatTyID: +    return "float"; +  case Type::DoubleTyID: +    return "double"; +  case Type::VectorTyID: { +    auto VecTy = cast<VectorType>(Ty); +    auto ElTy = VecTy->getElementType(); +    auto NumElements = VecTy->getVectorNumElements(); +    return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); +  } +  default: +    return "unknown"; +  } +} + +std::vector<uint32_t> +MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const { +  std::vector<uint32_t> Dims; +  if (Node->getNumOperands() != 3) +    return Dims; + +  for (auto &Op : Node->operands()) +    Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue()); +  return Dims; +} + +Kernel::CodeProps::Metadata +MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF, +                                    const SIProgramInfo &ProgramInfo) const { +  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); +  const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); +  HSAMD::Kernel::CodeProps::Metadata HSACodeProps; +  const Function &F = MF.getFunction(); + +  assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL || +         F.getCallingConv() == CallingConv::SPIR_KERNEL); + +  Align MaxKernArgAlign; +  HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F, +                                                               MaxKernArgAlign); +  HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize; +  HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize; +  HSACodeProps.mKernargSegmentAlign = +      std::max(MaxKernArgAlign, Align(4)).value(); +  HSACodeProps.mWavefrontSize = STM.getWavefrontSize(); +  HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR; +  HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR; +  HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize(); +  HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack; +  HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled(); +  HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs(); +  HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs(); + +  return HSACodeProps; +} + +Kernel::DebugProps::Metadata +MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF, +                                     const SIProgramInfo &ProgramInfo) const { +  return HSAMD::Kernel::DebugProps::Metadata(); +} + +void MetadataStreamerV2::emitVersion() { +  auto &Version = HSAMetadata.mVersion; + +  Version.push_back(VersionMajor); +  Version.push_back(VersionMinor); +} + +void MetadataStreamerV2::emitPrintf(const Module &Mod) { +  auto &Printf = HSAMetadata.mPrintf; + +  auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); +  if (!Node) +    return; + +  for (auto Op : Node->operands()) +    if (Op->getNumOperands()) +      Printf.push_back(cast<MDString>(Op->getOperand(0))->getString()); +} + +void MetadataStreamerV2::emitKernelLanguage(const Function &Func) { +  auto &Kernel = HSAMetadata.mKernels.back(); + +  // TODO: What about other languages? +  auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); +  if (!Node || !Node->getNumOperands()) +    return; +  auto Op0 = Node->getOperand(0); +  if (Op0->getNumOperands() <= 1) +    return; + +  Kernel.mLanguage = "OpenCL C"; +  Kernel.mLanguageVersion.push_back( +      mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()); +  Kernel.mLanguageVersion.push_back( +      mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()); +} + +void MetadataStreamerV2::emitKernelAttrs(const Function &Func) { +  auto &Attrs = HSAMetadata.mKernels.back().mAttrs; + +  if (auto Node = Func.getMetadata("reqd_work_group_size")) +    Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node); +  if (auto Node = Func.getMetadata("work_group_size_hint")) +    Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node); +  if (auto Node = Func.getMetadata("vec_type_hint")) { +    Attrs.mVecTypeHint = getTypeName( +        cast<ValueAsMetadata>(Node->getOperand(0))->getType(), +        mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()); +  } +  if (Func.hasFnAttribute("runtime-handle")) { +    Attrs.mRuntimeHandle = +        Func.getFnAttribute("runtime-handle").getValueAsString().str(); +  } +} + +void MetadataStreamerV2::emitKernelArgs(const Function &Func) { +  for (auto &Arg : Func.args()) +    emitKernelArg(Arg); + +  emitHiddenKernelArgs(Func); +} + +void MetadataStreamerV2::emitKernelArg(const Argument &Arg) { +  auto Func = Arg.getParent(); +  auto ArgNo = Arg.getArgNo(); +  const MDNode *Node; + +  StringRef Name; +  Node = Func->getMetadata("kernel_arg_name"); +  if (Node && ArgNo < Node->getNumOperands()) +    Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); +  else if (Arg.hasName()) +    Name = Arg.getName(); + +  StringRef TypeName; +  Node = Func->getMetadata("kernel_arg_type"); +  if (Node && ArgNo < Node->getNumOperands()) +    TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); + +  StringRef BaseTypeName; +  Node = Func->getMetadata("kernel_arg_base_type"); +  if (Node && ArgNo < Node->getNumOperands()) +    BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); + +  StringRef AccQual; +  if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() && +      Arg.hasNoAliasAttr()) { +    AccQual = "read_only"; +  } else { +    Node = Func->getMetadata("kernel_arg_access_qual"); +    if (Node && ArgNo < Node->getNumOperands()) +      AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); +  } + +  StringRef TypeQual; +  Node = Func->getMetadata("kernel_arg_type_qual"); +  if (Node && ArgNo < Node->getNumOperands()) +    TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); + +  Type *Ty = Arg.getType(); +  const DataLayout &DL = Func->getParent()->getDataLayout(); + +  unsigned PointeeAlign = 0; +  if (auto PtrTy = dyn_cast<PointerType>(Ty)) { +    if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { +      PointeeAlign = Arg.getParamAlignment(); +      if (PointeeAlign == 0) +        PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType()); +    } +  } + +  emitKernelArg(DL, Ty, getValueKind(Arg.getType(), TypeQual, BaseTypeName), +                PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual); +} + +void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty, +                                       ValueKind ValueKind, +                                       unsigned PointeeAlign, StringRef Name, +                                       StringRef TypeName, +                                       StringRef BaseTypeName, +                                       StringRef AccQual, StringRef TypeQual) { +  HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata()); +  auto &Arg = HSAMetadata.mKernels.back().mArgs.back(); + +  Arg.mName = Name; +  Arg.mTypeName = TypeName; +  Arg.mSize = DL.getTypeAllocSize(Ty); +  Arg.mAlign = DL.getABITypeAlignment(Ty); +  Arg.mValueKind = ValueKind; +  Arg.mValueType = getValueType(Ty, BaseTypeName); +  Arg.mPointeeAlign = PointeeAlign; + +  if (auto PtrTy = dyn_cast<PointerType>(Ty)) +    Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace()); + +  Arg.mAccQual = getAccessQualifier(AccQual); + +  // TODO: Emit Arg.mActualAccQual. + +  SmallVector<StringRef, 1> SplitTypeQuals; +  TypeQual.split(SplitTypeQuals, " ", -1, false); +  for (StringRef Key : SplitTypeQuals) { +    auto P = StringSwitch<bool*>(Key) +                 .Case("const",    &Arg.mIsConst) +                 .Case("restrict", &Arg.mIsRestrict) +                 .Case("volatile", &Arg.mIsVolatile) +                 .Case("pipe",     &Arg.mIsPipe) +                 .Default(nullptr); +    if (P) +      *P = true; +  } +} + +void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) { +  int HiddenArgNumBytes = +      getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0); + +  if (!HiddenArgNumBytes) +    return; + +  auto &DL = Func.getParent()->getDataLayout(); +  auto Int64Ty = Type::getInt64Ty(Func.getContext()); + +  if (HiddenArgNumBytes >= 8) +    emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX); +  if (HiddenArgNumBytes >= 16) +    emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY); +  if (HiddenArgNumBytes >= 24) +    emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ); + +  auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(), +                                      AMDGPUAS::GLOBAL_ADDRESS); + +  // Emit "printf buffer" argument if printf is used, otherwise emit dummy +  // "none" argument. +  if (HiddenArgNumBytes >= 32) { +    if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) +      emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer); +    else +      emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); +  } + +  // Emit "default queue" and "completion action" arguments if enqueue kernel is +  // used, otherwise emit dummy "none" arguments. +  if (HiddenArgNumBytes >= 48) { +    if (Func.hasFnAttribute("calls-enqueue-kernel")) { +      emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue); +      emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction); +    } else { +      emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); +      emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); +    } +  } + +  // Emit the pointer argument for multi-grid object. +  if (HiddenArgNumBytes >= 56) +    emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenMultiGridSyncArg); +} + +bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) { +  return TargetStreamer.EmitHSAMetadata(getHSAMetadata()); +} + +void MetadataStreamerV2::begin(const Module &Mod) { +  emitVersion(); +  emitPrintf(Mod); +} + +void MetadataStreamerV2::end() { +  std::string HSAMetadataString; +  if (toString(HSAMetadata, HSAMetadataString)) +    return; + +  if (DumpHSAMetadata) +    dump(HSAMetadataString); +  if (VerifyHSAMetadata) +    verify(HSAMetadataString); +} + +void MetadataStreamerV2::emitKernel(const MachineFunction &MF, +                                    const SIProgramInfo &ProgramInfo) { +  auto &Func = MF.getFunction(); +  if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL) +    return; + +  auto CodeProps = getHSACodeProps(MF, ProgramInfo); +  auto DebugProps = getHSADebugProps(MF, ProgramInfo); + +  HSAMetadata.mKernels.push_back(Kernel::Metadata()); +  auto &Kernel = HSAMetadata.mKernels.back(); + +  Kernel.mName = Func.getName(); +  Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str(); +  emitKernelLanguage(Func); +  emitKernelAttrs(Func); +  emitKernelArgs(Func); +  HSAMetadata.mKernels.back().mCodeProps = CodeProps; +  HSAMetadata.mKernels.back().mDebugProps = DebugProps; +} + +//===----------------------------------------------------------------------===// +// HSAMetadataStreamerV3 +//===----------------------------------------------------------------------===// + +void MetadataStreamerV3::dump(StringRef HSAMetadataString) const { +  errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; +} + +void MetadataStreamerV3::verify(StringRef HSAMetadataString) const { +  errs() << "AMDGPU HSA Metadata Parser Test: "; + +  msgpack::Document FromHSAMetadataString; + +  if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) { +    errs() << "FAIL\n"; +    return; +  } + +  std::string ToHSAMetadataString; +  raw_string_ostream StrOS(ToHSAMetadataString); +  FromHSAMetadataString.toYAML(StrOS); + +  errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n'; +  if (HSAMetadataString != ToHSAMetadataString) { +    errs() << "Original input: " << HSAMetadataString << '\n' +           << "Produced output: " << StrOS.str() << '\n'; +  } +} + +Optional<StringRef> +MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const { +  return StringSwitch<Optional<StringRef>>(AccQual) +      .Case("read_only", StringRef("read_only")) +      .Case("write_only", StringRef("write_only")) +      .Case("read_write", StringRef("read_write")) +      .Default(None); +} + +Optional<StringRef> +MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const { +  switch (AddressSpace) { +  case AMDGPUAS::PRIVATE_ADDRESS: +    return StringRef("private"); +  case AMDGPUAS::GLOBAL_ADDRESS: +    return StringRef("global"); +  case AMDGPUAS::CONSTANT_ADDRESS: +    return StringRef("constant"); +  case AMDGPUAS::LOCAL_ADDRESS: +    return StringRef("local"); +  case AMDGPUAS::FLAT_ADDRESS: +    return StringRef("generic"); +  case AMDGPUAS::REGION_ADDRESS: +    return StringRef("region"); +  default: +    return None; +  } +} + +StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual, +                                           StringRef BaseTypeName) const { +  if (TypeQual.find("pipe") != StringRef::npos) +    return "pipe"; + +  return StringSwitch<StringRef>(BaseTypeName) +      .Case("image1d_t", "image") +      .Case("image1d_array_t", "image") +      .Case("image1d_buffer_t", "image") +      .Case("image2d_t", "image") +      .Case("image2d_array_t", "image") +      .Case("image2d_array_depth_t", "image") +      .Case("image2d_array_msaa_t", "image") +      .Case("image2d_array_msaa_depth_t", "image") +      .Case("image2d_depth_t", "image") +      .Case("image2d_msaa_t", "image") +      .Case("image2d_msaa_depth_t", "image") +      .Case("image3d_t", "image") +      .Case("sampler_t", "sampler") +      .Case("queue_t", "queue") +      .Default(isa<PointerType>(Ty) +                   ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS +                          ? "dynamic_shared_pointer" +                          : "global_buffer") +                   : "by_value"); +} + +StringRef MetadataStreamerV3::getValueType(Type *Ty, StringRef TypeName) const { +  switch (Ty->getTypeID()) { +  case Type::IntegerTyID: { +    auto Signed = !TypeName.startswith("u"); +    switch (Ty->getIntegerBitWidth()) { +    case 8: +      return Signed ? "i8" : "u8"; +    case 16: +      return Signed ? "i16" : "u16"; +    case 32: +      return Signed ? "i32" : "u32"; +    case 64: +      return Signed ? "i64" : "u64"; +    default: +      return "struct"; +    } +  } +  case Type::HalfTyID: +    return "f16"; +  case Type::FloatTyID: +    return "f32"; +  case Type::DoubleTyID: +    return "f64"; +  case Type::PointerTyID: +    return getValueType(Ty->getPointerElementType(), TypeName); +  case Type::VectorTyID: +    return getValueType(Ty->getVectorElementType(), TypeName); +  default: +    return "struct"; +  } +} + +std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const { +  switch (Ty->getTypeID()) { +  case Type::IntegerTyID: { +    if (!Signed) +      return (Twine('u') + getTypeName(Ty, true)).str(); + +    auto BitWidth = Ty->getIntegerBitWidth(); +    switch (BitWidth) { +    case 8: +      return "char"; +    case 16: +      return "short"; +    case 32: +      return "int"; +    case 64: +      return "long"; +    default: +      return (Twine('i') + Twine(BitWidth)).str(); +    } +  } +  case Type::HalfTyID: +    return "half"; +  case Type::FloatTyID: +    return "float"; +  case Type::DoubleTyID: +    return "double"; +  case Type::VectorTyID: { +    auto VecTy = cast<VectorType>(Ty); +    auto ElTy = VecTy->getElementType(); +    auto NumElements = VecTy->getVectorNumElements(); +    return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); +  } +  default: +    return "unknown"; +  } +} + +msgpack::ArrayDocNode +MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const { +  auto Dims = HSAMetadataDoc->getArrayNode(); +  if (Node->getNumOperands() != 3) +    return Dims; + +  for (auto &Op : Node->operands()) +    Dims.push_back(Dims.getDocument()->getNode( +        uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue()))); +  return Dims; +} + +void MetadataStreamerV3::emitVersion() { +  auto Version = HSAMetadataDoc->getArrayNode(); +  Version.push_back(Version.getDocument()->getNode(VersionMajor)); +  Version.push_back(Version.getDocument()->getNode(VersionMinor)); +  getRootMetadata("amdhsa.version") = Version; +} + +void MetadataStreamerV3::emitPrintf(const Module &Mod) { +  auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); +  if (!Node) +    return; + +  auto Printf = HSAMetadataDoc->getArrayNode(); +  for (auto Op : Node->operands()) +    if (Op->getNumOperands()) +      Printf.push_back(Printf.getDocument()->getNode( +          cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true)); +  getRootMetadata("amdhsa.printf") = Printf; +} + +void MetadataStreamerV3::emitKernelLanguage(const Function &Func, +                                            msgpack::MapDocNode Kern) { +  // TODO: What about other languages? +  auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); +  if (!Node || !Node->getNumOperands()) +    return; +  auto Op0 = Node->getOperand(0); +  if (Op0->getNumOperands() <= 1) +    return; + +  Kern[".language"] = Kern.getDocument()->getNode("OpenCL C"); +  auto LanguageVersion = Kern.getDocument()->getArrayNode(); +  LanguageVersion.push_back(Kern.getDocument()->getNode( +      mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue())); +  LanguageVersion.push_back(Kern.getDocument()->getNode( +      mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue())); +  Kern[".language_version"] = LanguageVersion; +} + +void MetadataStreamerV3::emitKernelAttrs(const Function &Func, +                                         msgpack::MapDocNode Kern) { + +  if (auto Node = Func.getMetadata("reqd_work_group_size")) +    Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node); +  if (auto Node = Func.getMetadata("work_group_size_hint")) +    Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node); +  if (auto Node = Func.getMetadata("vec_type_hint")) { +    Kern[".vec_type_hint"] = Kern.getDocument()->getNode( +        getTypeName( +            cast<ValueAsMetadata>(Node->getOperand(0))->getType(), +            mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()), +        /*Copy=*/true); +  } +  if (Func.hasFnAttribute("runtime-handle")) { +    Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode( +        Func.getFnAttribute("runtime-handle").getValueAsString().str(), +        /*Copy=*/true); +  } +} + +void MetadataStreamerV3::emitKernelArgs(const Function &Func, +                                        msgpack::MapDocNode Kern) { +  unsigned Offset = 0; +  auto Args = HSAMetadataDoc->getArrayNode(); +  for (auto &Arg : Func.args()) +    emitKernelArg(Arg, Offset, Args); + +  emitHiddenKernelArgs(Func, Offset, Args); + +  Kern[".args"] = Args; +} + +void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset, +                                       msgpack::ArrayDocNode Args) { +  auto Func = Arg.getParent(); +  auto ArgNo = Arg.getArgNo(); +  const MDNode *Node; + +  StringRef Name; +  Node = Func->getMetadata("kernel_arg_name"); +  if (Node && ArgNo < Node->getNumOperands()) +    Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); +  else if (Arg.hasName()) +    Name = Arg.getName(); + +  StringRef TypeName; +  Node = Func->getMetadata("kernel_arg_type"); +  if (Node && ArgNo < Node->getNumOperands()) +    TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); + +  StringRef BaseTypeName; +  Node = Func->getMetadata("kernel_arg_base_type"); +  if (Node && ArgNo < Node->getNumOperands()) +    BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); + +  StringRef AccQual; +  if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() && +      Arg.hasNoAliasAttr()) { +    AccQual = "read_only"; +  } else { +    Node = Func->getMetadata("kernel_arg_access_qual"); +    if (Node && ArgNo < Node->getNumOperands()) +      AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); +  } + +  StringRef TypeQual; +  Node = Func->getMetadata("kernel_arg_type_qual"); +  if (Node && ArgNo < Node->getNumOperands()) +    TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); + +  Type *Ty = Arg.getType(); +  const DataLayout &DL = Func->getParent()->getDataLayout(); + +  unsigned PointeeAlign = 0; +  if (auto PtrTy = dyn_cast<PointerType>(Ty)) { +    if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { +      PointeeAlign = Arg.getParamAlignment(); +      if (PointeeAlign == 0) +        PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType()); +    } +  } + +  emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(), +                getValueKind(Arg.getType(), TypeQual, BaseTypeName), Offset, +                Args, PointeeAlign, Name, TypeName, BaseTypeName, AccQual, +                TypeQual); +} + +void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty, +                                       StringRef ValueKind, unsigned &Offset, +                                       msgpack::ArrayDocNode Args, +                                       unsigned PointeeAlign, StringRef Name, +                                       StringRef TypeName, +                                       StringRef BaseTypeName, +                                       StringRef AccQual, StringRef TypeQual) { +  auto Arg = Args.getDocument()->getMapNode(); + +  if (!Name.empty()) +    Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true); +  if (!TypeName.empty()) +    Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true); +  auto Size = DL.getTypeAllocSize(Ty); +  auto Align = DL.getABITypeAlignment(Ty); +  Arg[".size"] = Arg.getDocument()->getNode(Size); +  Offset = alignTo(Offset, Align); +  Arg[".offset"] = Arg.getDocument()->getNode(Offset); +  Offset += Size; +  Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true); +  Arg[".value_type"] = +      Arg.getDocument()->getNode(getValueType(Ty, BaseTypeName), /*Copy=*/true); +  if (PointeeAlign) +    Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign); + +  if (auto PtrTy = dyn_cast<PointerType>(Ty)) +    if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace())) +      Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, /*Copy=*/true); + +  if (auto AQ = getAccessQualifier(AccQual)) +    Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true); + +  // TODO: Emit Arg[".actual_access"]. + +  SmallVector<StringRef, 1> SplitTypeQuals; +  TypeQual.split(SplitTypeQuals, " ", -1, false); +  for (StringRef Key : SplitTypeQuals) { +    if (Key == "const") +      Arg[".is_const"] = Arg.getDocument()->getNode(true); +    else if (Key == "restrict") +      Arg[".is_restrict"] = Arg.getDocument()->getNode(true); +    else if (Key == "volatile") +      Arg[".is_volatile"] = Arg.getDocument()->getNode(true); +    else if (Key == "pipe") +      Arg[".is_pipe"] = Arg.getDocument()->getNode(true); +  } + +  Args.push_back(Arg); +} + +void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func, +                                              unsigned &Offset, +                                              msgpack::ArrayDocNode Args) { +  int HiddenArgNumBytes = +      getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0); + +  if (!HiddenArgNumBytes) +    return; + +  auto &DL = Func.getParent()->getDataLayout(); +  auto Int64Ty = Type::getInt64Ty(Func.getContext()); + +  if (HiddenArgNumBytes >= 8) +    emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, Args); +  if (HiddenArgNumBytes >= 16) +    emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, Args); +  if (HiddenArgNumBytes >= 24) +    emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, Args); + +  auto Int8PtrTy = +      Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); + +  // Emit "printf buffer" argument if printf is used, otherwise emit dummy +  // "none" argument. +  if (HiddenArgNumBytes >= 32) { +    if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) +      emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args); +    else +      emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args); +  } + +  // Emit "default queue" and "completion action" arguments if enqueue kernel is +  // used, otherwise emit dummy "none" arguments. +  if (HiddenArgNumBytes >= 48) { +    if (Func.hasFnAttribute("calls-enqueue-kernel")) { +      emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, Args); +      emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, Args); +    } else { +      emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args); +      emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args); +    } +  } + +  // Emit the pointer argument for multi-grid object. +  if (HiddenArgNumBytes >= 56) +    emitKernelArg(DL, Int8PtrTy, "hidden_multigrid_sync_arg", Offset, Args); +} + +msgpack::MapDocNode +MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF, +                                      const SIProgramInfo &ProgramInfo) const { +  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); +  const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); +  const Function &F = MF.getFunction(); + +  auto Kern = HSAMetadataDoc->getMapNode(); + +  Align MaxKernArgAlign; +  Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode( +      STM.getKernArgSegmentSize(F, MaxKernArgAlign)); +  Kern[".group_segment_fixed_size"] = +      Kern.getDocument()->getNode(ProgramInfo.LDSSize); +  Kern[".private_segment_fixed_size"] = +      Kern.getDocument()->getNode(ProgramInfo.ScratchSize); +  Kern[".kernarg_segment_align"] = +      Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value()); +  Kern[".wavefront_size"] = +      Kern.getDocument()->getNode(STM.getWavefrontSize()); +  Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR); +  Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR); +  Kern[".max_flat_workgroup_size"] = +      Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize()); +  Kern[".sgpr_spill_count"] = +      Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs()); +  Kern[".vgpr_spill_count"] = +      Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs()); + +  return Kern; +} + +bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) { +  return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true); +} + +void MetadataStreamerV3::begin(const Module &Mod) { +  emitVersion(); +  emitPrintf(Mod); +  getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode(); +} + +void MetadataStreamerV3::end() { +  std::string HSAMetadataString; +  raw_string_ostream StrOS(HSAMetadataString); +  HSAMetadataDoc->toYAML(StrOS); + +  if (DumpHSAMetadata) +    dump(StrOS.str()); +  if (VerifyHSAMetadata) +    verify(StrOS.str()); +} + +void MetadataStreamerV3::emitKernel(const MachineFunction &MF, +                                    const SIProgramInfo &ProgramInfo) { +  auto &Func = MF.getFunction(); +  auto Kern = getHSAKernelProps(MF, ProgramInfo); + +  assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL || +         Func.getCallingConv() == CallingConv::SPIR_KERNEL); + +  auto Kernels = +      getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true); + +  { +    Kern[".name"] = Kern.getDocument()->getNode(Func.getName()); +    Kern[".symbol"] = Kern.getDocument()->getNode( +        (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true); +    emitKernelLanguage(Func, Kern); +    emitKernelAttrs(Func, Kern); +    emitKernelArgs(Func, Kern); +  } + +  Kernels.push_back(Kern); +} + +} // end namespace HSAMD +} // end namespace AMDGPU +} // end namespace llvm | 
