diff options
Diffstat (limited to 'lib/Target/AMDGPU/MCTargetDesc')
12 files changed, 1270 insertions, 490 deletions
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp index ffb92aae599e..f3266fe82955 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -37,7 +37,7 @@ public: bool &IsResolved) override; void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel) const override; + uint64_t Value, bool IsPCRel, MCContext &Ctx) const override; bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const override { @@ -131,7 +131,7 @@ void AMDGPUAsmBackend::processFixupValue(const MCAssembler &Asm, void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value, - bool IsPCRel) const { + bool IsPCRel, MCContext &Ctx) const { if (!Value) return; // Doesn't change encoding. @@ -164,7 +164,20 @@ const MCFixupKindInfo &AMDGPUAsmBackend::getFixupKindInfo( } bool AMDGPUAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { - OW->WriteZeros(Count); + // If the count is not 4-byte aligned, we must be writing data into the text + // section (otherwise we have unaligned instructions, and thus have far + // bigger problems), so just write zeros instead. + OW->WriteZeros(Count % 4); + + // We are properly aligned, so write NOPs as requested. + Count /= 4; + + // FIXME: R600 support. + // s_nop 0 + const uint32_t Encoded_S_NOP_0 = 0xbf800000; + + for (uint64_t I = 0; I != Count; ++I) + OW->write32(Encoded_S_NOP_0); return true; } diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h new file mode 100644 index 000000000000..816e8c744b27 --- /dev/null +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h @@ -0,0 +1,422 @@ +//===--- AMDGPUCodeObjectMetadata.h -----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief AMDGPU Code Object Metadata definitions and in-memory +/// representations. +/// +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H +#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H + +#include <cstdint> +#include <string> +#include <system_error> +#include <vector> + +namespace llvm { +namespace AMDGPU { + +//===----------------------------------------------------------------------===// +// Code Object Metadata. +//===----------------------------------------------------------------------===// +namespace CodeObject { + +/// \brief Code object metadata major version. +constexpr uint32_t MetadataVersionMajor = 1; +/// \brief Code object metadata minor version. +constexpr uint32_t MetadataVersionMinor = 0; + +/// \brief Code object metadata beginning assembler directive. +constexpr char MetadataAssemblerDirectiveBegin[] = + ".amdgpu_code_object_metadata"; +/// \brief Code object metadata ending assembler directive. +constexpr char MetadataAssemblerDirectiveEnd[] = + ".end_amdgpu_code_object_metadata"; + +/// \brief Access qualifiers. +enum class AccessQualifier : uint8_t { + Default = 0, + ReadOnly = 1, + WriteOnly = 2, + ReadWrite = 3, + Unknown = 0xff +}; + +/// \brief Address space qualifiers. +enum class AddressSpaceQualifier : uint8_t { + Private = 0, + Global = 1, + Constant = 2, + Local = 3, + Generic = 4, + Region = 5, + Unknown = 0xff +}; + +/// \brief Value kinds. +enum class ValueKind : uint8_t { + ByValue = 0, + GlobalBuffer = 1, + DynamicSharedPointer = 2, + Sampler = 3, + Image = 4, + Pipe = 5, + Queue = 6, + HiddenGlobalOffsetX = 7, + HiddenGlobalOffsetY = 8, + HiddenGlobalOffsetZ = 9, + HiddenNone = 10, + HiddenPrintfBuffer = 11, + HiddenDefaultQueue = 12, + HiddenCompletionAction = 13, + Unknown = 0xff +}; + +/// \brief Value types. +enum class ValueType : uint8_t { + Struct = 0, + I8 = 1, + U8 = 2, + I16 = 3, + U16 = 4, + F16 = 5, + I32 = 6, + U32 = 7, + F32 = 8, + I64 = 9, + U64 = 10, + F64 = 11, + Unknown = 0xff +}; + +//===----------------------------------------------------------------------===// +// Kernel Metadata. +//===----------------------------------------------------------------------===// +namespace Kernel { + +//===----------------------------------------------------------------------===// +// Kernel Attributes Metadata. +//===----------------------------------------------------------------------===// +namespace Attrs { + +namespace Key { +/// \brief Key for Kernel::Attr::Metadata::mReqdWorkGroupSize. +constexpr char ReqdWorkGroupSize[] = "ReqdWorkGroupSize"; +/// \brief Key for Kernel::Attr::Metadata::mWorkGroupSizeHint. +constexpr char WorkGroupSizeHint[] = "WorkGroupSizeHint"; +/// \brief Key for Kernel::Attr::Metadata::mVecTypeHint. +constexpr char VecTypeHint[] = "VecTypeHint"; +} // end namespace Key + +/// \brief In-memory representation of kernel attributes metadata. +struct Metadata final { + /// \brief 'reqd_work_group_size' attribute. Optional. + std::vector<uint32_t> mReqdWorkGroupSize = std::vector<uint32_t>(); + /// \brief 'work_group_size_hint' attribute. Optional. + std::vector<uint32_t> mWorkGroupSizeHint = std::vector<uint32_t>(); + /// \brief 'vec_type_hint' attribute. Optional. + std::string mVecTypeHint = std::string(); + + /// \brief Default constructor. + Metadata() = default; + + /// \returns True if kernel attributes metadata is empty, false otherwise. + bool empty() const { + return mReqdWorkGroupSize.empty() && + mWorkGroupSizeHint.empty() && + mVecTypeHint.empty(); + } + + /// \returns True if kernel attributes metadata is not empty, false otherwise. + bool notEmpty() const { + return !empty(); + } +}; + +} // end namespace Attrs + +//===----------------------------------------------------------------------===// +// Kernel Argument Metadata. +//===----------------------------------------------------------------------===// +namespace Arg { + +namespace Key { +/// \brief Key for Kernel::Arg::Metadata::mSize. +constexpr char Size[] = "Size"; +/// \brief Key for Kernel::Arg::Metadata::mAlign. +constexpr char Align[] = "Align"; +/// \brief Key for Kernel::Arg::Metadata::mValueKind. +constexpr char ValueKind[] = "ValueKind"; +/// \brief Key for Kernel::Arg::Metadata::mValueType. +constexpr char ValueType[] = "ValueType"; +/// \brief Key for Kernel::Arg::Metadata::mPointeeAlign. +constexpr char PointeeAlign[] = "PointeeAlign"; +/// \brief Key for Kernel::Arg::Metadata::mAccQual. +constexpr char AccQual[] = "AccQual"; +/// \brief Key for Kernel::Arg::Metadata::mAddrSpaceQual. +constexpr char AddrSpaceQual[] = "AddrSpaceQual"; +/// \brief Key for Kernel::Arg::Metadata::mIsConst. +constexpr char IsConst[] = "IsConst"; +/// \brief Key for Kernel::Arg::Metadata::mIsPipe. +constexpr char IsPipe[] = "IsPipe"; +/// \brief Key for Kernel::Arg::Metadata::mIsRestrict. +constexpr char IsRestrict[] = "IsRestrict"; +/// \brief Key for Kernel::Arg::Metadata::mIsVolatile. +constexpr char IsVolatile[] = "IsVolatile"; +/// \brief Key for Kernel::Arg::Metadata::mName. +constexpr char Name[] = "Name"; +/// \brief Key for Kernel::Arg::Metadata::mTypeName. +constexpr char TypeName[] = "TypeName"; +} // end namespace Key + +/// \brief In-memory representation of kernel argument metadata. +struct Metadata final { + /// \brief Size in bytes. Required. + uint32_t mSize = 0; + /// \brief Alignment in bytes. Required. + uint32_t mAlign = 0; + /// \brief Value kind. Required. + ValueKind mValueKind = ValueKind::Unknown; + /// \brief Value type. Required. + ValueType mValueType = ValueType::Unknown; + /// \brief Pointee alignment in bytes. Optional. + uint32_t mPointeeAlign = 0; + /// \brief Access qualifier. Optional. + AccessQualifier mAccQual = AccessQualifier::Unknown; + /// \brief Address space qualifier. Optional. + AddressSpaceQualifier mAddrSpaceQual = AddressSpaceQualifier::Unknown; + /// \brief True if 'const' qualifier is specified. Optional. + bool mIsConst = false; + /// \brief True if 'pipe' qualifier is specified. Optional. + bool mIsPipe = false; + /// \brief True if 'restrict' qualifier is specified. Optional. + bool mIsRestrict = false; + /// \brief True if 'volatile' qualifier is specified. Optional. + bool mIsVolatile = false; + /// \brief Name. Optional. + std::string mName = std::string(); + /// \brief Type name. Optional. + std::string mTypeName = std::string(); + + /// \brief Default constructor. + Metadata() = default; +}; + +} // end namespace Arg + +//===----------------------------------------------------------------------===// +// Kernel Code Properties Metadata. +//===----------------------------------------------------------------------===// +namespace CodeProps { + +namespace Key { +/// \brief Key for Kernel::CodeProps::Metadata::mKernargSegmentSize. +constexpr char KernargSegmentSize[] = "KernargSegmentSize"; +/// \brief Key for Kernel::CodeProps::Metadata::mWorkgroupGroupSegmentSize. +constexpr char WorkgroupGroupSegmentSize[] = "WorkgroupGroupSegmentSize"; +/// \brief Key for Kernel::CodeProps::Metadata::mWorkitemPrivateSegmentSize. +constexpr char WorkitemPrivateSegmentSize[] = "WorkitemPrivateSegmentSize"; +/// \brief Key for Kernel::CodeProps::Metadata::mWavefrontNumSGPRs. +constexpr char WavefrontNumSGPRs[] = "WavefrontNumSGPRs"; +/// \brief Key for Kernel::CodeProps::Metadata::mWorkitemNumVGPRs. +constexpr char WorkitemNumVGPRs[] = "WorkitemNumVGPRs"; +/// \brief Key for Kernel::CodeProps::Metadata::mKernargSegmentAlign. +constexpr char KernargSegmentAlign[] = "KernargSegmentAlign"; +/// \brief Key for Kernel::CodeProps::Metadata::mGroupSegmentAlign. +constexpr char GroupSegmentAlign[] = "GroupSegmentAlign"; +/// \brief Key for Kernel::CodeProps::Metadata::mPrivateSegmentAlign. +constexpr char PrivateSegmentAlign[] = "PrivateSegmentAlign"; +/// \brief Key for Kernel::CodeProps::Metadata::mWavefrontSize. +constexpr char WavefrontSize[] = "WavefrontSize"; +} // end namespace Key + +/// \brief In-memory representation of kernel code properties metadata. +struct Metadata final { + /// \brief Size in bytes of the kernarg segment memory. Kernarg segment memory + /// holds the values of the arguments to the kernel. Optional. + uint64_t mKernargSegmentSize = 0; + /// \brief Size in bytes of the group segment memory required by a workgroup. + /// This value does not include any dynamically allocated group segment memory + /// that may be added when the kernel is dispatched. Optional. + uint32_t mWorkgroupGroupSegmentSize = 0; + /// \brief Size in bytes of the private segment memory required by a workitem. + /// Private segment memory includes arg, spill and private segments. Optional. + uint32_t mWorkitemPrivateSegmentSize = 0; + /// \brief Total number of SGPRs used by a wavefront. Optional. + uint16_t mWavefrontNumSGPRs = 0; + /// \brief Total number of VGPRs used by a workitem. Optional. + uint16_t mWorkitemNumVGPRs = 0; + /// \brief Maximum byte alignment of variables used by the kernel in the + /// kernarg memory segment. Expressed as a power of two. Optional. + uint8_t mKernargSegmentAlign = 0; + /// \brief Maximum byte alignment of variables used by the kernel in the + /// group memory segment. Expressed as a power of two. Optional. + uint8_t mGroupSegmentAlign = 0; + /// \brief Maximum byte alignment of variables used by the kernel in the + /// private memory segment. Expressed as a power of two. Optional. + uint8_t mPrivateSegmentAlign = 0; + /// \brief Wavefront size. Expressed as a power of two. Optional. + uint8_t mWavefrontSize = 0; + + /// \brief Default constructor. + Metadata() = default; + + /// \returns True if kernel code properties metadata is empty, false + /// otherwise. + bool empty() const { + return !notEmpty(); + } + + /// \returns True if kernel code properties metadata is not empty, false + /// otherwise. + bool notEmpty() const { + return mKernargSegmentSize || mWorkgroupGroupSegmentSize || + mWorkitemPrivateSegmentSize || mWavefrontNumSGPRs || + mWorkitemNumVGPRs || mKernargSegmentAlign || mGroupSegmentAlign || + mPrivateSegmentAlign || mWavefrontSize; + } +}; + +} // end namespace CodeProps + +//===----------------------------------------------------------------------===// +// Kernel Debug Properties Metadata. +//===----------------------------------------------------------------------===// +namespace DebugProps { + +namespace Key { +/// \brief Key for Kernel::DebugProps::Metadata::mDebuggerABIVersion. +constexpr char DebuggerABIVersion[] = "DebuggerABIVersion"; +/// \brief Key for Kernel::DebugProps::Metadata::mReservedNumVGPRs. +constexpr char ReservedNumVGPRs[] = "ReservedNumVGPRs"; +/// \brief Key for Kernel::DebugProps::Metadata::mReservedFirstVGPR. +constexpr char ReservedFirstVGPR[] = "ReservedFirstVGPR"; +/// \brief Key for Kernel::DebugProps::Metadata::mPrivateSegmentBufferSGPR. +constexpr char PrivateSegmentBufferSGPR[] = "PrivateSegmentBufferSGPR"; +/// \brief Key for +/// Kernel::DebugProps::Metadata::mWavefrontPrivateSegmentOffsetSGPR. +constexpr char WavefrontPrivateSegmentOffsetSGPR[] = + "WavefrontPrivateSegmentOffsetSGPR"; +} // end namespace Key + +/// \brief In-memory representation of kernel debug properties metadata. +struct Metadata final { + /// \brief Debugger ABI version. Optional. + std::vector<uint32_t> mDebuggerABIVersion = std::vector<uint32_t>(); + /// \brief Consecutive number of VGPRs reserved for debugger use. Must be 0 if + /// mDebuggerABIVersion is not set. Optional. + uint16_t mReservedNumVGPRs = 0; + /// \brief First fixed VGPR reserved. Must be uint16_t(-1) if + /// mDebuggerABIVersion is not set or mReservedFirstVGPR is 0. Optional. + uint16_t mReservedFirstVGPR = uint16_t(-1); + /// \brief Fixed SGPR of the first of 4 SGPRs used to hold the scratch V# used + /// for the entire kernel execution. Must be uint16_t(-1) if + /// mDebuggerABIVersion is not set or SGPR not used or not known. Optional. + uint16_t mPrivateSegmentBufferSGPR = uint16_t(-1); + /// \brief Fixed SGPR used to hold the wave scratch offset for the entire + /// kernel execution. Must be uint16_t(-1) if mDebuggerABIVersion is not set + /// or SGPR is not used or not known. Optional. + uint16_t mWavefrontPrivateSegmentOffsetSGPR = uint16_t(-1); + + /// \brief Default constructor. + Metadata() = default; + + /// \returns True if kernel debug properties metadata is empty, false + /// otherwise. + bool empty() const { + return !notEmpty(); + } + + /// \returns True if kernel debug properties metadata is not empty, false + /// otherwise. + bool notEmpty() const { + return !mDebuggerABIVersion.empty(); + } +}; + +} // end namespace DebugProps + +namespace Key { +/// \brief Key for Kernel::Metadata::mName. +constexpr char Name[] = "Name"; +/// \brief Key for Kernel::Metadata::mLanguage. +constexpr char Language[] = "Language"; +/// \brief Key for Kernel::Metadata::mLanguageVersion. +constexpr char LanguageVersion[] = "LanguageVersion"; +/// \brief Key for Kernel::Metadata::mAttrs. +constexpr char Attrs[] = "Attrs"; +/// \brief Key for Kernel::Metadata::mArgs. +constexpr char Args[] = "Args"; +/// \brief Key for Kernel::Metadata::mCodeProps. +constexpr char CodeProps[] = "CodeProps"; +/// \brief Key for Kernel::Metadata::mDebugProps. +constexpr char DebugProps[] = "DebugProps"; +} // end namespace Key + +/// \brief In-memory representation of kernel metadata. +struct Metadata final { + /// \brief Name. Required. + std::string mName = std::string(); + /// \brief Language. Optional. + std::string mLanguage = std::string(); + /// \brief Language version. Optional. + std::vector<uint32_t> mLanguageVersion = std::vector<uint32_t>(); + /// \brief Attributes metadata. Optional. + Attrs::Metadata mAttrs = Attrs::Metadata(); + /// \brief Arguments metadata. Optional. + std::vector<Arg::Metadata> mArgs = std::vector<Arg::Metadata>(); + /// \brief Code properties metadata. Optional. + CodeProps::Metadata mCodeProps = CodeProps::Metadata(); + /// \brief Debug properties metadata. Optional. + DebugProps::Metadata mDebugProps = DebugProps::Metadata(); + + /// \brief Default constructor. + Metadata() = default; +}; + +} // end namespace Kernel + +namespace Key { +/// \brief Key for CodeObject::Metadata::mVersion. +constexpr char Version[] = "Version"; +/// \brief Key for CodeObject::Metadata::mPrintf. +constexpr char Printf[] = "Printf"; +/// \brief Key for CodeObject::Metadata::mKernels. +constexpr char Kernels[] = "Kernels"; +} // end namespace Key + +/// \brief In-memory representation of code object metadata. +struct Metadata final { + /// \brief Code object metadata version. Required. + std::vector<uint32_t> mVersion = std::vector<uint32_t>(); + /// \brief Printf metadata. Optional. + std::vector<std::string> mPrintf = std::vector<std::string>(); + /// \brief Kernels metadata. Optional. + std::vector<Kernel::Metadata> mKernels = std::vector<Kernel::Metadata>(); + + /// \brief Default constructor. + Metadata() = default; + + /// \brief Converts \p YamlString to \p CodeObjectMetadata. + static std::error_code fromYamlString(std::string YamlString, + Metadata &CodeObjectMetadata); + + /// \brief Converts \p CodeObjectMetadata to \p YamlString. + static std::error_code toYamlString(Metadata CodeObjectMetadata, + std::string &YamlString); +}; + +} // end namespace CodeObject +} // end namespace AMDGPU +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp new file mode 100644 index 000000000000..29a6ab9fbe93 --- /dev/null +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp @@ -0,0 +1,625 @@ +//===--- AMDGPUCodeObjectMetadataStreamer.cpp -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief AMDGPU Code Object Metadata Streamer. +/// +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "AMDGPUCodeObjectMetadataStreamer.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/YAMLTraits.h" + +using namespace llvm::AMDGPU; +using namespace llvm::AMDGPU::CodeObject; + +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t) +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string) +LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Arg::Metadata) +LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata) + +namespace llvm { + +static cl::opt<bool> DumpCodeObjectMetadata( + "amdgpu-dump-comd", + cl::desc("Dump AMDGPU Code Object Metadata")); +static cl::opt<bool> VerifyCodeObjectMetadata( + "amdgpu-verify-comd", + cl::desc("Verify AMDGPU Code Object Metadata")); + +namespace yaml { + +template <> +struct ScalarEnumerationTraits<AccessQualifier> { + static void enumeration(IO &YIO, AccessQualifier &EN) { + YIO.enumCase(EN, "Default", AccessQualifier::Default); + YIO.enumCase(EN, "ReadOnly", AccessQualifier::ReadOnly); + YIO.enumCase(EN, "WriteOnly", AccessQualifier::WriteOnly); + YIO.enumCase(EN, "ReadWrite", AccessQualifier::ReadWrite); + } +}; + +template <> +struct ScalarEnumerationTraits<AddressSpaceQualifier> { + static void enumeration(IO &YIO, AddressSpaceQualifier &EN) { + YIO.enumCase(EN, "Private", AddressSpaceQualifier::Private); + YIO.enumCase(EN, "Global", AddressSpaceQualifier::Global); + YIO.enumCase(EN, "Constant", AddressSpaceQualifier::Constant); + YIO.enumCase(EN, "Local", AddressSpaceQualifier::Local); + YIO.enumCase(EN, "Generic", AddressSpaceQualifier::Generic); + YIO.enumCase(EN, "Region", AddressSpaceQualifier::Region); + } +}; + +template <> +struct ScalarEnumerationTraits<ValueKind> { + static void enumeration(IO &YIO, ValueKind &EN) { + YIO.enumCase(EN, "ByValue", ValueKind::ByValue); + YIO.enumCase(EN, "GlobalBuffer", ValueKind::GlobalBuffer); + YIO.enumCase(EN, "DynamicSharedPointer", ValueKind::DynamicSharedPointer); + YIO.enumCase(EN, "Sampler", ValueKind::Sampler); + YIO.enumCase(EN, "Image", ValueKind::Image); + YIO.enumCase(EN, "Pipe", ValueKind::Pipe); + YIO.enumCase(EN, "Queue", ValueKind::Queue); + YIO.enumCase(EN, "HiddenGlobalOffsetX", ValueKind::HiddenGlobalOffsetX); + YIO.enumCase(EN, "HiddenGlobalOffsetY", ValueKind::HiddenGlobalOffsetY); + YIO.enumCase(EN, "HiddenGlobalOffsetZ", ValueKind::HiddenGlobalOffsetZ); + YIO.enumCase(EN, "HiddenNone", ValueKind::HiddenNone); + YIO.enumCase(EN, "HiddenPrintfBuffer", ValueKind::HiddenPrintfBuffer); + YIO.enumCase(EN, "HiddenDefaultQueue", ValueKind::HiddenDefaultQueue); + YIO.enumCase(EN, "HiddenCompletionAction", + ValueKind::HiddenCompletionAction); + } +}; + +template <> +struct ScalarEnumerationTraits<ValueType> { + static void enumeration(IO &YIO, ValueType &EN) { + YIO.enumCase(EN, "Struct", ValueType::Struct); + YIO.enumCase(EN, "I8", ValueType::I8); + YIO.enumCase(EN, "U8", ValueType::U8); + YIO.enumCase(EN, "I16", ValueType::I16); + YIO.enumCase(EN, "U16", ValueType::U16); + YIO.enumCase(EN, "F16", ValueType::F16); + YIO.enumCase(EN, "I32", ValueType::I32); + YIO.enumCase(EN, "U32", ValueType::U32); + YIO.enumCase(EN, "F32", ValueType::F32); + YIO.enumCase(EN, "I64", ValueType::I64); + YIO.enumCase(EN, "U64", ValueType::U64); + YIO.enumCase(EN, "F64", ValueType::F64); + } +}; + +template <> +struct MappingTraits<Kernel::Attrs::Metadata> { + static void mapping(IO &YIO, Kernel::Attrs::Metadata &MD) { + YIO.mapOptional(Kernel::Attrs::Key::ReqdWorkGroupSize, + MD.mReqdWorkGroupSize, std::vector<uint32_t>()); + YIO.mapOptional(Kernel::Attrs::Key::WorkGroupSizeHint, + MD.mWorkGroupSizeHint, std::vector<uint32_t>()); + YIO.mapOptional(Kernel::Attrs::Key::VecTypeHint, + MD.mVecTypeHint, std::string()); + } +}; + +template <> +struct MappingTraits<Kernel::Arg::Metadata> { + static void mapping(IO &YIO, Kernel::Arg::Metadata &MD) { + YIO.mapRequired(Kernel::Arg::Key::Size, MD.mSize); + YIO.mapRequired(Kernel::Arg::Key::Align, MD.mAlign); + YIO.mapRequired(Kernel::Arg::Key::ValueKind, MD.mValueKind); + YIO.mapRequired(Kernel::Arg::Key::ValueType, MD.mValueType); + YIO.mapOptional(Kernel::Arg::Key::PointeeAlign, MD.mPointeeAlign, + uint32_t(0)); + YIO.mapOptional(Kernel::Arg::Key::AccQual, MD.mAccQual, + AccessQualifier::Unknown); + YIO.mapOptional(Kernel::Arg::Key::AddrSpaceQual, MD.mAddrSpaceQual, + AddressSpaceQualifier::Unknown); + YIO.mapOptional(Kernel::Arg::Key::IsConst, MD.mIsConst, false); + YIO.mapOptional(Kernel::Arg::Key::IsPipe, MD.mIsPipe, false); + YIO.mapOptional(Kernel::Arg::Key::IsRestrict, MD.mIsRestrict, false); + YIO.mapOptional(Kernel::Arg::Key::IsVolatile, MD.mIsVolatile, false); + YIO.mapOptional(Kernel::Arg::Key::Name, MD.mName, std::string()); + YIO.mapOptional(Kernel::Arg::Key::TypeName, MD.mTypeName, std::string()); + } +}; + +template <> +struct MappingTraits<Kernel::CodeProps::Metadata> { + static void mapping(IO &YIO, Kernel::CodeProps::Metadata &MD) { + YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentSize, + MD.mKernargSegmentSize, uint64_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::WorkgroupGroupSegmentSize, + MD.mWorkgroupGroupSegmentSize, uint32_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::WorkitemPrivateSegmentSize, + MD.mWorkitemPrivateSegmentSize, uint32_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::WavefrontNumSGPRs, + MD.mWavefrontNumSGPRs, uint16_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::WorkitemNumVGPRs, + MD.mWorkitemNumVGPRs, uint16_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentAlign, + MD.mKernargSegmentAlign, uint8_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::GroupSegmentAlign, + MD.mGroupSegmentAlign, uint8_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::PrivateSegmentAlign, + MD.mPrivateSegmentAlign, uint8_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::WavefrontSize, + MD.mWavefrontSize, uint8_t(0)); + } +}; + +template <> +struct MappingTraits<Kernel::DebugProps::Metadata> { + static void mapping(IO &YIO, Kernel::DebugProps::Metadata &MD) { + YIO.mapOptional(Kernel::DebugProps::Key::DebuggerABIVersion, + MD.mDebuggerABIVersion, std::vector<uint32_t>()); + YIO.mapOptional(Kernel::DebugProps::Key::ReservedNumVGPRs, + MD.mReservedNumVGPRs, uint16_t(0)); + YIO.mapOptional(Kernel::DebugProps::Key::ReservedFirstVGPR, + MD.mReservedFirstVGPR, uint16_t(-1)); + YIO.mapOptional(Kernel::DebugProps::Key::PrivateSegmentBufferSGPR, + MD.mPrivateSegmentBufferSGPR, uint16_t(-1)); + YIO.mapOptional(Kernel::DebugProps::Key::WavefrontPrivateSegmentOffsetSGPR, + MD.mWavefrontPrivateSegmentOffsetSGPR, uint16_t(-1)); + } +}; + +template <> +struct MappingTraits<Kernel::Metadata> { + static void mapping(IO &YIO, Kernel::Metadata &MD) { + YIO.mapRequired(Kernel::Key::Name, MD.mName); + YIO.mapOptional(Kernel::Key::Language, MD.mLanguage, std::string()); + YIO.mapOptional(Kernel::Key::LanguageVersion, MD.mLanguageVersion, + std::vector<uint32_t>()); + if (!MD.mAttrs.empty() || !YIO.outputting()) + YIO.mapOptional(Kernel::Key::Attrs, MD.mAttrs); + if (!MD.mArgs.empty() || !YIO.outputting()) + YIO.mapOptional(Kernel::Key::Args, MD.mArgs); + if (!MD.mCodeProps.empty() || !YIO.outputting()) + YIO.mapOptional(Kernel::Key::CodeProps, MD.mCodeProps); + if (!MD.mDebugProps.empty() || !YIO.outputting()) + YIO.mapOptional(Kernel::Key::DebugProps, MD.mDebugProps); + } +}; + +template <> +struct MappingTraits<CodeObject::Metadata> { + static void mapping(IO &YIO, CodeObject::Metadata &MD) { + YIO.mapRequired(Key::Version, MD.mVersion); + YIO.mapOptional(Key::Printf, MD.mPrintf, std::vector<std::string>()); + if (!MD.mKernels.empty() || !YIO.outputting()) + YIO.mapOptional(Key::Kernels, MD.mKernels); + } +}; + +} // end namespace yaml + +namespace AMDGPU { + +/* static */ +std::error_code CodeObject::Metadata::fromYamlString( + std::string YamlString, CodeObject::Metadata &CodeObjectMetadata) { + yaml::Input YamlInput(YamlString); + YamlInput >> CodeObjectMetadata; + return YamlInput.error(); +} + +/* static */ +std::error_code CodeObject::Metadata::toYamlString( + CodeObject::Metadata CodeObjectMetadata, std::string &YamlString) { + raw_string_ostream YamlStream(YamlString); + yaml::Output YamlOutput(YamlStream, nullptr, std::numeric_limits<int>::max()); + YamlOutput << CodeObjectMetadata; + return std::error_code(); +} + +namespace CodeObject { + +void MetadataStreamer::dump(StringRef YamlString) const { + errs() << "AMDGPU Code Object Metadata:\n" << YamlString << '\n'; +} + +void MetadataStreamer::verify(StringRef YamlString) const { + errs() << "AMDGPU Code Object Metadata Parser Test: "; + + CodeObject::Metadata FromYamlString; + if (Metadata::fromYamlString(YamlString, FromYamlString)) { + errs() << "FAIL\n"; + return; + } + + std::string ToYamlString; + if (Metadata::toYamlString(FromYamlString, ToYamlString)) { + errs() << "FAIL\n"; + return; + } + + errs() << (YamlString == ToYamlString ? "PASS" : "FAIL") << '\n'; + if (YamlString != ToYamlString) { + errs() << "Original input: " << YamlString << '\n' + << "Produced output: " << ToYamlString << '\n'; + } +} + +AccessQualifier MetadataStreamer::getAccessQualifier(StringRef AccQual) const { + if (AccQual.empty()) + return AccessQualifier::Unknown; + + return StringSwitch<AccessQualifier>(AccQual) + .Case("read_only", AccessQualifier::ReadOnly) + .Case("write_only", AccessQualifier::WriteOnly) + .Case("read_write", AccessQualifier::ReadWrite) + .Default(AccessQualifier::Default); +} + +AddressSpaceQualifier MetadataStreamer::getAddressSpaceQualifer( + unsigned AddressSpace) const { + if (AddressSpace == AMDGPUASI.PRIVATE_ADDRESS) + return AddressSpaceQualifier::Private; + if (AddressSpace == AMDGPUASI.GLOBAL_ADDRESS) + return AddressSpaceQualifier::Global; + if (AddressSpace == AMDGPUASI.CONSTANT_ADDRESS) + return AddressSpaceQualifier::Constant; + if (AddressSpace == AMDGPUASI.LOCAL_ADDRESS) + return AddressSpaceQualifier::Local; + if (AddressSpace == AMDGPUASI.FLAT_ADDRESS) + return AddressSpaceQualifier::Generic; + if (AddressSpace == AMDGPUASI.REGION_ADDRESS) + return AddressSpaceQualifier::Region; + + llvm_unreachable("Unknown address space qualifier"); +} + +ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual, + StringRef BaseTypeName) const { + if (TypeQual.find("pipe") != StringRef::npos) + return ValueKind::Pipe; + + return StringSwitch<ValueKind>(BaseTypeName) + .Case("sampler_t", ValueKind::Sampler) + .Case("queue_t", ValueKind::Queue) + .Cases("image1d_t", + "image1d_array_t", + "image1d_buffer_t", + "image2d_t" , + "image2d_array_t", + "image2d_array_depth_t", + "image2d_array_msaa_t" + "image2d_array_msaa_depth_t" + "image2d_depth_t", + "image2d_msaa_t", + "image2d_msaa_depth_t", + "image3d_t", ValueKind::Image) + .Default(isa<PointerType>(Ty) ? + (Ty->getPointerAddressSpace() == + AMDGPUASI.LOCAL_ADDRESS ? + ValueKind::DynamicSharedPointer : + ValueKind::GlobalBuffer) : + ValueKind::ByValue); +} + +ValueType MetadataStreamer::getValueType(Type *Ty, StringRef TypeName) const { + switch (Ty->getTypeID()) { + case Type::IntegerTyID: { + auto Signed = !TypeName.startswith("u"); + switch (Ty->getIntegerBitWidth()) { + case 8: + return Signed ? ValueType::I8 : ValueType::U8; + case 16: + return Signed ? ValueType::I16 : ValueType::U16; + case 32: + return Signed ? ValueType::I32 : ValueType::U32; + case 64: + return Signed ? ValueType::I64 : ValueType::U64; + default: + return ValueType::Struct; + } + } + case Type::HalfTyID: + return ValueType::F16; + case Type::FloatTyID: + return ValueType::F32; + case Type::DoubleTyID: + return ValueType::F64; + case Type::PointerTyID: + return getValueType(Ty->getPointerElementType(), TypeName); + case Type::VectorTyID: + return getValueType(Ty->getVectorElementType(), TypeName); + default: + return ValueType::Struct; + } +} + +std::string MetadataStreamer::getTypeName(Type *Ty, bool Signed) const { + switch (Ty->getTypeID()) { + case Type::IntegerTyID: { + if (!Signed) + return (Twine('u') + getTypeName(Ty, true)).str(); + + auto BitWidth = Ty->getIntegerBitWidth(); + switch (BitWidth) { + case 8: + return "char"; + case 16: + return "short"; + case 32: + return "int"; + case 64: + return "long"; + default: + return (Twine('i') + Twine(BitWidth)).str(); + } + } + case Type::HalfTyID: + return "half"; + case Type::FloatTyID: + return "float"; + case Type::DoubleTyID: + return "double"; + case Type::VectorTyID: { + auto VecTy = cast<VectorType>(Ty); + auto ElTy = VecTy->getElementType(); + auto NumElements = VecTy->getVectorNumElements(); + return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); + } + default: + return "unknown"; + } +} + +std::vector<uint32_t> MetadataStreamer::getWorkGroupDimensions( + MDNode *Node) const { + std::vector<uint32_t> Dims; + if (Node->getNumOperands() != 3) + return Dims; + + for (auto &Op : Node->operands()) + Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue()); + return Dims; +} + +void MetadataStreamer::emitVersion() { + auto &Version = CodeObjectMetadata.mVersion; + + Version.push_back(MetadataVersionMajor); + Version.push_back(MetadataVersionMinor); +} + +void MetadataStreamer::emitPrintf(const Module &Mod) { + auto &Printf = CodeObjectMetadata.mPrintf; + + auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); + if (!Node) + return; + + for (auto Op : Node->operands()) + if (Op->getNumOperands()) + Printf.push_back(cast<MDString>(Op->getOperand(0))->getString()); +} + +void MetadataStreamer::emitKernelLanguage(const Function &Func) { + auto &Kernel = CodeObjectMetadata.mKernels.back(); + + // TODO: What about other languages? + auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); + if (!Node || !Node->getNumOperands()) + return; + auto Op0 = Node->getOperand(0); + if (Op0->getNumOperands() <= 1) + return; + + Kernel.mLanguage = "OpenCL C"; + Kernel.mLanguageVersion.push_back( + mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()); + Kernel.mLanguageVersion.push_back( + mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()); +} + +void MetadataStreamer::emitKernelAttrs(const Function &Func) { + auto &Attrs = CodeObjectMetadata.mKernels.back().mAttrs; + + if (auto Node = Func.getMetadata("reqd_work_group_size")) + Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node); + if (auto Node = Func.getMetadata("work_group_size_hint")) + Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node); + if (auto Node = Func.getMetadata("vec_type_hint")) { + Attrs.mVecTypeHint = getTypeName( + cast<ValueAsMetadata>(Node->getOperand(0))->getType(), + mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()); + } +} + +void MetadataStreamer::emitKernelArgs(const Function &Func) { + for (auto &Arg : Func.args()) + emitKernelArg(Arg); + + // TODO: What about other languages? + if (!Func.getParent()->getNamedMetadata("opencl.ocl.version")) + return; + + auto &DL = Func.getParent()->getDataLayout(); + auto Int64Ty = Type::getInt64Ty(Func.getContext()); + + emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX); + emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY); + emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ); + + if (!Func.getParent()->getNamedMetadata("llvm.printf.fmts")) + return; + + auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(), + AMDGPUASI.GLOBAL_ADDRESS); + emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer); +} + +void MetadataStreamer::emitKernelArg(const Argument &Arg) { + auto Func = Arg.getParent(); + auto ArgNo = Arg.getArgNo(); + const MDNode *Node; + + StringRef TypeQual; + Node = Func->getMetadata("kernel_arg_type_qual"); + if (Node && ArgNo < Node->getNumOperands()) + TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); + + StringRef BaseTypeName; + Node = Func->getMetadata("kernel_arg_base_type"); + if (Node && ArgNo < Node->getNumOperands()) + BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); + + StringRef AccQual; + if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() && + Arg.hasNoAliasAttr()) { + AccQual = "read_only"; + } else { + Node = Func->getMetadata("kernel_arg_access_qual"); + if (Node && ArgNo < Node->getNumOperands()) + AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); + } + + StringRef Name; + Node = Func->getMetadata("kernel_arg_name"); + if (Node && ArgNo < Node->getNumOperands()) + Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); + + StringRef TypeName; + Node = Func->getMetadata("kernel_arg_type"); + if (Node && ArgNo < Node->getNumOperands()) + TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); + + emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(), + getValueKind(Arg.getType(), TypeQual, BaseTypeName), TypeQual, + BaseTypeName, AccQual, Name, TypeName); +} + +void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty, + ValueKind ValueKind, StringRef TypeQual, + StringRef BaseTypeName, StringRef AccQual, + StringRef Name, StringRef TypeName) { + CodeObjectMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata()); + auto &Arg = CodeObjectMetadata.mKernels.back().mArgs.back(); + + Arg.mSize = DL.getTypeAllocSize(Ty); + Arg.mAlign = DL.getABITypeAlignment(Ty); + Arg.mValueKind = ValueKind; + Arg.mValueType = getValueType(Ty, BaseTypeName); + + if (auto PtrTy = dyn_cast<PointerType>(Ty)) { + auto ElTy = PtrTy->getElementType(); + if (PtrTy->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS && ElTy->isSized()) + Arg.mPointeeAlign = DL.getABITypeAlignment(ElTy); + } + + Arg.mAccQual = getAccessQualifier(AccQual); + + if (auto PtrTy = dyn_cast<PointerType>(Ty)) + Arg.mAddrSpaceQual = getAddressSpaceQualifer(PtrTy->getAddressSpace()); + + SmallVector<StringRef, 1> SplitTypeQuals; + TypeQual.split(SplitTypeQuals, " ", -1, false); + for (StringRef Key : SplitTypeQuals) { + auto P = StringSwitch<bool*>(Key) + .Case("const", &Arg.mIsConst) + .Case("pipe", &Arg.mIsPipe) + .Case("restrict", &Arg.mIsRestrict) + .Case("volatile", &Arg.mIsVolatile) + .Default(nullptr); + if (P) + *P = true; + } + + Arg.mName = Name; + Arg.mTypeName = TypeName; +} + +void MetadataStreamer::emitKernelCodeProps( + const amd_kernel_code_t &KernelCode) { + auto &CodeProps = CodeObjectMetadata.mKernels.back().mCodeProps; + + CodeProps.mKernargSegmentSize = KernelCode.kernarg_segment_byte_size; + CodeProps.mWorkgroupGroupSegmentSize = + KernelCode.workgroup_group_segment_byte_size; + CodeProps.mWorkitemPrivateSegmentSize = + KernelCode.workitem_private_segment_byte_size; + CodeProps.mWavefrontNumSGPRs = KernelCode.wavefront_sgpr_count; + CodeProps.mWorkitemNumVGPRs = KernelCode.workitem_vgpr_count; + CodeProps.mKernargSegmentAlign = KernelCode.kernarg_segment_alignment; + CodeProps.mGroupSegmentAlign = KernelCode.group_segment_alignment; + CodeProps.mPrivateSegmentAlign = KernelCode.private_segment_alignment; + CodeProps.mWavefrontSize = KernelCode.wavefront_size; +} + +void MetadataStreamer::emitKernelDebugProps( + const amd_kernel_code_t &KernelCode) { + if (!(KernelCode.code_properties & AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED)) + return; + + auto &DebugProps = CodeObjectMetadata.mKernels.back().mDebugProps; + + // FIXME: Need to pass down debugger ABI version through features. This is ok + // for now because we only have one version. + DebugProps.mDebuggerABIVersion.push_back(1); + DebugProps.mDebuggerABIVersion.push_back(0); + DebugProps.mReservedNumVGPRs = KernelCode.reserved_vgpr_count; + DebugProps.mReservedFirstVGPR = KernelCode.reserved_vgpr_first; + DebugProps.mPrivateSegmentBufferSGPR = + KernelCode.debug_private_segment_buffer_sgpr; + DebugProps.mWavefrontPrivateSegmentOffsetSGPR = + KernelCode.debug_wavefront_private_segment_offset_sgpr; +} + +void MetadataStreamer::begin(const Module &Mod) { + AMDGPUASI = getAMDGPUAS(Mod); + emitVersion(); + emitPrintf(Mod); +} + +void MetadataStreamer::emitKernel(const Function &Func, + const amd_kernel_code_t &KernelCode) { + if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL) + return; + + CodeObjectMetadata.mKernels.push_back(Kernel::Metadata()); + auto &Kernel = CodeObjectMetadata.mKernels.back(); + + Kernel.mName = Func.getName(); + emitKernelLanguage(Func); + emitKernelAttrs(Func); + emitKernelArgs(Func); + emitKernelCodeProps(KernelCode); + emitKernelDebugProps(KernelCode); +} + +ErrorOr<std::string> MetadataStreamer::toYamlString() { + std::string YamlString; + if (auto Error = Metadata::toYamlString(CodeObjectMetadata, YamlString)) + return Error; + + if (DumpCodeObjectMetadata) + dump(YamlString); + if (VerifyCodeObjectMetadata) + verify(YamlString); + + return YamlString; +} + +ErrorOr<std::string> MetadataStreamer::toYamlString(StringRef YamlString) { + if (auto Error = Metadata::fromYamlString(YamlString, CodeObjectMetadata)) + return Error; + + return toYamlString(); +} + +} // end namespace CodeObject +} // end namespace AMDGPU +} // end namespace llvm diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h new file mode 100644 index 000000000000..8d4c51763f63 --- /dev/null +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h @@ -0,0 +1,99 @@ +//===--- AMDGPUCodeObjectMetadataStreamer.h ---------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief AMDGPU Code Object Metadata Streamer. +/// +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H +#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H + +#include "AMDGPU.h" +#include "AMDGPUCodeObjectMetadata.h" +#include "AMDKernelCodeT.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/ErrorOr.h" + +namespace llvm { + +class Argument; +class DataLayout; +class Function; +class MDNode; +class Module; +class Type; + +namespace AMDGPU { +namespace CodeObject { + +class MetadataStreamer final { +private: + Metadata CodeObjectMetadata; + AMDGPUAS AMDGPUASI; + + void dump(StringRef YamlString) const; + + void verify(StringRef YamlString) const; + + AccessQualifier getAccessQualifier(StringRef AccQual) const; + + AddressSpaceQualifier getAddressSpaceQualifer(unsigned AddressSpace) const; + + ValueKind getValueKind(Type *Ty, StringRef TypeQual, + StringRef BaseTypeName) const; + + ValueType getValueType(Type *Ty, StringRef TypeName) const; + + std::string getTypeName(Type *Ty, bool Signed) const; + + std::vector<uint32_t> getWorkGroupDimensions(MDNode *Node) const; + + void emitVersion(); + + void emitPrintf(const Module &Mod); + + void emitKernelLanguage(const Function &Func); + + void emitKernelAttrs(const Function &Func); + + void emitKernelArgs(const Function &Func); + + void emitKernelArg(const Argument &Arg); + + void emitKernelArg(const DataLayout &DL, Type *Ty, ValueKind ValueKind, + StringRef TypeQual = "", StringRef BaseTypeName = "", + StringRef AccQual = "", StringRef Name = "", + StringRef TypeName = ""); + + void emitKernelCodeProps(const amd_kernel_code_t &KernelCode); + + void emitKernelDebugProps(const amd_kernel_code_t &KernelCode); + +public: + MetadataStreamer() = default; + ~MetadataStreamer() = default; + + void begin(const Module &Mod); + + void end() {} + + void emitKernel(const Function &Func, const amd_kernel_code_t &KernelCode); + + ErrorOr<std::string> toYamlString(); + + ErrorOr<std::string> toYamlString(StringRef YamlString); +}; + +} // end namespace CodeObject +} // end namespace AMDGPU +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp index 1847d7a67328..073d19422e86 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp @@ -1,16 +1,20 @@ -//===-- AMDGPUELFObjectWriter.cpp - AMDGPU ELF Writer ----------------------==// +//===- AMDGPUELFObjectWriter.cpp - AMDGPU ELF Writer ----------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -/// \file //===----------------------------------------------------------------------===// #include "AMDGPUMCTargetDesc.h" #include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -19,20 +23,21 @@ namespace { class AMDGPUELFObjectWriter : public MCELFObjectTargetWriter { public: AMDGPUELFObjectWriter(bool Is64Bit, bool HasRelocationAddend); + protected: unsigned getRelocType(MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const override; }; -} // End anonymous namespace +} // end anonymous namespace AMDGPUELFObjectWriter::AMDGPUELFObjectWriter(bool Is64Bit, bool HasRelocationAddend) : MCELFObjectTargetWriter(Is64Bit, ELF::ELFOSABI_AMDGPU_HSA, ELF::EM_AMDGPU, - HasRelocationAddend) { } + HasRelocationAddend) {} unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, @@ -77,7 +82,6 @@ unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx, llvm_unreachable("unhandled relocation type"); } - MCObjectWriter *llvm::createAMDGPUELFObjectWriter(bool Is64Bit, bool HasRelocationAddend, raw_pwrite_stream &OS) { diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h index 548bad56e174..f80b5f3a6dba 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h @@ -54,11 +54,17 @@ MCObjectWriter *createAMDGPUELFObjectWriter(bool Is64Bit, #define GET_REGINFO_ENUM #include "AMDGPUGenRegisterInfo.inc" +#undef GET_REGINFO_ENUM #define GET_INSTRINFO_ENUM +#define GET_INSTRINFO_OPERAND_ENUM #include "AMDGPUGenInstrInfo.inc" +#undef GET_INSTRINFO_OPERAND_ENUM +#undef GET_INSTRINFO_ENUM + #define GET_SUBTARGETINFO_ENUM #include "AMDGPUGenSubtargetInfo.inc" +#undef GET_SUBTARGETINFO_ENUM #endif diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp deleted file mode 100644 index 95387ad1627c..000000000000 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp +++ /dev/null @@ -1,408 +0,0 @@ -//===-- AMDGPURuntimeMD.cpp - Generates runtime metadata ------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -/// \file -/// -/// Generates AMDGPU runtime metadata for YAML mapping. -// -//===----------------------------------------------------------------------===// -// - -#include "AMDGPU.h" -#include "AMDGPURuntimeMetadata.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/YAMLTraits.h" -#include <vector> -#include "AMDGPURuntimeMD.h" - -using namespace llvm; -using namespace ::AMDGPU::RuntimeMD; - -static cl::opt<bool> -DumpRuntimeMD("amdgpu-dump-rtmd", - cl::desc("Dump AMDGPU runtime metadata")); - -static cl::opt<bool> -CheckRuntimeMDParser("amdgpu-check-rtmd-parser", cl::Hidden, - cl::desc("Check AMDGPU runtime metadata YAML parser")); - -LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint8_t) -LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t) -LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string) -LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata) -LLVM_YAML_IS_SEQUENCE_VECTOR(KernelArg::Metadata) - -namespace llvm { -namespace yaml { - -template <> struct MappingTraits<KernelArg::Metadata> { - static void mapping(IO &YamlIO, KernelArg::Metadata &A) { - YamlIO.mapRequired(KeyName::ArgSize, A.Size); - YamlIO.mapRequired(KeyName::ArgAlign, A.Align); - YamlIO.mapOptional(KeyName::ArgPointeeAlign, A.PointeeAlign, 0U); - YamlIO.mapRequired(KeyName::ArgKind, A.Kind); - YamlIO.mapRequired(KeyName::ArgValueType, A.ValueType); - YamlIO.mapOptional(KeyName::ArgTypeName, A.TypeName, std::string()); - YamlIO.mapOptional(KeyName::ArgName, A.Name, std::string()); - YamlIO.mapOptional(KeyName::ArgAddrQual, A.AddrQual, INVALID_ADDR_QUAL); - YamlIO.mapOptional(KeyName::ArgAccQual, A.AccQual, INVALID_ACC_QUAL); - YamlIO.mapOptional(KeyName::ArgIsVolatile, A.IsVolatile, uint8_t(0)); - YamlIO.mapOptional(KeyName::ArgIsConst, A.IsConst, uint8_t(0)); - YamlIO.mapOptional(KeyName::ArgIsRestrict, A.IsRestrict, uint8_t(0)); - YamlIO.mapOptional(KeyName::ArgIsPipe, A.IsPipe, uint8_t(0)); - } - static const bool flow = true; -}; - -template <> struct MappingTraits<Kernel::Metadata> { - static void mapping(IO &YamlIO, Kernel::Metadata &K) { - YamlIO.mapRequired(KeyName::KernelName, K.Name); - YamlIO.mapOptional(KeyName::Language, K.Language, std::string()); - YamlIO.mapOptional(KeyName::LanguageVersion, K.LanguageVersion); - YamlIO.mapOptional(KeyName::ReqdWorkGroupSize, K.ReqdWorkGroupSize); - YamlIO.mapOptional(KeyName::WorkGroupSizeHint, K.WorkGroupSizeHint); - YamlIO.mapOptional(KeyName::VecTypeHint, K.VecTypeHint, std::string()); - YamlIO.mapOptional(KeyName::KernelIndex, K.KernelIndex, - INVALID_KERNEL_INDEX); - YamlIO.mapOptional(KeyName::NoPartialWorkGroups, K.NoPartialWorkGroups, - uint8_t(0)); - YamlIO.mapRequired(KeyName::Args, K.Args); - } - static const bool flow = true; -}; - -template <> struct MappingTraits<Program::Metadata> { - static void mapping(IO &YamlIO, Program::Metadata &Prog) { - YamlIO.mapRequired(KeyName::MDVersion, Prog.MDVersionSeq); - YamlIO.mapOptional(KeyName::PrintfInfo, Prog.PrintfInfo); - YamlIO.mapOptional(KeyName::Kernels, Prog.Kernels); - } - static const bool flow = true; -}; - -} // end namespace yaml -} // end namespace llvm - -// Get a vector of three integer values from MDNode \p Node; -static std::vector<uint32_t> getThreeInt32(MDNode *Node) { - assert(Node->getNumOperands() == 3); - std::vector<uint32_t> V; - for (const MDOperand &Op : Node->operands()) { - const ConstantInt *CI = mdconst::extract<ConstantInt>(Op); - V.push_back(CI->getZExtValue()); - } - return V; -} - -static std::string getOCLTypeName(Type *Ty, bool Signed) { - switch (Ty->getTypeID()) { - case Type::HalfTyID: - return "half"; - case Type::FloatTyID: - return "float"; - case Type::DoubleTyID: - return "double"; - case Type::IntegerTyID: { - if (!Signed) - return (Twine('u') + getOCLTypeName(Ty, true)).str(); - unsigned BW = Ty->getIntegerBitWidth(); - switch (BW) { - case 8: - return "char"; - case 16: - return "short"; - case 32: - return "int"; - case 64: - return "long"; - default: - return (Twine('i') + Twine(BW)).str(); - } - } - case Type::VectorTyID: { - VectorType *VecTy = cast<VectorType>(Ty); - Type *EleTy = VecTy->getElementType(); - unsigned Size = VecTy->getVectorNumElements(); - return (Twine(getOCLTypeName(EleTy, Signed)) + Twine(Size)).str(); - } - default: - return "unknown"; - } -} - -static KernelArg::ValueType getRuntimeMDValueType( - Type *Ty, StringRef TypeName) { - switch (Ty->getTypeID()) { - case Type::HalfTyID: - return KernelArg::F16; - case Type::FloatTyID: - return KernelArg::F32; - case Type::DoubleTyID: - return KernelArg::F64; - case Type::IntegerTyID: { - bool Signed = !TypeName.startswith("u"); - switch (Ty->getIntegerBitWidth()) { - case 8: - return Signed ? KernelArg::I8 : KernelArg::U8; - case 16: - return Signed ? KernelArg::I16 : KernelArg::U16; - case 32: - return Signed ? KernelArg::I32 : KernelArg::U32; - case 64: - return Signed ? KernelArg::I64 : KernelArg::U64; - default: - // Runtime does not recognize other integer types. Report as struct type. - return KernelArg::Struct; - } - } - case Type::VectorTyID: - return getRuntimeMDValueType(Ty->getVectorElementType(), TypeName); - case Type::PointerTyID: - return getRuntimeMDValueType(Ty->getPointerElementType(), TypeName); - default: - return KernelArg::Struct; - } -} - -static KernelArg::AddressSpaceQualifer getRuntimeAddrSpace( - AMDGPUAS::AddressSpaces A) { - switch (A) { - case AMDGPUAS::GLOBAL_ADDRESS: - return KernelArg::Global; - case AMDGPUAS::CONSTANT_ADDRESS: - return KernelArg::Constant; - case AMDGPUAS::LOCAL_ADDRESS: - return KernelArg::Local; - case AMDGPUAS::FLAT_ADDRESS: - return KernelArg::Generic; - case AMDGPUAS::REGION_ADDRESS: - return KernelArg::Region; - default: - return KernelArg::Private; - } -} - -static KernelArg::Metadata getRuntimeMDForKernelArg(const DataLayout &DL, - Type *T, KernelArg::Kind Kind, StringRef BaseTypeName = "", - StringRef TypeName = "", StringRef ArgName = "", StringRef TypeQual = "", - StringRef AccQual = "") { - - KernelArg::Metadata Arg; - - // Set ArgSize and ArgAlign. - Arg.Size = DL.getTypeAllocSize(T); - Arg.Align = DL.getABITypeAlignment(T); - if (auto PT = dyn_cast<PointerType>(T)) { - auto ET = PT->getElementType(); - if (PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && ET->isSized()) - Arg.PointeeAlign = DL.getABITypeAlignment(ET); - } - - // Set ArgTypeName. - Arg.TypeName = TypeName; - - // Set ArgName. - Arg.Name = ArgName; - - // Set ArgIsVolatile, ArgIsRestrict, ArgIsConst and ArgIsPipe. - SmallVector<StringRef, 1> SplitQ; - TypeQual.split(SplitQ, " ", -1, false /* Drop empty entry */); - - for (StringRef KeyName : SplitQ) { - auto *P = StringSwitch<uint8_t *>(KeyName) - .Case("volatile", &Arg.IsVolatile) - .Case("restrict", &Arg.IsRestrict) - .Case("const", &Arg.IsConst) - .Case("pipe", &Arg.IsPipe) - .Default(nullptr); - if (P) - *P = 1; - } - - // Set ArgKind. - Arg.Kind = Kind; - - // Set ArgValueType. - Arg.ValueType = getRuntimeMDValueType(T, BaseTypeName); - - // Set ArgAccQual. - if (!AccQual.empty()) { - Arg.AccQual = StringSwitch<KernelArg::AccessQualifer>(AccQual) - .Case("read_only", KernelArg::ReadOnly) - .Case("write_only", KernelArg::WriteOnly) - .Case("read_write", KernelArg::ReadWrite) - .Default(KernelArg::AccNone); - } - - // Set ArgAddrQual. - if (auto *PT = dyn_cast<PointerType>(T)) { - Arg.AddrQual = getRuntimeAddrSpace(static_cast<AMDGPUAS::AddressSpaces>( - PT->getAddressSpace())); - } - - return Arg; -} - -static Kernel::Metadata getRuntimeMDForKernel(const Function &F) { - Kernel::Metadata Kernel; - Kernel.Name = F.getName(); - auto &M = *F.getParent(); - - // Set Language and LanguageVersion. - if (auto MD = M.getNamedMetadata("opencl.ocl.version")) { - if (MD->getNumOperands() != 0) { - auto Node = MD->getOperand(0); - if (Node->getNumOperands() > 1) { - Kernel.Language = "OpenCL C"; - uint16_t Major = mdconst::extract<ConstantInt>(Node->getOperand(0)) - ->getZExtValue(); - uint16_t Minor = mdconst::extract<ConstantInt>(Node->getOperand(1)) - ->getZExtValue(); - Kernel.LanguageVersion.push_back(Major); - Kernel.LanguageVersion.push_back(Minor); - } - } - } - - const DataLayout &DL = F.getParent()->getDataLayout(); - for (auto &Arg : F.args()) { - unsigned I = Arg.getArgNo(); - Type *T = Arg.getType(); - auto TypeName = dyn_cast<MDString>(F.getMetadata( - "kernel_arg_type")->getOperand(I))->getString(); - auto BaseTypeName = cast<MDString>(F.getMetadata( - "kernel_arg_base_type")->getOperand(I))->getString(); - StringRef ArgName; - if (auto ArgNameMD = F.getMetadata("kernel_arg_name")) - ArgName = cast<MDString>(ArgNameMD->getOperand(I))->getString(); - auto TypeQual = cast<MDString>(F.getMetadata( - "kernel_arg_type_qual")->getOperand(I))->getString(); - auto AccQual = cast<MDString>(F.getMetadata( - "kernel_arg_access_qual")->getOperand(I))->getString(); - KernelArg::Kind Kind; - if (TypeQual.find("pipe") != StringRef::npos) - Kind = KernelArg::Pipe; - else Kind = StringSwitch<KernelArg::Kind>(BaseTypeName) - .Case("sampler_t", KernelArg::Sampler) - .Case("queue_t", KernelArg::Queue) - .Cases("image1d_t", "image1d_array_t", "image1d_buffer_t", - "image2d_t" , "image2d_array_t", KernelArg::Image) - .Cases("image2d_depth_t", "image2d_array_depth_t", - "image2d_msaa_t", "image2d_array_msaa_t", - "image2d_msaa_depth_t", KernelArg::Image) - .Cases("image2d_array_msaa_depth_t", "image3d_t", - KernelArg::Image) - .Default(isa<PointerType>(T) ? - (T->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ? - KernelArg::DynamicSharedPointer : - KernelArg::GlobalBuffer) : - KernelArg::ByValue); - Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, T, Kind, - BaseTypeName, TypeName, ArgName, TypeQual, AccQual)); - } - - // Emit hidden kernel arguments for OpenCL kernels. - if (F.getParent()->getNamedMetadata("opencl.ocl.version")) { - auto Int64T = Type::getInt64Ty(F.getContext()); - Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T, - KernelArg::HiddenGlobalOffsetX)); - Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T, - KernelArg::HiddenGlobalOffsetY)); - Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T, - KernelArg::HiddenGlobalOffsetZ)); - if (F.getParent()->getNamedMetadata("llvm.printf.fmts")) { - auto Int8PtrT = Type::getInt8PtrTy(F.getContext(), - KernelArg::Global); - Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int8PtrT, - KernelArg::HiddenPrintfBuffer)); - } - } - - // Set ReqdWorkGroupSize, WorkGroupSizeHint, and VecTypeHint. - if (auto RWGS = F.getMetadata("reqd_work_group_size")) - Kernel.ReqdWorkGroupSize = getThreeInt32(RWGS); - - if (auto WGSH = F.getMetadata("work_group_size_hint")) - Kernel.WorkGroupSizeHint = getThreeInt32(WGSH); - - if (auto VTH = F.getMetadata("vec_type_hint")) - Kernel.VecTypeHint = getOCLTypeName(cast<ValueAsMetadata>( - VTH->getOperand(0))->getType(), mdconst::extract<ConstantInt>( - VTH->getOperand(1))->getZExtValue()); - - return Kernel; -} - -Program::Metadata::Metadata(const std::string &YAML) { - yaml::Input Input(YAML); - Input >> *this; -} - -std::string Program::Metadata::toYAML(void) { - std::string Text; - raw_string_ostream Stream(Text); - yaml::Output Output(Stream, nullptr, INT_MAX /* do not wrap line */); - Output << *this; - return Stream.str(); -} - -Program::Metadata Program::Metadata::fromYAML(const std::string &S) { - return Program::Metadata(S); -} - -// Check if the YAML string can be parsed. -static void checkRuntimeMDYAMLString(const std::string &YAML) { - auto P = Program::Metadata::fromYAML(YAML); - auto S = P.toYAML(); - llvm::errs() << "AMDGPU runtime metadata parser test " - << (YAML == S ? "passes" : "fails") << ".\n"; - if (YAML != S) { - llvm::errs() << "First output: " << YAML << '\n' - << "Second output: " << S << '\n'; - } -} - -std::string llvm::getRuntimeMDYAMLString(Module &M) { - Program::Metadata Prog; - Prog.MDVersionSeq.push_back(MDVersion); - Prog.MDVersionSeq.push_back(MDRevision); - - // Set PrintfInfo. - if (auto MD = M.getNamedMetadata("llvm.printf.fmts")) { - for (unsigned I = 0; I < MD->getNumOperands(); ++I) { - auto Node = MD->getOperand(I); - if (Node->getNumOperands() > 0) - Prog.PrintfInfo.push_back(cast<MDString>(Node->getOperand(0)) - ->getString()); - } - } - - // Set Kernels. - for (auto &F: M.functions()) { - if (!F.getMetadata("kernel_arg_type")) - continue; - Prog.Kernels.emplace_back(getRuntimeMDForKernel(F)); - } - - auto YAML = Prog.toYAML(); - - if (DumpRuntimeMD) - llvm::errs() << "AMDGPU runtime metadata:\n" << YAML << '\n'; - - if (CheckRuntimeMDParser) - checkRuntimeMDYAMLString(YAML); - - return YAML; -} diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h deleted file mode 100644 index a92fdd4bebc2..000000000000 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h +++ /dev/null @@ -1,26 +0,0 @@ -//===- AMDGPURuntimeMD.h - Generate runtime metadata ---------------*- C++ -*-// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares functions for generating runtime metadata. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPURUNTIMEMD_H -#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPURUNTIMEMD_H - -#include <string> - -namespace llvm { -class Module; - -// Get runtime metadata as YAML string. -std::string getRuntimeMDYAMLString(Module &M); - -} -#endif diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 3392183d33c3..8dc863f723e2 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -27,7 +27,6 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/Support/ELF.h" #include "llvm/Support/FormattedStream.h" -#include "AMDGPURuntimeMD.h" namespace llvm { #include "AMDGPUPTNote.h" @@ -36,9 +35,27 @@ namespace llvm { using namespace llvm; using namespace llvm::AMDGPU; +//===----------------------------------------------------------------------===// +// AMDGPUTargetStreamer +//===----------------------------------------------------------------------===// + AMDGPUTargetStreamer::AMDGPUTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {} +void AMDGPUTargetStreamer::EmitStartOfCodeObjectMetadata(const Module &Mod) { + CodeObjectMetadataStreamer.begin(Mod); +} + +void AMDGPUTargetStreamer::EmitKernelCodeObjectMetadata( + const Function &Func, const amd_kernel_code_t &KernelCode) { + CodeObjectMetadataStreamer.emitKernel(Func, KernelCode); +} + +void AMDGPUTargetStreamer::EmitEndOfCodeObjectMetadata() { + CodeObjectMetadataStreamer.end(); + EmitCodeObjectMetadata(CodeObjectMetadataStreamer.toYamlString().get()); +} + //===----------------------------------------------------------------------===// // AMDGPUTargetAsmStreamer //===----------------------------------------------------------------------===// @@ -93,16 +110,16 @@ void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaProgramScopeGlobal( OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n'; } -void AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(Module &M) { - OS << "\t.amdgpu_runtime_metadata\n"; - OS << getRuntimeMDYAMLString(M); - OS << "\n\t.end_amdgpu_runtime_metadata\n"; -} +bool AMDGPUTargetAsmStreamer::EmitCodeObjectMetadata(StringRef YamlString) { + auto VerifiedYamlString = CodeObjectMetadataStreamer.toYamlString(YamlString); + if (!VerifiedYamlString) + return false; -void AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(StringRef Metadata) { - OS << "\t.amdgpu_runtime_metadata"; - OS << Metadata; - OS << "\t.end_amdgpu_runtime_metadata\n"; + OS << '\t' << AMDGPU::CodeObject::MetadataAssemblerDirectiveBegin << '\n'; + OS << VerifiedYamlString.get(); + OS << '\t' << AMDGPU::CodeObject::MetadataAssemblerDirectiveEnd << '\n'; + + return true; } //===----------------------------------------------------------------------===// @@ -116,22 +133,21 @@ MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() { return static_cast<MCELFStreamer &>(Streamer); } -void -AMDGPUTargetELFStreamer::EmitAMDGPUNote(const MCExpr* DescSZ, - PT_NOTE::NoteType Type, - std::function<void(MCELFStreamer &)> EmitDesc) { +void AMDGPUTargetELFStreamer::EmitAMDGPUNote( + const MCExpr *DescSZ, ElfNote::NoteType Type, + function_ref<void(MCELFStreamer &)> EmitDesc) { auto &S = getStreamer(); auto &Context = S.getContext(); - auto NameSZ = sizeof(PT_NOTE::NoteName); + auto NameSZ = sizeof(ElfNote::NoteName); S.PushSection(); S.SwitchSection(Context.getELFSection( - PT_NOTE::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC)); + ElfNote::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC)); S.EmitIntValue(NameSZ, 4); // namesz S.EmitValue(DescSZ, 4); // descz - S.EmitIntValue(Type, 4); // type - S.EmitBytes(StringRef(PT_NOTE::NoteName, NameSZ)); // name + S.EmitIntValue(Type, 4); // type + S.EmitBytes(StringRef(ElfNote::NoteName, NameSZ)); // name S.EmitValueToAlignment(4, 0, 1, 0); // padding 0 EmitDesc(S); // desc S.EmitValueToAlignment(4, 0, 1, 0); // padding 0 @@ -144,7 +160,7 @@ AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major, EmitAMDGPUNote( MCConstantExpr::create(8, getContext()), - PT_NOTE::NT_AMDGPU_HSA_CODE_OBJECT_VERSION, + ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION, [&](MCELFStreamer &OS){ OS.EmitIntValue(Major, 4); OS.EmitIntValue(Minor, 4); @@ -160,14 +176,14 @@ AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major, StringRef ArchName) { uint16_t VendorNameSize = VendorName.size() + 1; uint16_t ArchNameSize = ArchName.size() + 1; - + unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) + sizeof(Major) + sizeof(Minor) + sizeof(Stepping) + VendorNameSize + ArchNameSize; EmitAMDGPUNote( MCConstantExpr::create(DescSZ, getContext()), - PT_NOTE::NT_AMDGPU_HSA_ISA, + ElfNote::NT_AMDGPU_HSA_ISA, [&](MCELFStreamer &OS) { OS.EmitIntValue(VendorNameSize, 2); OS.EmitIntValue(ArchNameSize, 2); @@ -216,7 +232,11 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUHsaProgramScopeGlobal( Symbol->setBinding(ELF::STB_GLOBAL); } -void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(StringRef Metadata) { +bool AMDGPUTargetELFStreamer::EmitCodeObjectMetadata(StringRef YamlString) { + auto VerifiedYamlString = CodeObjectMetadataStreamer.toYamlString(YamlString); + if (!VerifiedYamlString) + return false; + // Create two labels to mark the beginning and end of the desc field // and a MCExpr to calculate the size of the desc field. auto &Context = getContext(); @@ -228,15 +248,13 @@ void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(StringRef Metadata) { EmitAMDGPUNote( DescSZ, - PT_NOTE::NT_AMDGPU_HSA_RUNTIME_METADATA, + ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_METADATA, [&](MCELFStreamer &OS) { OS.EmitLabel(DescBegin); - OS.EmitBytes(Metadata); + OS.EmitBytes(VerifiedYamlString.get()); OS.EmitLabel(DescEnd); } ); -} -void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(Module &M) { - EmitRuntimeMetadata(getRuntimeMDYAMLString(M)); + return true; } diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h index e2f20586903d..5c588bbded9c 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -10,6 +10,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H +#include "AMDGPUCodeObjectMetadataStreamer.h" #include "AMDKernelCodeT.h" #include "llvm/MC/MCStreamer.h" @@ -26,6 +27,7 @@ class Type; class AMDGPUTargetStreamer : public MCTargetStreamer { protected: + AMDGPU::CodeObject::MetadataStreamer CodeObjectMetadataStreamer; MCContext &getContext() const { return Streamer.getContext(); } public: @@ -46,12 +48,18 @@ public: virtual void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) = 0; - virtual void EmitRuntimeMetadata(Module &M) = 0; + virtual void EmitStartOfCodeObjectMetadata(const Module &Mod); - virtual void EmitRuntimeMetadata(StringRef Metadata) = 0; + virtual void EmitKernelCodeObjectMetadata( + const Function &Func, const amd_kernel_code_t &KernelCode); + + virtual void EmitEndOfCodeObjectMetadata(); + + /// \returns True on success, false on failure. + virtual bool EmitCodeObjectMetadata(StringRef YamlString) = 0; }; -class AMDGPUTargetAsmStreamer : public AMDGPUTargetStreamer { +class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer { formatted_raw_ostream &OS; public: AMDGPUTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS); @@ -70,17 +78,16 @@ public: void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override; - void EmitRuntimeMetadata(Module &M) override; - - void EmitRuntimeMetadata(StringRef Metadata) override; + /// \returns True on success, false on failure. + bool EmitCodeObjectMetadata(StringRef YamlString) override; }; -class AMDGPUTargetELFStreamer : public AMDGPUTargetStreamer { +class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer { MCStreamer &Streamer; - void EmitAMDGPUNote(const MCExpr* DescSize, - AMDGPU::PT_NOTE::NoteType Type, - std::function<void(MCELFStreamer &)> EmitDesc); + void EmitAMDGPUNote(const MCExpr *DescSize, + AMDGPU::ElfNote::NoteType Type, + function_ref<void(MCELFStreamer &)> EmitDesc); public: AMDGPUTargetELFStreamer(MCStreamer &S); @@ -102,9 +109,8 @@ public: void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override; - void EmitRuntimeMetadata(Module &M) override; - - void EmitRuntimeMetadata(StringRef Metadata) override; + /// \returns True on success, false on failure. + bool EmitCodeObjectMetadata(StringRef YamlString) override; }; } diff --git a/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt b/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt index 8a6d00ce69ed..09e3efad10af 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt @@ -1,13 +1,12 @@ - add_llvm_library(LLVMAMDGPUDesc AMDGPUAsmBackend.cpp + AMDGPUCodeObjectMetadataStreamer.cpp AMDGPUELFObjectWriter.cpp AMDGPUELFStreamer.cpp + AMDGPUMCAsmInfo.cpp AMDGPUMCCodeEmitter.cpp AMDGPUMCTargetDesc.cpp - AMDGPUMCAsmInfo.cpp - AMDGPURuntimeMD.cpp AMDGPUTargetStreamer.cpp R600MCCodeEmitter.cpp SIMCCodeEmitter.cpp - ) +) diff --git a/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp index 0c5bb0648a16..bda0928036fd 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -220,13 +220,35 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO, Imm = MO.getImm(); } - switch (AMDGPU::getOperandSize(OpInfo)) { - case 4: + switch (OpInfo.OperandType) { + case AMDGPU::OPERAND_REG_IMM_INT32: + case AMDGPU::OPERAND_REG_IMM_FP32: + case AMDGPU::OPERAND_REG_INLINE_C_INT32: + case AMDGPU::OPERAND_REG_INLINE_C_FP32: return getLit32Encoding(static_cast<uint32_t>(Imm), STI); - case 8: + + case AMDGPU::OPERAND_REG_IMM_INT64: + case AMDGPU::OPERAND_REG_IMM_FP64: + case AMDGPU::OPERAND_REG_INLINE_C_INT64: + case AMDGPU::OPERAND_REG_INLINE_C_FP64: return getLit64Encoding(static_cast<uint64_t>(Imm), STI); - case 2: + + case AMDGPU::OPERAND_REG_IMM_INT16: + case AMDGPU::OPERAND_REG_IMM_FP16: + case AMDGPU::OPERAND_REG_INLINE_C_INT16: + case AMDGPU::OPERAND_REG_INLINE_C_FP16: + // FIXME Is this correct? What do inline immediates do on SI for f16 src + // which does not have f16 support? return getLit16Encoding(static_cast<uint16_t>(Imm), STI); + + case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: + case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { + uint16_t Lo16 = static_cast<uint16_t>(Imm); + assert(Lo16 == static_cast<uint16_t>(Imm >> 16)); + uint32_t Encoding = getLit16Encoding(Lo16, STI); + assert(Encoding != 255 && "packed constants can only be inline immediates"); + return Encoding; + } default: llvm_unreachable("invalid operand size"); } |