12 files changed, 1270 insertions, 490 deletions
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
index ffb92aae599e..f3266fe82955 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -37,7 +37,7 @@ public:
                          bool &IsResolved) override;
 
   void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
-                  uint64_t Value, bool IsPCRel) const override;
+                  uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
   bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
                             const MCRelaxableFragment *DF,
                             const MCAsmLayout &Layout) const override {
@@ -131,7 +131,7 @@ void AMDGPUAsmBackend::processFixupValue(const MCAssembler &Asm,
 
 void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
                                   unsigned DataSize, uint64_t Value,
-                                  bool IsPCRel) const {
+                                  bool IsPCRel, MCContext &Ctx) const {
   if (!Value)
     return; // Doesn't change encoding.
 
@@ -164,7 +164,20 @@ const MCFixupKindInfo &AMDGPUAsmBackend::getFixupKindInfo(
 }
 
 bool AMDGPUAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
-  OW->WriteZeros(Count);
+  // If the count is not 4-byte aligned, we must be writing data into the text
+  // section (otherwise we have unaligned instructions, and thus have far
+  // bigger problems), so just write zeros instead.
+  OW->WriteZeros(Count % 4);
+
+  // We are properly aligned, so write NOPs as requested.
+  Count /= 4;
+
+  // FIXME: R600 support.
+  // s_nop 0
+  const uint32_t Encoded_S_NOP_0 = 0xbf800000;
+
+  for (uint64_t I = 0; I != Count; ++I)
+    OW->write32(Encoded_S_NOP_0);
 
   return true;
 }
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h
new file mode 100644
index 000000000000..816e8c744b27
--- /dev/null
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h
@@ -0,0 +1,422 @@
+//===--- AMDGPUCodeObjectMetadata.h -----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief AMDGPU Code Object Metadata definitions and in-memory
+/// representations.
+///
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H
+#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H
+
+#include <cstdint>
+#include <string>
+#include <system_error>
+#include <vector>
+
+namespace llvm {
+namespace AMDGPU {
+
+//===----------------------------------------------------------------------===//
+// Code Object Metadata.
+//===----------------------------------------------------------------------===//
+namespace CodeObject {
+
+/// \brief Code object metadata major version.
+constexpr uint32_t MetadataVersionMajor = 1;
+/// \brief Code object metadata minor version.
+constexpr uint32_t MetadataVersionMinor = 0;
+
+/// \brief Code object metadata beginning assembler directive.
+constexpr char MetadataAssemblerDirectiveBegin[] =
+    ".amdgpu_code_object_metadata";
+/// \brief Code object metadata ending assembler directive.
+constexpr char MetadataAssemblerDirectiveEnd[] =
+    ".end_amdgpu_code_object_metadata";
+
+/// \brief Access qualifiers.
+enum class AccessQualifier : uint8_t {
+  Default   = 0,
+  ReadOnly  = 1,
+  WriteOnly = 2,
+  ReadWrite = 3,
+  Unknown   = 0xff
+};
+
+/// \brief Address space qualifiers.
+enum class AddressSpaceQualifier : uint8_t {
+  Private  = 0,
+  Global   = 1,
+  Constant = 2,
+  Local    = 3,
+  Generic  = 4,
+  Region   = 5,
+  Unknown  = 0xff
+};
+
+/// \brief Value kinds.
+enum class ValueKind : uint8_t {
+  ByValue                = 0,
+  GlobalBuffer           = 1,
+  DynamicSharedPointer   = 2,
+  Sampler                = 3,
+  Image                  = 4,
+  Pipe                   = 5,
+  Queue                  = 6,
+  HiddenGlobalOffsetX    = 7,
+  HiddenGlobalOffsetY    = 8,
+  HiddenGlobalOffsetZ    = 9,
+  HiddenNone             = 10,
+  HiddenPrintfBuffer     = 11,
+  HiddenDefaultQueue     = 12,
+  HiddenCompletionAction = 13,
+  Unknown                = 0xff
+};
+
+/// \brief Value types.
+enum class ValueType : uint8_t {
+  Struct  = 0,
+  I8      = 1,
+  U8      = 2,
+  I16     = 3,
+  U16     = 4,
+  F16     = 5,
+  I32     = 6,
+  U32     = 7,
+  F32     = 8,
+  I64     = 9,
+  U64     = 10,
+  F64     = 11,
+  Unknown = 0xff
+};
+
+//===----------------------------------------------------------------------===//
+// Kernel Metadata.
+//===----------------------------------------------------------------------===//
+namespace Kernel {
+
+//===----------------------------------------------------------------------===//
+// Kernel Attributes Metadata.
+//===----------------------------------------------------------------------===//
+namespace Attrs {
+
+namespace Key {
+/// \brief Key for Kernel::Attr::Metadata::mReqdWorkGroupSize.
+constexpr char ReqdWorkGroupSize[] = "ReqdWorkGroupSize";
+/// \brief Key for Kernel::Attr::Metadata::mWorkGroupSizeHint.
+constexpr char WorkGroupSizeHint[] = "WorkGroupSizeHint";
+/// \brief Key for Kernel::Attr::Metadata::mVecTypeHint.
+constexpr char VecTypeHint[] = "VecTypeHint";
+} // end namespace Key
+
+/// \brief In-memory representation of kernel attributes metadata.
+struct Metadata final {
+  /// \brief 'reqd_work_group_size' attribute. Optional.
+  std::vector<uint32_t> mReqdWorkGroupSize = std::vector<uint32_t>();
+  /// \brief 'work_group_size_hint' attribute. Optional.
+  std::vector<uint32_t> mWorkGroupSizeHint = std::vector<uint32_t>();
+  /// \brief 'vec_type_hint' attribute. Optional.
+  std::string mVecTypeHint = std::string();
+
+  /// \brief Default constructor.
+  Metadata() = default;
+
+  /// \returns True if kernel attributes metadata is empty, false otherwise.
+  bool empty() const {
+    return mReqdWorkGroupSize.empty() &&
+           mWorkGroupSizeHint.empty() &&
+           mVecTypeHint.empty();
+  }
+
+  /// \returns True if kernel attributes metadata is not empty, false otherwise.
+  bool notEmpty() const {
+    return !empty();
+  }
+};
+
+} // end namespace Attrs
+
+//===----------------------------------------------------------------------===//
+// Kernel Argument Metadata.
+//===----------------------------------------------------------------------===//
+namespace Arg {
+
+namespace Key {
+/// \brief Key for Kernel::Arg::Metadata::mSize.
+constexpr char Size[] = "Size";
+/// \brief Key for Kernel::Arg::Metadata::mAlign.
+constexpr char Align[] = "Align";
+/// \brief Key for Kernel::Arg::Metadata::mValueKind.
+constexpr char ValueKind[] = "ValueKind";
+/// \brief Key for Kernel::Arg::Metadata::mValueType.
+constexpr char ValueType[] = "ValueType";
+/// \brief Key for Kernel::Arg::Metadata::mPointeeAlign.
+constexpr char PointeeAlign[] = "PointeeAlign";
+/// \brief Key for Kernel::Arg::Metadata::mAccQual.
+constexpr char AccQual[] = "AccQual";
+/// \brief Key for Kernel::Arg::Metadata::mAddrSpaceQual.
+constexpr char AddrSpaceQual[] = "AddrSpaceQual";
+/// \brief Key for Kernel::Arg::Metadata::mIsConst.
+constexpr char IsConst[] = "IsConst";
+/// \brief Key for Kernel::Arg::Metadata::mIsPipe.
+constexpr char IsPipe[] = "IsPipe";
+/// \brief Key for Kernel::Arg::Metadata::mIsRestrict.
+constexpr char IsRestrict[] = "IsRestrict";
+/// \brief Key for Kernel::Arg::Metadata::mIsVolatile.
+constexpr char IsVolatile[] = "IsVolatile";
+/// \brief Key for Kernel::Arg::Metadata::mName.
+constexpr char Name[] = "Name";
+/// \brief Key for Kernel::Arg::Metadata::mTypeName.
+constexpr char TypeName[] = "TypeName";
+} // end namespace Key
+
+/// \brief In-memory representation of kernel argument metadata.
+struct Metadata final {
+  /// \brief Size in bytes. Required.
+  uint32_t mSize = 0;
+  /// \brief Alignment in bytes. Required.
+  uint32_t mAlign = 0;
+  /// \brief Value kind. Required.
+  ValueKind mValueKind = ValueKind::Unknown;
+  /// \brief Value type. Required.
+  ValueType mValueType = ValueType::Unknown;
+  /// \brief Pointee alignment in bytes. Optional.
+  uint32_t mPointeeAlign = 0;
+  /// \brief Access qualifier. Optional.
+  AccessQualifier mAccQual = AccessQualifier::Unknown;
+  /// \brief Address space qualifier. Optional.
+  AddressSpaceQualifier mAddrSpaceQual = AddressSpaceQualifier::Unknown;
+  /// \brief True if 'const' qualifier is specified. Optional.
+  bool mIsConst = false;
+  /// \brief True if 'pipe' qualifier is specified. Optional.
+  bool mIsPipe = false;
+  /// \brief True if 'restrict' qualifier is specified. Optional.
+  bool mIsRestrict = false;
+  /// \brief True if 'volatile' qualifier is specified. Optional.
+  bool mIsVolatile = false;
+  /// \brief Name. Optional.
+  std::string mName = std::string();
+  /// \brief Type name. Optional.
+  std::string mTypeName = std::string();
+
+  /// \brief Default constructor.
+  Metadata() = default;
+};
+
+} // end namespace Arg
+
+//===----------------------------------------------------------------------===//
+// Kernel Code Properties Metadata.
+//===----------------------------------------------------------------------===//
+namespace CodeProps {
+
+namespace Key {
+/// \brief Key for Kernel::CodeProps::Metadata::mKernargSegmentSize.
+constexpr char KernargSegmentSize[] = "KernargSegmentSize";
+/// \brief Key for Kernel::CodeProps::Metadata::mWorkgroupGroupSegmentSize.
+constexpr char WorkgroupGroupSegmentSize[] = "WorkgroupGroupSegmentSize";
+/// \brief Key for Kernel::CodeProps::Metadata::mWorkitemPrivateSegmentSize.
+constexpr char WorkitemPrivateSegmentSize[] = "WorkitemPrivateSegmentSize";
+/// \brief Key for Kernel::CodeProps::Metadata::mWavefrontNumSGPRs.
+constexpr char WavefrontNumSGPRs[] = "WavefrontNumSGPRs";
+/// \brief Key for Kernel::CodeProps::Metadata::mWorkitemNumVGPRs.
+constexpr char WorkitemNumVGPRs[] = "WorkitemNumVGPRs";
+/// \brief Key for Kernel::CodeProps::Metadata::mKernargSegmentAlign.
+constexpr char KernargSegmentAlign[] = "KernargSegmentAlign";
+/// \brief Key for Kernel::CodeProps::Metadata::mGroupSegmentAlign.
+constexpr char GroupSegmentAlign[] = "GroupSegmentAlign";
+/// \brief Key for Kernel::CodeProps::Metadata::mPrivateSegmentAlign.
+constexpr char PrivateSegmentAlign[] = "PrivateSegmentAlign";
+/// \brief Key for Kernel::CodeProps::Metadata::mWavefrontSize.
+constexpr char WavefrontSize[] = "WavefrontSize";
+} // end namespace Key
+
+/// \brief In-memory representation of kernel code properties metadata.
+struct Metadata final {
+  /// \brief Size in bytes of the kernarg segment memory. Kernarg segment memory
+  /// holds the values of the arguments to the kernel. Optional.
+  uint64_t mKernargSegmentSize = 0;
+  /// \brief Size in bytes of the group segment memory required by a workgroup.
+  /// This value does not include any dynamically allocated group segment memory
+  /// that may be added when the kernel is dispatched. Optional.
+  uint32_t mWorkgroupGroupSegmentSize = 0;
+  /// \brief Size in bytes of the private segment memory required by a workitem.
+  /// Private segment memory includes arg, spill and private segments. Optional.
+  uint32_t mWorkitemPrivateSegmentSize = 0;
+  /// \brief Total number of SGPRs used by a wavefront. Optional.
+  uint16_t mWavefrontNumSGPRs = 0;
+  /// \brief Total number of VGPRs used by a workitem. Optional.
+  uint16_t mWorkitemNumVGPRs = 0;
+  /// \brief Maximum byte alignment of variables used by the kernel in the
+  /// kernarg memory segment. Expressed as a power of two. Optional.
+  uint8_t mKernargSegmentAlign = 0;
+  /// \brief Maximum byte alignment of variables used by the kernel in the
+  /// group memory segment. Expressed as a power of two. Optional.
+  uint8_t mGroupSegmentAlign = 0;
+  /// \brief Maximum byte alignment of variables used by the kernel in the
+  /// private memory segment. Expressed as a power of two. Optional.
+  uint8_t mPrivateSegmentAlign = 0;
+  /// \brief Wavefront size. Expressed as a power of two. Optional.
+  uint8_t mWavefrontSize = 0;
+
+  /// \brief Default constructor.
+  Metadata() = default;
+
+  /// \returns True if kernel code properties metadata is empty, false
+  /// otherwise.
+  bool empty() const {
+    return !notEmpty();
+  }
+
+  /// \returns True if kernel code properties metadata is not empty, false
+  /// otherwise.
+  bool notEmpty() const {
+    return mKernargSegmentSize || mWorkgroupGroupSegmentSize ||
+           mWorkitemPrivateSegmentSize || mWavefrontNumSGPRs ||
+           mWorkitemNumVGPRs || mKernargSegmentAlign || mGroupSegmentAlign ||
+           mPrivateSegmentAlign || mWavefrontSize;
+  }
+};
+
+} // end namespace CodeProps
+
+//===----------------------------------------------------------------------===//
+// Kernel Debug Properties Metadata.
+//===----------------------------------------------------------------------===//
+namespace DebugProps {
+
+namespace Key {
+/// \brief Key for Kernel::DebugProps::Metadata::mDebuggerABIVersion.
+constexpr char DebuggerABIVersion[] = "DebuggerABIVersion";
+/// \brief Key for Kernel::DebugProps::Metadata::mReservedNumVGPRs.
+constexpr char ReservedNumVGPRs[] = "ReservedNumVGPRs";
+/// \brief Key for Kernel::DebugProps::Metadata::mReservedFirstVGPR.
+constexpr char ReservedFirstVGPR[] = "ReservedFirstVGPR";
+/// \brief Key for Kernel::DebugProps::Metadata::mPrivateSegmentBufferSGPR.
+constexpr char PrivateSegmentBufferSGPR[] = "PrivateSegmentBufferSGPR";
+/// \brief Key for
+///     Kernel::DebugProps::Metadata::mWavefrontPrivateSegmentOffsetSGPR.
+constexpr char WavefrontPrivateSegmentOffsetSGPR[] =
+    "WavefrontPrivateSegmentOffsetSGPR";
+} // end namespace Key
+
+/// \brief In-memory representation of kernel debug properties metadata.
+struct Metadata final {
+  /// \brief Debugger ABI version. Optional.
+  std::vector<uint32_t> mDebuggerABIVersion = std::vector<uint32_t>();
+  /// \brief Consecutive number of VGPRs reserved for debugger use. Must be 0 if
+  /// mDebuggerABIVersion is not set. Optional.
+  uint16_t mReservedNumVGPRs = 0;
+  /// \brief First fixed VGPR reserved. Must be uint16_t(-1) if
+  /// mDebuggerABIVersion is not set or mReservedFirstVGPR is 0. Optional.
+  uint16_t mReservedFirstVGPR = uint16_t(-1);
+  /// \brief Fixed SGPR of the first of 4 SGPRs used to hold the scratch V# used
+  /// for the entire kernel execution. Must be uint16_t(-1) if
+  /// mDebuggerABIVersion is not set or SGPR not used or not known. Optional.
+  uint16_t mPrivateSegmentBufferSGPR = uint16_t(-1);
+  /// \brief Fixed SGPR used to hold the wave scratch offset for the entire
+  /// kernel execution. Must be uint16_t(-1) if mDebuggerABIVersion is not set
+  /// or SGPR is not used or not known. Optional.
+  uint16_t mWavefrontPrivateSegmentOffsetSGPR = uint16_t(-1);
+
+  /// \brief Default constructor.
+  Metadata() = default;
+
+  /// \returns True if kernel debug properties metadata is empty, false
+  /// otherwise.
+  bool empty() const {
+    return !notEmpty();
+  }
+
+  /// \returns True if kernel debug properties metadata is not empty, false
+  /// otherwise.
+  bool notEmpty() const {
+    return !mDebuggerABIVersion.empty();
+  }
+};
+
+} // end namespace DebugProps
+
+namespace Key {
+/// \brief Key for Kernel::Metadata::mName.
+constexpr char Name[] = "Name";
+/// \brief Key for Kernel::Metadata::mLanguage.
+constexpr char Language[] = "Language";
+/// \brief Key for Kernel::Metadata::mLanguageVersion.
+constexpr char LanguageVersion[] = "LanguageVersion";
+/// \brief Key for Kernel::Metadata::mAttrs.
+constexpr char Attrs[] = "Attrs";
+/// \brief Key for Kernel::Metadata::mArgs.
+constexpr char Args[] = "Args";
+/// \brief Key for Kernel::Metadata::mCodeProps.
+constexpr char CodeProps[] = "CodeProps";
+/// \brief Key for Kernel::Metadata::mDebugProps.
+constexpr char DebugProps[] = "DebugProps";
+} // end namespace Key
+
+/// \brief In-memory representation of kernel metadata.
+struct Metadata final {
+  /// \brief Name. Required.
+  std::string mName = std::string();
+  /// \brief Language. Optional.
+  std::string mLanguage = std::string();
+  /// \brief Language version. Optional.
+  std::vector<uint32_t> mLanguageVersion = std::vector<uint32_t>();
+  /// \brief Attributes metadata. Optional.
+  Attrs::Metadata mAttrs = Attrs::Metadata();
+  /// \brief Arguments metadata. Optional.
+  std::vector<Arg::Metadata> mArgs = std::vector<Arg::Metadata>();
+  /// \brief Code properties metadata. Optional.
+  CodeProps::Metadata mCodeProps = CodeProps::Metadata();
+  /// \brief Debug properties metadata. Optional.
+  DebugProps::Metadata mDebugProps = DebugProps::Metadata();
+
+  /// \brief Default constructor.
+  Metadata() = default;
+};
+
+} // end namespace Kernel
+
+namespace Key {
+/// \brief Key for CodeObject::Metadata::mVersion.
+constexpr char Version[] = "Version";
+/// \brief Key for CodeObject::Metadata::mPrintf.
+constexpr char Printf[] = "Printf";
+/// \brief Key for CodeObject::Metadata::mKernels.
+constexpr char Kernels[] = "Kernels";
+} // end namespace Key
+
+/// \brief In-memory representation of code object metadata.
+struct Metadata final {
+  /// \brief Code object metadata version. Required.
+  std::vector<uint32_t> mVersion = std::vector<uint32_t>();
+  /// \brief Printf metadata. Optional.
+  std::vector<std::string> mPrintf = std::vector<std::string>();
+  /// \brief Kernels metadata. Optional.
+  std::vector<Kernel::Metadata> mKernels = std::vector<Kernel::Metadata>();
+
+  /// \brief Default constructor.
+  Metadata() = default;
+
+  /// \brief Converts \p YamlString to \p CodeObjectMetadata.
+  static std::error_code fromYamlString(std::string YamlString,
+                                        Metadata &CodeObjectMetadata);
+
+  /// \brief Converts \p CodeObjectMetadata to \p YamlString.
+  static std::error_code toYamlString(Metadata CodeObjectMetadata,
+                                      std::string &YamlString);
+};
+
+} // end namespace CodeObject
+} // end namespace AMDGPU
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp
new file mode 100644
index 000000000000..29a6ab9fbe93
--- /dev/null
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp
@@ -0,0 +1,625 @@
+//===--- AMDGPUCodeObjectMetadataStreamer.cpp -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief AMDGPU Code Object Metadata Streamer.
+///
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUCodeObjectMetadataStreamer.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/YAMLTraits.h"
+
+using namespace llvm::AMDGPU;
+using namespace llvm::AMDGPU::CodeObject;
+
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t)
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
+LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Arg::Metadata)
+LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata)
+
+namespace llvm {
+
+static cl::opt<bool> DumpCodeObjectMetadata(
+    "amdgpu-dump-comd",
+    cl::desc("Dump AMDGPU Code Object Metadata"));
+static cl::opt<bool> VerifyCodeObjectMetadata(
+    "amdgpu-verify-comd",
+    cl::desc("Verify AMDGPU Code Object Metadata"));
+
+namespace yaml {
+
+template <>
+struct ScalarEnumerationTraits<AccessQualifier> {
+  static void enumeration(IO &YIO, AccessQualifier &EN) {
+    YIO.enumCase(EN, "Default", AccessQualifier::Default);
+    YIO.enumCase(EN, "ReadOnly", AccessQualifier::ReadOnly);
+    YIO.enumCase(EN, "WriteOnly", AccessQualifier::WriteOnly);
+    YIO.enumCase(EN, "ReadWrite", AccessQualifier::ReadWrite);
+  }
+};
+
+template <>
+struct ScalarEnumerationTraits<AddressSpaceQualifier> {
+  static void enumeration(IO &YIO, AddressSpaceQualifier &EN) {
+    YIO.enumCase(EN, "Private", AddressSpaceQualifier::Private);
+    YIO.enumCase(EN, "Global", AddressSpaceQualifier::Global);
+    YIO.enumCase(EN, "Constant", AddressSpaceQualifier::Constant);
+    YIO.enumCase(EN, "Local", AddressSpaceQualifier::Local);
+    YIO.enumCase(EN, "Generic", AddressSpaceQualifier::Generic);
+    YIO.enumCase(EN, "Region", AddressSpaceQualifier::Region);
+  }
+};
+
+template <>
+struct ScalarEnumerationTraits<ValueKind> {
+  static void enumeration(IO &YIO, ValueKind &EN) {
+    YIO.enumCase(EN, "ByValue", ValueKind::ByValue);
+    YIO.enumCase(EN, "GlobalBuffer", ValueKind::GlobalBuffer);
+    YIO.enumCase(EN, "DynamicSharedPointer", ValueKind::DynamicSharedPointer);
+    YIO.enumCase(EN, "Sampler", ValueKind::Sampler);
+    YIO.enumCase(EN, "Image", ValueKind::Image);
+    YIO.enumCase(EN, "Pipe", ValueKind::Pipe);
+    YIO.enumCase(EN, "Queue", ValueKind::Queue);
+    YIO.enumCase(EN, "HiddenGlobalOffsetX", ValueKind::HiddenGlobalOffsetX);
+    YIO.enumCase(EN, "HiddenGlobalOffsetY", ValueKind::HiddenGlobalOffsetY);
+    YIO.enumCase(EN, "HiddenGlobalOffsetZ", ValueKind::HiddenGlobalOffsetZ);
+    YIO.enumCase(EN, "HiddenNone", ValueKind::HiddenNone);
+    YIO.enumCase(EN, "HiddenPrintfBuffer", ValueKind::HiddenPrintfBuffer);
+    YIO.enumCase(EN, "HiddenDefaultQueue", ValueKind::HiddenDefaultQueue);
+    YIO.enumCase(EN, "HiddenCompletionAction",
+                 ValueKind::HiddenCompletionAction);
+  }
+};
+
+template <>
+struct ScalarEnumerationTraits<ValueType> {
+  static void enumeration(IO &YIO, ValueType &EN) {
+    YIO.enumCase(EN, "Struct", ValueType::Struct);
+    YIO.enumCase(EN, "I8", ValueType::I8);
+    YIO.enumCase(EN, "U8", ValueType::U8);
+    YIO.enumCase(EN, "I16", ValueType::I16);
+    YIO.enumCase(EN, "U16", ValueType::U16);
+    YIO.enumCase(EN, "F16", ValueType::F16);
+    YIO.enumCase(EN, "I32", ValueType::I32);
+    YIO.enumCase(EN, "U32", ValueType::U32);
+    YIO.enumCase(EN, "F32", ValueType::F32);
+    YIO.enumCase(EN, "I64", ValueType::I64);
+    YIO.enumCase(EN, "U64", ValueType::U64);
+    YIO.enumCase(EN, "F64", ValueType::F64);
+  }
+};
+
+template <>
+struct MappingTraits<Kernel::Attrs::Metadata> {
+  static void mapping(IO &YIO, Kernel::Attrs::Metadata &MD) {
+    YIO.mapOptional(Kernel::Attrs::Key::ReqdWorkGroupSize,
+                    MD.mReqdWorkGroupSize, std::vector<uint32_t>());
+    YIO.mapOptional(Kernel::Attrs::Key::WorkGroupSizeHint,
+                    MD.mWorkGroupSizeHint, std::vector<uint32_t>());
+    YIO.mapOptional(Kernel::Attrs::Key::VecTypeHint,
+                    MD.mVecTypeHint, std::string());
+  }
+};
+
+template <>
+struct MappingTraits<Kernel::Arg::Metadata> {
+  static void mapping(IO &YIO, Kernel::Arg::Metadata &MD) {
+    YIO.mapRequired(Kernel::Arg::Key::Size, MD.mSize);
+    YIO.mapRequired(Kernel::Arg::Key::Align, MD.mAlign);
+    YIO.mapRequired(Kernel::Arg::Key::ValueKind, MD.mValueKind);
+    YIO.mapRequired(Kernel::Arg::Key::ValueType, MD.mValueType);
+    YIO.mapOptional(Kernel::Arg::Key::PointeeAlign, MD.mPointeeAlign,
+                    uint32_t(0));
+    YIO.mapOptional(Kernel::Arg::Key::AccQual, MD.mAccQual,
+                    AccessQualifier::Unknown);
+    YIO.mapOptional(Kernel::Arg::Key::AddrSpaceQual, MD.mAddrSpaceQual,
+                    AddressSpaceQualifier::Unknown);
+    YIO.mapOptional(Kernel::Arg::Key::IsConst, MD.mIsConst, false);
+    YIO.mapOptional(Kernel::Arg::Key::IsPipe, MD.mIsPipe, false);
+    YIO.mapOptional(Kernel::Arg::Key::IsRestrict, MD.mIsRestrict, false);
+    YIO.mapOptional(Kernel::Arg::Key::IsVolatile, MD.mIsVolatile, false);
+    YIO.mapOptional(Kernel::Arg::Key::Name, MD.mName, std::string());
+    YIO.mapOptional(Kernel::Arg::Key::TypeName, MD.mTypeName, std::string());
+  }
+};
+
+template <>
+struct MappingTraits<Kernel::CodeProps::Metadata> {
+  static void mapping(IO &YIO, Kernel::CodeProps::Metadata &MD) {
+    YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentSize,
+                    MD.mKernargSegmentSize, uint64_t(0));
+    YIO.mapOptional(Kernel::CodeProps::Key::WorkgroupGroupSegmentSize,
+                    MD.mWorkgroupGroupSegmentSize, uint32_t(0));
+    YIO.mapOptional(Kernel::CodeProps::Key::WorkitemPrivateSegmentSize,
+                    MD.mWorkitemPrivateSegmentSize, uint32_t(0));
+    YIO.mapOptional(Kernel::CodeProps::Key::WavefrontNumSGPRs,
+                    MD.mWavefrontNumSGPRs, uint16_t(0));
+    YIO.mapOptional(Kernel::CodeProps::Key::WorkitemNumVGPRs,
+                    MD.mWorkitemNumVGPRs, uint16_t(0));
+    YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentAlign,
+                    MD.mKernargSegmentAlign, uint8_t(0));
+    YIO.mapOptional(Kernel::CodeProps::Key::GroupSegmentAlign,
+                    MD.mGroupSegmentAlign, uint8_t(0));
+    YIO.mapOptional(Kernel::CodeProps::Key::PrivateSegmentAlign,
+                    MD.mPrivateSegmentAlign, uint8_t(0));
+    YIO.mapOptional(Kernel::CodeProps::Key::WavefrontSize,
+                    MD.mWavefrontSize, uint8_t(0));
+  }
+};
+
+template <>
+struct MappingTraits<Kernel::DebugProps::Metadata> {
+  static void mapping(IO &YIO, Kernel::DebugProps::Metadata &MD) {
+    YIO.mapOptional(Kernel::DebugProps::Key::DebuggerABIVersion,
+                    MD.mDebuggerABIVersion, std::vector<uint32_t>());
+    YIO.mapOptional(Kernel::DebugProps::Key::ReservedNumVGPRs,
+                    MD.mReservedNumVGPRs, uint16_t(0));
+    YIO.mapOptional(Kernel::DebugProps::Key::ReservedFirstVGPR,
+                    MD.mReservedFirstVGPR, uint16_t(-1));
+    YIO.mapOptional(Kernel::DebugProps::Key::PrivateSegmentBufferSGPR,
+                    MD.mPrivateSegmentBufferSGPR, uint16_t(-1));
+    YIO.mapOptional(Kernel::DebugProps::Key::WavefrontPrivateSegmentOffsetSGPR,
+                    MD.mWavefrontPrivateSegmentOffsetSGPR, uint16_t(-1));
+  }
+};
+
+template <>
+struct MappingTraits<Kernel::Metadata> {
+  static void mapping(IO &YIO, Kernel::Metadata &MD) {
+    YIO.mapRequired(Kernel::Key::Name, MD.mName);
+    YIO.mapOptional(Kernel::Key::Language, MD.mLanguage, std::string());
+    YIO.mapOptional(Kernel::Key::LanguageVersion, MD.mLanguageVersion,
+                    std::vector<uint32_t>());
+    if (!MD.mAttrs.empty() || !YIO.outputting())
+      YIO.mapOptional(Kernel::Key::Attrs, MD.mAttrs);
+    if (!MD.mArgs.empty() || !YIO.outputting())
+      YIO.mapOptional(Kernel::Key::Args, MD.mArgs);
+    if (!MD.mCodeProps.empty() || !YIO.outputting())
+      YIO.mapOptional(Kernel::Key::CodeProps, MD.mCodeProps);
+    if (!MD.mDebugProps.empty() || !YIO.outputting())
+      YIO.mapOptional(Kernel::Key::DebugProps, MD.mDebugProps);
+  }
+};
+
+template <>
+struct MappingTraits<CodeObject::Metadata> {
+  static void mapping(IO &YIO, CodeObject::Metadata &MD) {
+    YIO.mapRequired(Key::Version, MD.mVersion);
+    YIO.mapOptional(Key::Printf, MD.mPrintf, std::vector<std::string>());
+    if (!MD.mKernels.empty() || !YIO.outputting())
+      YIO.mapOptional(Key::Kernels, MD.mKernels);
+  }
+};
+
+} // end namespace yaml
+
+namespace AMDGPU {
+
+/* static */
+std::error_code CodeObject::Metadata::fromYamlString(
+    std::string YamlString, CodeObject::Metadata &CodeObjectMetadata) {
+  yaml::Input YamlInput(YamlString);
+  YamlInput >> CodeObjectMetadata;
+  return YamlInput.error();
+}
+
+/* static */
+std::error_code CodeObject::Metadata::toYamlString(
+    CodeObject::Metadata CodeObjectMetadata, std::string &YamlString) {
+  raw_string_ostream YamlStream(YamlString);
+  yaml::Output YamlOutput(YamlStream, nullptr, std::numeric_limits<int>::max());
+  YamlOutput << CodeObjectMetadata;
+  return std::error_code();
+}
+
+namespace CodeObject {
+
+void MetadataStreamer::dump(StringRef YamlString) const {
+  errs() << "AMDGPU Code Object Metadata:\n" << YamlString << '\n';
+}
+
+void MetadataStreamer::verify(StringRef YamlString) const {
+  errs() << "AMDGPU Code Object Metadata Parser Test: ";
+
+  CodeObject::Metadata FromYamlString;
+  if (Metadata::fromYamlString(YamlString, FromYamlString)) {
+    errs() << "FAIL\n";
+    return;
+  }
+
+  std::string ToYamlString;
+  if (Metadata::toYamlString(FromYamlString, ToYamlString)) {
+    errs() << "FAIL\n";
+    return;
+  }
+
+  errs() << (YamlString == ToYamlString ? "PASS" : "FAIL") << '\n';
+  if (YamlString != ToYamlString) {
+    errs() << "Original input: " << YamlString << '\n'
+           << "Produced output: " << ToYamlString << '\n';
+  }
+}
+
+AccessQualifier MetadataStreamer::getAccessQualifier(StringRef AccQual) const {
+  if (AccQual.empty())
+    return AccessQualifier::Unknown;
+
+  return StringSwitch<AccessQualifier>(AccQual)
+             .Case("read_only",  AccessQualifier::ReadOnly)
+             .Case("write_only", AccessQualifier::WriteOnly)
+             .Case("read_write", AccessQualifier::ReadWrite)
+             .Default(AccessQualifier::Default);
+}
+
+AddressSpaceQualifier MetadataStreamer::getAddressSpaceQualifer(
+    unsigned AddressSpace) const {
+  if (AddressSpace == AMDGPUASI.PRIVATE_ADDRESS)
+    return AddressSpaceQualifier::Private;
+  if (AddressSpace == AMDGPUASI.GLOBAL_ADDRESS)
+    return AddressSpaceQualifier::Global;
+  if (AddressSpace == AMDGPUASI.CONSTANT_ADDRESS)
+    return AddressSpaceQualifier::Constant;
+  if (AddressSpace == AMDGPUASI.LOCAL_ADDRESS)
+    return AddressSpaceQualifier::Local;
+  if (AddressSpace == AMDGPUASI.FLAT_ADDRESS)
+    return AddressSpaceQualifier::Generic;
+  if (AddressSpace == AMDGPUASI.REGION_ADDRESS)
+    return AddressSpaceQualifier::Region;
+
+  llvm_unreachable("Unknown address space qualifier");
+}
+
+ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual,
+                                         StringRef BaseTypeName) const {
+  if (TypeQual.find("pipe") != StringRef::npos)
+    return ValueKind::Pipe;
+
+  return StringSwitch<ValueKind>(BaseTypeName)
+             .Case("sampler_t", ValueKind::Sampler)
+             .Case("queue_t", ValueKind::Queue)
+             .Cases("image1d_t",
+                    "image1d_array_t",
+                    "image1d_buffer_t",
+                    "image2d_t" ,
+                    "image2d_array_t",
+                    "image2d_array_depth_t",
+                    "image2d_array_msaa_t"
+                    "image2d_array_msaa_depth_t"
+                    "image2d_depth_t",
+                    "image2d_msaa_t",
+                    "image2d_msaa_depth_t",
+                    "image3d_t", ValueKind::Image)
+             .Default(isa<PointerType>(Ty) ?
+                          (Ty->getPointerAddressSpace() ==
+                           AMDGPUASI.LOCAL_ADDRESS ?
+                           ValueKind::DynamicSharedPointer :
+                           ValueKind::GlobalBuffer) :
+                      ValueKind::ByValue);
+}
+
+ValueType MetadataStreamer::getValueType(Type *Ty, StringRef TypeName) const {
+  switch (Ty->getTypeID()) {
+  case Type::IntegerTyID: {
+    auto Signed = !TypeName.startswith("u");
+    switch (Ty->getIntegerBitWidth()) {
+    case 8:
+      return Signed ? ValueType::I8 : ValueType::U8;
+    case 16:
+      return Signed ? ValueType::I16 : ValueType::U16;
+    case 32:
+      return Signed ? ValueType::I32 : ValueType::U32;
+    case 64:
+      return Signed ? ValueType::I64 : ValueType::U64;
+    default:
+      return ValueType::Struct;
+    }
+  }
+  case Type::HalfTyID:
+    return ValueType::F16;
+  case Type::FloatTyID:
+    return ValueType::F32;
+  case Type::DoubleTyID:
+    return ValueType::F64;
+  case Type::PointerTyID:
+    return getValueType(Ty->getPointerElementType(), TypeName);
+  case Type::VectorTyID:
+    return getValueType(Ty->getVectorElementType(), TypeName);
+  default:
+    return ValueType::Struct;
+  }
+}
+
+std::string MetadataStreamer::getTypeName(Type *Ty, bool Signed) const {
+  switch (Ty->getTypeID()) {
+  case Type::IntegerTyID: {
+    if (!Signed)
+      return (Twine('u') + getTypeName(Ty, true)).str();
+
+    auto BitWidth = Ty->getIntegerBitWidth();
+    switch (BitWidth) {
+    case 8:
+      return "char";
+    case 16:
+      return "short";
+    case 32:
+      return "int";
+    case 64:
+      return "long";
+    default:
+      return (Twine('i') + Twine(BitWidth)).str();
+    }
+  }
+  case Type::HalfTyID:
+    return "half";
+  case Type::FloatTyID:
+    return "float";
+  case Type::DoubleTyID:
+    return "double";
+  case Type::VectorTyID: {
+    auto VecTy = cast<VectorType>(Ty);
+    auto ElTy = VecTy->getElementType();
+    auto NumElements = VecTy->getVectorNumElements();
+    return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
+  }
+  default:
+    return "unknown";
+  }
+}
+
+std::vector<uint32_t> MetadataStreamer::getWorkGroupDimensions(
+    MDNode *Node) const {
+  std::vector<uint32_t> Dims;
+  if (Node->getNumOperands() != 3)
+    return Dims;
+
+  for (auto &Op : Node->operands())
+    Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
+  return Dims;
+}
+
+void MetadataStreamer::emitVersion() {
+  auto &Version = CodeObjectMetadata.mVersion;
+
+  Version.push_back(MetadataVersionMajor);
+  Version.push_back(MetadataVersionMinor);
+}
+
+void MetadataStreamer::emitPrintf(const Module &Mod) {
+  auto &Printf = CodeObjectMetadata.mPrintf;
+
+  auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
+  if (!Node)
+    return;
+
+  for (auto Op : Node->operands())
+    if (Op->getNumOperands())
+      Printf.push_back(cast<MDString>(Op->getOperand(0))->getString());
+}
+
+void MetadataStreamer::emitKernelLanguage(const Function &Func) {
+  auto &Kernel = CodeObjectMetadata.mKernels.back();
+
+  // TODO: What about other languages?
+  auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
+  if (!Node || !Node->getNumOperands())
+    return;
+  auto Op0 = Node->getOperand(0);
+  if (Op0->getNumOperands() <= 1)
+    return;
+
+  Kernel.mLanguage = "OpenCL C";
+  Kernel.mLanguageVersion.push_back(
+      mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
+  Kernel.mLanguageVersion.push_back(
+      mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
+}
+
+void MetadataStreamer::emitKernelAttrs(const Function &Func) {
+  auto &Attrs = CodeObjectMetadata.mKernels.back().mAttrs;
+
+  if (auto Node = Func.getMetadata("reqd_work_group_size"))
+    Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
+  if (auto Node = Func.getMetadata("work_group_size_hint"))
+    Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
+  if (auto Node = Func.getMetadata("vec_type_hint")) {
+    Attrs.mVecTypeHint = getTypeName(
+        cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
+        mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
+  }
+}
+
+void MetadataStreamer::emitKernelArgs(const Function &Func) {
+  for (auto &Arg : Func.args())
+    emitKernelArg(Arg);
+
+  // TODO: What about other languages?
+  if (!Func.getParent()->getNamedMetadata("opencl.ocl.version"))
+    return;
+
+  auto &DL = Func.getParent()->getDataLayout();
+  auto Int64Ty = Type::getInt64Ty(Func.getContext());
+
+  emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX);
+  emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY);
+  emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ);
+
+  if (!Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
+    return;
+
+  auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
+                                      AMDGPUASI.GLOBAL_ADDRESS);
+  emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
+}
+
+void MetadataStreamer::emitKernelArg(const Argument &Arg) {
+  auto Func = Arg.getParent();
+  auto ArgNo = Arg.getArgNo();
+  const MDNode *Node;
+
+  StringRef TypeQual;
+  Node = Func->getMetadata("kernel_arg_type_qual");
+  if (Node && ArgNo < Node->getNumOperands())
+    TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
+
+  StringRef BaseTypeName;
+  Node = Func->getMetadata("kernel_arg_base_type");
+  if (Node && ArgNo < Node->getNumOperands())
+    BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
+
+  StringRef AccQual;
+  if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
+      Arg.hasNoAliasAttr()) {
+    AccQual = "read_only";
+  } else {
+    Node = Func->getMetadata("kernel_arg_access_qual");
+    if (Node && ArgNo < Node->getNumOperands())
+      AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
+  }
+
+  StringRef Name;
+  Node = Func->getMetadata("kernel_arg_name");
+  if (Node && ArgNo < Node->getNumOperands())
+    Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
+
+  StringRef TypeName;
+  Node = Func->getMetadata("kernel_arg_type");
+  if (Node && ArgNo < Node->getNumOperands())
+    TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
+
+  emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(),
+                getValueKind(Arg.getType(), TypeQual, BaseTypeName), TypeQual,
+                BaseTypeName, AccQual, Name, TypeName);
+}
+
+void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty,
+                                     ValueKind ValueKind, StringRef TypeQual,
+                                     StringRef BaseTypeName, StringRef AccQual,
+                                     StringRef Name, StringRef TypeName) {
+  CodeObjectMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
+  auto &Arg = CodeObjectMetadata.mKernels.back().mArgs.back();
+
+  Arg.mSize = DL.getTypeAllocSize(Ty);
+  Arg.mAlign = DL.getABITypeAlignment(Ty);
+  Arg.mValueKind = ValueKind;
+  Arg.mValueType = getValueType(Ty, BaseTypeName);
+
+  if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
+    auto ElTy = PtrTy->getElementType();
+    if (PtrTy->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS && ElTy->isSized())
+      Arg.mPointeeAlign = DL.getABITypeAlignment(ElTy);
+  }
+
+  Arg.mAccQual = getAccessQualifier(AccQual);
+
+  if (auto PtrTy = dyn_cast<PointerType>(Ty))
+    Arg.mAddrSpaceQual = getAddressSpaceQualifer(PtrTy->getAddressSpace());
+
+  SmallVector<StringRef, 1> SplitTypeQuals;
+  TypeQual.split(SplitTypeQuals, " ", -1, false);
+  for (StringRef Key : SplitTypeQuals) {
+    auto P = StringSwitch<bool*>(Key)
+                 .Case("const",    &Arg.mIsConst)
+                 .Case("pipe",     &Arg.mIsPipe)
+                 .Case("restrict", &Arg.mIsRestrict)
+                 .Case("volatile", &Arg.mIsVolatile)
+                 .Default(nullptr);
+    if (P)
+      *P = true;
+  }
+
+  Arg.mName = Name;
+  Arg.mTypeName = TypeName;
+}
+
+void MetadataStreamer::emitKernelCodeProps(
+    const amd_kernel_code_t &KernelCode) {
+  auto &CodeProps = CodeObjectMetadata.mKernels.back().mCodeProps;
+
+  CodeProps.mKernargSegmentSize = KernelCode.kernarg_segment_byte_size;
+  CodeProps.mWorkgroupGroupSegmentSize =
+      KernelCode.workgroup_group_segment_byte_size;
+  CodeProps.mWorkitemPrivateSegmentSize =
+      KernelCode.workitem_private_segment_byte_size;
+  CodeProps.mWavefrontNumSGPRs = KernelCode.wavefront_sgpr_count;
+  CodeProps.mWorkitemNumVGPRs = KernelCode.workitem_vgpr_count;
+  CodeProps.mKernargSegmentAlign = KernelCode.kernarg_segment_alignment;
+  CodeProps.mGroupSegmentAlign = KernelCode.group_segment_alignment;
+  CodeProps.mPrivateSegmentAlign = KernelCode.private_segment_alignment;
+  CodeProps.mWavefrontSize = KernelCode.wavefront_size;
+}
+
+void MetadataStreamer::emitKernelDebugProps(
+    const amd_kernel_code_t &KernelCode) {
+  if (!(KernelCode.code_properties & AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED))
+    return;
+
+  auto &DebugProps = CodeObjectMetadata.mKernels.back().mDebugProps;
+
+  // FIXME: Need to pass down debugger ABI version through features. This is ok
+  // for now because we only have one version.
+  DebugProps.mDebuggerABIVersion.push_back(1);
+  DebugProps.mDebuggerABIVersion.push_back(0);
+  DebugProps.mReservedNumVGPRs = KernelCode.reserved_vgpr_count;
+  DebugProps.mReservedFirstVGPR = KernelCode.reserved_vgpr_first;
+  DebugProps.mPrivateSegmentBufferSGPR =
+      KernelCode.debug_private_segment_buffer_sgpr;
+  DebugProps.mWavefrontPrivateSegmentOffsetSGPR =
+      KernelCode.debug_wavefront_private_segment_offset_sgpr;
+}
+
+void MetadataStreamer::begin(const Module &Mod) {
+  AMDGPUASI = getAMDGPUAS(Mod);
+  emitVersion();
+  emitPrintf(Mod);
+}
+
+void MetadataStreamer::emitKernel(const Function &Func,
+                                  const amd_kernel_code_t &KernelCode) {
+  if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
+    return;
+
+  CodeObjectMetadata.mKernels.push_back(Kernel::Metadata());
+  auto &Kernel = CodeObjectMetadata.mKernels.back();
+
+  Kernel.mName = Func.getName();
+  emitKernelLanguage(Func);
+  emitKernelAttrs(Func);
+  emitKernelArgs(Func);
+  emitKernelCodeProps(KernelCode);
+  emitKernelDebugProps(KernelCode);
+}
+
+ErrorOr<std::string> MetadataStreamer::toYamlString() {
+  std::string YamlString;
+  if (auto Error = Metadata::toYamlString(CodeObjectMetadata, YamlString))
+    return Error;
+
+  if (DumpCodeObjectMetadata)
+    dump(YamlString);
+  if (VerifyCodeObjectMetadata)
+    verify(YamlString);
+
+  return YamlString;
+}
+
+ErrorOr<std::string> MetadataStreamer::toYamlString(StringRef YamlString) {
+  if (auto Error = Metadata::fromYamlString(YamlString, CodeObjectMetadata))
+    return Error;
+
+  return toYamlString();
+}
+
+} // end namespace CodeObject
+} // end namespace AMDGPU
+} // end namespace llvm
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h
new file mode 100644
index 000000000000..8d4c51763f63
--- /dev/null
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h
@@ -0,0 +1,99 @@
+//===--- AMDGPUCodeObjectMetadataStreamer.h ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief AMDGPU Code Object Metadata Streamer.
+///
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H
+#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H
+
+#include "AMDGPU.h"
+#include "AMDGPUCodeObjectMetadata.h"
+#include "AMDKernelCodeT.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/ErrorOr.h"
+
+namespace llvm {
+
+class Argument;
+class DataLayout;
+class Function;
+class MDNode;
+class Module;
+class Type;
+
+namespace AMDGPU {
+namespace CodeObject {
+
+class MetadataStreamer final {
+private:
+  Metadata CodeObjectMetadata;
+  AMDGPUAS AMDGPUASI;
+
+  void dump(StringRef YamlString) const;
+
+  void verify(StringRef YamlString) const;
+
+  AccessQualifier getAccessQualifier(StringRef AccQual) const;
+
+  AddressSpaceQualifier getAddressSpaceQualifer(unsigned AddressSpace) const;
+
+  ValueKind getValueKind(Type *Ty, StringRef TypeQual,
+                         StringRef BaseTypeName) const;
+
+  ValueType getValueType(Type *Ty, StringRef TypeName) const;
+
+  std::string getTypeName(Type *Ty, bool Signed) const;
+
+  std::vector<uint32_t> getWorkGroupDimensions(MDNode *Node) const;
+
+  void emitVersion();
+
+  void emitPrintf(const Module &Mod);
+
+  void emitKernelLanguage(const Function &Func);
+
+  void emitKernelAttrs(const Function &Func);
+
+  void emitKernelArgs(const Function &Func);
+
+  void emitKernelArg(const Argument &Arg);
+
+  void emitKernelArg(const DataLayout &DL, Type *Ty, ValueKind ValueKind,
+                     StringRef TypeQual = "", StringRef BaseTypeName = "",
+                     StringRef AccQual = "", StringRef Name = "",
+                     StringRef TypeName = "");
+
+  void emitKernelCodeProps(const amd_kernel_code_t &KernelCode);
+
+  void emitKernelDebugProps(const amd_kernel_code_t &KernelCode);
+
+public:
+  MetadataStreamer() = default;
+  ~MetadataStreamer() = default;
+
+  void begin(const Module &Mod);
+
+  void end() {}
+
+  void emitKernel(const Function &Func, const amd_kernel_code_t &KernelCode);
+
+  ErrorOr<std::string> toYamlString();
+
+  ErrorOr<std::string> toYamlString(StringRef YamlString);
+};
+
+} // end namespace CodeObject
+} // end namespace AMDGPU
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
index 1847d7a67328..073d19422e86 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -1,16 +1,20 @@
-//===-- AMDGPUELFObjectWriter.cpp - AMDGPU ELF Writer ----------------------==//
+//===- AMDGPUELFObjectWriter.cpp - AMDGPU ELF Writer ----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
-/// \file
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPUMCTargetDesc.h"
 #include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
 
 using namespace llvm;
 
@@ -19,20 +23,21 @@ namespace {
 class AMDGPUELFObjectWriter : public MCELFObjectTargetWriter {
 public:
   AMDGPUELFObjectWriter(bool Is64Bit, bool HasRelocationAddend);
+
 protected:
   unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
                         const MCFixup &Fixup, bool IsPCRel) const override;
 };
 
 
-} // End anonymous namespace
+} // end anonymous namespace
 
 AMDGPUELFObjectWriter::AMDGPUELFObjectWriter(bool Is64Bit,
                                              bool HasRelocationAddend)
   : MCELFObjectTargetWriter(Is64Bit,
                             ELF::ELFOSABI_AMDGPU_HSA,
                             ELF::EM_AMDGPU,
-                            HasRelocationAddend) { }
+                            HasRelocationAddend) {}
 
 unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx,
                                              const MCValue &Target,
@@ -77,7 +82,6 @@ unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx,
   llvm_unreachable("unhandled relocation type");
 }
 
-
 MCObjectWriter *llvm::createAMDGPUELFObjectWriter(bool Is64Bit,
                                                   bool HasRelocationAddend,
                                                   raw_pwrite_stream &OS) {
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
index 548bad56e174..f80b5f3a6dba 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -54,11 +54,17 @@ MCObjectWriter *createAMDGPUELFObjectWriter(bool Is64Bit,
 
 #define GET_REGINFO_ENUM
 #include "AMDGPUGenRegisterInfo.inc"
+#undef GET_REGINFO_ENUM
 
 #define GET_INSTRINFO_ENUM
+#define GET_INSTRINFO_OPERAND_ENUM
 #include "AMDGPUGenInstrInfo.inc"
+#undef GET_INSTRINFO_OPERAND_ENUM
+#undef GET_INSTRINFO_ENUM
+
 
 #define GET_SUBTARGETINFO_ENUM
 #include "AMDGPUGenSubtargetInfo.inc"
+#undef GET_SUBTARGETINFO_ENUM
 
 #endif
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp
deleted file mode 100644
index 95387ad1627c..000000000000
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp
+++ /dev/null
@@ -1,408 +0,0 @@
-//===-- AMDGPURuntimeMD.cpp - Generates runtime metadata ------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-///
-/// Generates AMDGPU runtime metadata for YAML mapping.
-//
-//===----------------------------------------------------------------------===//
-//
-
-#include "AMDGPU.h"
-#include "AMDGPURuntimeMetadata.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/YAMLTraits.h"
-#include <vector>
-#include "AMDGPURuntimeMD.h"
-
-using namespace llvm;
-using namespace ::AMDGPU::RuntimeMD;
-
-static cl::opt<bool>
-DumpRuntimeMD("amdgpu-dump-rtmd",
-              cl::desc("Dump AMDGPU runtime metadata"));
-
-static cl::opt<bool>
-CheckRuntimeMDParser("amdgpu-check-rtmd-parser", cl::Hidden,
-                     cl::desc("Check AMDGPU runtime metadata YAML parser"));
-
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint8_t)
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t)
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
-LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata)
-LLVM_YAML_IS_SEQUENCE_VECTOR(KernelArg::Metadata)
-
-namespace llvm {
-namespace yaml {
-
-template <> struct MappingTraits<KernelArg::Metadata> {
-  static void mapping(IO &YamlIO, KernelArg::Metadata &A) {
-    YamlIO.mapRequired(KeyName::ArgSize, A.Size);
-    YamlIO.mapRequired(KeyName::ArgAlign, A.Align);
-    YamlIO.mapOptional(KeyName::ArgPointeeAlign, A.PointeeAlign, 0U);
-    YamlIO.mapRequired(KeyName::ArgKind, A.Kind);
-    YamlIO.mapRequired(KeyName::ArgValueType, A.ValueType);
-    YamlIO.mapOptional(KeyName::ArgTypeName, A.TypeName, std::string());
-    YamlIO.mapOptional(KeyName::ArgName, A.Name, std::string());
-    YamlIO.mapOptional(KeyName::ArgAddrQual, A.AddrQual, INVALID_ADDR_QUAL);
-    YamlIO.mapOptional(KeyName::ArgAccQual, A.AccQual, INVALID_ACC_QUAL);
-    YamlIO.mapOptional(KeyName::ArgIsVolatile, A.IsVolatile, uint8_t(0));
-    YamlIO.mapOptional(KeyName::ArgIsConst, A.IsConst, uint8_t(0));
-    YamlIO.mapOptional(KeyName::ArgIsRestrict, A.IsRestrict, uint8_t(0));
-    YamlIO.mapOptional(KeyName::ArgIsPipe, A.IsPipe, uint8_t(0));
-  }
-  static const bool flow = true;
-};
-
-template <> struct MappingTraits<Kernel::Metadata> {
-  static void mapping(IO &YamlIO, Kernel::Metadata &K) {
-    YamlIO.mapRequired(KeyName::KernelName, K.Name);
-    YamlIO.mapOptional(KeyName::Language, K.Language, std::string());
-    YamlIO.mapOptional(KeyName::LanguageVersion, K.LanguageVersion);
-    YamlIO.mapOptional(KeyName::ReqdWorkGroupSize, K.ReqdWorkGroupSize);
-    YamlIO.mapOptional(KeyName::WorkGroupSizeHint, K.WorkGroupSizeHint);
-    YamlIO.mapOptional(KeyName::VecTypeHint, K.VecTypeHint, std::string());
-    YamlIO.mapOptional(KeyName::KernelIndex, K.KernelIndex,
-        INVALID_KERNEL_INDEX);
-    YamlIO.mapOptional(KeyName::NoPartialWorkGroups, K.NoPartialWorkGroups,
-        uint8_t(0));
-    YamlIO.mapRequired(KeyName::Args, K.Args);
-  }
-  static const bool flow = true;
-};
-
-template <> struct MappingTraits<Program::Metadata> {
-  static void mapping(IO &YamlIO, Program::Metadata &Prog) {
-    YamlIO.mapRequired(KeyName::MDVersion, Prog.MDVersionSeq);
-    YamlIO.mapOptional(KeyName::PrintfInfo, Prog.PrintfInfo);
-    YamlIO.mapOptional(KeyName::Kernels, Prog.Kernels);
-  }
-  static const bool flow = true;
-};
-
-} // end namespace yaml
-} // end namespace llvm
-
-// Get a vector of three integer values from MDNode \p Node;
-static std::vector<uint32_t> getThreeInt32(MDNode *Node) {
-  assert(Node->getNumOperands() == 3);
-  std::vector<uint32_t> V;
-  for (const MDOperand &Op : Node->operands()) {
-    const ConstantInt *CI = mdconst::extract<ConstantInt>(Op);
-    V.push_back(CI->getZExtValue());
-  }
-  return V;
-}
-
-static std::string getOCLTypeName(Type *Ty, bool Signed) {
-  switch (Ty->getTypeID()) {
-  case Type::HalfTyID:
-    return "half";
-  case Type::FloatTyID:
-    return "float";
-  case Type::DoubleTyID:
-    return "double";
-  case Type::IntegerTyID: {
-    if (!Signed)
-      return (Twine('u') + getOCLTypeName(Ty, true)).str();
-    unsigned BW = Ty->getIntegerBitWidth();
-    switch (BW) {
-    case 8:
-      return "char";
-    case 16:
-      return "short";
-    case 32:
-      return "int";
-    case 64:
-      return "long";
-    default:
-      return (Twine('i') + Twine(BW)).str();
-    }
-  }
-  case Type::VectorTyID: {
-    VectorType *VecTy = cast<VectorType>(Ty);
-    Type *EleTy = VecTy->getElementType();
-    unsigned Size = VecTy->getVectorNumElements();
-    return (Twine(getOCLTypeName(EleTy, Signed)) + Twine(Size)).str();
-  }
-  default:
-    return "unknown";
-  }
-}
-
-static KernelArg::ValueType getRuntimeMDValueType(
-  Type *Ty, StringRef TypeName) {
-  switch (Ty->getTypeID()) {
-  case Type::HalfTyID:
-    return KernelArg::F16;
-  case Type::FloatTyID:
-    return KernelArg::F32;
-  case Type::DoubleTyID:
-    return KernelArg::F64;
-  case Type::IntegerTyID: {
-    bool Signed = !TypeName.startswith("u");
-    switch (Ty->getIntegerBitWidth()) {
-    case 8:
-      return Signed ? KernelArg::I8 : KernelArg::U8;
-    case 16:
-      return Signed ? KernelArg::I16 : KernelArg::U16;
-    case 32:
-      return Signed ? KernelArg::I32 : KernelArg::U32;
-    case 64:
-      return Signed ? KernelArg::I64 : KernelArg::U64;
-    default:
-      // Runtime does not recognize other integer types. Report as struct type.
-      return KernelArg::Struct;
-    }
-  }
-  case Type::VectorTyID:
-    return getRuntimeMDValueType(Ty->getVectorElementType(), TypeName);
-  case Type::PointerTyID:
-    return getRuntimeMDValueType(Ty->getPointerElementType(), TypeName);
-  default:
-    return KernelArg::Struct;
-  }
-}
-
-static KernelArg::AddressSpaceQualifer getRuntimeAddrSpace(
-    AMDGPUAS::AddressSpaces A) {
-  switch (A) {
-  case AMDGPUAS::GLOBAL_ADDRESS:
-    return KernelArg::Global;
-  case AMDGPUAS::CONSTANT_ADDRESS:
-    return KernelArg::Constant;
-  case AMDGPUAS::LOCAL_ADDRESS:
-    return KernelArg::Local;
-  case AMDGPUAS::FLAT_ADDRESS:
-    return KernelArg::Generic;
-  case AMDGPUAS::REGION_ADDRESS:
-    return KernelArg::Region;
-  default:
-    return KernelArg::Private;
-  }
-}
-
-static KernelArg::Metadata getRuntimeMDForKernelArg(const DataLayout &DL,
-    Type *T, KernelArg::Kind Kind, StringRef BaseTypeName = "",
-    StringRef TypeName = "", StringRef ArgName = "", StringRef TypeQual = "",
-    StringRef AccQual = "") {
-
-  KernelArg::Metadata Arg;
-
-  // Set ArgSize and ArgAlign.
-  Arg.Size = DL.getTypeAllocSize(T);
-  Arg.Align = DL.getABITypeAlignment(T);
-  if (auto PT = dyn_cast<PointerType>(T)) {
-    auto ET = PT->getElementType();
-    if (PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && ET->isSized())
-      Arg.PointeeAlign = DL.getABITypeAlignment(ET);
-  }
-
-  // Set ArgTypeName.
-  Arg.TypeName = TypeName;
-
-  // Set ArgName.
-  Arg.Name = ArgName;
-
-  // Set ArgIsVolatile, ArgIsRestrict, ArgIsConst and ArgIsPipe.
-  SmallVector<StringRef, 1> SplitQ;
-  TypeQual.split(SplitQ, " ", -1, false /* Drop empty entry */);
-
-  for (StringRef KeyName : SplitQ) {
-    auto *P = StringSwitch<uint8_t *>(KeyName)
-      .Case("volatile", &Arg.IsVolatile)
-      .Case("restrict", &Arg.IsRestrict)
-      .Case("const",    &Arg.IsConst)
-      .Case("pipe",     &Arg.IsPipe)
-      .Default(nullptr);
-    if (P)
-      *P = 1;
-  }
-
-  // Set ArgKind.
-  Arg.Kind = Kind;
-
-  // Set ArgValueType.
-  Arg.ValueType = getRuntimeMDValueType(T, BaseTypeName);
-
-  // Set ArgAccQual.
-  if (!AccQual.empty()) {
-    Arg.AccQual = StringSwitch<KernelArg::AccessQualifer>(AccQual)
-      .Case("read_only",  KernelArg::ReadOnly)
-      .Case("write_only", KernelArg::WriteOnly)
-      .Case("read_write", KernelArg::ReadWrite)
-      .Default(KernelArg::AccNone);
-  }
-
-  // Set ArgAddrQual.
-  if (auto *PT = dyn_cast<PointerType>(T)) {
-    Arg.AddrQual = getRuntimeAddrSpace(static_cast<AMDGPUAS::AddressSpaces>(
-        PT->getAddressSpace()));
-  }
-
-  return Arg;
-}
-
-static Kernel::Metadata getRuntimeMDForKernel(const Function &F) {
-  Kernel::Metadata Kernel;
-  Kernel.Name = F.getName();
-  auto &M = *F.getParent();
-
-  // Set Language and LanguageVersion.
-  if (auto MD = M.getNamedMetadata("opencl.ocl.version")) {
-    if (MD->getNumOperands() != 0) {
-      auto Node = MD->getOperand(0);
-      if (Node->getNumOperands() > 1) {
-        Kernel.Language = "OpenCL C";
-        uint16_t Major = mdconst::extract<ConstantInt>(Node->getOperand(0))
-                         ->getZExtValue();
-        uint16_t Minor = mdconst::extract<ConstantInt>(Node->getOperand(1))
-                         ->getZExtValue();
-        Kernel.LanguageVersion.push_back(Major);
-        Kernel.LanguageVersion.push_back(Minor);
-      }
-    }
-  }
-
-  const DataLayout &DL = F.getParent()->getDataLayout();
-  for (auto &Arg : F.args()) {
-    unsigned I = Arg.getArgNo();
-    Type *T = Arg.getType();
-    auto TypeName = dyn_cast<MDString>(F.getMetadata(
-        "kernel_arg_type")->getOperand(I))->getString();
-    auto BaseTypeName = cast<MDString>(F.getMetadata(
-        "kernel_arg_base_type")->getOperand(I))->getString();
-    StringRef ArgName;
-    if (auto ArgNameMD = F.getMetadata("kernel_arg_name"))
-      ArgName = cast<MDString>(ArgNameMD->getOperand(I))->getString();
-    auto TypeQual = cast<MDString>(F.getMetadata(
-        "kernel_arg_type_qual")->getOperand(I))->getString();
-    auto AccQual = cast<MDString>(F.getMetadata(
-        "kernel_arg_access_qual")->getOperand(I))->getString();
-    KernelArg::Kind Kind;
-    if (TypeQual.find("pipe") != StringRef::npos)
-      Kind = KernelArg::Pipe;
-    else Kind = StringSwitch<KernelArg::Kind>(BaseTypeName)
-      .Case("sampler_t", KernelArg::Sampler)
-      .Case("queue_t",   KernelArg::Queue)
-      .Cases("image1d_t", "image1d_array_t", "image1d_buffer_t",
-             "image2d_t" , "image2d_array_t",  KernelArg::Image)
-      .Cases("image2d_depth_t", "image2d_array_depth_t",
-             "image2d_msaa_t", "image2d_array_msaa_t",
-             "image2d_msaa_depth_t",  KernelArg::Image)
-      .Cases("image2d_array_msaa_depth_t", "image3d_t",
-             KernelArg::Image)
-      .Default(isa<PointerType>(T) ?
-                   (T->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ?
-                   KernelArg::DynamicSharedPointer :
-                   KernelArg::GlobalBuffer) :
-                   KernelArg::ByValue);
-    Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, T, Kind,
-        BaseTypeName, TypeName, ArgName, TypeQual, AccQual));
-  }
-
-  // Emit hidden kernel arguments for OpenCL kernels.
-  if (F.getParent()->getNamedMetadata("opencl.ocl.version")) {
-    auto Int64T = Type::getInt64Ty(F.getContext());
-    Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T,
-        KernelArg::HiddenGlobalOffsetX));
-    Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T,
-        KernelArg::HiddenGlobalOffsetY));
-    Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T,
-        KernelArg::HiddenGlobalOffsetZ));
-    if (F.getParent()->getNamedMetadata("llvm.printf.fmts")) {
-      auto Int8PtrT = Type::getInt8PtrTy(F.getContext(),
-          KernelArg::Global);
-      Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int8PtrT,
-          KernelArg::HiddenPrintfBuffer));
-    }
-  }
-
-  // Set ReqdWorkGroupSize, WorkGroupSizeHint, and VecTypeHint.
-  if (auto RWGS = F.getMetadata("reqd_work_group_size"))
-    Kernel.ReqdWorkGroupSize = getThreeInt32(RWGS);
-
-  if (auto WGSH = F.getMetadata("work_group_size_hint"))
-    Kernel.WorkGroupSizeHint = getThreeInt32(WGSH);
-
-  if (auto VTH = F.getMetadata("vec_type_hint"))
-    Kernel.VecTypeHint = getOCLTypeName(cast<ValueAsMetadata>(
-      VTH->getOperand(0))->getType(), mdconst::extract<ConstantInt>(
-      VTH->getOperand(1))->getZExtValue());
-
-  return Kernel;
-}
-
-Program::Metadata::Metadata(const std::string &YAML) {
-  yaml::Input Input(YAML);
-  Input >> *this;
-}
-
-std::string Program::Metadata::toYAML(void) {
-  std::string Text;
-  raw_string_ostream Stream(Text);
-  yaml::Output Output(Stream, nullptr, INT_MAX /* do not wrap line */);
-  Output << *this;
-  return Stream.str();
-}
-
-Program::Metadata Program::Metadata::fromYAML(const std::string &S) {
-  return Program::Metadata(S);
-}
-
-// Check if the YAML string can be parsed.
-static void checkRuntimeMDYAMLString(const std::string &YAML) {
-  auto P = Program::Metadata::fromYAML(YAML);
-  auto S = P.toYAML();
-  llvm::errs() << "AMDGPU runtime metadata parser test "
-               << (YAML == S ? "passes" : "fails") << ".\n";
-  if (YAML != S) {
-    llvm::errs() << "First output: " << YAML << '\n'
-                 << "Second output: " << S << '\n';
-  }
-}
-
-std::string llvm::getRuntimeMDYAMLString(Module &M) {
-  Program::Metadata Prog;
-  Prog.MDVersionSeq.push_back(MDVersion);
-  Prog.MDVersionSeq.push_back(MDRevision);
-
-  // Set PrintfInfo.
-  if (auto MD = M.getNamedMetadata("llvm.printf.fmts")) {
-    for (unsigned I = 0; I < MD->getNumOperands(); ++I) {
-      auto Node = MD->getOperand(I);
-      if (Node->getNumOperands() > 0)
-        Prog.PrintfInfo.push_back(cast<MDString>(Node->getOperand(0))
-            ->getString());
-    }
-  }
-
-  // Set Kernels.
-  for (auto &F: M.functions()) {
-    if (!F.getMetadata("kernel_arg_type"))
-      continue;
-    Prog.Kernels.emplace_back(getRuntimeMDForKernel(F));
-  }
-
-  auto YAML = Prog.toYAML();
-
-  if (DumpRuntimeMD)
-    llvm::errs() << "AMDGPU runtime metadata:\n" << YAML << '\n';
-
-  if (CheckRuntimeMDParser)
-    checkRuntimeMDYAMLString(YAML);
-
-  return YAML;
-}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h
deleted file mode 100644
index a92fdd4bebc2..000000000000
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h
+++ /dev/null
@@ -1,26 +0,0 @@
-//===- AMDGPURuntimeMD.h - Generate runtime metadata ---------------*- C++ -*-//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares functions for generating runtime metadata.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPURUNTIMEMD_H
-#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPURUNTIMEMD_H
-
-#include <string>
-
-namespace llvm {
-class Module;
-
-// Get runtime metadata as YAML string.
-std::string getRuntimeMDYAMLString(Module &M);
-
-}
-#endif
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 3392183d33c3..8dc863f723e2 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -27,7 +27,6 @@
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/FormattedStream.h"
-#include "AMDGPURuntimeMD.h"
 
 namespace llvm {
 #include "AMDGPUPTNote.h"
@@ -36,9 +35,27 @@ namespace llvm {
 using namespace llvm;
 using namespace llvm::AMDGPU;
 
+//===----------------------------------------------------------------------===//
+// AMDGPUTargetStreamer
+//===----------------------------------------------------------------------===//
+
 AMDGPUTargetStreamer::AMDGPUTargetStreamer(MCStreamer &S)
     : MCTargetStreamer(S) {}
 
+void AMDGPUTargetStreamer::EmitStartOfCodeObjectMetadata(const Module &Mod) {
+  CodeObjectMetadataStreamer.begin(Mod);
+}
+
+void AMDGPUTargetStreamer::EmitKernelCodeObjectMetadata(
+    const Function &Func, const amd_kernel_code_t &KernelCode) {
+  CodeObjectMetadataStreamer.emitKernel(Func, KernelCode);
+}
+
+void AMDGPUTargetStreamer::EmitEndOfCodeObjectMetadata() {
+  CodeObjectMetadataStreamer.end();
+  EmitCodeObjectMetadata(CodeObjectMetadataStreamer.toYamlString().get());
+}
+
 //===----------------------------------------------------------------------===//
 // AMDGPUTargetAsmStreamer
 //===----------------------------------------------------------------------===//
@@ -93,16 +110,16 @@ void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaProgramScopeGlobal(
   OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n';
 }
 
-void AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(Module &M) {
-  OS << "\t.amdgpu_runtime_metadata\n";
-  OS << getRuntimeMDYAMLString(M);
-  OS << "\n\t.end_amdgpu_runtime_metadata\n";
-}
+bool AMDGPUTargetAsmStreamer::EmitCodeObjectMetadata(StringRef YamlString) {
+  auto VerifiedYamlString = CodeObjectMetadataStreamer.toYamlString(YamlString);
+  if (!VerifiedYamlString)
+    return false;
 
-void AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(StringRef Metadata) {
-  OS << "\t.amdgpu_runtime_metadata";
-  OS << Metadata;
-  OS << "\t.end_amdgpu_runtime_metadata\n";
+  OS << '\t' << AMDGPU::CodeObject::MetadataAssemblerDirectiveBegin << '\n';
+  OS << VerifiedYamlString.get();
+  OS << '\t' << AMDGPU::CodeObject::MetadataAssemblerDirectiveEnd << '\n';
+
+  return true;
 }
 
 //===----------------------------------------------------------------------===//
@@ -116,22 +133,21 @@ MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
   return static_cast<MCELFStreamer &>(Streamer);
 }
 
-void
-AMDGPUTargetELFStreamer::EmitAMDGPUNote(const MCExpr* DescSZ,
-                                        PT_NOTE::NoteType Type,
-                              std::function<void(MCELFStreamer &)> EmitDesc) {
+void AMDGPUTargetELFStreamer::EmitAMDGPUNote(
+    const MCExpr *DescSZ, ElfNote::NoteType Type,
+    function_ref<void(MCELFStreamer &)> EmitDesc) {
   auto &S = getStreamer();
   auto &Context = S.getContext();
 
-  auto NameSZ = sizeof(PT_NOTE::NoteName);
+  auto NameSZ = sizeof(ElfNote::NoteName);
 
   S.PushSection();
   S.SwitchSection(Context.getELFSection(
-    PT_NOTE::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC));
+    ElfNote::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC));
   S.EmitIntValue(NameSZ, 4);                                  // namesz
   S.EmitValue(DescSZ, 4);                                     // descz
-  S.EmitIntValue(Type, 4); // type
-  S.EmitBytes(StringRef(PT_NOTE::NoteName, NameSZ));          // name
+  S.EmitIntValue(Type, 4);                                    // type
+  S.EmitBytes(StringRef(ElfNote::NoteName, NameSZ));          // name
   S.EmitValueToAlignment(4, 0, 1, 0);                         // padding 0
   EmitDesc(S);                                                // desc
   S.EmitValueToAlignment(4, 0, 1, 0);                         // padding 0
@@ -144,7 +160,7 @@ AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major,
 
   EmitAMDGPUNote(
     MCConstantExpr::create(8, getContext()),
-    PT_NOTE::NT_AMDGPU_HSA_CODE_OBJECT_VERSION,
+    ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION,
     [&](MCELFStreamer &OS){
       OS.EmitIntValue(Major, 4);
       OS.EmitIntValue(Minor, 4);
@@ -160,14 +176,14 @@ AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
                                                        StringRef ArchName) {
   uint16_t VendorNameSize = VendorName.size() + 1;
   uint16_t ArchNameSize = ArchName.size() + 1;
-  
+
   unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) +
     sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
     VendorNameSize + ArchNameSize;
 
   EmitAMDGPUNote(
     MCConstantExpr::create(DescSZ, getContext()),
-    PT_NOTE::NT_AMDGPU_HSA_ISA,
+    ElfNote::NT_AMDGPU_HSA_ISA,
     [&](MCELFStreamer &OS) {
       OS.EmitIntValue(VendorNameSize, 2);
       OS.EmitIntValue(ArchNameSize, 2);
@@ -216,7 +232,11 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUHsaProgramScopeGlobal(
   Symbol->setBinding(ELF::STB_GLOBAL);
 }
 
-void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(StringRef Metadata) {
+bool AMDGPUTargetELFStreamer::EmitCodeObjectMetadata(StringRef YamlString) {
+  auto VerifiedYamlString = CodeObjectMetadataStreamer.toYamlString(YamlString);
+  if (!VerifiedYamlString)
+    return false;
+
   // Create two labels to mark the beginning and end of the desc field
   // and a MCExpr to calculate the size of the desc field.
   auto &Context = getContext();
@@ -228,15 +248,13 @@ void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(StringRef Metadata) {
 
   EmitAMDGPUNote(
     DescSZ,
-    PT_NOTE::NT_AMDGPU_HSA_RUNTIME_METADATA,
+    ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_METADATA,
     [&](MCELFStreamer &OS) {
       OS.EmitLabel(DescBegin);
-      OS.EmitBytes(Metadata);
+      OS.EmitBytes(VerifiedYamlString.get());
       OS.EmitLabel(DescEnd);
     }
   );
-}
 
-void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(Module &M) {
-  EmitRuntimeMetadata(getRuntimeMDYAMLString(M));
+  return true;
 }
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index e2f20586903d..5c588bbded9c 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H
 #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H
 
+#include "AMDGPUCodeObjectMetadataStreamer.h"
 #include "AMDKernelCodeT.h"
 #include "llvm/MC/MCStreamer.h"
 
@@ -26,6 +27,7 @@ class Type;
 
 class AMDGPUTargetStreamer : public MCTargetStreamer {
 protected:
+  AMDGPU::CodeObject::MetadataStreamer CodeObjectMetadataStreamer;
   MCContext &getContext() const { return Streamer.getContext(); }
 
 public:
@@ -46,12 +48,18 @@ public:
 
   virtual void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) = 0;
 
-  virtual void EmitRuntimeMetadata(Module &M) = 0;
+  virtual void EmitStartOfCodeObjectMetadata(const Module &Mod);
 
-  virtual void EmitRuntimeMetadata(StringRef Metadata) = 0;
+  virtual void EmitKernelCodeObjectMetadata(
+      const Function &Func, const amd_kernel_code_t &KernelCode);
+
+  virtual void EmitEndOfCodeObjectMetadata();
+
+  /// \returns True on success, false on failure.
+  virtual bool EmitCodeObjectMetadata(StringRef YamlString) = 0;
 };
 
-class AMDGPUTargetAsmStreamer : public AMDGPUTargetStreamer {
+class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer {
   formatted_raw_ostream &OS;
 public:
   AMDGPUTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
@@ -70,17 +78,16 @@ public:
 
   void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
 
-  void EmitRuntimeMetadata(Module &M) override;
-
-  void EmitRuntimeMetadata(StringRef Metadata) override;
+  /// \returns True on success, false on failure.
+  bool EmitCodeObjectMetadata(StringRef YamlString) override;
 };
 
-class AMDGPUTargetELFStreamer : public AMDGPUTargetStreamer {
+class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer {
   MCStreamer &Streamer;
 
-  void EmitAMDGPUNote(const MCExpr* DescSize,
-                      AMDGPU::PT_NOTE::NoteType Type,
-                      std::function<void(MCELFStreamer &)> EmitDesc);
+  void EmitAMDGPUNote(const MCExpr *DescSize,
+                      AMDGPU::ElfNote::NoteType Type,
+                      function_ref<void(MCELFStreamer &)> EmitDesc);
 
 public:
   AMDGPUTargetELFStreamer(MCStreamer &S);
@@ -102,9 +109,8 @@ public:
 
   void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
 
-  void EmitRuntimeMetadata(Module &M) override;
-
-  void EmitRuntimeMetadata(StringRef Metadata) override;
+  /// \returns True on success, false on failure.
+  bool EmitCodeObjectMetadata(StringRef YamlString) override;
 };
 
 }
diff --git a/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt b/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
index 8a6d00ce69ed..09e3efad10af 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
@@ -1,13 +1,12 @@
-
 add_llvm_library(LLVMAMDGPUDesc
   AMDGPUAsmBackend.cpp
+  AMDGPUCodeObjectMetadataStreamer.cpp
   AMDGPUELFObjectWriter.cpp
   AMDGPUELFStreamer.cpp
+  AMDGPUMCAsmInfo.cpp
   AMDGPUMCCodeEmitter.cpp
   AMDGPUMCTargetDesc.cpp
-  AMDGPUMCAsmInfo.cpp
-  AMDGPURuntimeMD.cpp
   AMDGPUTargetStreamer.cpp
   R600MCCodeEmitter.cpp
   SIMCCodeEmitter.cpp
-  )
+)
diff --git a/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
index 0c5bb0648a16..bda0928036fd 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -220,13 +220,35 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
     Imm = MO.getImm();
   }
 
-  switch (AMDGPU::getOperandSize(OpInfo)) {
-  case 4:
+  switch (OpInfo.OperandType) {
+  case AMDGPU::OPERAND_REG_IMM_INT32:
+  case AMDGPU::OPERAND_REG_IMM_FP32:
+  case AMDGPU::OPERAND_REG_INLINE_C_INT32:
+  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
     return getLit32Encoding(static_cast<uint32_t>(Imm), STI);
-  case 8:
+
+  case AMDGPU::OPERAND_REG_IMM_INT64:
+  case AMDGPU::OPERAND_REG_IMM_FP64:
+  case AMDGPU::OPERAND_REG_INLINE_C_INT64:
+  case AMDGPU::OPERAND_REG_INLINE_C_FP64:
     return getLit64Encoding(static_cast<uint64_t>(Imm), STI);
-  case 2:
+
+  case AMDGPU::OPERAND_REG_IMM_INT16:
+  case AMDGPU::OPERAND_REG_IMM_FP16:
+  case AMDGPU::OPERAND_REG_INLINE_C_INT16:
+  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+    // FIXME Is this correct? What do inline immediates do on SI for f16 src
+    // which does not have f16 support?
     return getLit16Encoding(static_cast<uint16_t>(Imm), STI);
+
+  case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
+    uint16_t Lo16 = static_cast<uint16_t>(Imm);
+    assert(Lo16 == static_cast<uint16_t>(Imm >> 16));
+    uint32_t Encoding = getLit16Encoding(Lo16, STI);
+    assert(Encoding != 255 && "packed constants can only be inline immediates");
+    return Encoding;
+  }
   default:
     llvm_unreachable("invalid operand size");
   }