summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/Utils
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/Utils')
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp112
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h43
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp1334
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h691
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp725
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h135
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h155
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp179
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h35
9 files changed, 3409 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
new file mode 100644
index 000000000000..075e08986c0c
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
@@ -0,0 +1,112 @@
+//===-- AMDGPUAsmUtils.cpp - AsmParser/InstPrinter common -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include "AMDGPUAsmUtils.h"
+
+namespace llvm {
+namespace AMDGPU {
+namespace SendMsg {
+
+// This must be in sync with llvm::AMDGPU::SendMsg::Id enum members, see SIDefines.h.
+const char* const IdSymbolic[] = {
+ nullptr,
+ "MSG_INTERRUPT",
+ "MSG_GS",
+ "MSG_GS_DONE",
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ "MSG_GS_ALLOC_REQ",
+ "MSG_GET_DOORBELL",
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ "MSG_SYSMSG"
+};
+
+// These two must be in sync with llvm::AMDGPU::SendMsg::Op enum members, see SIDefines.h.
+const char* const OpSysSymbolic[] = {
+ nullptr,
+ "SYSMSG_OP_ECC_ERR_INTERRUPT",
+ "SYSMSG_OP_REG_RD",
+ "SYSMSG_OP_HOST_TRAP_ACK",
+ "SYSMSG_OP_TTRACE_PC"
+};
+
+const char* const OpGsSymbolic[] = {
+ "GS_OP_NOP",
+ "GS_OP_CUT",
+ "GS_OP_EMIT",
+ "GS_OP_EMIT_CUT"
+};
+
+} // namespace SendMsg
+
+namespace Hwreg {
+
+// This must be in sync with llvm::AMDGPU::Hwreg::ID_SYMBOLIC_FIRST_/LAST_, see SIDefines.h.
+const char* const IdSymbolic[] = {
+ nullptr,
+ "HW_REG_MODE",
+ "HW_REG_STATUS",
+ "HW_REG_TRAPSTS",
+ "HW_REG_HW_ID",
+ "HW_REG_GPR_ALLOC",
+ "HW_REG_LDS_ALLOC",
+ "HW_REG_IB_STS",
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ "HW_REG_SH_MEM_BASES",
+ "HW_REG_TBA_LO",
+ "HW_REG_TBA_HI",
+ "HW_REG_TMA_LO",
+ "HW_REG_TMA_HI",
+ "HW_REG_FLAT_SCR_LO",
+ "HW_REG_FLAT_SCR_HI",
+ "HW_REG_XNACK_MASK",
+ nullptr, // HW_ID1, no predictable values
+ nullptr, // HW_ID2, no predictable values
+ "HW_REG_POPS_PACKER"
+};
+
+} // namespace Hwreg
+
+namespace Swizzle {
+
+// This must be in sync with llvm::AMDGPU::Swizzle::Id enum members, see SIDefines.h.
+const char* const IdSymbolic[] = {
+ "QUAD_PERM",
+ "BITMASK_PERM",
+ "SWAP",
+ "REVERSE",
+ "BROADCAST",
+};
+
+} // namespace Swizzle
+
+namespace VGPRIndexMode {
+
+// This must be in sync with llvm::AMDGPU::VGPRIndexMode::Id enum members, see SIDefines.h.
+const char* const IdSymbolic[] = {
+ "SRC0",
+ "SRC1",
+ "SRC2",
+ "DST",
+};
+
+} // namespace VGPRIndexMode
+
+} // namespace AMDGPU
+} // namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h
new file mode 100644
index 000000000000..cd91c5f6edd5
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h
@@ -0,0 +1,43 @@
+//===-- AMDGPUAsmUtils.h - AsmParser/InstPrinter common ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUASMUTILS_H
+#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUASMUTILS_H
+
+namespace llvm {
+namespace AMDGPU {
+namespace SendMsg { // Symbolic names for the sendmsg(...) syntax.
+
+extern const char* const IdSymbolic[];
+extern const char* const OpSysSymbolic[];
+extern const char* const OpGsSymbolic[];
+
+} // namespace SendMsg
+
+namespace Hwreg { // Symbolic names for the hwreg(...) syntax.
+
+extern const char* const IdSymbolic[];
+
+} // namespace Hwreg
+
+namespace Swizzle { // Symbolic names for the swizzle(...) syntax.
+
+extern const char* const IdSymbolic[];
+
+} // namespace Swizzle
+
+namespace VGPRIndexMode { // Symbolic names for the gpr_idx(...) syntax.
+
+extern const char* const IdSymbolic[];
+
+} // namespace VGPRIndexMode
+
+} // namespace AMDGPU
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
new file mode 100644
index 000000000000..afb2fd987afd
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -0,0 +1,1334 @@
+//===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUBaseInfo.h"
+#include "AMDGPUTargetTransformInfo.h"
+#include "AMDGPU.h"
+#include "SIDefines.h"
+#include "AMDGPUAsmUtils.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <utility>
+
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+
+#define GET_INSTRINFO_NAMED_OPS
+#define GET_INSTRMAP_INFO
+#include "AMDGPUGenInstrInfo.inc"
+#undef GET_INSTRMAP_INFO
+#undef GET_INSTRINFO_NAMED_OPS
+
+namespace {
+
+/// \returns Bit mask for given bit \p Shift and bit \p Width.
+unsigned getBitMask(unsigned Shift, unsigned Width) {
+ return ((1 << Width) - 1) << Shift;
+}
+
+/// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
+///
+/// \returns Packed \p Dst.
+unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
+ Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
+ Dst |= (Src << Shift) & getBitMask(Shift, Width);
+ return Dst;
+}
+
+/// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
+///
+/// \returns Unpacked bits.
+unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
+ return (Src & getBitMask(Shift, Width)) >> Shift;
+}
+
+/// \returns Vmcnt bit shift (lower bits).
+unsigned getVmcntBitShiftLo() { return 0; }
+
+/// \returns Vmcnt bit width (lower bits).
+unsigned getVmcntBitWidthLo() { return 4; }
+
+/// \returns Expcnt bit shift.
+unsigned getExpcntBitShift() { return 4; }
+
+/// \returns Expcnt bit width.
+unsigned getExpcntBitWidth() { return 3; }
+
+/// \returns Lgkmcnt bit shift.
+unsigned getLgkmcntBitShift() { return 8; }
+
+/// \returns Lgkmcnt bit width.
+unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
+ return (VersionMajor >= 10) ? 6 : 4;
+}
+
+/// \returns Vmcnt bit shift (higher bits).
+unsigned getVmcntBitShiftHi() { return 14; }
+
+/// \returns Vmcnt bit width (higher bits).
+unsigned getVmcntBitWidthHi() { return 2; }
+
+} // end namespace anonymous
+
+namespace llvm {
+
+namespace AMDGPU {
+
+#define GET_MIMGBaseOpcodesTable_IMPL
+#define GET_MIMGDimInfoTable_IMPL
+#define GET_MIMGInfoTable_IMPL
+#define GET_MIMGLZMappingTable_IMPL
+#define GET_MIMGMIPMappingTable_IMPL
+#include "AMDGPUGenSearchableTables.inc"
+
+int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
+ unsigned VDataDwords, unsigned VAddrDwords) {
+ const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
+ VDataDwords, VAddrDwords);
+ return Info ? Info->Opcode : -1;
+}
+
+const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) {
+ const MIMGInfo *Info = getMIMGInfo(Opc);
+ return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr;
+}
+
+int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
+ const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
+ const MIMGInfo *NewInfo =
+ getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
+ NewChannels, OrigInfo->VAddrDwords);
+ return NewInfo ? NewInfo->Opcode : -1;
+}
+
+struct MUBUFInfo {
+ uint16_t Opcode;
+ uint16_t BaseOpcode;
+ uint8_t elements;
+ bool has_vaddr;
+ bool has_srsrc;
+ bool has_soffset;
+};
+
+struct MTBUFInfo {
+ uint16_t Opcode;
+ uint16_t BaseOpcode;
+ uint8_t elements;
+ bool has_vaddr;
+ bool has_srsrc;
+ bool has_soffset;
+};
+
+#define GET_MTBUFInfoTable_DECL
+#define GET_MTBUFInfoTable_IMPL
+#define GET_MUBUFInfoTable_DECL
+#define GET_MUBUFInfoTable_IMPL
+#include "AMDGPUGenSearchableTables.inc"
+
+int getMTBUFBaseOpcode(unsigned Opc) {
+ const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc);
+ return Info ? Info->BaseOpcode : -1;
+}
+
+int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
+ const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
+ return Info ? Info->Opcode : -1;
+}
+
+int getMTBUFElements(unsigned Opc) {
+ const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
+ return Info ? Info->elements : 0;
+}
+
+bool getMTBUFHasVAddr(unsigned Opc) {
+ const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
+ return Info ? Info->has_vaddr : false;
+}
+
+bool getMTBUFHasSrsrc(unsigned Opc) {
+ const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
+ return Info ? Info->has_srsrc : false;
+}
+
+bool getMTBUFHasSoffset(unsigned Opc) {
+ const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
+ return Info ? Info->has_soffset : false;
+}
+
+int getMUBUFBaseOpcode(unsigned Opc) {
+ const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
+ return Info ? Info->BaseOpcode : -1;
+}
+
+int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
+ const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
+ return Info ? Info->Opcode : -1;
+}
+
+int getMUBUFElements(unsigned Opc) {
+ const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
+ return Info ? Info->elements : 0;
+}
+
+bool getMUBUFHasVAddr(unsigned Opc) {
+ const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
+ return Info ? Info->has_vaddr : false;
+}
+
+bool getMUBUFHasSrsrc(unsigned Opc) {
+ const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
+ return Info ? Info->has_srsrc : false;
+}
+
+bool getMUBUFHasSoffset(unsigned Opc) {
+ const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
+ return Info ? Info->has_soffset : false;
+}
+
+// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
+// header files, so we need to wrap it in a function that takes unsigned
+// instead.
+int getMCOpcode(uint16_t Opcode, unsigned Gen) {
+ return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
+}
+
+namespace IsaInfo {
+
+void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
+ auto TargetTriple = STI->getTargetTriple();
+ auto Version = getIsaVersion(STI->getCPU());
+
+ Stream << TargetTriple.getArchName() << '-'
+ << TargetTriple.getVendorName() << '-'
+ << TargetTriple.getOSName() << '-'
+ << TargetTriple.getEnvironmentName() << '-'
+ << "gfx"
+ << Version.Major
+ << Version.Minor
+ << Version.Stepping;
+
+ if (hasXNACK(*STI))
+ Stream << "+xnack";
+ if (hasSRAMECC(*STI))
+ Stream << "+sram-ecc";
+
+ Stream.flush();
+}
+
+bool hasCodeObjectV3(const MCSubtargetInfo *STI) {
+ return STI->getTargetTriple().getOS() == Triple::AMDHSA &&
+ STI->getFeatureBits().test(FeatureCodeObjectV3);
+}
+
+unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
+ if (STI->getFeatureBits().test(FeatureWavefrontSize16))
+ return 16;
+ if (STI->getFeatureBits().test(FeatureWavefrontSize32))
+ return 32;
+
+ return 64;
+}
+
+unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
+ if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
+ return 32768;
+ if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
+ return 65536;
+
+ return 0;
+}
+
+unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
+ return 4;
+}
+
+unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
+ unsigned FlatWorkGroupSize) {
+ assert(FlatWorkGroupSize != 0);
+ if (STI->getTargetTriple().getArch() != Triple::amdgcn)
+ return 8;
+ unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
+ if (N == 1)
+ return 40;
+ N = 40 / N;
+ return std::min(N, 16u);
+}
+
+unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) {
+ return getMaxWavesPerEU(STI) * getEUsPerCU(STI);
+}
+
+unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
+ unsigned FlatWorkGroupSize) {
+ return getWavesPerWorkGroup(STI, FlatWorkGroupSize);
+}
+
+unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
+ return 1;
+}
+
+unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
+ // FIXME: Need to take scratch memory into account.
+ if (!isGFX10(*STI))
+ return 10;
+ return 20;
+}
+
+unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
+ unsigned FlatWorkGroupSize) {
+ return alignTo(getMaxWavesPerCU(STI, FlatWorkGroupSize),
+ getEUsPerCU(STI)) / getEUsPerCU(STI);
+}
+
+unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) {
+ return 1;
+}
+
+unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
+ return 2048;
+}
+
+unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
+ unsigned FlatWorkGroupSize) {
+ return alignTo(FlatWorkGroupSize, getWavefrontSize(STI)) /
+ getWavefrontSize(STI);
+}
+
+unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
+ IsaVersion Version = getIsaVersion(STI->getCPU());
+ if (Version.Major >= 10)
+ return getAddressableNumSGPRs(STI);
+ if (Version.Major >= 8)
+ return 16;
+ return 8;
+}
+
+unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) {
+ return 8;
+}
+
+unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
+ IsaVersion Version = getIsaVersion(STI->getCPU());
+ if (Version.Major >= 8)
+ return 800;
+ return 512;
+}
+
+unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
+ if (STI->getFeatureBits().test(FeatureSGPRInitBug))
+ return FIXED_NUM_SGPRS_FOR_INIT_BUG;
+
+ IsaVersion Version = getIsaVersion(STI->getCPU());
+ if (Version.Major >= 10)
+ return 106;
+ if (Version.Major >= 8)
+ return 102;
+ return 104;
+}
+
+unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
+ assert(WavesPerEU != 0);
+
+ IsaVersion Version = getIsaVersion(STI->getCPU());
+ if (Version.Major >= 10)
+ return 0;
+
+ if (WavesPerEU >= getMaxWavesPerEU(STI))
+ return 0;
+
+ unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
+ if (STI->getFeatureBits().test(FeatureTrapHandler))
+ MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
+ MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
+ return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
+}
+
+unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
+ bool Addressable) {
+ assert(WavesPerEU != 0);
+
+ unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
+ IsaVersion Version = getIsaVersion(STI->getCPU());
+ if (Version.Major >= 10)
+ return Addressable ? AddressableNumSGPRs : 108;
+ if (Version.Major >= 8 && !Addressable)
+ AddressableNumSGPRs = 112;
+ unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
+ if (STI->getFeatureBits().test(FeatureTrapHandler))
+ MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
+ MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
+ return std::min(MaxNumSGPRs, AddressableNumSGPRs);
+}
+
+unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
+ bool FlatScrUsed, bool XNACKUsed) {
+ unsigned ExtraSGPRs = 0;
+ if (VCCUsed)
+ ExtraSGPRs = 2;
+
+ IsaVersion Version = getIsaVersion(STI->getCPU());
+ if (Version.Major >= 10)
+ return ExtraSGPRs;
+
+ if (Version.Major < 8) {
+ if (FlatScrUsed)
+ ExtraSGPRs = 4;
+ } else {
+ if (XNACKUsed)
+ ExtraSGPRs = 4;
+
+ if (FlatScrUsed)
+ ExtraSGPRs = 6;
+ }
+
+ return ExtraSGPRs;
+}
+
+unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
+ bool FlatScrUsed) {
+ return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
+ STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
+}
+
+unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
+ NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI));
+ // SGPRBlocks is actual number of SGPR blocks minus 1.
+ return NumSGPRs / getSGPREncodingGranule(STI) - 1;
+}
+
+unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
+ Optional<bool> EnableWavefrontSize32) {
+ bool IsWave32 = EnableWavefrontSize32 ?
+ *EnableWavefrontSize32 :
+ STI->getFeatureBits().test(FeatureWavefrontSize32);
+ return IsWave32 ? 8 : 4;
+}
+
+unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
+ Optional<bool> EnableWavefrontSize32) {
+ return getVGPRAllocGranule(STI, EnableWavefrontSize32);
+}
+
+unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
+ if (!isGFX10(*STI))
+ return 256;
+ return STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1024 : 512;
+}
+
+unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
+ return 256;
+}
+
+unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
+ assert(WavesPerEU != 0);
+
+ if (WavesPerEU >= getMaxWavesPerEU(STI))
+ return 0;
+ unsigned MinNumVGPRs =
+ alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1),
+ getVGPRAllocGranule(STI)) + 1;
+ return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI));
+}
+
+unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
+ assert(WavesPerEU != 0);
+
+ unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
+ getVGPRAllocGranule(STI));
+ unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
+ return std::min(MaxNumVGPRs, AddressableNumVGPRs);
+}
+
+unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
+ Optional<bool> EnableWavefrontSize32) {
+ NumVGPRs = alignTo(std::max(1u, NumVGPRs),
+ getVGPREncodingGranule(STI, EnableWavefrontSize32));
+ // VGPRBlocks is actual number of VGPR blocks minus 1.
+ return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1;
+}
+
+} // end namespace IsaInfo
+
+void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
+ const MCSubtargetInfo *STI) {
+ IsaVersion Version = getIsaVersion(STI->getCPU());
+
+ memset(&Header, 0, sizeof(Header));
+
+ Header.amd_kernel_code_version_major = 1;
+ Header.amd_kernel_code_version_minor = 2;
+ Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
+ Header.amd_machine_version_major = Version.Major;
+ Header.amd_machine_version_minor = Version.Minor;
+ Header.amd_machine_version_stepping = Version.Stepping;
+ Header.kernel_code_entry_byte_offset = sizeof(Header);
+ Header.wavefront_size = 6;
+
+ // If the code object does not support indirect functions, then the value must
+ // be 0xffffffff.
+ Header.call_convention = -1;
+
+ // These alignment values are specified in powers of two, so alignment =
+ // 2^n. The minimum alignment is 2^4 = 16.
+ Header.kernarg_segment_alignment = 4;
+ Header.group_segment_alignment = 4;
+ Header.private_segment_alignment = 4;
+
+ if (Version.Major >= 10) {
+ if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
+ Header.wavefront_size = 5;
+ Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
+ }
+ Header.compute_pgm_resource_registers |=
+ S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
+ S_00B848_MEM_ORDERED(1);
+ }
+}
+
+amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
+ const MCSubtargetInfo *STI) {
+ IsaVersion Version = getIsaVersion(STI->getCPU());
+
+ amdhsa::kernel_descriptor_t KD;
+ memset(&KD, 0, sizeof(KD));
+
+ AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
+ amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE);
+ AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, 1);
+ AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1);
+ AMDHSA_BITS_SET(KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
+ if (Version.Major >= 10) {
+ AMDHSA_BITS_SET(KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
+ STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0);
+ AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE,
+ STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
+ AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1);
+ }
+ return KD;
+}
+
+bool isGroupSegment(const GlobalValue *GV) {
+ return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+}
+
+bool isGlobalSegment(const GlobalValue *GV) {
+ return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
+}
+
+bool isReadOnlySegment(const GlobalValue *GV) {
+ return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
+ GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
+}
+
+bool shouldEmitConstantsToTextSection(const Triple &TT) {
+ return TT.getOS() == Triple::AMDPAL;
+}
+
+int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
+ Attribute A = F.getFnAttribute(Name);
+ int Result = Default;
+
+ if (A.isStringAttribute()) {
+ StringRef Str = A.getValueAsString();
+ if (Str.getAsInteger(0, Result)) {
+ LLVMContext &Ctx = F.getContext();
+ Ctx.emitError("can't parse integer attribute " + Name);
+ }
+ }
+
+ return Result;
+}
+
+std::pair<int, int> getIntegerPairAttribute(const Function &F,
+ StringRef Name,
+ std::pair<int, int> Default,
+ bool OnlyFirstRequired) {
+ Attribute A = F.getFnAttribute(Name);
+ if (!A.isStringAttribute())
+ return Default;
+
+ LLVMContext &Ctx = F.getContext();
+ std::pair<int, int> Ints = Default;
+ std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
+ if (Strs.first.trim().getAsInteger(0, Ints.first)) {
+ Ctx.emitError("can't parse first integer attribute " + Name);
+ return Default;
+ }
+ if (Strs.second.trim().getAsInteger(0, Ints.second)) {
+ if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
+ Ctx.emitError("can't parse second integer attribute " + Name);
+ return Default;
+ }
+ }
+
+ return Ints;
+}
+
+unsigned getVmcntBitMask(const IsaVersion &Version) {
+ unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
+ if (Version.Major < 9)
+ return VmcntLo;
+
+ unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo();
+ return VmcntLo | VmcntHi;
+}
+
+unsigned getExpcntBitMask(const IsaVersion &Version) {
+ return (1 << getExpcntBitWidth()) - 1;
+}
+
+unsigned getLgkmcntBitMask(const IsaVersion &Version) {
+ return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
+}
+
+unsigned getWaitcntBitMask(const IsaVersion &Version) {
+ unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
+ unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
+ unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(),
+ getLgkmcntBitWidth(Version.Major));
+ unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
+ if (Version.Major < 9)
+ return Waitcnt;
+
+ unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi());
+ return Waitcnt | VmcntHi;
+}
+
+unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
+ unsigned VmcntLo =
+ unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
+ if (Version.Major < 9)
+ return VmcntLo;
+
+ unsigned VmcntHi =
+ unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
+ VmcntHi <<= getVmcntBitWidthLo();
+ return VmcntLo | VmcntHi;
+}
+
+unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
+ return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
+}
+
+unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
+ return unpackBits(Waitcnt, getLgkmcntBitShift(),
+ getLgkmcntBitWidth(Version.Major));
+}
+
+void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
+ unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
+ Vmcnt = decodeVmcnt(Version, Waitcnt);
+ Expcnt = decodeExpcnt(Version, Waitcnt);
+ Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
+}
+
+Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
+ Waitcnt Decoded;
+ Decoded.VmCnt = decodeVmcnt(Version, Encoded);
+ Decoded.ExpCnt = decodeExpcnt(Version, Encoded);
+ Decoded.LgkmCnt = decodeLgkmcnt(Version, Encoded);
+ return Decoded;
+}
+
+unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
+ unsigned Vmcnt) {
+ Waitcnt =
+ packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
+ if (Version.Major < 9)
+ return Waitcnt;
+
+ Vmcnt >>= getVmcntBitWidthLo();
+ return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
+}
+
+unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
+ unsigned Expcnt) {
+ return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
+}
+
+unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
+ unsigned Lgkmcnt) {
+ return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(),
+ getLgkmcntBitWidth(Version.Major));
+}
+
+unsigned encodeWaitcnt(const IsaVersion &Version,
+ unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
+ unsigned Waitcnt = getWaitcntBitMask(Version);
+ Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
+ Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
+ Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
+ return Waitcnt;
+}
+
+unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
+ return encodeWaitcnt(Version, Decoded.VmCnt, Decoded.ExpCnt, Decoded.LgkmCnt);
+}
+
+//===----------------------------------------------------------------------===//
+// hwreg
+//===----------------------------------------------------------------------===//
+
+namespace Hwreg {
+
+int64_t getHwregId(const StringRef Name) {
+ for (int Id = ID_SYMBOLIC_FIRST_; Id < ID_SYMBOLIC_LAST_; ++Id) {
+ if (IdSymbolic[Id] && Name == IdSymbolic[Id])
+ return Id;
+ }
+ return ID_UNKNOWN_;
+}
+
+static unsigned getLastSymbolicHwreg(const MCSubtargetInfo &STI) {
+ if (isSI(STI) || isCI(STI) || isVI(STI))
+ return ID_SYMBOLIC_FIRST_GFX9_;
+ else if (isGFX9(STI))
+ return ID_SYMBOLIC_FIRST_GFX10_;
+ else
+ return ID_SYMBOLIC_LAST_;
+}
+
+bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI) {
+ return ID_SYMBOLIC_FIRST_ <= Id && Id < getLastSymbolicHwreg(STI) &&
+ IdSymbolic[Id];
+}
+
+bool isValidHwreg(int64_t Id) {
+ return 0 <= Id && isUInt<ID_WIDTH_>(Id);
+}
+
+bool isValidHwregOffset(int64_t Offset) {
+ return 0 <= Offset && isUInt<OFFSET_WIDTH_>(Offset);
+}
+
+bool isValidHwregWidth(int64_t Width) {
+ return 0 <= (Width - 1) && isUInt<WIDTH_M1_WIDTH_>(Width - 1);
+}
+
+uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width) {
+ return (Id << ID_SHIFT_) |
+ (Offset << OFFSET_SHIFT_) |
+ ((Width - 1) << WIDTH_M1_SHIFT_);
+}
+
+StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) {
+ return isValidHwreg(Id, STI) ? IdSymbolic[Id] : "";
+}
+
+void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width) {
+ Id = (Val & ID_MASK_) >> ID_SHIFT_;
+ Offset = (Val & OFFSET_MASK_) >> OFFSET_SHIFT_;
+ Width = ((Val & WIDTH_M1_MASK_) >> WIDTH_M1_SHIFT_) + 1;
+}
+
+} // namespace Hwreg
+
+//===----------------------------------------------------------------------===//
+// SendMsg
+//===----------------------------------------------------------------------===//
+
+namespace SendMsg {
+
+int64_t getMsgId(const StringRef Name) {
+ for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
+ if (IdSymbolic[i] && Name == IdSymbolic[i])
+ return i;
+ }
+ return ID_UNKNOWN_;
+}
+
+static bool isValidMsgId(int64_t MsgId) {
+ return (ID_GAPS_FIRST_ <= MsgId && MsgId < ID_GAPS_LAST_) && IdSymbolic[MsgId];
+}
+
+bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict) {
+ if (Strict) {
+ if (MsgId == ID_GS_ALLOC_REQ || MsgId == ID_GET_DOORBELL)
+ return isGFX9(STI) || isGFX10(STI);
+ else
+ return isValidMsgId(MsgId);
+ } else {
+ return 0 <= MsgId && isUInt<ID_WIDTH_>(MsgId);
+ }
+}
+
+StringRef getMsgName(int64_t MsgId) {
+ return isValidMsgId(MsgId)? IdSymbolic[MsgId] : "";
+}
+
+int64_t getMsgOpId(int64_t MsgId, const StringRef Name) {
+ const char* const *S = (MsgId == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
+ const int F = (MsgId == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
+ const int L = (MsgId == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
+ for (int i = F; i < L; ++i) {
+ if (Name == S[i]) {
+ return i;
+ }
+ }
+ return OP_UNKNOWN_;
+}
+
+bool isValidMsgOp(int64_t MsgId, int64_t OpId, bool Strict) {
+
+ if (!Strict)
+ return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
+
+ switch(MsgId)
+ {
+ case ID_GS:
+ return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP;
+ case ID_GS_DONE:
+ return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_;
+ case ID_SYSMSG:
+ return OP_SYS_FIRST_ <= OpId && OpId < OP_SYS_LAST_;
+ default:
+ return OpId == OP_NONE_;
+ }
+}
+
+StringRef getMsgOpName(int64_t MsgId, int64_t OpId) {
+ assert(msgRequiresOp(MsgId));
+ return (MsgId == ID_SYSMSG)? OpSysSymbolic[OpId] : OpGsSymbolic[OpId];
+}
+
+bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, bool Strict) {
+
+ if (!Strict)
+ return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId);
+
+ switch(MsgId)
+ {
+ case ID_GS:
+ return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_;
+ case ID_GS_DONE:
+ return (OpId == OP_GS_NOP)?
+ (StreamId == STREAM_ID_NONE_) :
+ (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_);
+ default:
+ return StreamId == STREAM_ID_NONE_;
+ }
+}
+
+bool msgRequiresOp(int64_t MsgId) {
+ return MsgId == ID_GS || MsgId == ID_GS_DONE || MsgId == ID_SYSMSG;
+}
+
+bool msgSupportsStream(int64_t MsgId, int64_t OpId) {
+ return (MsgId == ID_GS || MsgId == ID_GS_DONE) && OpId != OP_GS_NOP;
+}
+
+void decodeMsg(unsigned Val,
+ uint16_t &MsgId,
+ uint16_t &OpId,
+ uint16_t &StreamId) {
+ MsgId = Val & ID_MASK_;
+ OpId = (Val & OP_MASK_) >> OP_SHIFT_;
+ StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_;
+}
+
+uint64_t encodeMsg(uint64_t MsgId,
+ uint64_t OpId,
+ uint64_t StreamId) {
+ return (MsgId << ID_SHIFT_) |
+ (OpId << OP_SHIFT_) |
+ (StreamId << STREAM_ID_SHIFT_);
+}
+
+} // namespace SendMsg
+
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+unsigned getInitialPSInputAddr(const Function &F) {
+ return getIntegerAttribute(F, "InitialPSInputAddr", 0);
+}
+
+bool isShader(CallingConv::ID cc) {
+ switch(cc) {
+ case CallingConv::AMDGPU_VS:
+ case CallingConv::AMDGPU_LS:
+ case CallingConv::AMDGPU_HS:
+ case CallingConv::AMDGPU_ES:
+ case CallingConv::AMDGPU_GS:
+ case CallingConv::AMDGPU_PS:
+ case CallingConv::AMDGPU_CS:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool isCompute(CallingConv::ID cc) {
+ return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
+}
+
+bool isEntryFunctionCC(CallingConv::ID CC) {
+ switch (CC) {
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::SPIR_KERNEL:
+ case CallingConv::AMDGPU_VS:
+ case CallingConv::AMDGPU_GS:
+ case CallingConv::AMDGPU_PS:
+ case CallingConv::AMDGPU_CS:
+ case CallingConv::AMDGPU_ES:
+ case CallingConv::AMDGPU_HS:
+ case CallingConv::AMDGPU_LS:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool hasXNACK(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
+}
+
+bool hasSRAMECC(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureSRAMECC];
+}
+
+bool hasMIMG_R128(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128];
+}
+
+bool hasPackedD16(const MCSubtargetInfo &STI) {
+ return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem];
+}
+
+bool isSI(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
+}
+
+bool isCI(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
+}
+
+bool isVI(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
+}
+
+bool isGFX9(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
+}
+
+bool isGFX10(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureGFX10];
+}
+
+bool isGCN3Encoding(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
+}
+
+bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
+ const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
+ const unsigned FirstSubReg = TRI->getSubReg(Reg, 1);
+ return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
+ Reg == AMDGPU::SCC;
+}
+
+bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
+ for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) {
+ if (*R == Reg1) return true;
+ }
+ return false;
+}
+
+#define MAP_REG2REG \
+ using namespace AMDGPU; \
+ switch(Reg) { \
+ default: return Reg; \
+ CASE_CI_VI(FLAT_SCR) \
+ CASE_CI_VI(FLAT_SCR_LO) \
+ CASE_CI_VI(FLAT_SCR_HI) \
+ CASE_VI_GFX9_GFX10(TTMP0) \
+ CASE_VI_GFX9_GFX10(TTMP1) \
+ CASE_VI_GFX9_GFX10(TTMP2) \
+ CASE_VI_GFX9_GFX10(TTMP3) \
+ CASE_VI_GFX9_GFX10(TTMP4) \
+ CASE_VI_GFX9_GFX10(TTMP5) \
+ CASE_VI_GFX9_GFX10(TTMP6) \
+ CASE_VI_GFX9_GFX10(TTMP7) \
+ CASE_VI_GFX9_GFX10(TTMP8) \
+ CASE_VI_GFX9_GFX10(TTMP9) \
+ CASE_VI_GFX9_GFX10(TTMP10) \
+ CASE_VI_GFX9_GFX10(TTMP11) \
+ CASE_VI_GFX9_GFX10(TTMP12) \
+ CASE_VI_GFX9_GFX10(TTMP13) \
+ CASE_VI_GFX9_GFX10(TTMP14) \
+ CASE_VI_GFX9_GFX10(TTMP15) \
+ CASE_VI_GFX9_GFX10(TTMP0_TTMP1) \
+ CASE_VI_GFX9_GFX10(TTMP2_TTMP3) \
+ CASE_VI_GFX9_GFX10(TTMP4_TTMP5) \
+ CASE_VI_GFX9_GFX10(TTMP6_TTMP7) \
+ CASE_VI_GFX9_GFX10(TTMP8_TTMP9) \
+ CASE_VI_GFX9_GFX10(TTMP10_TTMP11) \
+ CASE_VI_GFX9_GFX10(TTMP12_TTMP13) \
+ CASE_VI_GFX9_GFX10(TTMP14_TTMP15) \
+ CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3) \
+ CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7) \
+ CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11) \
+ CASE_VI_GFX9_GFX10(TTMP12_TTMP13_TTMP14_TTMP15) \
+ CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
+ CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
+ CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
+ CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
+ }
+
+#define CASE_CI_VI(node) \
+ assert(!isSI(STI)); \
+ case node: return isCI(STI) ? node##_ci : node##_vi;
+
+#define CASE_VI_GFX9_GFX10(node) \
+ case node: return (isGFX9(STI) || isGFX10(STI)) ? node##_gfx9_gfx10 : node##_vi;
+
+unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
+ if (STI.getTargetTriple().getArch() == Triple::r600)
+ return Reg;
+ MAP_REG2REG
+}
+
+#undef CASE_CI_VI
+#undef CASE_VI_GFX9_GFX10
+
+#define CASE_CI_VI(node) case node##_ci: case node##_vi: return node;
+#define CASE_VI_GFX9_GFX10(node) case node##_vi: case node##_gfx9_gfx10: return node;
+
+unsigned mc2PseudoReg(unsigned Reg) {
+ MAP_REG2REG
+}
+
+#undef CASE_CI_VI
+#undef CASE_VI_GFX9_GFX10
+#undef MAP_REG2REG
+
+bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
+ assert(OpNo < Desc.NumOperands);
+ unsigned OpType = Desc.OpInfo[OpNo].OperandType;
+ return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
+ OpType <= AMDGPU::OPERAND_SRC_LAST;
+}
+
+bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
+ assert(OpNo < Desc.NumOperands);
+ unsigned OpType = Desc.OpInfo[OpNo].OperandType;
+ switch (OpType) {
+ case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_IMM_FP64:
+ case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_IMM_V2FP16:
+ case AMDGPU::OPERAND_REG_IMM_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
+ assert(OpNo < Desc.NumOperands);
+ unsigned OpType = Desc.OpInfo[OpNo].OperandType;
+ return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
+ OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST;
+}
+
+// Avoid using MCRegisterClass::getSize, since that function will go away
+// (move from MC* level to Target* level). Return size in bits.
+unsigned getRegBitWidth(unsigned RCID) {
+ switch (RCID) {
+ case AMDGPU::SGPR_32RegClassID:
+ case AMDGPU::VGPR_32RegClassID:
+ case AMDGPU::VRegOrLds_32RegClassID:
+ case AMDGPU::AGPR_32RegClassID:
+ case AMDGPU::VS_32RegClassID:
+ case AMDGPU::AV_32RegClassID:
+ case AMDGPU::SReg_32RegClassID:
+ case AMDGPU::SReg_32_XM0RegClassID:
+ case AMDGPU::SRegOrLds_32RegClassID:
+ return 32;
+ case AMDGPU::SGPR_64RegClassID:
+ case AMDGPU::VS_64RegClassID:
+ case AMDGPU::AV_64RegClassID:
+ case AMDGPU::SReg_64RegClassID:
+ case AMDGPU::VReg_64RegClassID:
+ case AMDGPU::AReg_64RegClassID:
+ case AMDGPU::SReg_64_XEXECRegClassID:
+ return 64;
+ case AMDGPU::SGPR_96RegClassID:
+ case AMDGPU::SReg_96RegClassID:
+ case AMDGPU::VReg_96RegClassID:
+ return 96;
+ case AMDGPU::SGPR_128RegClassID:
+ case AMDGPU::SReg_128RegClassID:
+ case AMDGPU::VReg_128RegClassID:
+ case AMDGPU::AReg_128RegClassID:
+ return 128;
+ case AMDGPU::SGPR_160RegClassID:
+ case AMDGPU::SReg_160RegClassID:
+ case AMDGPU::VReg_160RegClassID:
+ return 160;
+ case AMDGPU::SReg_256RegClassID:
+ case AMDGPU::VReg_256RegClassID:
+ return 256;
+ case AMDGPU::SReg_512RegClassID:
+ case AMDGPU::VReg_512RegClassID:
+ case AMDGPU::AReg_512RegClassID:
+ return 512;
+ case AMDGPU::SReg_1024RegClassID:
+ case AMDGPU::VReg_1024RegClassID:
+ case AMDGPU::AReg_1024RegClassID:
+ return 1024;
+ default:
+ llvm_unreachable("Unexpected register class");
+ }
+}
+
+unsigned getRegBitWidth(const MCRegisterClass &RC) {
+ return getRegBitWidth(RC.getID());
+}
+
+unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
+ unsigned OpNo) {
+ assert(OpNo < Desc.NumOperands);
+ unsigned RCID = Desc.OpInfo[OpNo].RegClass;
+ return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
+}
+
+bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
+ if (Literal >= -16 && Literal <= 64)
+ return true;
+
+ uint64_t Val = static_cast<uint64_t>(Literal);
+ return (Val == DoubleToBits(0.0)) ||
+ (Val == DoubleToBits(1.0)) ||
+ (Val == DoubleToBits(-1.0)) ||
+ (Val == DoubleToBits(0.5)) ||
+ (Val == DoubleToBits(-0.5)) ||
+ (Val == DoubleToBits(2.0)) ||
+ (Val == DoubleToBits(-2.0)) ||
+ (Val == DoubleToBits(4.0)) ||
+ (Val == DoubleToBits(-4.0)) ||
+ (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
+}
+
+bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
+ if (Literal >= -16 && Literal <= 64)
+ return true;
+
+ // The actual type of the operand does not seem to matter as long
+ // as the bits match one of the inline immediate values. For example:
+ //
+ // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
+ // so it is a legal inline immediate.
+ //
+ // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
+ // floating-point, so it is a legal inline immediate.
+
+ uint32_t Val = static_cast<uint32_t>(Literal);
+ return (Val == FloatToBits(0.0f)) ||
+ (Val == FloatToBits(1.0f)) ||
+ (Val == FloatToBits(-1.0f)) ||
+ (Val == FloatToBits(0.5f)) ||
+ (Val == FloatToBits(-0.5f)) ||
+ (Val == FloatToBits(2.0f)) ||
+ (Val == FloatToBits(-2.0f)) ||
+ (Val == FloatToBits(4.0f)) ||
+ (Val == FloatToBits(-4.0f)) ||
+ (Val == 0x3e22f983 && HasInv2Pi);
+}
+
+bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
+ if (!HasInv2Pi)
+ return false;
+
+ if (Literal >= -16 && Literal <= 64)
+ return true;
+
+ uint16_t Val = static_cast<uint16_t>(Literal);
+ return Val == 0x3C00 || // 1.0
+ Val == 0xBC00 || // -1.0
+ Val == 0x3800 || // 0.5
+ Val == 0xB800 || // -0.5
+ Val == 0x4000 || // 2.0
+ Val == 0xC000 || // -2.0
+ Val == 0x4400 || // 4.0
+ Val == 0xC400 || // -4.0
+ Val == 0x3118; // 1/2pi
+}
+
+bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
+ assert(HasInv2Pi);
+
+ if (isInt<16>(Literal) || isUInt<16>(Literal)) {
+ int16_t Trunc = static_cast<int16_t>(Literal);
+ return AMDGPU::isInlinableLiteral16(Trunc, HasInv2Pi);
+ }
+ if (!(Literal & 0xffff))
+ return AMDGPU::isInlinableLiteral16(Literal >> 16, HasInv2Pi);
+
+ int16_t Lo16 = static_cast<int16_t>(Literal);
+ int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
+ return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
+}
+
+bool isArgPassedInSGPR(const Argument *A) {
+ const Function *F = A->getParent();
+
+ // Arguments to compute shaders are never a source of divergence.
+ CallingConv::ID CC = F->getCallingConv();
+ switch (CC) {
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::SPIR_KERNEL:
+ return true;
+ case CallingConv::AMDGPU_VS:
+ case CallingConv::AMDGPU_LS:
+ case CallingConv::AMDGPU_HS:
+ case CallingConv::AMDGPU_ES:
+ case CallingConv::AMDGPU_GS:
+ case CallingConv::AMDGPU_PS:
+ case CallingConv::AMDGPU_CS:
+ // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
+ // Everything else is in VGPRs.
+ return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
+ F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal);
+ default:
+ // TODO: Should calls support inreg for SGPR inputs?
+ return false;
+ }
+}
+
+static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
+ return isGCN3Encoding(ST) || isGFX10(ST);
+}
+
+int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
+ if (hasSMEMByteOffset(ST))
+ return ByteOffset;
+ return ByteOffset >> 2;
+}
+
+bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
+ int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
+ return (hasSMEMByteOffset(ST)) ?
+ isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset);
+}
+
+// Given Imm, split it into the values to put into the SOffset and ImmOffset
+// fields in an MUBUF instruction. Return false if it is not possible (due to a
+// hardware bug needing a workaround).
+//
+// The required alignment ensures that individual address components remain
+// aligned if they are aligned to begin with. It also ensures that additional
+// offsets within the given alignment can be added to the resulting ImmOffset.
+bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
+ const GCNSubtarget *Subtarget, uint32_t Align) {
+ const uint32_t MaxImm = alignDown(4095, Align);
+ uint32_t Overflow = 0;
+
+ if (Imm > MaxImm) {
+ if (Imm <= MaxImm + 64) {
+ // Use an SOffset inline constant for 4..64
+ Overflow = Imm - MaxImm;
+ Imm = MaxImm;
+ } else {
+ // Try to keep the same value in SOffset for adjacent loads, so that
+ // the corresponding register contents can be re-used.
+ //
+ // Load values with all low-bits (except for alignment bits) set into
+ // SOffset, so that a larger range of values can be covered using
+ // s_movk_i32.
+ //
+ // Atomic operations fail to work correctly when individual address
+ // components are unaligned, even if their sum is aligned.
+ uint32_t High = (Imm + Align) & ~4095;
+ uint32_t Low = (Imm + Align) & 4095;
+ Imm = Low;
+ Overflow = High - Align;
+ }
+ }
+
+ // There is a hardware bug in SI and CI which prevents address clamping in
+ // MUBUF instructions from working correctly with SOffsets. The immediate
+ // offset is unaffected.
+ if (Overflow > 0 &&
+ Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
+ return false;
+
+ ImmOffset = Imm;
+ SOffset = Overflow;
+ return true;
+}
+
+SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) {
+ *this = getDefaultForCallingConv(F.getCallingConv());
+
+ StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString();
+ if (!IEEEAttr.empty())
+ IEEE = IEEEAttr == "true";
+
+ StringRef DX10ClampAttr
+ = F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString();
+ if (!DX10ClampAttr.empty())
+ DX10Clamp = DX10ClampAttr == "true";
+}
+
+namespace {
+
+struct SourceOfDivergence {
+ unsigned Intr;
+};
+const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
+
+#define GET_SourcesOfDivergence_IMPL
+#include "AMDGPUGenSearchableTables.inc"
+
+} // end anonymous namespace
+
+bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
+ return lookupSourceOfDivergence(IntrID);
+}
+
+} // namespace AMDGPU
+} // namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
new file mode 100644
index 000000000000..f78dadd447ff
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -0,0 +1,691 @@
+//===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
+#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
+
+#include "AMDGPU.h"
+#include "AMDKernelCodeT.h"
+#include "SIDefines.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/AMDHSAKernelDescriptor.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetParser.h"
+#include <cstdint>
+#include <string>
+#include <utility>
+
+namespace llvm {
+
+class Argument;
+class AMDGPUSubtarget;
+class FeatureBitset;
+class Function;
+class GCNSubtarget;
+class GlobalValue;
+class MCContext;
+class MCRegisterClass;
+class MCRegisterInfo;
+class MCSection;
+class MCSubtargetInfo;
+class MachineMemOperand;
+class Triple;
+
+namespace AMDGPU {
+
+#define GET_MIMGBaseOpcode_DECL
+#define GET_MIMGDim_DECL
+#define GET_MIMGEncoding_DECL
+#define GET_MIMGLZMapping_DECL
+#define GET_MIMGMIPMapping_DECL
+#include "AMDGPUGenSearchableTables.inc"
+
+namespace IsaInfo {
+
+enum {
+ // The closed Vulkan driver sets 96, which limits the wave count to 8 but
+ // doesn't spill SGPRs as much as when 80 is set.
+ FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
+ TRAP_NUM_SGPRS = 16
+};
+
+/// Streams isa version string for given subtarget \p STI into \p Stream.
+void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
+
+/// \returns True if given subtarget \p STI supports code object version 3,
+/// false otherwise.
+bool hasCodeObjectV3(const MCSubtargetInfo *STI);
+
+/// \returns Wavefront size for given subtarget \p STI.
+unsigned getWavefrontSize(const MCSubtargetInfo *STI);
+
+/// \returns Local memory size in bytes for given subtarget \p STI.
+unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
+
+/// \returns Number of execution units per compute unit for given subtarget \p
+/// STI.
+unsigned getEUsPerCU(const MCSubtargetInfo *STI);
+
+/// \returns Maximum number of work groups per compute unit for given subtarget
+/// \p STI and limited by given \p FlatWorkGroupSize.
+unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
+ unsigned FlatWorkGroupSize);
+
+/// \returns Maximum number of waves per compute unit for given subtarget \p
+/// STI without any kind of limitation.
+unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI);
+
+/// \returns Maximum number of waves per compute unit for given subtarget \p
+/// STI and limited by given \p FlatWorkGroupSize.
+unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
+ unsigned FlatWorkGroupSize);
+
+/// \returns Minimum number of waves per execution unit for given subtarget \p
+/// STI.
+unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
+
+/// \returns Maximum number of waves per execution unit for given subtarget \p
+/// STI without any kind of limitation.
+unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
+
+/// \returns Maximum number of waves per execution unit for given subtarget \p
+/// STI and limited by given \p FlatWorkGroupSize.
+unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
+ unsigned FlatWorkGroupSize);
+
+/// \returns Minimum flat work group size for given subtarget \p STI.
+unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
+
+/// \returns Maximum flat work group size for given subtarget \p STI.
+unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
+
+/// \returns Number of waves per work group for given subtarget \p STI and
+/// limited by given \p FlatWorkGroupSize.
+unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
+ unsigned FlatWorkGroupSize);
+
+/// \returns SGPR allocation granularity for given subtarget \p STI.
+unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
+
+/// \returns SGPR encoding granularity for given subtarget \p STI.
+unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
+
+/// \returns Total number of SGPRs for given subtarget \p STI.
+unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
+
+/// \returns Addressable number of SGPRs for given subtarget \p STI.
+unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
+
+/// \returns Minimum number of SGPRs that meets the given number of waves per
+/// execution unit requirement for given subtarget \p STI.
+unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
+
+/// \returns Maximum number of SGPRs that meets the given number of waves per
+/// execution unit requirement for given subtarget \p STI.
+unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
+ bool Addressable);
+
+/// \returns Number of extra SGPRs implicitly required by given subtarget \p
+/// STI when the given special registers are used.
+unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
+ bool FlatScrUsed, bool XNACKUsed);
+
+/// \returns Number of extra SGPRs implicitly required by given subtarget \p
+/// STI when the given special registers are used. XNACK is inferred from
+/// \p STI.
+unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
+ bool FlatScrUsed);
+
+/// \returns Number of SGPR blocks needed for given subtarget \p STI when
+/// \p NumSGPRs are used. \p NumSGPRs should already include any special
+/// register counts.
+unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
+
+/// \returns VGPR allocation granularity for given subtarget \p STI.
+///
+/// For subtargets which support it, \p EnableWavefrontSize32 should match
+/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
+unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
+ Optional<bool> EnableWavefrontSize32 = None);
+
+/// \returns VGPR encoding granularity for given subtarget \p STI.
+///
+/// For subtargets which support it, \p EnableWavefrontSize32 should match
+/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
+unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
+ Optional<bool> EnableWavefrontSize32 = None);
+
+/// \returns Total number of VGPRs for given subtarget \p STI.
+unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
+
+/// \returns Addressable number of VGPRs for given subtarget \p STI.
+unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
+
+/// \returns Minimum number of VGPRs that meets given number of waves per
+/// execution unit requirement for given subtarget \p STI.
+unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
+
+/// \returns Maximum number of VGPRs that meets given number of waves per
+/// execution unit requirement for given subtarget \p STI.
+unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
+
+/// \returns Number of VGPR blocks needed for given subtarget \p STI when
+/// \p NumVGPRs are used.
+///
+/// For subtargets which support it, \p EnableWavefrontSize32 should match the
+/// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
+unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs,
+ Optional<bool> EnableWavefrontSize32 = None);
+
+} // end namespace IsaInfo
+
+LLVM_READONLY
+int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
+
+LLVM_READONLY
+int getSOPPWithRelaxation(uint16_t Opcode);
+
+struct MIMGBaseOpcodeInfo {
+ MIMGBaseOpcode BaseOpcode;
+ bool Store;
+ bool Atomic;
+ bool AtomicX2;
+ bool Sampler;
+ bool Gather4;
+
+ uint8_t NumExtraArgs;
+ bool Gradients;
+ bool Coordinates;
+ bool LodOrClampOrMip;
+ bool HasD16;
+};
+
+LLVM_READONLY
+const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
+
+struct MIMGDimInfo {
+ MIMGDim Dim;
+ uint8_t NumCoords;
+ uint8_t NumGradients;
+ bool DA;
+ uint8_t Encoding;
+ const char *AsmSuffix;
+};
+
+LLVM_READONLY
+const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
+
+LLVM_READONLY
+const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
+
+LLVM_READONLY
+const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
+
+struct MIMGLZMappingInfo {
+ MIMGBaseOpcode L;
+ MIMGBaseOpcode LZ;
+};
+
+struct MIMGMIPMappingInfo {
+ MIMGBaseOpcode MIP;
+ MIMGBaseOpcode NONMIP;
+};
+
+LLVM_READONLY
+const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
+
+LLVM_READONLY
+const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned L);
+
+LLVM_READONLY
+int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
+ unsigned VDataDwords, unsigned VAddrDwords);
+
+LLVM_READONLY
+int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
+
+struct MIMGInfo {
+ uint16_t Opcode;
+ uint16_t BaseOpcode;
+ uint8_t MIMGEncoding;
+ uint8_t VDataDwords;
+ uint8_t VAddrDwords;
+};
+
+LLVM_READONLY
+const MIMGInfo *getMIMGInfo(unsigned Opc);
+
+LLVM_READONLY
+int getMTBUFBaseOpcode(unsigned Opc);
+
+LLVM_READONLY
+int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
+
+LLVM_READONLY
+int getMTBUFElements(unsigned Opc);
+
+LLVM_READONLY
+bool getMTBUFHasVAddr(unsigned Opc);
+
+LLVM_READONLY
+bool getMTBUFHasSrsrc(unsigned Opc);
+
+LLVM_READONLY
+bool getMTBUFHasSoffset(unsigned Opc);
+
+LLVM_READONLY
+int getMUBUFBaseOpcode(unsigned Opc);
+
+LLVM_READONLY
+int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
+
+LLVM_READONLY
+int getMUBUFElements(unsigned Opc);
+
+LLVM_READONLY
+bool getMUBUFHasVAddr(unsigned Opc);
+
+LLVM_READONLY
+bool getMUBUFHasSrsrc(unsigned Opc);
+
+LLVM_READONLY
+bool getMUBUFHasSoffset(unsigned Opc);
+
+LLVM_READONLY
+int getMCOpcode(uint16_t Opcode, unsigned Gen);
+
+void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
+ const MCSubtargetInfo *STI);
+
+amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
+ const MCSubtargetInfo *STI);
+
+bool isGroupSegment(const GlobalValue *GV);
+bool isGlobalSegment(const GlobalValue *GV);
+bool isReadOnlySegment(const GlobalValue *GV);
+
+/// \returns True if constants should be emitted to .text section for given
+/// target triple \p TT, false otherwise.
+bool shouldEmitConstantsToTextSection(const Triple &TT);
+
+/// \returns Integer value requested using \p F's \p Name attribute.
+///
+/// \returns \p Default if attribute is not present.
+///
+/// \returns \p Default and emits error if requested value cannot be converted
+/// to integer.
+int getIntegerAttribute(const Function &F, StringRef Name, int Default);
+
+/// \returns A pair of integer values requested using \p F's \p Name attribute
+/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
+/// is false).
+///
+/// \returns \p Default if attribute is not present.
+///
+/// \returns \p Default and emits error if one of the requested values cannot be
+/// converted to integer, or \p OnlyFirstRequired is false and "second" value is
+/// not present.
+std::pair<int, int> getIntegerPairAttribute(const Function &F,
+ StringRef Name,
+ std::pair<int, int> Default,
+ bool OnlyFirstRequired = false);
+
+/// Represents the counter values to wait for in an s_waitcnt instruction.
+///
+/// Large values (including the maximum possible integer) can be used to
+/// represent "don't care" waits.
+struct Waitcnt {
+ unsigned VmCnt = ~0u;
+ unsigned ExpCnt = ~0u;
+ unsigned LgkmCnt = ~0u;
+ unsigned VsCnt = ~0u;
+
+ Waitcnt() {}
+ Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
+ : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {}
+
+ static Waitcnt allZero(const IsaVersion &Version) {
+ return Waitcnt(0, 0, 0, Version.Major >= 10 ? 0 : ~0u);
+ }
+ static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); }
+
+ bool hasWait() const {
+ return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u;
+ }
+
+ bool dominates(const Waitcnt &Other) const {
+ return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt &&
+ LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt;
+ }
+
+ Waitcnt combined(const Waitcnt &Other) const {
+ return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt),
+ std::min(LgkmCnt, Other.LgkmCnt),
+ std::min(VsCnt, Other.VsCnt));
+ }
+};
+
+/// \returns Vmcnt bit mask for given isa \p Version.
+unsigned getVmcntBitMask(const IsaVersion &Version);
+
+/// \returns Expcnt bit mask for given isa \p Version.
+unsigned getExpcntBitMask(const IsaVersion &Version);
+
+/// \returns Lgkmcnt bit mask for given isa \p Version.
+unsigned getLgkmcntBitMask(const IsaVersion &Version);
+
+/// \returns Waitcnt bit mask for given isa \p Version.
+unsigned getWaitcntBitMask(const IsaVersion &Version);
+
+/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
+unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
+
+/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
+unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
+
+/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
+unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
+
+/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
+/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
+/// \p Lgkmcnt respectively.
+///
+/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
+/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only)
+/// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only)
+/// \p Expcnt = \p Waitcnt[6:4]
+/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10 only)
+/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10+ only)
+void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
+ unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
+
+Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
+
+/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
+unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
+ unsigned Vmcnt);
+
+/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
+unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
+ unsigned Expcnt);
+
+/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
+unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
+ unsigned Lgkmcnt);
+
+/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
+/// \p Version.
+///
+/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
+/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only)
+/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only)
+/// Waitcnt[6:4] = \p Expcnt
+/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10 only)
+/// Waitcnt[13:8] = \p Lgkmcnt (gfx10+ only)
+/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only)
+///
+/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
+/// isa \p Version.
+unsigned encodeWaitcnt(const IsaVersion &Version,
+ unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
+
+unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
+
+namespace Hwreg {
+
+LLVM_READONLY
+int64_t getHwregId(const StringRef Name);
+
+LLVM_READNONE
+bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI);
+
+LLVM_READNONE
+bool isValidHwreg(int64_t Id);
+
+LLVM_READNONE
+bool isValidHwregOffset(int64_t Offset);
+
+LLVM_READNONE
+bool isValidHwregWidth(int64_t Width);
+
+LLVM_READNONE
+uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width);
+
+LLVM_READNONE
+StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI);
+
+void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width);
+
+} // namespace Hwreg
+
+namespace SendMsg {
+
+LLVM_READONLY
+int64_t getMsgId(const StringRef Name);
+
+LLVM_READONLY
+int64_t getMsgOpId(int64_t MsgId, const StringRef Name);
+
+LLVM_READNONE
+StringRef getMsgName(int64_t MsgId);
+
+LLVM_READNONE
+StringRef getMsgOpName(int64_t MsgId, int64_t OpId);
+
+LLVM_READNONE
+bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict = true);
+
+LLVM_READNONE
+bool isValidMsgOp(int64_t MsgId, int64_t OpId, bool Strict = true);
+
+LLVM_READNONE
+bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, bool Strict = true);
+
+LLVM_READNONE
+bool msgRequiresOp(int64_t MsgId);
+
+LLVM_READNONE
+bool msgSupportsStream(int64_t MsgId, int64_t OpId);
+
+void decodeMsg(unsigned Val,
+ uint16_t &MsgId,
+ uint16_t &OpId,
+ uint16_t &StreamId);
+
+LLVM_READNONE
+uint64_t encodeMsg(uint64_t MsgId,
+ uint64_t OpId,
+ uint64_t StreamId);
+
+} // namespace SendMsg
+
+
+unsigned getInitialPSInputAddr(const Function &F);
+
+LLVM_READNONE
+bool isShader(CallingConv::ID CC);
+
+LLVM_READNONE
+bool isCompute(CallingConv::ID CC);
+
+LLVM_READNONE
+bool isEntryFunctionCC(CallingConv::ID CC);
+
+// FIXME: Remove this when calling conventions cleaned up
+LLVM_READNONE
+inline bool isKernel(CallingConv::ID CC) {
+ switch (CC) {
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::SPIR_KERNEL:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool hasXNACK(const MCSubtargetInfo &STI);
+bool hasSRAMECC(const MCSubtargetInfo &STI);
+bool hasMIMG_R128(const MCSubtargetInfo &STI);
+bool hasPackedD16(const MCSubtargetInfo &STI);
+
+bool isSI(const MCSubtargetInfo &STI);
+bool isCI(const MCSubtargetInfo &STI);
+bool isVI(const MCSubtargetInfo &STI);
+bool isGFX9(const MCSubtargetInfo &STI);
+bool isGFX10(const MCSubtargetInfo &STI);
+
+/// Is Reg - scalar register
+bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
+
+/// Is there any intersection between registers
+bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI);
+
+/// If \p Reg is a pseudo reg, return the correct hardware register given
+/// \p STI otherwise return \p Reg.
+unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
+
+/// Convert hardware register \p Reg to a pseudo register
+LLVM_READNONE
+unsigned mc2PseudoReg(unsigned Reg);
+
+/// Can this operand also contain immediate values?
+bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
+
+/// Is this floating-point operand?
+bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
+
+/// Does this opearnd support only inlinable literals?
+bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
+
+/// Get the size in bits of a register from the register class \p RC.
+unsigned getRegBitWidth(unsigned RCID);
+
+/// Get the size in bits of a register from the register class \p RC.
+unsigned getRegBitWidth(const MCRegisterClass &RC);
+
+/// Get size of register operand
+unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
+ unsigned OpNo);
+
+LLVM_READNONE
+inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
+ switch (OpInfo.OperandType) {
+ case AMDGPU::OPERAND_REG_IMM_INT32:
+ case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
+ return 4;
+
+ case AMDGPU::OPERAND_REG_IMM_INT64:
+ case AMDGPU::OPERAND_REG_IMM_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT64:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP64:
+ return 8;
+
+ case AMDGPU::OPERAND_REG_IMM_INT16:
+ case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
+ case AMDGPU::OPERAND_REG_IMM_V2INT16:
+ case AMDGPU::OPERAND_REG_IMM_V2FP16:
+ return 2;
+
+ default:
+ llvm_unreachable("unhandled operand type");
+ }
+}
+
+LLVM_READNONE
+inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
+ return getOperandSize(Desc.OpInfo[OpNo]);
+}
+
+/// Is this literal inlinable
+LLVM_READNONE
+bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
+
+LLVM_READNONE
+bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
+
+LLVM_READNONE
+bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
+
+LLVM_READNONE
+bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
+
+bool isArgPassedInSGPR(const Argument *Arg);
+
+/// \returns The encoding that will be used for \p ByteOffset in the SMRD
+/// offset field.
+int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
+
+/// \returns true if this offset is small enough to fit in the SMRD
+/// offset field. \p ByteOffset should be the offset in bytes and
+/// not the encoded offset.
+bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
+
+bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
+ const GCNSubtarget *Subtarget, uint32_t Align = 4);
+
+/// \returns true if the intrinsic is divergent
+bool isIntrinsicSourceOfDivergence(unsigned IntrID);
+
+
+// Track defaults for fields in the MODE registser.
+struct SIModeRegisterDefaults {
+ /// Floating point opcodes that support exception flag gathering quiet and
+ /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10
+ /// become IEEE 754- 2008 compliant due to signaling NaN propagation and
+ /// quieting.
+ bool IEEE : 1;
+
+ /// Used by the vector ALU to force DX10-style treatment of NaNs: when set,
+ /// clamp NaN to zero; otherwise, pass NaN through.
+ bool DX10Clamp : 1;
+
+ // TODO: FP mode fields
+
+ SIModeRegisterDefaults() :
+ IEEE(true),
+ DX10Clamp(true) {}
+
+ SIModeRegisterDefaults(const Function &F);
+
+ static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
+ SIModeRegisterDefaults Mode;
+ Mode.DX10Clamp = true;
+ Mode.IEEE = AMDGPU::isCompute(CC);
+ return Mode;
+ }
+
+ bool operator ==(const SIModeRegisterDefaults Other) const {
+ return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp;
+ }
+
+ // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
+ // be able to override.
+ bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const {
+ return *this == CalleeMode;
+ }
+};
+
+} // end namespace AMDGPU
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
new file mode 100644
index 000000000000..207e4232e829
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
@@ -0,0 +1,725 @@
+//===-- AMDGPUPALMetadata.cpp - Accumulate and print AMDGPU PAL metadata -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+///
+/// This class has methods called by AMDGPUAsmPrinter to accumulate and print
+/// the PAL metadata.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include "AMDGPUPALMetadata.h"
+#include "AMDGPU.h"
+#include "AMDGPUAsmPrinter.h"
+#include "MCTargetDesc/AMDGPUTargetStreamer.h"
+#include "SIDefines.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/AMDGPUMetadata.h"
+#include "llvm/Support/EndianStream.h"
+
+using namespace llvm;
+using namespace llvm::AMDGPU;
+
+// Read the PAL metadata from IR metadata, where it was put by the frontend.
+void AMDGPUPALMetadata::readFromIR(Module &M) {
+ auto NamedMD = M.getNamedMetadata("amdgpu.pal.metadata.msgpack");
+ if (NamedMD && NamedMD->getNumOperands()) {
+ // This is the new msgpack format for metadata. It is a NamedMD containing
+ // an MDTuple containing an MDString containing the msgpack data.
+ BlobType = ELF::NT_AMDGPU_METADATA;
+ auto MDN = dyn_cast<MDTuple>(NamedMD->getOperand(0));
+ if (MDN && MDN->getNumOperands()) {
+ if (auto MDS = dyn_cast<MDString>(MDN->getOperand(0)))
+ setFromMsgPackBlob(MDS->getString());
+ }
+ return;
+ }
+ BlobType = ELF::NT_AMD_AMDGPU_PAL_METADATA;
+ NamedMD = M.getNamedMetadata("amdgpu.pal.metadata");
+ if (!NamedMD || !NamedMD->getNumOperands())
+ return;
+ // This is the old reg=value pair format for metadata. It is a NamedMD
+ // containing an MDTuple containing a number of MDNodes each of which is an
+ // integer value, and each two integer values forms a key=value pair that we
+ // store as Registers[key]=value in the map.
+ auto Tuple = dyn_cast<MDTuple>(NamedMD->getOperand(0));
+ if (!Tuple)
+ return;
+ for (unsigned I = 0, E = Tuple->getNumOperands() & -2; I != E; I += 2) {
+ auto Key = mdconst::dyn_extract<ConstantInt>(Tuple->getOperand(I));
+ auto Val = mdconst::dyn_extract<ConstantInt>(Tuple->getOperand(I + 1));
+ if (!Key || !Val)
+ continue;
+ setRegister(Key->getZExtValue(), Val->getZExtValue());
+ }
+}
+
+// Set PAL metadata from a binary blob from the applicable .note record.
+// Returns false if bad format. Blob must remain valid for the lifetime of the
+// Metadata.
+bool AMDGPUPALMetadata::setFromBlob(unsigned Type, StringRef Blob) {
+ BlobType = Type;
+ if (Type == ELF::NT_AMD_AMDGPU_PAL_METADATA)
+ return setFromLegacyBlob(Blob);
+ return setFromMsgPackBlob(Blob);
+}
+
+// Set PAL metadata from legacy (array of key=value pairs) blob.
+bool AMDGPUPALMetadata::setFromLegacyBlob(StringRef Blob) {
+ auto Data = reinterpret_cast<const uint32_t *>(Blob.data());
+ for (unsigned I = 0; I != Blob.size() / sizeof(uint32_t) / 2; ++I)
+ setRegister(Data[I * 2], Data[I * 2 + 1]);
+ return true;
+}
+
+// Set PAL metadata from msgpack blob.
+bool AMDGPUPALMetadata::setFromMsgPackBlob(StringRef Blob) {
+ msgpack::Reader Reader(Blob);
+ return MsgPackDoc.readFromBlob(Blob, /*Multi=*/false);
+}
+
+// Given the calling convention, calculate the register number for rsrc1. In
+// principle the register number could change in future hardware, but we know
+// it is the same for gfx6-9 (except that LS and ES don't exist on gfx9), so
+// we can use fixed values.
+static unsigned getRsrc1Reg(CallingConv::ID CC) {
+ switch (CC) {
+ default:
+ return PALMD::R_2E12_COMPUTE_PGM_RSRC1;
+ case CallingConv::AMDGPU_LS:
+ return PALMD::R_2D4A_SPI_SHADER_PGM_RSRC1_LS;
+ case CallingConv::AMDGPU_HS:
+ return PALMD::R_2D0A_SPI_SHADER_PGM_RSRC1_HS;
+ case CallingConv::AMDGPU_ES:
+ return PALMD::R_2CCA_SPI_SHADER_PGM_RSRC1_ES;
+ case CallingConv::AMDGPU_GS:
+ return PALMD::R_2C8A_SPI_SHADER_PGM_RSRC1_GS;
+ case CallingConv::AMDGPU_VS:
+ return PALMD::R_2C4A_SPI_SHADER_PGM_RSRC1_VS;
+ case CallingConv::AMDGPU_PS:
+ return PALMD::R_2C0A_SPI_SHADER_PGM_RSRC1_PS;
+ }
+}
+
+// Calculate the PAL metadata key for *S_SCRATCH_SIZE. It can be used
+// with a constant offset to access any non-register shader-specific PAL
+// metadata key.
+static unsigned getScratchSizeKey(CallingConv::ID CC) {
+ switch (CC) {
+ case CallingConv::AMDGPU_PS:
+ return PALMD::Key::PS_SCRATCH_SIZE;
+ case CallingConv::AMDGPU_VS:
+ return PALMD::Key::VS_SCRATCH_SIZE;
+ case CallingConv::AMDGPU_GS:
+ return PALMD::Key::GS_SCRATCH_SIZE;
+ case CallingConv::AMDGPU_ES:
+ return PALMD::Key::ES_SCRATCH_SIZE;
+ case CallingConv::AMDGPU_HS:
+ return PALMD::Key::HS_SCRATCH_SIZE;
+ case CallingConv::AMDGPU_LS:
+ return PALMD::Key::LS_SCRATCH_SIZE;
+ default:
+ return PALMD::Key::CS_SCRATCH_SIZE;
+ }
+}
+
+// Set the rsrc1 register in the metadata for a particular shader stage.
+// In fact this ORs the value into any previous setting of the register.
+void AMDGPUPALMetadata::setRsrc1(CallingConv::ID CC, unsigned Val) {
+ setRegister(getRsrc1Reg(CC), Val);
+}
+
+// Set the rsrc2 register in the metadata for a particular shader stage.
+// In fact this ORs the value into any previous setting of the register.
+void AMDGPUPALMetadata::setRsrc2(CallingConv::ID CC, unsigned Val) {
+ setRegister(getRsrc1Reg(CC) + 1, Val);
+}
+
+// Set the SPI_PS_INPUT_ENA register in the metadata.
+// In fact this ORs the value into any previous setting of the register.
+void AMDGPUPALMetadata::setSpiPsInputEna(unsigned Val) {
+ setRegister(PALMD::R_A1B3_SPI_PS_INPUT_ENA, Val);
+}
+
+// Set the SPI_PS_INPUT_ADDR register in the metadata.
+// In fact this ORs the value into any previous setting of the register.
+void AMDGPUPALMetadata::setSpiPsInputAddr(unsigned Val) {
+ setRegister(PALMD::R_A1B4_SPI_PS_INPUT_ADDR, Val);
+}
+
+// Get a register from the metadata, or 0 if not currently set.
+unsigned AMDGPUPALMetadata::getRegister(unsigned Reg) {
+ auto Regs = getRegisters();
+ auto It = Regs.find(MsgPackDoc.getNode(Reg));
+ if (It == Regs.end())
+ return 0;
+ auto N = It->second;
+ if (N.getKind() != msgpack::Type::UInt)
+ return 0;
+ return N.getUInt();
+}
+
+// Set a register in the metadata.
+// In fact this ORs the value into any previous setting of the register.
+void AMDGPUPALMetadata::setRegister(unsigned Reg, unsigned Val) {
+ if (!isLegacy()) {
+ // In the new MsgPack format, ignore register numbered >= 0x10000000. It
+ // is a PAL ABI pseudo-register in the old non-MsgPack format.
+ if (Reg >= 0x10000000)
+ return;
+ }
+ auto &N = getRegisters()[MsgPackDoc.getNode(Reg)];
+ if (N.getKind() == msgpack::Type::UInt)
+ Val |= N.getUInt();
+ N = N.getDocument()->getNode(Val);
+}
+
+// Set the entry point name for one shader.
+void AMDGPUPALMetadata::setEntryPoint(unsigned CC, StringRef Name) {
+ if (isLegacy())
+ return;
+ // Msgpack format.
+ getHwStage(CC)[".entry_point"] = MsgPackDoc.getNode(Name, /*Copy=*/true);
+}
+
+// Set the number of used vgprs in the metadata. This is an optional
+// advisory record for logging etc; wave dispatch actually uses the rsrc1
+// register for the shader stage to determine the number of vgprs to
+// allocate.
+void AMDGPUPALMetadata::setNumUsedVgprs(CallingConv::ID CC, unsigned Val) {
+ if (isLegacy()) {
+ // Old non-msgpack format.
+ unsigned NumUsedVgprsKey = getScratchSizeKey(CC) +
+ PALMD::Key::VS_NUM_USED_VGPRS -
+ PALMD::Key::VS_SCRATCH_SIZE;
+ setRegister(NumUsedVgprsKey, Val);
+ return;
+ }
+ // Msgpack format.
+ getHwStage(CC)[".vgpr_count"] = MsgPackDoc.getNode(Val);
+}
+
+// Set the number of used sgprs in the metadata. This is an optional advisory
+// record for logging etc; wave dispatch actually uses the rsrc1 register for
+// the shader stage to determine the number of sgprs to allocate.
+void AMDGPUPALMetadata::setNumUsedSgprs(CallingConv::ID CC, unsigned Val) {
+ if (isLegacy()) {
+ // Old non-msgpack format.
+ unsigned NumUsedSgprsKey = getScratchSizeKey(CC) +
+ PALMD::Key::VS_NUM_USED_SGPRS -
+ PALMD::Key::VS_SCRATCH_SIZE;
+ setRegister(NumUsedSgprsKey, Val);
+ return;
+ }
+ // Msgpack format.
+ getHwStage(CC)[".sgpr_count"] = MsgPackDoc.getNode(Val);
+}
+
+// Set the scratch size in the metadata.
+void AMDGPUPALMetadata::setScratchSize(CallingConv::ID CC, unsigned Val) {
+ if (isLegacy()) {
+ // Old non-msgpack format.
+ setRegister(getScratchSizeKey(CC), Val);
+ return;
+ }
+ // Msgpack format.
+ getHwStage(CC)[".scratch_memory_size"] = MsgPackDoc.getNode(Val);
+}
+
+// Set the hardware register bit in PAL metadata to enable wave32 on the
+// shader of the given calling convention.
+void AMDGPUPALMetadata::setWave32(unsigned CC) {
+ switch (CC) {
+ case CallingConv::AMDGPU_HS:
+ setRegister(PALMD::R_A2D5_VGT_SHADER_STAGES_EN, S_028B54_HS_W32_EN(1));
+ break;
+ case CallingConv::AMDGPU_GS:
+ setRegister(PALMD::R_A2D5_VGT_SHADER_STAGES_EN, S_028B54_GS_W32_EN(1));
+ break;
+ case CallingConv::AMDGPU_VS:
+ setRegister(PALMD::R_A2D5_VGT_SHADER_STAGES_EN, S_028B54_VS_W32_EN(1));
+ break;
+ case CallingConv::AMDGPU_PS:
+ setRegister(PALMD::R_A1B6_SPI_PS_IN_CONTROL, S_0286D8_PS_W32_EN(1));
+ break;
+ case CallingConv::AMDGPU_CS:
+ setRegister(PALMD::R_2E00_COMPUTE_DISPATCH_INITIATOR,
+ S_00B800_CS_W32_EN(1));
+ break;
+ }
+}
+
+// Convert a register number to name, for display by toString().
+// Returns nullptr if none.
+static const char *getRegisterName(unsigned RegNum) {
+ // Table of registers.
+ static const struct RegInfo {
+ unsigned Num;
+ const char *Name;
+ } RegInfoTable[] = {
+ // Registers that code generation sets/modifies metadata for.
+ {PALMD::R_2C4A_SPI_SHADER_PGM_RSRC1_VS, "SPI_SHADER_PGM_RSRC1_VS"},
+ {PALMD::R_2C4A_SPI_SHADER_PGM_RSRC1_VS + 1, "SPI_SHADER_PGM_RSRC2_VS"},
+ {PALMD::R_2D4A_SPI_SHADER_PGM_RSRC1_LS, "SPI_SHADER_PGM_RSRC1_LS"},
+ {PALMD::R_2D4A_SPI_SHADER_PGM_RSRC1_LS + 1, "SPI_SHADER_PGM_RSRC2_LS"},
+ {PALMD::R_2D0A_SPI_SHADER_PGM_RSRC1_HS, "SPI_SHADER_PGM_RSRC1_HS"},
+ {PALMD::R_2D0A_SPI_SHADER_PGM_RSRC1_HS + 1, "SPI_SHADER_PGM_RSRC2_HS"},
+ {PALMD::R_2CCA_SPI_SHADER_PGM_RSRC1_ES, "SPI_SHADER_PGM_RSRC1_ES"},
+ {PALMD::R_2CCA_SPI_SHADER_PGM_RSRC1_ES + 1, "SPI_SHADER_PGM_RSRC2_ES"},
+ {PALMD::R_2C8A_SPI_SHADER_PGM_RSRC1_GS, "SPI_SHADER_PGM_RSRC1_GS"},
+ {PALMD::R_2C8A_SPI_SHADER_PGM_RSRC1_GS + 1, "SPI_SHADER_PGM_RSRC2_GS"},
+ {PALMD::R_2E00_COMPUTE_DISPATCH_INITIATOR, "COMPUTE_DISPATCH_INITIATOR"},
+ {PALMD::R_2E12_COMPUTE_PGM_RSRC1, "COMPUTE_PGM_RSRC1"},
+ {PALMD::R_2E12_COMPUTE_PGM_RSRC1 + 1, "COMPUTE_PGM_RSRC2"},
+ {PALMD::R_2C0A_SPI_SHADER_PGM_RSRC1_PS, "SPI_SHADER_PGM_RSRC1_PS"},
+ {PALMD::R_2C0A_SPI_SHADER_PGM_RSRC1_PS + 1, "SPI_SHADER_PGM_RSRC2_PS"},
+ {PALMD::R_A1B3_SPI_PS_INPUT_ENA, "SPI_PS_INPUT_ENA"},
+ {PALMD::R_A1B4_SPI_PS_INPUT_ADDR, "SPI_PS_INPUT_ADDR"},
+ {PALMD::R_A1B6_SPI_PS_IN_CONTROL, "SPI_PS_IN_CONTROL"},
+ {PALMD::R_A2D5_VGT_SHADER_STAGES_EN, "VGT_SHADER_STAGES_EN"},
+
+ // Registers not known to code generation.
+ {0x2c07, "SPI_SHADER_PGM_RSRC3_PS"},
+ {0x2c46, "SPI_SHADER_PGM_RSRC3_VS"},
+ {0x2c87, "SPI_SHADER_PGM_RSRC3_GS"},
+ {0x2cc7, "SPI_SHADER_PGM_RSRC3_ES"},
+ {0x2d07, "SPI_SHADER_PGM_RSRC3_HS"},
+ {0x2d47, "SPI_SHADER_PGM_RSRC3_LS"},
+
+ {0xa1c3, "SPI_SHADER_POS_FORMAT"},
+ {0xa1b1, "SPI_VS_OUT_CONFIG"},
+ {0xa207, "PA_CL_VS_OUT_CNTL"},
+ {0xa204, "PA_CL_CLIP_CNTL"},
+ {0xa206, "PA_CL_VTE_CNTL"},
+ {0xa2f9, "PA_SU_VTX_CNTL"},
+ {0xa293, "PA_SC_MODE_CNTL_1"},
+ {0xa2a1, "VGT_PRIMITIVEID_EN"},
+ {0x2c81, "SPI_SHADER_PGM_RSRC4_GS"},
+ {0x2e18, "COMPUTE_TMPRING_SIZE"},
+ {0xa1b5, "SPI_INTERP_CONTROL_0"},
+ {0xa1ba, "SPI_TMPRING_SIZE"},
+ {0xa1c4, "SPI_SHADER_Z_FORMAT"},
+ {0xa1c5, "SPI_SHADER_COL_FORMAT"},
+ {0xa203, "DB_SHADER_CONTROL"},
+ {0xa08f, "CB_SHADER_MASK"},
+ {0xa191, "SPI_PS_INPUT_CNTL_0"},
+ {0xa192, "SPI_PS_INPUT_CNTL_1"},
+ {0xa193, "SPI_PS_INPUT_CNTL_2"},
+ {0xa194, "SPI_PS_INPUT_CNTL_3"},
+ {0xa195, "SPI_PS_INPUT_CNTL_4"},
+ {0xa196, "SPI_PS_INPUT_CNTL_5"},
+ {0xa197, "SPI_PS_INPUT_CNTL_6"},
+ {0xa198, "SPI_PS_INPUT_CNTL_7"},
+ {0xa199, "SPI_PS_INPUT_CNTL_8"},
+ {0xa19a, "SPI_PS_INPUT_CNTL_9"},
+ {0xa19b, "SPI_PS_INPUT_CNTL_10"},
+ {0xa19c, "SPI_PS_INPUT_CNTL_11"},
+ {0xa19d, "SPI_PS_INPUT_CNTL_12"},
+ {0xa19e, "SPI_PS_INPUT_CNTL_13"},
+ {0xa19f, "SPI_PS_INPUT_CNTL_14"},
+ {0xa1a0, "SPI_PS_INPUT_CNTL_15"},
+ {0xa1a1, "SPI_PS_INPUT_CNTL_16"},
+ {0xa1a2, "SPI_PS_INPUT_CNTL_17"},
+ {0xa1a3, "SPI_PS_INPUT_CNTL_18"},
+ {0xa1a4, "SPI_PS_INPUT_CNTL_19"},
+ {0xa1a5, "SPI_PS_INPUT_CNTL_20"},
+ {0xa1a6, "SPI_PS_INPUT_CNTL_21"},
+ {0xa1a7, "SPI_PS_INPUT_CNTL_22"},
+ {0xa1a8, "SPI_PS_INPUT_CNTL_23"},
+ {0xa1a9, "SPI_PS_INPUT_CNTL_24"},
+ {0xa1aa, "SPI_PS_INPUT_CNTL_25"},
+ {0xa1ab, "SPI_PS_INPUT_CNTL_26"},
+ {0xa1ac, "SPI_PS_INPUT_CNTL_27"},
+ {0xa1ad, "SPI_PS_INPUT_CNTL_28"},
+ {0xa1ae, "SPI_PS_INPUT_CNTL_29"},
+ {0xa1af, "SPI_PS_INPUT_CNTL_30"},
+ {0xa1b0, "SPI_PS_INPUT_CNTL_31"},
+
+ {0xa2ce, "VGT_GS_MAX_VERT_OUT"},
+ {0xa2ab, "VGT_ESGS_RING_ITEMSIZE"},
+ {0xa290, "VGT_GS_MODE"},
+ {0xa291, "VGT_GS_ONCHIP_CNTL"},
+ {0xa2d7, "VGT_GS_VERT_ITEMSIZE"},
+ {0xa2d8, "VGT_GS_VERT_ITEMSIZE_1"},
+ {0xa2d9, "VGT_GS_VERT_ITEMSIZE_2"},
+ {0xa2da, "VGT_GS_VERT_ITEMSIZE_3"},
+ {0xa298, "VGT_GSVS_RING_OFFSET_1"},
+ {0xa299, "VGT_GSVS_RING_OFFSET_2"},
+ {0xa29a, "VGT_GSVS_RING_OFFSET_3"},
+
+ {0xa2e4, "VGT_GS_INSTANCE_CNT"},
+ {0xa297, "VGT_GS_PER_VS"},
+ {0xa29b, "VGT_GS_OUT_PRIM_TYPE"},
+ {0xa2ac, "VGT_GSVS_RING_ITEMSIZE"},
+
+ {0xa2ad, "VGT_REUSE_OFF"},
+ {0xa1b8, "SPI_BARYC_CNTL"},
+
+ {0x2c4c, "SPI_SHADER_USER_DATA_VS_0"},
+ {0x2c4d, "SPI_SHADER_USER_DATA_VS_1"},
+ {0x2c4e, "SPI_SHADER_USER_DATA_VS_2"},
+ {0x2c4f, "SPI_SHADER_USER_DATA_VS_3"},
+ {0x2c50, "SPI_SHADER_USER_DATA_VS_4"},
+ {0x2c51, "SPI_SHADER_USER_DATA_VS_5"},
+ {0x2c52, "SPI_SHADER_USER_DATA_VS_6"},
+ {0x2c53, "SPI_SHADER_USER_DATA_VS_7"},
+ {0x2c54, "SPI_SHADER_USER_DATA_VS_8"},
+ {0x2c55, "SPI_SHADER_USER_DATA_VS_9"},
+ {0x2c56, "SPI_SHADER_USER_DATA_VS_10"},
+ {0x2c57, "SPI_SHADER_USER_DATA_VS_11"},
+ {0x2c58, "SPI_SHADER_USER_DATA_VS_12"},
+ {0x2c59, "SPI_SHADER_USER_DATA_VS_13"},
+ {0x2c5a, "SPI_SHADER_USER_DATA_VS_14"},
+ {0x2c5b, "SPI_SHADER_USER_DATA_VS_15"},
+ {0x2c5c, "SPI_SHADER_USER_DATA_VS_16"},
+ {0x2c5d, "SPI_SHADER_USER_DATA_VS_17"},
+ {0x2c5e, "SPI_SHADER_USER_DATA_VS_18"},
+ {0x2c5f, "SPI_SHADER_USER_DATA_VS_19"},
+ {0x2c60, "SPI_SHADER_USER_DATA_VS_20"},
+ {0x2c61, "SPI_SHADER_USER_DATA_VS_21"},
+ {0x2c62, "SPI_SHADER_USER_DATA_VS_22"},
+ {0x2c63, "SPI_SHADER_USER_DATA_VS_23"},
+ {0x2c64, "SPI_SHADER_USER_DATA_VS_24"},
+ {0x2c65, "SPI_SHADER_USER_DATA_VS_25"},
+ {0x2c66, "SPI_SHADER_USER_DATA_VS_26"},
+ {0x2c67, "SPI_SHADER_USER_DATA_VS_27"},
+ {0x2c68, "SPI_SHADER_USER_DATA_VS_28"},
+ {0x2c69, "SPI_SHADER_USER_DATA_VS_29"},
+ {0x2c6a, "SPI_SHADER_USER_DATA_VS_30"},
+ {0x2c6b, "SPI_SHADER_USER_DATA_VS_31"},
+
+ {0x2ccc, "SPI_SHADER_USER_DATA_ES_0"},
+ {0x2ccd, "SPI_SHADER_USER_DATA_ES_1"},
+ {0x2cce, "SPI_SHADER_USER_DATA_ES_2"},
+ {0x2ccf, "SPI_SHADER_USER_DATA_ES_3"},
+ {0x2cd0, "SPI_SHADER_USER_DATA_ES_4"},
+ {0x2cd1, "SPI_SHADER_USER_DATA_ES_5"},
+ {0x2cd2, "SPI_SHADER_USER_DATA_ES_6"},
+ {0x2cd3, "SPI_SHADER_USER_DATA_ES_7"},
+ {0x2cd4, "SPI_SHADER_USER_DATA_ES_8"},
+ {0x2cd5, "SPI_SHADER_USER_DATA_ES_9"},
+ {0x2cd6, "SPI_SHADER_USER_DATA_ES_10"},
+ {0x2cd7, "SPI_SHADER_USER_DATA_ES_11"},
+ {0x2cd8, "SPI_SHADER_USER_DATA_ES_12"},
+ {0x2cd9, "SPI_SHADER_USER_DATA_ES_13"},
+ {0x2cda, "SPI_SHADER_USER_DATA_ES_14"},
+ {0x2cdb, "SPI_SHADER_USER_DATA_ES_15"},
+ {0x2cdc, "SPI_SHADER_USER_DATA_ES_16"},
+ {0x2cdd, "SPI_SHADER_USER_DATA_ES_17"},
+ {0x2cde, "SPI_SHADER_USER_DATA_ES_18"},
+ {0x2cdf, "SPI_SHADER_USER_DATA_ES_19"},
+ {0x2ce0, "SPI_SHADER_USER_DATA_ES_20"},
+ {0x2ce1, "SPI_SHADER_USER_DATA_ES_21"},
+ {0x2ce2, "SPI_SHADER_USER_DATA_ES_22"},
+ {0x2ce3, "SPI_SHADER_USER_DATA_ES_23"},
+ {0x2ce4, "SPI_SHADER_USER_DATA_ES_24"},
+ {0x2ce5, "SPI_SHADER_USER_DATA_ES_25"},
+ {0x2ce6, "SPI_SHADER_USER_DATA_ES_26"},
+ {0x2ce7, "SPI_SHADER_USER_DATA_ES_27"},
+ {0x2ce8, "SPI_SHADER_USER_DATA_ES_28"},
+ {0x2ce9, "SPI_SHADER_USER_DATA_ES_29"},
+ {0x2cea, "SPI_SHADER_USER_DATA_ES_30"},
+ {0x2ceb, "SPI_SHADER_USER_DATA_ES_31"},
+
+ {0x2c0c, "SPI_SHADER_USER_DATA_PS_0"},
+ {0x2c0d, "SPI_SHADER_USER_DATA_PS_1"},
+ {0x2c0e, "SPI_SHADER_USER_DATA_PS_2"},
+ {0x2c0f, "SPI_SHADER_USER_DATA_PS_3"},
+ {0x2c10, "SPI_SHADER_USER_DATA_PS_4"},
+ {0x2c11, "SPI_SHADER_USER_DATA_PS_5"},
+ {0x2c12, "SPI_SHADER_USER_DATA_PS_6"},
+ {0x2c13, "SPI_SHADER_USER_DATA_PS_7"},
+ {0x2c14, "SPI_SHADER_USER_DATA_PS_8"},
+ {0x2c15, "SPI_SHADER_USER_DATA_PS_9"},
+ {0x2c16, "SPI_SHADER_USER_DATA_PS_10"},
+ {0x2c17, "SPI_SHADER_USER_DATA_PS_11"},
+ {0x2c18, "SPI_SHADER_USER_DATA_PS_12"},
+ {0x2c19, "SPI_SHADER_USER_DATA_PS_13"},
+ {0x2c1a, "SPI_SHADER_USER_DATA_PS_14"},
+ {0x2c1b, "SPI_SHADER_USER_DATA_PS_15"},
+ {0x2c1c, "SPI_SHADER_USER_DATA_PS_16"},
+ {0x2c1d, "SPI_SHADER_USER_DATA_PS_17"},
+ {0x2c1e, "SPI_SHADER_USER_DATA_PS_18"},
+ {0x2c1f, "SPI_SHADER_USER_DATA_PS_19"},
+ {0x2c20, "SPI_SHADER_USER_DATA_PS_20"},
+ {0x2c21, "SPI_SHADER_USER_DATA_PS_21"},
+ {0x2c22, "SPI_SHADER_USER_DATA_PS_22"},
+ {0x2c23, "SPI_SHADER_USER_DATA_PS_23"},
+ {0x2c24, "SPI_SHADER_USER_DATA_PS_24"},
+ {0x2c25, "SPI_SHADER_USER_DATA_PS_25"},
+ {0x2c26, "SPI_SHADER_USER_DATA_PS_26"},
+ {0x2c27, "SPI_SHADER_USER_DATA_PS_27"},
+ {0x2c28, "SPI_SHADER_USER_DATA_PS_28"},
+ {0x2c29, "SPI_SHADER_USER_DATA_PS_29"},
+ {0x2c2a, "SPI_SHADER_USER_DATA_PS_30"},
+ {0x2c2b, "SPI_SHADER_USER_DATA_PS_31"},
+
+ {0x2e40, "COMPUTE_USER_DATA_0"},
+ {0x2e41, "COMPUTE_USER_DATA_1"},
+ {0x2e42, "COMPUTE_USER_DATA_2"},
+ {0x2e43, "COMPUTE_USER_DATA_3"},
+ {0x2e44, "COMPUTE_USER_DATA_4"},
+ {0x2e45, "COMPUTE_USER_DATA_5"},
+ {0x2e46, "COMPUTE_USER_DATA_6"},
+ {0x2e47, "COMPUTE_USER_DATA_7"},
+ {0x2e48, "COMPUTE_USER_DATA_8"},
+ {0x2e49, "COMPUTE_USER_DATA_9"},
+ {0x2e4a, "COMPUTE_USER_DATA_10"},
+ {0x2e4b, "COMPUTE_USER_DATA_11"},
+ {0x2e4c, "COMPUTE_USER_DATA_12"},
+ {0x2e4d, "COMPUTE_USER_DATA_13"},
+ {0x2e4e, "COMPUTE_USER_DATA_14"},
+ {0x2e4f, "COMPUTE_USER_DATA_15"},
+
+ {0x2e07, "COMPUTE_NUM_THREAD_X"},
+ {0x2e08, "COMPUTE_NUM_THREAD_Y"},
+ {0x2e09, "COMPUTE_NUM_THREAD_Z"},
+ {0xa2db, "VGT_TF_PARAM"},
+ {0xa2d6, "VGT_LS_HS_CONFIG"},
+ {0xa287, "VGT_HOS_MIN_TESS_LEVEL"},
+ {0xa286, "VGT_HOS_MAX_TESS_LEVEL"},
+ {0xa2f8, "PA_SC_AA_CONFIG"},
+ {0xa310, "PA_SC_SHADER_CONTROL"},
+ {0xa313, "PA_SC_CONSERVATIVE_RASTERIZATION_CNTL"},
+
+ {0x2d0c, "SPI_SHADER_USER_DATA_LS_0"},
+ {0x2d0d, "SPI_SHADER_USER_DATA_LS_1"},
+ {0x2d0e, "SPI_SHADER_USER_DATA_LS_2"},
+ {0x2d0f, "SPI_SHADER_USER_DATA_LS_3"},
+ {0x2d10, "SPI_SHADER_USER_DATA_LS_4"},
+ {0x2d11, "SPI_SHADER_USER_DATA_LS_5"},
+ {0x2d12, "SPI_SHADER_USER_DATA_LS_6"},
+ {0x2d13, "SPI_SHADER_USER_DATA_LS_7"},
+ {0x2d14, "SPI_SHADER_USER_DATA_LS_8"},
+ {0x2d15, "SPI_SHADER_USER_DATA_LS_9"},
+ {0x2d16, "SPI_SHADER_USER_DATA_LS_10"},
+ {0x2d17, "SPI_SHADER_USER_DATA_LS_11"},
+ {0x2d18, "SPI_SHADER_USER_DATA_LS_12"},
+ {0x2d19, "SPI_SHADER_USER_DATA_LS_13"},
+ {0x2d1a, "SPI_SHADER_USER_DATA_LS_14"},
+ {0x2d1b, "SPI_SHADER_USER_DATA_LS_15"},
+ {0x2d1c, "SPI_SHADER_USER_DATA_LS_16"},
+ {0x2d1d, "SPI_SHADER_USER_DATA_LS_17"},
+ {0x2d1e, "SPI_SHADER_USER_DATA_LS_18"},
+ {0x2d1f, "SPI_SHADER_USER_DATA_LS_19"},
+ {0x2d20, "SPI_SHADER_USER_DATA_LS_20"},
+ {0x2d21, "SPI_SHADER_USER_DATA_LS_21"},
+ {0x2d22, "SPI_SHADER_USER_DATA_LS_22"},
+ {0x2d23, "SPI_SHADER_USER_DATA_LS_23"},
+ {0x2d24, "SPI_SHADER_USER_DATA_LS_24"},
+ {0x2d25, "SPI_SHADER_USER_DATA_LS_25"},
+ {0x2d26, "SPI_SHADER_USER_DATA_LS_26"},
+ {0x2d27, "SPI_SHADER_USER_DATA_LS_27"},
+ {0x2d28, "SPI_SHADER_USER_DATA_LS_28"},
+ {0x2d29, "SPI_SHADER_USER_DATA_LS_29"},
+ {0x2d2a, "SPI_SHADER_USER_DATA_LS_30"},
+ {0x2d2b, "SPI_SHADER_USER_DATA_LS_31"},
+
+ {0xa2aa, "IA_MULTI_VGT_PARAM"},
+ {0xa2a5, "VGT_GS_MAX_PRIMS_PER_SUBGROUP"},
+ {0xa2e6, "VGT_STRMOUT_BUFFER_CONFIG"},
+ {0xa2e5, "VGT_STRMOUT_CONFIG"},
+ {0xa2b5, "VGT_STRMOUT_VTX_STRIDE_0"},
+ {0xa2b9, "VGT_STRMOUT_VTX_STRIDE_1"},
+ {0xa2bd, "VGT_STRMOUT_VTX_STRIDE_2"},
+ {0xa2c1, "VGT_STRMOUT_VTX_STRIDE_3"},
+ {0xa316, "VGT_VERTEX_REUSE_BLOCK_CNTL"},
+
+ {0, nullptr}};
+ auto Entry = RegInfoTable;
+ for (; Entry->Num && Entry->Num != RegNum; ++Entry)
+ ;
+ return Entry->Name;
+}
+
+// Convert the accumulated PAL metadata into an asm directive.
+void AMDGPUPALMetadata::toString(std::string &String) {
+ String.clear();
+ if (!BlobType)
+ return;
+ raw_string_ostream Stream(String);
+ if (isLegacy()) {
+ if (MsgPackDoc.getRoot().getKind() == msgpack::Type::Nil)
+ return;
+ // Old linear reg=val format.
+ Stream << '\t' << AMDGPU::PALMD::AssemblerDirective << ' ';
+ auto Regs = getRegisters();
+ for (auto I = Regs.begin(), E = Regs.end(); I != E; ++I) {
+ if (I != Regs.begin())
+ Stream << ',';
+ unsigned Reg = I->first.getUInt();
+ unsigned Val = I->second.getUInt();
+ Stream << "0x" << Twine::utohexstr(Reg) << ",0x" << Twine::utohexstr(Val);
+ }
+ Stream << '\n';
+ return;
+ }
+
+ // New msgpack-based format -- output as YAML (with unsigned numbers in hex),
+ // but first change the registers map to use names.
+ MsgPackDoc.setHexMode();
+ auto &RegsObj = refRegisters();
+ auto OrigRegs = RegsObj.getMap();
+ RegsObj = MsgPackDoc.getMapNode();
+ for (auto I : OrigRegs) {
+ auto Key = I.first;
+ if (const char *RegName = getRegisterName(Key.getUInt())) {
+ std::string KeyName = Key.toString();
+ KeyName += " (";
+ KeyName += RegName;
+ KeyName += ')';
+ Key = MsgPackDoc.getNode(KeyName, /*Copy=*/true);
+ }
+ RegsObj.getMap()[Key] = I.second;
+ }
+
+ // Output as YAML.
+ Stream << '\t' << AMDGPU::PALMD::AssemblerDirectiveBegin << '\n';
+ MsgPackDoc.toYAML(Stream);
+ Stream << '\t' << AMDGPU::PALMD::AssemblerDirectiveEnd << '\n';
+
+ // Restore original registers map.
+ RegsObj = OrigRegs;
+}
+
+// Convert the accumulated PAL metadata into a binary blob for writing as
+// a .note record of the specified AMD type. Returns an empty blob if
+// there is no PAL metadata,
+void AMDGPUPALMetadata::toBlob(unsigned Type, std::string &Blob) {
+ if (Type == ELF::NT_AMD_AMDGPU_PAL_METADATA)
+ toLegacyBlob(Blob);
+ else if (Type)
+ toMsgPackBlob(Blob);
+}
+
+void AMDGPUPALMetadata::toLegacyBlob(std::string &Blob) {
+ Blob.clear();
+ auto Registers = getRegisters();
+ if (Registers.getMap().empty())
+ return;
+ raw_string_ostream OS(Blob);
+ support::endian::Writer EW(OS, support::endianness::little);
+ for (auto I : Registers.getMap()) {
+ EW.write(uint32_t(I.first.getUInt()));
+ EW.write(uint32_t(I.second.getUInt()));
+ }
+}
+
+void AMDGPUPALMetadata::toMsgPackBlob(std::string &Blob) {
+ Blob.clear();
+ MsgPackDoc.writeToBlob(Blob);
+}
+
+// Set PAL metadata from YAML text. Returns false if failed.
+bool AMDGPUPALMetadata::setFromString(StringRef S) {
+ BlobType = ELF::NT_AMDGPU_METADATA;
+ if (!MsgPackDoc.fromYAML(S))
+ return false;
+
+ // In the registers map, some keys may be of the form "0xa191
+ // (SPI_PS_INPUT_CNTL_0)", in which case the YAML input code made it a
+ // string. We need to turn it into a number.
+ auto &RegsObj = refRegisters();
+ auto OrigRegs = RegsObj;
+ RegsObj = MsgPackDoc.getMapNode();
+ Registers = RegsObj.getMap();
+ bool Ok = true;
+ for (auto I : OrigRegs.getMap()) {
+ auto Key = I.first;
+ if (Key.getKind() == msgpack::Type::String) {
+ StringRef S = Key.getString();
+ uint64_t Val;
+ if (S.consumeInteger(0, Val)) {
+ Ok = false;
+ errs() << "Unrecognized PAL metadata register key '" << S << "'\n";
+ continue;
+ }
+ Key = MsgPackDoc.getNode(uint64_t(Val));
+ }
+ Registers.getMap()[Key] = I.second;
+ }
+ return Ok;
+}
+
+// Reference (create if necessary) the node for the registers map.
+msgpack::DocNode &AMDGPUPALMetadata::refRegisters() {
+ auto &N =
+ MsgPackDoc.getRoot()
+ .getMap(/*Convert=*/true)[MsgPackDoc.getNode("amdpal.pipelines")]
+ .getArray(/*Convert=*/true)[0]
+ .getMap(/*Convert=*/true)[MsgPackDoc.getNode(".registers")];
+ N.getMap(/*Convert=*/true);
+ return N;
+}
+
+// Get (create if necessary) the registers map.
+msgpack::MapDocNode AMDGPUPALMetadata::getRegisters() {
+ if (Registers.isEmpty())
+ Registers = refRegisters();
+ return Registers.getMap();
+}
+
+// Return the PAL metadata hardware shader stage name.
+static const char *getStageName(CallingConv::ID CC) {
+ switch (CC) {
+ case CallingConv::AMDGPU_PS:
+ return ".ps";
+ case CallingConv::AMDGPU_VS:
+ return ".vs";
+ case CallingConv::AMDGPU_GS:
+ return ".gs";
+ case CallingConv::AMDGPU_ES:
+ return ".es";
+ case CallingConv::AMDGPU_HS:
+ return ".hs";
+ case CallingConv::AMDGPU_LS:
+ return ".ls";
+ default:
+ return ".cs";
+ }
+}
+
+// Get (create if necessary) the .hardware_stages entry for the given calling
+// convention.
+msgpack::MapDocNode AMDGPUPALMetadata::getHwStage(unsigned CC) {
+ if (HwStages.isEmpty())
+ HwStages = MsgPackDoc.getRoot()
+ .getMap(/*Convert=*/true)["amdpal.pipelines"]
+ .getArray(/*Convert=*/true)[0]
+ .getMap(/*Convert=*/true)[".hardware_stages"]
+ .getMap(/*Convert=*/true);
+ return HwStages.getMap()[getStageName(CC)].getMap(/*Convert=*/true);
+}
+
+// Get .note record vendor name of metadata blob to be emitted.
+const char *AMDGPUPALMetadata::getVendor() const {
+ return isLegacy() ? ElfNote::NoteNameV2 : ElfNote::NoteNameV3;
+}
+
+// Get .note record type of metadata blob to be emitted:
+// ELF::NT_AMD_AMDGPU_PAL_METADATA (legacy key=val format), or
+// ELF::NT_AMDGPU_METADATA (MsgPack format), or
+// 0 (no PAL metadata).
+unsigned AMDGPUPALMetadata::getType() const {
+ return BlobType;
+}
+
+// Return whether the blob type is legacy PAL metadata.
+bool AMDGPUPALMetadata::isLegacy() const {
+ return BlobType == ELF::NT_AMD_AMDGPU_PAL_METADATA;
+}
+
+// Set legacy PAL metadata format.
+void AMDGPUPALMetadata::setLegacy() {
+ BlobType = ELF::NT_AMD_AMDGPU_PAL_METADATA;
+}
+
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
new file mode 100644
index 000000000000..0f17c157b206
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
@@ -0,0 +1,135 @@
+//===-- AMDGPUPALMetadata.h - PAL metadata handling -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// PAL metadata handling
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/MsgPackDocument.h"
+#include <map>
+
+namespace llvm {
+
+class AMDGPUTargetStreamer;
+class formatted_raw_ostream;
+class MCStreamer;
+class Module;
+
+class AMDGPUPALMetadata {
+ unsigned BlobType = 0;
+ msgpack::Document MsgPackDoc;
+ msgpack::DocNode Registers;
+ msgpack::DocNode HwStages;
+
+public:
+ // Read the amdgpu.pal.metadata supplied by the frontend, ready for
+ // per-function modification.
+ void readFromIR(Module &M);
+
+ // Set PAL metadata from a binary blob from the applicable .note record.
+ // Returns false if bad format. Blob must remain valid for the lifetime of
+ // the Metadata.
+ bool setFromBlob(unsigned Type, StringRef Blob);
+
+ // Set the rsrc1 register in the metadata for a particular shader stage.
+ // In fact this ORs the value into any previous setting of the register.
+ void setRsrc1(unsigned CC, unsigned Val);
+
+ // Set the rsrc2 register in the metadata for a particular shader stage.
+ // In fact this ORs the value into any previous setting of the register.
+ void setRsrc2(unsigned CC, unsigned Val);
+
+ // Set the SPI_PS_INPUT_ENA register in the metadata.
+ // In fact this ORs the value into any previous setting of the register.
+ void setSpiPsInputEna(unsigned Val);
+
+ // Set the SPI_PS_INPUT_ADDR register in the metadata.
+ // In fact this ORs the value into any previous setting of the register.
+ void setSpiPsInputAddr(unsigned Val);
+
+ // Get a register from the metadata, or 0 if not currently set.
+ unsigned getRegister(unsigned Reg);
+
+ // Set a register in the metadata.
+ // In fact this ORs the value into any previous setting of the register.
+ void setRegister(unsigned Reg, unsigned Val);
+
+ // Set the entry point name for one shader.
+ void setEntryPoint(unsigned CC, StringRef Name);
+
+ // Set the number of used vgprs in the metadata. This is an optional advisory
+ // record for logging etc; wave dispatch actually uses the rsrc1 register for
+ // the shader stage to determine the number of vgprs to allocate.
+ void setNumUsedVgprs(unsigned CC, unsigned Val);
+
+ // Set the number of used sgprs in the metadata. This is an optional advisory
+ // record for logging etc; wave dispatch actually uses the rsrc1 register for
+ // the shader stage to determine the number of sgprs to allocate.
+ void setNumUsedSgprs(unsigned CC, unsigned Val);
+
+ // Set the scratch size in the metadata.
+ void setScratchSize(unsigned CC, unsigned Val);
+
+ // Set the hardware register bit in PAL metadata to enable wave32 on the
+ // shader of the given calling convention.
+ void setWave32(unsigned CC);
+
+ // Emit the accumulated PAL metadata as asm directives.
+ // This is called from AMDGPUTargetAsmStreamer::Finish().
+ void toString(std::string &S);
+
+ // Set PAL metadata from YAML text.
+ bool setFromString(StringRef S);
+
+ // Get .note record vendor name of metadata blob to be emitted.
+ const char *getVendor() const;
+
+ // Get .note record type of metadata blob to be emitted:
+ // ELF::NT_AMD_AMDGPU_PAL_METADATA (legacy key=val format), or
+ // ELF::NT_AMDGPU_METADATA (MsgPack format), or
+ // 0 (no PAL metadata).
+ unsigned getType() const;
+
+ // Emit the accumulated PAL metadata as a binary blob.
+ // This is called from AMDGPUTargetELFStreamer::Finish().
+ void toBlob(unsigned Type, std::string &S);
+
+ // Get the msgpack::Document for the PAL metadata.
+ msgpack::Document *getMsgPackDoc() { return &MsgPackDoc; }
+
+ // Set legacy PAL metadata format.
+ void setLegacy();
+
+private:
+ // Return whether the blob type is legacy PAL metadata.
+ bool isLegacy() const;
+
+ // Reference (create if necessary) the node for the registers map.
+ msgpack::DocNode &refRegisters();
+
+ // Get (create if necessary) the registers map.
+ msgpack::MapDocNode getRegisters();
+
+ // Get (create if necessary) the .hardware_stages entry for the given calling
+ // convention.
+ msgpack::MapDocNode getHwStage(unsigned CC);
+
+ bool setFromLegacyBlob(StringRef Blob);
+ bool setFromMsgPackBlob(StringRef Blob);
+ void toLegacyBlob(std::string &Blob);
+ void toMsgPackBlob(std::string &Blob);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
new file mode 100644
index 000000000000..95ad3f35d18f
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
@@ -0,0 +1,155 @@
+//===--------------------- AMDKernelCodeTInfo.h ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file - specifies tables for amd_kernel_code_t structure parsing/printing
+//
+//===----------------------------------------------------------------------===//
+
+#define QNAME(name) amd_kernel_code_t::name
+#define FLD_T(name) decltype(QNAME(name)), &QNAME(name)
+
+#define FIELD2(sname, aname, name) \
+ RECORD(sname, aname, printField<FLD_T(name)>, parseField<FLD_T(name)>)
+
+#define FIELD(name) FIELD2(name, name, name)
+
+
+#define PRINTCODEPROP(name) \
+ printBitField<FLD_T(code_properties),\
+ AMD_CODE_PROPERTY_##name##_SHIFT,\
+ AMD_CODE_PROPERTY_##name##_WIDTH>
+
+#define PARSECODEPROP(name) \
+ parseBitField<FLD_T(code_properties),\
+ AMD_CODE_PROPERTY_##name##_SHIFT,\
+ AMD_CODE_PROPERTY_##name##_WIDTH>
+
+#define CODEPROP(name, shift) \
+ RECORD(name, name, PRINTCODEPROP(shift), PARSECODEPROP(shift))
+
+// have to define these lambdas because of Set/GetMacro
+#define PRINTCOMP(GetMacro, Shift) \
+[](StringRef Name, const amd_kernel_code_t &C, raw_ostream &OS) { \
+ printName(OS, Name) << \
+ (int)GetMacro(C.compute_pgm_resource_registers >> Shift); \
+}
+#define PARSECOMP(SetMacro, Shift) \
+[](amd_kernel_code_t &C, MCAsmParser &MCParser, raw_ostream &Err) { \
+ int64_t Value = 0; \
+ if (!expectAbsExpression(MCParser, Value, Err)) \
+ return false; \
+ C.compute_pgm_resource_registers &= ~(SetMacro(0xFFFFFFFFFFFFFFFFULL) << Shift); \
+ C.compute_pgm_resource_registers |= SetMacro(Value) << Shift; \
+ return true; \
+}
+
+#define COMPPGM(name, aname, GetMacro, SetMacro, Shift) \
+ RECORD(name, aname, PRINTCOMP(GetMacro, Shift), PARSECOMP(SetMacro, Shift))
+
+#define COMPPGM1(name, aname, AccMacro) \
+ COMPPGM(name, aname, G_00B848_##AccMacro, S_00B848_##AccMacro, 0)
+
+#define COMPPGM2(name, aname, AccMacro) \
+ COMPPGM(name, aname, G_00B84C_##AccMacro, S_00B84C_##AccMacro, 32)
+
+///////////////////////////////////////////////////////////////////////////////
+// Begin of the table
+// Define RECORD(name, print, parse) in your code to get field definitions
+// and include this file
+
+FIELD2(amd_code_version_major, kernel_code_version_major, amd_kernel_code_version_major),
+FIELD2(amd_code_version_minor, kernel_code_version_minor, amd_kernel_code_version_minor),
+FIELD2(amd_machine_kind, machine_kind, amd_machine_kind),
+FIELD2(amd_machine_version_major, machine_version_major, amd_machine_version_major),
+FIELD2(amd_machine_version_minor, machine_version_minor, amd_machine_version_minor),
+FIELD2(amd_machine_version_stepping, machine_version_stepping, amd_machine_version_stepping),
+
+FIELD(kernel_code_entry_byte_offset),
+FIELD(kernel_code_prefetch_byte_size),
+
+COMPPGM1(granulated_workitem_vgpr_count, compute_pgm_rsrc1_vgprs, VGPRS),
+COMPPGM1(granulated_wavefront_sgpr_count, compute_pgm_rsrc1_sgprs, SGPRS),
+COMPPGM1(priority, compute_pgm_rsrc1_priority, PRIORITY),
+COMPPGM1(float_mode, compute_pgm_rsrc1_float_mode, FLOAT_MODE), // TODO: split float_mode
+COMPPGM1(priv, compute_pgm_rsrc1_priv, PRIV),
+COMPPGM1(enable_dx10_clamp, compute_pgm_rsrc1_dx10_clamp, DX10_CLAMP),
+COMPPGM1(debug_mode, compute_pgm_rsrc1_debug_mode, DEBUG_MODE),
+COMPPGM1(enable_ieee_mode, compute_pgm_rsrc1_ieee_mode, IEEE_MODE),
+COMPPGM1(enable_wgp_mode, compute_pgm_rsrc1_wgp_mode, WGP_MODE),
+COMPPGM1(enable_mem_ordered, compute_pgm_rsrc1_mem_ordered, MEM_ORDERED),
+COMPPGM1(enable_fwd_progress, compute_pgm_rsrc1_fwd_progress, FWD_PROGRESS),
+// TODO: bulky
+// TODO: cdbg_user
+COMPPGM2(enable_sgpr_private_segment_wave_byte_offset, compute_pgm_rsrc2_scratch_en, SCRATCH_EN),
+COMPPGM2(user_sgpr_count, compute_pgm_rsrc2_user_sgpr, USER_SGPR),
+COMPPGM2(enable_trap_handler, compute_pgm_rsrc2_trap_handler, TRAP_HANDLER),
+COMPPGM2(enable_sgpr_workgroup_id_x, compute_pgm_rsrc2_tgid_x_en, TGID_X_EN),
+COMPPGM2(enable_sgpr_workgroup_id_y, compute_pgm_rsrc2_tgid_y_en, TGID_Y_EN),
+COMPPGM2(enable_sgpr_workgroup_id_z, compute_pgm_rsrc2_tgid_z_en, TGID_Z_EN),
+COMPPGM2(enable_sgpr_workgroup_info, compute_pgm_rsrc2_tg_size_en, TG_SIZE_EN),
+COMPPGM2(enable_vgpr_workitem_id, compute_pgm_rsrc2_tidig_comp_cnt, TIDIG_COMP_CNT),
+COMPPGM2(enable_exception_msb, compute_pgm_rsrc2_excp_en_msb, EXCP_EN_MSB), // TODO: split enable_exception_msb
+COMPPGM2(granulated_lds_size, compute_pgm_rsrc2_lds_size, LDS_SIZE),
+COMPPGM2(enable_exception, compute_pgm_rsrc2_excp_en, EXCP_EN), // TODO: split enable_exception
+
+CODEPROP(enable_sgpr_private_segment_buffer, ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER),
+CODEPROP(enable_sgpr_dispatch_ptr, ENABLE_SGPR_DISPATCH_PTR),
+CODEPROP(enable_sgpr_queue_ptr, ENABLE_SGPR_QUEUE_PTR),
+CODEPROP(enable_sgpr_kernarg_segment_ptr, ENABLE_SGPR_KERNARG_SEGMENT_PTR),
+CODEPROP(enable_sgpr_dispatch_id, ENABLE_SGPR_DISPATCH_ID),
+CODEPROP(enable_sgpr_flat_scratch_init, ENABLE_SGPR_FLAT_SCRATCH_INIT),
+CODEPROP(enable_sgpr_private_segment_size, ENABLE_SGPR_PRIVATE_SEGMENT_SIZE),
+CODEPROP(enable_sgpr_grid_workgroup_count_x, ENABLE_SGPR_GRID_WORKGROUP_COUNT_X),
+CODEPROP(enable_sgpr_grid_workgroup_count_y, ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y),
+CODEPROP(enable_sgpr_grid_workgroup_count_z, ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z),
+CODEPROP(enable_wavefront_size32, ENABLE_WAVEFRONT_SIZE32),
+CODEPROP(enable_ordered_append_gds, ENABLE_ORDERED_APPEND_GDS),
+CODEPROP(private_element_size, PRIVATE_ELEMENT_SIZE),
+CODEPROP(is_ptr64, IS_PTR64),
+CODEPROP(is_dynamic_callstack, IS_DYNAMIC_CALLSTACK),
+CODEPROP(is_debug_enabled, IS_DEBUG_SUPPORTED),
+CODEPROP(is_xnack_enabled, IS_XNACK_SUPPORTED),
+
+FIELD(workitem_private_segment_byte_size),
+FIELD(workgroup_group_segment_byte_size),
+FIELD(gds_segment_byte_size),
+FIELD(kernarg_segment_byte_size),
+FIELD(workgroup_fbarrier_count),
+FIELD(wavefront_sgpr_count),
+FIELD(workitem_vgpr_count),
+FIELD(reserved_vgpr_first),
+FIELD(reserved_vgpr_count),
+FIELD(reserved_sgpr_first),
+FIELD(reserved_sgpr_count),
+FIELD(debug_wavefront_private_segment_offset_sgpr),
+FIELD(debug_private_segment_buffer_sgpr),
+FIELD(kernarg_segment_alignment),
+FIELD(group_segment_alignment),
+FIELD(private_segment_alignment),
+FIELD(wavefront_size),
+FIELD(call_convention),
+FIELD(runtime_loader_kernel_symbol)
+// TODO: control_directive
+
+// end of the table
+///////////////////////////////////////////////////////////////////////////////
+
+#undef QNAME
+#undef FLD_T
+#undef FIELD2
+#undef FIELD
+#undef PRINTCODEPROP
+#undef PARSECODEPROP
+#undef CODEPROP
+#undef PRINTCOMP
+#undef PAPSECOMP
+#undef COMPPGM
+#undef COMPPGM1
+#undef COMPPGM2
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
new file mode 100644
index 000000000000..443e2cc45ac0
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
@@ -0,0 +1,179 @@
+//===- AMDKernelCodeTUtils.cpp --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file - utility functions to parse/print amd_kernel_code_t structure
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDKernelCodeTUtils.h"
+#include "SIDefines.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+#include <utility>
+
+using namespace llvm;
+
+static ArrayRef<StringRef> get_amd_kernel_code_t_FldNames() {
+ static StringRef const Table[] = {
+ "", // not found placeholder
+#define RECORD(name, altName, print, parse) #name
+#include "AMDKernelCodeTInfo.h"
+#undef RECORD
+ };
+ return makeArrayRef(Table);
+}
+
+static ArrayRef<StringRef> get_amd_kernel_code_t_FldAltNames() {
+ static StringRef const Table[] = {
+ "", // not found placeholder
+#define RECORD(name, altName, print, parse) #altName
+#include "AMDKernelCodeTInfo.h"
+#undef RECORD
+ };
+ return makeArrayRef(Table);
+}
+
+static StringMap<int> createIndexMap(const ArrayRef<StringRef> &names,
+ const ArrayRef<StringRef> &altNames) {
+ StringMap<int> map;
+ assert(names.size() == altNames.size());
+ for (unsigned i = 0; i < names.size(); ++i) {
+ map.insert(std::make_pair(names[i], i));
+ map.insert(std::make_pair(altNames[i], i));
+ }
+ return map;
+}
+
+static int get_amd_kernel_code_t_FieldIndex(StringRef name) {
+ static const auto map = createIndexMap(get_amd_kernel_code_t_FldNames(),
+ get_amd_kernel_code_t_FldAltNames());
+ return map.lookup(name) - 1; // returns -1 if not found
+}
+
+static StringRef get_amd_kernel_code_t_FieldName(int index) {
+ return get_amd_kernel_code_t_FldNames()[index + 1];
+}
+
+// Field printing
+
+static raw_ostream &printName(raw_ostream &OS, StringRef Name) {
+ return OS << Name << " = ";
+}
+
+template <typename T, T amd_kernel_code_t::*ptr>
+static void printField(StringRef Name, const amd_kernel_code_t &C,
+ raw_ostream &OS) {
+ printName(OS, Name) << (int)(C.*ptr);
+}
+
+template <typename T, T amd_kernel_code_t::*ptr, int shift, int width = 1>
+static void printBitField(StringRef Name, const amd_kernel_code_t &c,
+ raw_ostream &OS) {
+ const auto Mask = (static_cast<T>(1) << width) - 1;
+ printName(OS, Name) << (int)((c.*ptr >> shift) & Mask);
+}
+
+using PrintFx = void(*)(StringRef, const amd_kernel_code_t &, raw_ostream &);
+
+static ArrayRef<PrintFx> getPrinterTable() {
+ static const PrintFx Table[] = {
+#define RECORD(name, altName, print, parse) print
+#include "AMDKernelCodeTInfo.h"
+#undef RECORD
+ };
+ return makeArrayRef(Table);
+}
+
+void llvm::printAmdKernelCodeField(const amd_kernel_code_t &C,
+ int FldIndex,
+ raw_ostream &OS) {
+ auto Printer = getPrinterTable()[FldIndex];
+ if (Printer)
+ Printer(get_amd_kernel_code_t_FieldName(FldIndex), C, OS);
+}
+
+void llvm::dumpAmdKernelCode(const amd_kernel_code_t *C,
+ raw_ostream &OS,
+ const char *tab) {
+ const int Size = getPrinterTable().size();
+ for (int i = 0; i < Size; ++i) {
+ OS << tab;
+ printAmdKernelCodeField(*C, i, OS);
+ OS << '\n';
+ }
+}
+
+// Field parsing
+
+static bool expectAbsExpression(MCAsmParser &MCParser, int64_t &Value, raw_ostream& Err) {
+
+ if (MCParser.getLexer().isNot(AsmToken::Equal)) {
+ Err << "expected '='";
+ return false;
+ }
+ MCParser.getLexer().Lex();
+
+ if (MCParser.parseAbsoluteExpression(Value)) {
+ Err << "integer absolute expression expected";
+ return false;
+ }
+ return true;
+}
+
+template <typename T, T amd_kernel_code_t::*ptr>
+static bool parseField(amd_kernel_code_t &C, MCAsmParser &MCParser,
+ raw_ostream &Err) {
+ int64_t Value = 0;
+ if (!expectAbsExpression(MCParser, Value, Err))
+ return false;
+ C.*ptr = (T)Value;
+ return true;
+}
+
+template <typename T, T amd_kernel_code_t::*ptr, int shift, int width = 1>
+static bool parseBitField(amd_kernel_code_t &C, MCAsmParser &MCParser,
+ raw_ostream &Err) {
+ int64_t Value = 0;
+ if (!expectAbsExpression(MCParser, Value, Err))
+ return false;
+ const uint64_t Mask = ((UINT64_C(1) << width) - 1) << shift;
+ C.*ptr &= (T)~Mask;
+ C.*ptr |= (T)((Value << shift) & Mask);
+ return true;
+}
+
+using ParseFx = bool(*)(amd_kernel_code_t &, MCAsmParser &MCParser,
+ raw_ostream &Err);
+
+static ArrayRef<ParseFx> getParserTable() {
+ static const ParseFx Table[] = {
+#define RECORD(name, altName, print, parse) parse
+#include "AMDKernelCodeTInfo.h"
+#undef RECORD
+ };
+ return makeArrayRef(Table);
+}
+
+bool llvm::parseAmdKernelCodeField(StringRef ID,
+ MCAsmParser &MCParser,
+ amd_kernel_code_t &C,
+ raw_ostream &Err) {
+ const int Idx = get_amd_kernel_code_t_FieldIndex(ID);
+ if (Idx < 0) {
+ Err << "unexpected amd_kernel_code_t field name " << ID;
+ return false;
+ }
+ auto Parser = getParserTable()[Idx];
+ return Parser ? Parser(C, MCParser, Err) : false;
+}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h
new file mode 100644
index 000000000000..a87325a78df3
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h
@@ -0,0 +1,35 @@
+//===- AMDGPUKernelCodeTUtils.h - helpers for amd_kernel_code_t -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file AMDKernelCodeTUtils.h
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDKERNELCODETUTILS_H
+#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDKERNELCODETUTILS_H
+
+#include "AMDKernelCodeT.h"
+
+namespace llvm {
+
+class MCAsmParser;
+class raw_ostream;
+class StringRef;
+
+void printAmdKernelCodeField(const amd_kernel_code_t &C, int FldIndex,
+ raw_ostream &OS);
+
+void dumpAmdKernelCode(const amd_kernel_code_t *C, raw_ostream &OS,
+ const char *tab);
+
+bool parseAmdKernelCodeField(StringRef ID, MCAsmParser &Parser,
+ amd_kernel_code_t &C, raw_ostream &Err);
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDKERNELCODETUTILS_H