diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/Utils')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp | 314 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h | 56 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 684 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 133 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h | 38 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp (renamed from llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp) | 102 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h | 51 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h | 4 |
9 files changed, 1061 insertions, 326 deletions
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp index 18c348d1cf89..c0fd5bc69325 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp @@ -6,33 +6,64 @@ // //===----------------------------------------------------------------------===// #include "AMDGPUAsmUtils.h" +#include "AMDGPUBaseInfo.h" #include "SIDefines.h" -#include "llvm/ADT/StringRef.h" - namespace llvm { namespace AMDGPU { + +namespace DepCtr { + +// NOLINTBEGIN +const CustomOperandVal DepCtrInfo[] = { + // Name max dflt offset width constraint + {{"depctr_hold_cnt"}, 1, 1, 7, 1, isGFX10_BEncoding}, + {{"depctr_sa_sdst"}, 1, 1, 0, 1}, + {{"depctr_va_vdst"}, 15, 15, 12, 4}, + {{"depctr_va_sdst"}, 7, 7, 9, 3}, + {{"depctr_va_ssrc"}, 1, 1, 8, 1}, + {{"depctr_va_vcc"}, 1, 1, 1, 1}, + {{"depctr_vm_vsrc"}, 7, 7, 2, 3}, +}; +// NOLINTEND + +const int DEP_CTR_SIZE = + static_cast<int>(sizeof(DepCtrInfo) / sizeof(CustomOperandVal)); + +} // namespace DepCtr + namespace SendMsg { -// This must be in sync with llvm::AMDGPU::SendMsg::Id enum members, see SIDefines.h. -const char *const IdSymbolic[ID_GAPS_LAST_] = { - nullptr, - "MSG_INTERRUPT", - "MSG_GS", - "MSG_GS_DONE", - "MSG_SAVEWAVE", - "MSG_STALL_WAVE_GEN", - "MSG_HALT_WAVES", - "MSG_ORDERED_PS_DONE", - "MSG_EARLY_PRIM_DEALLOC", - "MSG_GS_ALLOC_REQ", - "MSG_GET_DOORBELL", - "MSG_GET_DDID", - nullptr, - nullptr, - nullptr, - "MSG_SYSMSG" +// Disable lint checking for this block since it makes the table unreadable. +// NOLINTBEGIN +const CustomOperand<const MCSubtargetInfo &> Msg[] = { + {{""}}, + {{"MSG_INTERRUPT"}, ID_INTERRUPT}, + {{"MSG_GS"}, ID_GS_PreGFX11, isNotGFX11Plus}, + {{"MSG_GS_DONE"}, ID_GS_DONE_PreGFX11, isNotGFX11Plus}, + {{"MSG_SAVEWAVE"}, ID_SAVEWAVE, isGFX8_GFX9_GFX10}, + {{"MSG_STALL_WAVE_GEN"}, ID_STALL_WAVE_GEN, isGFX9Plus}, + {{"MSG_HALT_WAVES"}, ID_HALT_WAVES, isGFX9Plus}, + {{"MSG_ORDERED_PS_DONE"}, ID_ORDERED_PS_DONE, isGFX9Plus}, + {{"MSG_EARLY_PRIM_DEALLOC"}, ID_EARLY_PRIM_DEALLOC, isGFX9_GFX10}, + {{"MSG_GS_ALLOC_REQ"}, ID_GS_ALLOC_REQ, isGFX9Plus}, + {{"MSG_GET_DOORBELL"}, ID_GET_DOORBELL, isGFX9_GFX10}, + {{"MSG_GET_DDID"}, ID_GET_DDID, isGFX10}, + {{"MSG_HS_TESSFACTOR"}, ID_HS_TESSFACTOR_GFX11Plus, isGFX11Plus}, + {{"MSG_DEALLOC_VGPRS"}, ID_DEALLOC_VGPRS_GFX11Plus, isGFX11Plus}, + {{""}}, + {{"MSG_SYSMSG"}, ID_SYSMSG}, + {{"MSG_RTN_GET_DOORBELL"}, ID_RTN_GET_DOORBELL, isGFX11Plus}, + {{"MSG_RTN_GET_DDID"}, ID_RTN_GET_DDID, isGFX11Plus}, + {{"MSG_RTN_GET_TMA"}, ID_RTN_GET_TMA, isGFX11Plus}, + {{"MSG_RTN_GET_REALTIME"}, ID_RTN_GET_REALTIME, isGFX11Plus}, + {{"MSG_RTN_SAVE_WAVE"}, ID_RTN_SAVE_WAVE, isGFX11Plus}, + {{"MSG_RTN_GET_TBA"}, ID_RTN_GET_TBA, isGFX11Plus}, }; +// NOLINTEND + +const int MSG_SIZE = static_cast<int>( + sizeof(Msg) / sizeof(CustomOperand<const MCSubtargetInfo &>)); // These two must be in sync with llvm::AMDGPU::SendMsg::Op enum members, see SIDefines.h. const char *const OpSysSymbolic[OP_SYS_LAST_] = { @@ -54,39 +85,54 @@ const char *const OpGsSymbolic[OP_GS_LAST_] = { namespace Hwreg { -// This must be in sync with llvm::AMDGPU::Hwreg::ID_SYMBOLIC_FIRST_/LAST_, see SIDefines.h. -const char* const IdSymbolic[] = { - nullptr, - "HW_REG_MODE", - "HW_REG_STATUS", - "HW_REG_TRAPSTS", - "HW_REG_HW_ID", - "HW_REG_GPR_ALLOC", - "HW_REG_LDS_ALLOC", - "HW_REG_IB_STS", - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - "HW_REG_SH_MEM_BASES", - "HW_REG_TBA_LO", - "HW_REG_TBA_HI", - "HW_REG_TMA_LO", - "HW_REG_TMA_HI", - "HW_REG_FLAT_SCR_LO", - "HW_REG_FLAT_SCR_HI", - "HW_REG_XNACK_MASK", - "HW_REG_HW_ID1", - "HW_REG_HW_ID2", - "HW_REG_POPS_PACKER", - nullptr, - nullptr, - nullptr, - "HW_REG_SHADER_CYCLES" +// Disable lint checking for this block since it makes the table unreadable. +// NOLINTBEGIN +const CustomOperand<const MCSubtargetInfo &> Opr[] = { + {{""}}, + {{"HW_REG_MODE"}, ID_MODE}, + {{"HW_REG_STATUS"}, ID_STATUS}, + {{"HW_REG_TRAPSTS"}, ID_TRAPSTS}, + {{"HW_REG_HW_ID"}, ID_HW_ID, isNotGFX10Plus}, + {{"HW_REG_GPR_ALLOC"}, ID_GPR_ALLOC}, + {{"HW_REG_LDS_ALLOC"}, ID_LDS_ALLOC}, + {{"HW_REG_IB_STS"}, ID_IB_STS}, + {{""}}, + {{""}}, + {{""}}, + {{""}}, + {{""}}, + {{""}}, + {{""}}, + {{"HW_REG_SH_MEM_BASES"}, ID_MEM_BASES, isGFX9Plus}, + {{"HW_REG_TBA_LO"}, ID_TBA_LO, isGFX9_GFX10}, + {{"HW_REG_TBA_HI"}, ID_TBA_HI, isGFX9_GFX10}, + {{"HW_REG_TMA_LO"}, ID_TMA_LO, isGFX9_GFX10}, + {{"HW_REG_TMA_HI"}, ID_TMA_HI, isGFX9_GFX10}, + {{"HW_REG_FLAT_SCR_LO"}, ID_FLAT_SCR_LO, isGFX10Plus}, + {{"HW_REG_FLAT_SCR_HI"}, ID_FLAT_SCR_HI, isGFX10Plus}, + {{"HW_REG_XNACK_MASK"}, ID_XNACK_MASK, isGFX10Before1030}, + {{"HW_REG_HW_ID1"}, ID_HW_ID1, isGFX10Plus}, + {{"HW_REG_HW_ID2"}, ID_HW_ID2, isGFX10Plus}, + {{"HW_REG_POPS_PACKER"}, ID_POPS_PACKER, isGFX10}, + {{""}}, + {{""}}, + {{""}}, + {{"HW_REG_SHADER_CYCLES"}, ID_SHADER_CYCLES, isGFX10_BEncoding}, + + // GFX940 specific registers + {{"HW_REG_XCC_ID"}, ID_XCC_ID, isGFX940}, + {{"HW_REG_SQ_PERF_SNAPSHOT_DATA"}, ID_SQ_PERF_SNAPSHOT_DATA, isGFX940}, + {{"HW_REG_SQ_PERF_SNAPSHOT_DATA1"}, ID_SQ_PERF_SNAPSHOT_DATA1, isGFX940}, + {{"HW_REG_SQ_PERF_SNAPSHOT_PC_LO"}, ID_SQ_PERF_SNAPSHOT_PC_LO, isGFX940}, + {{"HW_REG_SQ_PERF_SNAPSHOT_PC_HI"}, ID_SQ_PERF_SNAPSHOT_PC_HI, isGFX940}, + + // Aliases + {{"HW_REG_HW_ID"}, ID_HW_ID1, isGFX10}, }; +// NOLINTEND + +const int OPR_SIZE = static_cast<int>( + sizeof(Opr) / sizeof(CustomOperand<const MCSubtargetInfo &>)); } // namespace Hwreg @@ -144,7 +190,7 @@ StringLiteral const NfmtSymbolicVI[] = { // VI and GFX9 "BUF_NUM_FORMAT_FLOAT" }; -StringLiteral const UfmtSymbolic[] = { +StringLiteral const UfmtSymbolicGFX10[] = { "BUF_FMT_INVALID", "BUF_FMT_8_UNORM", @@ -238,7 +284,7 @@ StringLiteral const UfmtSymbolic[] = { "BUF_FMT_32_32_32_32_FLOAT" }; -unsigned const DfmtNfmt2UFmt[] = { +unsigned const DfmtNfmt2UFmtGFX10[] = { DFMT_INVALID | (NFMT_UNORM << NFMT_SHIFT), DFMT_8 | (NFMT_UNORM << NFMT_SHIFT), @@ -332,6 +378,166 @@ unsigned const DfmtNfmt2UFmt[] = { DFMT_32_32_32_32 | (NFMT_FLOAT << NFMT_SHIFT) }; +StringLiteral const UfmtSymbolicGFX11[] = { + "BUF_FMT_INVALID", + + "BUF_FMT_8_UNORM", + "BUF_FMT_8_SNORM", + "BUF_FMT_8_USCALED", + "BUF_FMT_8_SSCALED", + "BUF_FMT_8_UINT", + "BUF_FMT_8_SINT", + + "BUF_FMT_16_UNORM", + "BUF_FMT_16_SNORM", + "BUF_FMT_16_USCALED", + "BUF_FMT_16_SSCALED", + "BUF_FMT_16_UINT", + "BUF_FMT_16_SINT", + "BUF_FMT_16_FLOAT", + + "BUF_FMT_8_8_UNORM", + "BUF_FMT_8_8_SNORM", + "BUF_FMT_8_8_USCALED", + "BUF_FMT_8_8_SSCALED", + "BUF_FMT_8_8_UINT", + "BUF_FMT_8_8_SINT", + + "BUF_FMT_32_UINT", + "BUF_FMT_32_SINT", + "BUF_FMT_32_FLOAT", + + "BUF_FMT_16_16_UNORM", + "BUF_FMT_16_16_SNORM", + "BUF_FMT_16_16_USCALED", + "BUF_FMT_16_16_SSCALED", + "BUF_FMT_16_16_UINT", + "BUF_FMT_16_16_SINT", + "BUF_FMT_16_16_FLOAT", + + "BUF_FMT_10_11_11_FLOAT", + + "BUF_FMT_11_11_10_FLOAT", + + "BUF_FMT_10_10_10_2_UNORM", + "BUF_FMT_10_10_10_2_SNORM", + "BUF_FMT_10_10_10_2_UINT", + "BUF_FMT_10_10_10_2_SINT", + + "BUF_FMT_2_10_10_10_UNORM", + "BUF_FMT_2_10_10_10_SNORM", + "BUF_FMT_2_10_10_10_USCALED", + "BUF_FMT_2_10_10_10_SSCALED", + "BUF_FMT_2_10_10_10_UINT", + "BUF_FMT_2_10_10_10_SINT", + + "BUF_FMT_8_8_8_8_UNORM", + "BUF_FMT_8_8_8_8_SNORM", + "BUF_FMT_8_8_8_8_USCALED", + "BUF_FMT_8_8_8_8_SSCALED", + "BUF_FMT_8_8_8_8_UINT", + "BUF_FMT_8_8_8_8_SINT", + + "BUF_FMT_32_32_UINT", + "BUF_FMT_32_32_SINT", + "BUF_FMT_32_32_FLOAT", + + "BUF_FMT_16_16_16_16_UNORM", + "BUF_FMT_16_16_16_16_SNORM", + "BUF_FMT_16_16_16_16_USCALED", + "BUF_FMT_16_16_16_16_SSCALED", + "BUF_FMT_16_16_16_16_UINT", + "BUF_FMT_16_16_16_16_SINT", + "BUF_FMT_16_16_16_16_FLOAT", + + "BUF_FMT_32_32_32_UINT", + "BUF_FMT_32_32_32_SINT", + "BUF_FMT_32_32_32_FLOAT", + "BUF_FMT_32_32_32_32_UINT", + "BUF_FMT_32_32_32_32_SINT", + "BUF_FMT_32_32_32_32_FLOAT" +}; + +unsigned const DfmtNfmt2UFmtGFX11[] = { + DFMT_INVALID | (NFMT_UNORM << NFMT_SHIFT), + + DFMT_8 | (NFMT_UNORM << NFMT_SHIFT), + DFMT_8 | (NFMT_SNORM << NFMT_SHIFT), + DFMT_8 | (NFMT_USCALED << NFMT_SHIFT), + DFMT_8 | (NFMT_SSCALED << NFMT_SHIFT), + DFMT_8 | (NFMT_UINT << NFMT_SHIFT), + DFMT_8 | (NFMT_SINT << NFMT_SHIFT), + + DFMT_16 | (NFMT_UNORM << NFMT_SHIFT), + DFMT_16 | (NFMT_SNORM << NFMT_SHIFT), + DFMT_16 | (NFMT_USCALED << NFMT_SHIFT), + DFMT_16 | (NFMT_SSCALED << NFMT_SHIFT), + DFMT_16 | (NFMT_UINT << NFMT_SHIFT), + DFMT_16 | (NFMT_SINT << NFMT_SHIFT), + DFMT_16 | (NFMT_FLOAT << NFMT_SHIFT), + + DFMT_8_8 | (NFMT_UNORM << NFMT_SHIFT), + DFMT_8_8 | (NFMT_SNORM << NFMT_SHIFT), + DFMT_8_8 | (NFMT_USCALED << NFMT_SHIFT), + DFMT_8_8 | (NFMT_SSCALED << NFMT_SHIFT), + DFMT_8_8 | (NFMT_UINT << NFMT_SHIFT), + DFMT_8_8 | (NFMT_SINT << NFMT_SHIFT), + + DFMT_32 | (NFMT_UINT << NFMT_SHIFT), + DFMT_32 | (NFMT_SINT << NFMT_SHIFT), + DFMT_32 | (NFMT_FLOAT << NFMT_SHIFT), + + DFMT_16_16 | (NFMT_UNORM << NFMT_SHIFT), + DFMT_16_16 | (NFMT_SNORM << NFMT_SHIFT), + DFMT_16_16 | (NFMT_USCALED << NFMT_SHIFT), + DFMT_16_16 | (NFMT_SSCALED << NFMT_SHIFT), + DFMT_16_16 | (NFMT_UINT << NFMT_SHIFT), + DFMT_16_16 | (NFMT_SINT << NFMT_SHIFT), + DFMT_16_16 | (NFMT_FLOAT << NFMT_SHIFT), + + DFMT_10_11_11 | (NFMT_FLOAT << NFMT_SHIFT), + + DFMT_11_11_10 | (NFMT_FLOAT << NFMT_SHIFT), + + DFMT_10_10_10_2 | (NFMT_UNORM << NFMT_SHIFT), + DFMT_10_10_10_2 | (NFMT_SNORM << NFMT_SHIFT), + DFMT_10_10_10_2 | (NFMT_UINT << NFMT_SHIFT), + DFMT_10_10_10_2 | (NFMT_SINT << NFMT_SHIFT), + + DFMT_2_10_10_10 | (NFMT_UNORM << NFMT_SHIFT), + DFMT_2_10_10_10 | (NFMT_SNORM << NFMT_SHIFT), + DFMT_2_10_10_10 | (NFMT_USCALED << NFMT_SHIFT), + DFMT_2_10_10_10 | (NFMT_SSCALED << NFMT_SHIFT), + DFMT_2_10_10_10 | (NFMT_UINT << NFMT_SHIFT), + DFMT_2_10_10_10 | (NFMT_SINT << NFMT_SHIFT), + + DFMT_8_8_8_8 | (NFMT_UNORM << NFMT_SHIFT), + DFMT_8_8_8_8 | (NFMT_SNORM << NFMT_SHIFT), + DFMT_8_8_8_8 | (NFMT_USCALED << NFMT_SHIFT), + DFMT_8_8_8_8 | (NFMT_SSCALED << NFMT_SHIFT), + DFMT_8_8_8_8 | (NFMT_UINT << NFMT_SHIFT), + DFMT_8_8_8_8 | (NFMT_SINT << NFMT_SHIFT), + + DFMT_32_32 | (NFMT_UINT << NFMT_SHIFT), + DFMT_32_32 | (NFMT_SINT << NFMT_SHIFT), + DFMT_32_32 | (NFMT_FLOAT << NFMT_SHIFT), + + DFMT_16_16_16_16 | (NFMT_UNORM << NFMT_SHIFT), + DFMT_16_16_16_16 | (NFMT_SNORM << NFMT_SHIFT), + DFMT_16_16_16_16 | (NFMT_USCALED << NFMT_SHIFT), + DFMT_16_16_16_16 | (NFMT_SSCALED << NFMT_SHIFT), + DFMT_16_16_16_16 | (NFMT_UINT << NFMT_SHIFT), + DFMT_16_16_16_16 | (NFMT_SINT << NFMT_SHIFT), + DFMT_16_16_16_16 | (NFMT_FLOAT << NFMT_SHIFT), + + DFMT_32_32_32 | (NFMT_UINT << NFMT_SHIFT), + DFMT_32_32_32 | (NFMT_SINT << NFMT_SHIFT), + DFMT_32_32_32 | (NFMT_FLOAT << NFMT_SHIFT), + DFMT_32_32_32_32 | (NFMT_UINT << NFMT_SHIFT), + DFMT_32_32_32_32 | (NFMT_SINT << NFMT_SHIFT), + DFMT_32_32_32_32 | (NFMT_FLOAT << NFMT_SHIFT) +}; + } // namespace MTBUFFormat namespace Swizzle { diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h index d1deb570a938..054e35e90f2f 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h @@ -11,15 +11,60 @@ #include "SIDefines.h" +#include "llvm/ADT/StringRef.h" + namespace llvm { class StringLiteral; +class MCSubtargetInfo; namespace AMDGPU { +const int OPR_ID_UNKNOWN = -1; +const int OPR_ID_UNSUPPORTED = -2; +const int OPR_ID_DUPLICATE = -3; +const int OPR_VAL_INVALID = -4; + +template <class T> struct CustomOperand { + StringLiteral Name; + int Encoding = 0; + bool (*Cond)(T Context) = nullptr; +}; + +struct CustomOperandVal { + StringLiteral Name; + unsigned Max; + unsigned Default; + unsigned Shift; + unsigned Width; + bool (*Cond)(const MCSubtargetInfo &STI) = nullptr; + unsigned Mask = (1 << Width) - 1; + + unsigned decode(unsigned Code) const { return (Code >> Shift) & Mask; } + + unsigned encode(unsigned Val) const { return (Val & Mask) << Shift; } + + unsigned getMask() const { return Mask << Shift; } + + bool isValid(unsigned Val) const { return Val <= Max; } + + bool isSupported(const MCSubtargetInfo &STI) const { + return !Cond || Cond(STI); + } +}; + +namespace DepCtr { + +extern const CustomOperandVal DepCtrInfo[]; +extern const int DEP_CTR_SIZE; + +} // namespace DepCtr + namespace SendMsg { // Symbolic names for the sendmsg(...) syntax. -extern const char *const IdSymbolic[ID_GAPS_LAST_]; +extern const CustomOperand<const MCSubtargetInfo &> Msg[]; +extern const int MSG_SIZE; + extern const char *const OpSysSymbolic[OP_SYS_LAST_]; extern const char *const OpGsSymbolic[OP_GS_LAST_]; @@ -27,7 +72,8 @@ extern const char *const OpGsSymbolic[OP_GS_LAST_]; namespace Hwreg { // Symbolic names for the hwreg(...) syntax. -extern const char* const IdSymbolic[]; +extern const CustomOperand<const MCSubtargetInfo &> Opr[]; +extern const int OPR_SIZE; } // namespace Hwreg @@ -37,8 +83,10 @@ extern StringLiteral const DfmtSymbolic[]; extern StringLiteral const NfmtSymbolicGFX10[]; extern StringLiteral const NfmtSymbolicSICI[]; extern StringLiteral const NfmtSymbolicVI[]; -extern StringLiteral const UfmtSymbolic[]; -extern unsigned const DfmtNfmt2UFmt[]; +extern StringLiteral const UfmtSymbolicGFX10[]; +extern StringLiteral const UfmtSymbolicGFX11[]; +extern unsigned const DfmtNfmt2UFmtGFX10[]; +extern unsigned const DfmtNfmt2UFmtGFX11[]; } // namespace MTBUFFormat diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 683be871ff82..e4ab72f1095b 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -28,10 +28,15 @@ #define GET_INSTRMAP_INFO #include "AMDGPUGenInstrInfo.inc" -static llvm::cl::opt<unsigned> AmdhsaCodeObjectVersion( - "amdhsa-code-object-version", llvm::cl::Hidden, - llvm::cl::desc("AMDHSA Code Object Version"), llvm::cl::init(4), - llvm::cl::ZeroOrMore); +static llvm::cl::opt<unsigned> + AmdhsaCodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden, + llvm::cl::desc("AMDHSA Code Object Version"), + llvm::cl::init(4)); + +// TODO-GFX11: Remove this when full 16-bit codegen is implemented. +static llvm::cl::opt<bool> + LimitTo128VGPRs("amdgpu-limit-to-128-vgprs", llvm::cl::Hidden, + llvm::cl::desc("Never use more than 128 VGPRs")); namespace { @@ -44,9 +49,8 @@ unsigned getBitMask(unsigned Shift, unsigned Width) { /// /// \returns Packed \p Dst. unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) { - Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width); - Dst |= (Src << Shift) & getBitMask(Shift, Width); - return Dst; + unsigned Mask = getBitMask(Shift, Width); + return ((Src << Shift) & Mask) | (Dst & ~Mask); } /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width. @@ -57,30 +61,40 @@ unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) { } /// \returns Vmcnt bit shift (lower bits). -unsigned getVmcntBitShiftLo() { return 0; } +unsigned getVmcntBitShiftLo(unsigned VersionMajor) { + return VersionMajor >= 11 ? 10 : 0; +} /// \returns Vmcnt bit width (lower bits). -unsigned getVmcntBitWidthLo() { return 4; } +unsigned getVmcntBitWidthLo(unsigned VersionMajor) { + return VersionMajor >= 11 ? 6 : 4; +} /// \returns Expcnt bit shift. -unsigned getExpcntBitShift() { return 4; } +unsigned getExpcntBitShift(unsigned VersionMajor) { + return VersionMajor >= 11 ? 0 : 4; +} /// \returns Expcnt bit width. -unsigned getExpcntBitWidth() { return 3; } +unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; } /// \returns Lgkmcnt bit shift. -unsigned getLgkmcntBitShift() { return 8; } +unsigned getLgkmcntBitShift(unsigned VersionMajor) { + return VersionMajor >= 11 ? 4 : 8; +} /// \returns Lgkmcnt bit width. unsigned getLgkmcntBitWidth(unsigned VersionMajor) { - return (VersionMajor >= 10) ? 6 : 4; + return VersionMajor >= 10 ? 6 : 4; } /// \returns Vmcnt bit shift (higher bits). -unsigned getVmcntBitShiftHi() { return 14; } +unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; } /// \returns Vmcnt bit width (higher bits). -unsigned getVmcntBitWidthHi() { return 2; } +unsigned getVmcntBitWidthHi(unsigned VersionMajor) { + return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0; +} } // end namespace anonymous @@ -136,6 +150,41 @@ bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI) { isHsaAbiVersion5(STI); } +unsigned getAmdhsaCodeObjectVersion() { + return AmdhsaCodeObjectVersion; +} + +unsigned getMultigridSyncArgImplicitArgPosition() { + switch (AmdhsaCodeObjectVersion) { + case 2: + case 3: + case 4: + return 48; + case 5: + return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET; + default: + llvm_unreachable("Unexpected code object version"); + return 0; + } +} + + +// FIXME: All such magic numbers about the ABI should be in a +// central TD file. +unsigned getHostcallImplicitArgPosition() { + switch (AmdhsaCodeObjectVersion) { + case 2: + case 3: + case 4: + return 24; + case 5: + return AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET; + default: + llvm_unreachable("Unexpected code object version"); + return 0; + } +} + #define GET_MIMGBaseOpcodesTable_IMPL #define GET_MIMGDimInfoTable_IMPL #define GET_MIMGInfoTable_IMPL @@ -144,6 +193,7 @@ bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI) { #define GET_MIMGBiasMappingTable_IMPL #define GET_MIMGOffsetMappingTable_IMPL #define GET_MIMGG16MappingTable_IMPL +#define GET_MAIInstInfoTable_IMPL #include "AMDGPUGenSearchableTables.inc" int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, @@ -223,6 +273,10 @@ struct VOPInfo { bool IsSingle; }; +struct VOPC64DPPInfo { + uint16_t Opcode; +}; + #define GET_MTBUFInfoTable_DECL #define GET_MTBUFInfoTable_IMPL #define GET_MUBUFInfoTable_DECL @@ -235,6 +289,14 @@ struct VOPInfo { #define GET_VOP2InfoTable_IMPL #define GET_VOP3InfoTable_DECL #define GET_VOP3InfoTable_IMPL +#define GET_VOPC64DPPTable_DECL +#define GET_VOPC64DPPTable_IMPL +#define GET_VOPC64DPP8Table_DECL +#define GET_VOPC64DPP8Table_IMPL +#define GET_WMMAOpcode2AddrMappingTable_DECL +#define GET_WMMAOpcode2AddrMappingTable_IMPL +#define GET_WMMAOpcode3AddrMappingTable_DECL +#define GET_WMMAOpcode3AddrMappingTable_IMPL #include "AMDGPUGenSearchableTables.inc" int getMTBUFBaseOpcode(unsigned Opc) { @@ -322,6 +384,30 @@ bool getVOP3IsSingle(unsigned Opc) { return Info ? Info->IsSingle : false; } +bool isVOPC64DPP(unsigned Opc) { + return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc); +} + +bool getMAIIsDGEMM(unsigned Opc) { + const MAIInstInfo *Info = getMAIInstInfoHelper(Opc); + return Info ? Info->is_dgemm : false; +} + +bool getMAIIsGFX940XDL(unsigned Opc) { + const MAIInstInfo *Info = getMAIInstInfoHelper(Opc); + return Info ? Info->is_gfx940_xdl : false; +} + +unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) { + const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc); + return Info ? Info->Opcode3Addr : ~0u; +} + +unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) { + const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opc); + return Info ? Info->Opcode2Addr : ~0u; +} + // Wrapper for Tablegen'd function. enum Subtarget is not defined in any // header files, so we need to wrap it in a function that takes unsigned // instead. @@ -740,6 +826,15 @@ unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) { } unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) { + if (LimitTo128VGPRs.getNumOccurrences() ? LimitTo128VGPRs + : isGFX11Plus(*STI)) { + // GFX11 changes the encoding of 16-bit operands in VOP1/2/C instructions + // such that values 128..255 no longer mean v128..v255, they mean + // v0.hi..v127.hi instead. Until the compiler understands this, it is not + // safe to use v128..v255. + // TODO-GFX11: Remove this when full 16-bit codegen is implemented. + return 128; + } if (STI->getFeatureBits().test(FeatureGFX90AInsts)) return 512; return 256; @@ -904,16 +999,13 @@ std::pair<int, int> getIntegerPairAttribute(const Function &F, } unsigned getVmcntBitMask(const IsaVersion &Version) { - unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1; - if (Version.Major < 9) - return VmcntLo; - - unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo(); - return VmcntLo | VmcntHi; + return (1 << (getVmcntBitWidthLo(Version.Major) + + getVmcntBitWidthHi(Version.Major))) - + 1; } unsigned getExpcntBitMask(const IsaVersion &Version) { - return (1 << getExpcntBitWidth()) - 1; + return (1 << getExpcntBitWidth(Version.Major)) - 1; } unsigned getLgkmcntBitMask(const IsaVersion &Version) { @@ -921,36 +1013,32 @@ unsigned getLgkmcntBitMask(const IsaVersion &Version) { } unsigned getWaitcntBitMask(const IsaVersion &Version) { - unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo()); - unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth()); - unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), + unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major), + getVmcntBitWidthLo(Version.Major)); + unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major), + getExpcntBitWidth(Version.Major)); + unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major), getLgkmcntBitWidth(Version.Major)); - unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt; - if (Version.Major < 9) - return Waitcnt; - - unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi()); - return Waitcnt | VmcntHi; + unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major), + getVmcntBitWidthHi(Version.Major)); + return VmcntLo | Expcnt | Lgkmcnt | VmcntHi; } unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) { - unsigned VmcntLo = - unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); - if (Version.Major < 9) - return VmcntLo; - - unsigned VmcntHi = - unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); - VmcntHi <<= getVmcntBitWidthLo(); - return VmcntLo | VmcntHi; + unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major), + getVmcntBitWidthLo(Version.Major)); + unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major), + getVmcntBitWidthHi(Version.Major)); + return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major); } unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) { - return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); + return unpackBits(Waitcnt, getExpcntBitShift(Version.Major), + getExpcntBitWidth(Version.Major)); } unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) { - return unpackBits(Waitcnt, getLgkmcntBitShift(), + return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major), getLgkmcntBitWidth(Version.Major)); } @@ -971,24 +1059,23 @@ Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) { unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt) { - Waitcnt = - packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); - if (Version.Major < 9) - return Waitcnt; - - Vmcnt >>= getVmcntBitWidthLo(); - return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); + Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(Version.Major), + getVmcntBitWidthLo(Version.Major)); + return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt, + getVmcntBitShiftHi(Version.Major), + getVmcntBitWidthHi(Version.Major)); } unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt) { - return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); + return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major), + getExpcntBitWidth(Version.Major)); } unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt) { - return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), - getLgkmcntBitWidth(Version.Major)); + return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major), + getLgkmcntBitWidth(Version.Major)); } unsigned encodeWaitcnt(const IsaVersion &Version, @@ -1005,43 +1092,184 @@ unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) { } //===----------------------------------------------------------------------===// -// hwreg +// Custom Operands. +// +// A table of custom operands shall describe "primary" operand names +// first followed by aliases if any. It is not required but recommended +// to arrange operands so that operand encoding match operand position +// in the table. This will make disassembly a bit more efficient. +// Unused slots in the table shall have an empty name. +// //===----------------------------------------------------------------------===// -namespace Hwreg { +template <class T> +static bool isValidOpr(int Idx, const CustomOperand<T> OpInfo[], int OpInfoSize, + T Context) { + return 0 <= Idx && Idx < OpInfoSize && !OpInfo[Idx].Name.empty() && + (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context)); +} -int64_t getHwregId(const StringRef Name) { - for (int Id = ID_SYMBOLIC_FIRST_; Id < ID_SYMBOLIC_LAST_; ++Id) { - if (IdSymbolic[Id] && Name == IdSymbolic[Id]) - return Id; +template <class T> +static int getOprIdx(std::function<bool(const CustomOperand<T> &)> Test, + const CustomOperand<T> OpInfo[], int OpInfoSize, + T Context) { + int InvalidIdx = OPR_ID_UNKNOWN; + for (int Idx = 0; Idx < OpInfoSize; ++Idx) { + if (Test(OpInfo[Idx])) { + if (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context)) + return Idx; + InvalidIdx = OPR_ID_UNSUPPORTED; + } } - return ID_UNKNOWN_; + return InvalidIdx; } -static unsigned getLastSymbolicHwreg(const MCSubtargetInfo &STI) { - if (isSI(STI) || isCI(STI) || isVI(STI)) - return ID_SYMBOLIC_FIRST_GFX9_; - else if (isGFX9(STI)) - return ID_SYMBOLIC_FIRST_GFX10_; - else if (isGFX10(STI) && !isGFX10_BEncoding(STI)) - return ID_SYMBOLIC_FIRST_GFX1030_; - else - return ID_SYMBOLIC_LAST_; +template <class T> +static int getOprIdx(const StringRef Name, const CustomOperand<T> OpInfo[], + int OpInfoSize, T Context) { + auto Test = [=](const CustomOperand<T> &Op) { return Op.Name == Name; }; + return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context); } -bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI) { - switch (Id) { - case ID_HW_ID: - return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI); - case ID_HW_ID1: - case ID_HW_ID2: - return isGFX10Plus(STI); - case ID_XNACK_MASK: - return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI); - default: - return ID_SYMBOLIC_FIRST_ <= Id && Id < getLastSymbolicHwreg(STI) && - IdSymbolic[Id]; +template <class T> +static int getOprIdx(int Id, const CustomOperand<T> OpInfo[], int OpInfoSize, + T Context, bool QuickCheck = true) { + auto Test = [=](const CustomOperand<T> &Op) { + return Op.Encoding == Id && !Op.Name.empty(); + }; + // This is an optimization that should work in most cases. + // As a side effect, it may cause selection of an alias + // instead of a primary operand name in case of sparse tables. + if (QuickCheck && isValidOpr<T>(Id, OpInfo, OpInfoSize, Context) && + OpInfo[Id].Encoding == Id) { + return Id; + } + return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context); +} + +//===----------------------------------------------------------------------===// +// Custom Operand Values +//===----------------------------------------------------------------------===// + +static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, + int Size, + const MCSubtargetInfo &STI) { + unsigned Enc = 0; + for (int Idx = 0; Idx < Size; ++Idx) { + const auto &Op = Opr[Idx]; + if (Op.isSupported(STI)) + Enc |= Op.encode(Op.Default); + } + return Enc; +} + +static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, + int Size, unsigned Code, + bool &HasNonDefaultVal, + const MCSubtargetInfo &STI) { + unsigned UsedOprMask = 0; + HasNonDefaultVal = false; + for (int Idx = 0; Idx < Size; ++Idx) { + const auto &Op = Opr[Idx]; + if (!Op.isSupported(STI)) + continue; + UsedOprMask |= Op.getMask(); + unsigned Val = Op.decode(Code); + if (!Op.isValid(Val)) + return false; + HasNonDefaultVal |= (Val != Op.Default); } + return (Code & ~UsedOprMask) == 0; +} + +static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, + unsigned Code, int &Idx, StringRef &Name, + unsigned &Val, bool &IsDefault, + const MCSubtargetInfo &STI) { + while (Idx < Size) { + const auto &Op = Opr[Idx++]; + if (Op.isSupported(STI)) { + Name = Op.Name; + Val = Op.decode(Code); + IsDefault = (Val == Op.Default); + return true; + } + } + + return false; +} + +static int encodeCustomOperandVal(const CustomOperandVal &Op, + int64_t InputVal) { + if (InputVal < 0 || InputVal > Op.Max) + return OPR_VAL_INVALID; + return Op.encode(InputVal); +} + +static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, + const StringRef Name, int64_t InputVal, + unsigned &UsedOprMask, + const MCSubtargetInfo &STI) { + int InvalidId = OPR_ID_UNKNOWN; + for (int Idx = 0; Idx < Size; ++Idx) { + const auto &Op = Opr[Idx]; + if (Op.Name == Name) { + if (!Op.isSupported(STI)) { + InvalidId = OPR_ID_UNSUPPORTED; + continue; + } + auto OprMask = Op.getMask(); + if (OprMask & UsedOprMask) + return OPR_ID_DUPLICATE; + UsedOprMask |= OprMask; + return encodeCustomOperandVal(Op, InputVal); + } + } + return InvalidId; +} + +//===----------------------------------------------------------------------===// +// DepCtr +//===----------------------------------------------------------------------===// + +namespace DepCtr { + +int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI) { + static int Default = -1; + if (Default == -1) + Default = getDefaultCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, STI); + return Default; +} + +bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, + const MCSubtargetInfo &STI) { + return isSymbolicCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, Code, + HasNonDefaultVal, STI); +} + +bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, + bool &IsDefault, const MCSubtargetInfo &STI) { + return decodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Code, Id, Name, Val, + IsDefault, STI); +} + +int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, + const MCSubtargetInfo &STI) { + return encodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Name, Val, UsedOprMask, + STI); +} + +} // namespace DepCtr + +//===----------------------------------------------------------------------===// +// hwreg +//===----------------------------------------------------------------------===// + +namespace Hwreg { + +int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI) { + int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Opr, OPR_SIZE, STI); + return (Idx < 0) ? Idx : Opr[Idx].Encoding; } bool isValidHwreg(int64_t Id) { @@ -1063,7 +1291,8 @@ uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width) { } StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) { - return isValidHwreg(Id, STI) ? IdSymbolic[Id] : ""; + int Idx = getOprIdx<const MCSubtargetInfo &>(Id, Opr, OPR_SIZE, STI); + return (Idx < 0) ? "" : Opr[Idx].Name; } void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width) { @@ -1087,12 +1316,13 @@ struct ExpTgt { }; static constexpr ExpTgt ExpTgtInfo[] = { - {{"null"}, ET_NULL, ET_NULL_MAX_IDX}, - {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX}, - {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX}, - {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX}, - {{"pos"}, ET_POS0, ET_POS_MAX_IDX}, - {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX}, + {{"null"}, ET_NULL, ET_NULL_MAX_IDX}, + {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX}, + {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX}, + {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX}, + {{"pos"}, ET_POS0, ET_POS_MAX_IDX}, + {{"dual_src_blend"}, ET_DUAL_SRC_BLEND0, ET_DUAL_SRC_BLEND_MAX_IDX}, + {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX}, }; bool getTgtName(unsigned Id, StringRef &Name, int &Index) { @@ -1130,7 +1360,20 @@ unsigned getTgtId(const StringRef Name) { } bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) { - return (Id != ET_POS4 && Id != ET_PRIM) || isGFX10Plus(STI); + switch (Id) { + case ET_NULL: + return !isGFX11Plus(STI); + case ET_POS4: + case ET_PRIM: + return isGFX10Plus(STI); + case ET_DUAL_SRC_BLEND0: + case ET_DUAL_SRC_BLEND1: + return isGFX11Plus(STI); + default: + if (Id >= ET_PARAM0 && Id <= ET_PARAM31) + return !isGFX11Plus(STI); + return true; + } } } // namespace Exp @@ -1196,27 +1439,44 @@ void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) { Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK; } -int64_t getUnifiedFormat(const StringRef Name) { - for (int Id = UFMT_FIRST; Id <= UFMT_LAST; ++Id) { - if (Name == UfmtSymbolic[Id]) - return Id; +int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI) { + if (isGFX11Plus(STI)) { + for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) { + if (Name == UfmtSymbolicGFX11[Id]) + return Id; + } + } else { + for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) { + if (Name == UfmtSymbolicGFX10[Id]) + return Id; + } } return UFMT_UNDEF; } -StringRef getUnifiedFormatName(unsigned Id) { - return isValidUnifiedFormat(Id) ? UfmtSymbolic[Id] : ""; +StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI) { + if(isValidUnifiedFormat(Id, STI)) + return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id]; + return ""; } -bool isValidUnifiedFormat(unsigned Id) { - return Id <= UFMT_LAST; +bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) { + return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST; } -int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt) { +int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, + const MCSubtargetInfo &STI) { int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt); - for (int Id = UFMT_FIRST; Id <= UFMT_LAST; ++Id) { - if (Fmt == DfmtNfmt2UFmt[Id]) - return Id; + if (isGFX11Plus(STI)) { + for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) { + if (Fmt == DfmtNfmt2UFmtGFX11[Id]) + return Id; + } + } else { + for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) { + if (Fmt == DfmtNfmt2UFmtGFX10[Id]) + return Id; + } } return UFMT_UNDEF; } @@ -1239,40 +1499,22 @@ unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI) { namespace SendMsg { -int64_t getMsgId(const StringRef Name) { - for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) { - if (IdSymbolic[i] && Name == IdSymbolic[i]) - return i; - } - return ID_UNKNOWN_; +static uint64_t getMsgIdMask(const MCSubtargetInfo &STI) { + return isGFX11Plus(STI) ? ID_MASK_GFX11Plus_ : ID_MASK_PreGFX11_; } -bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict) { - if (Strict) { - switch (MsgId) { - case ID_SAVEWAVE: - return isVI(STI) || isGFX9Plus(STI); - case ID_STALL_WAVE_GEN: - case ID_HALT_WAVES: - case ID_ORDERED_PS_DONE: - case ID_GS_ALLOC_REQ: - case ID_GET_DOORBELL: - return isGFX9Plus(STI); - case ID_EARLY_PRIM_DEALLOC: - return isGFX9(STI); - case ID_GET_DDID: - return isGFX10Plus(STI); - default: - return 0 <= MsgId && MsgId < ID_GAPS_LAST_ && IdSymbolic[MsgId]; - } - } else { - return 0 <= MsgId && isUInt<ID_WIDTH_>(MsgId); - } +int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI) { + int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Msg, MSG_SIZE, STI); + return (Idx < 0) ? Idx : Msg[Idx].Encoding; +} + +bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) { + return (MsgId & ~(getMsgIdMask(STI))) == 0; } -StringRef getMsgName(int64_t MsgId) { - assert(0 <= MsgId && MsgId < ID_GAPS_LAST_); - return IdSymbolic[MsgId]; +StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI) { + int Idx = getOprIdx<const MCSubtargetInfo &>(MsgId, Msg, MSG_SIZE, STI); + return (Idx < 0) ? "" : Msg[Idx].Name; } int64_t getMsgOpId(int64_t MsgId, const StringRef Name) { @@ -1289,26 +1531,27 @@ int64_t getMsgOpId(int64_t MsgId, const StringRef Name) { bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict) { - assert(isValidMsgId(MsgId, STI, Strict)); + assert(isValidMsgId(MsgId, STI)); if (!Strict) return 0 <= OpId && isUInt<OP_WIDTH_>(OpId); - switch(MsgId) - { - case ID_GS: - return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP; - case ID_GS_DONE: - return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_; - case ID_SYSMSG: + if (MsgId == ID_SYSMSG) return OP_SYS_FIRST_ <= OpId && OpId < OP_SYS_LAST_; - default: - return OpId == OP_NONE_; + if (!isGFX11Plus(STI)) { + switch (MsgId) { + case ID_GS_PreGFX11: + return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP; + case ID_GS_DONE_PreGFX11: + return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_; + } } + return OpId == OP_NONE_; } -StringRef getMsgOpName(int64_t MsgId, int64_t OpId) { - assert(msgRequiresOp(MsgId)); +StringRef getMsgOpName(int64_t MsgId, int64_t OpId, + const MCSubtargetInfo &STI) { + assert(msgRequiresOp(MsgId, STI)); return (MsgId == ID_SYSMSG)? OpSysSymbolic[OpId] : OpGsSymbolic[OpId]; } @@ -1319,42 +1562,48 @@ bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, if (!Strict) return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId); - switch(MsgId) - { - case ID_GS: - return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_; - case ID_GS_DONE: - return (OpId == OP_GS_NOP)? - (StreamId == STREAM_ID_NONE_) : - (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_); - default: - return StreamId == STREAM_ID_NONE_; + if (!isGFX11Plus(STI)) { + switch (MsgId) { + case ID_GS_PreGFX11: + return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_; + case ID_GS_DONE_PreGFX11: + return (OpId == OP_GS_NOP) ? + (StreamId == STREAM_ID_NONE_) : + (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_); + } } + return StreamId == STREAM_ID_NONE_; } -bool msgRequiresOp(int64_t MsgId) { - return MsgId == ID_GS || MsgId == ID_GS_DONE || MsgId == ID_SYSMSG; +bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) { + return MsgId == ID_SYSMSG || + (!isGFX11Plus(STI) && + (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11)); } -bool msgSupportsStream(int64_t MsgId, int64_t OpId) { - return (MsgId == ID_GS || MsgId == ID_GS_DONE) && OpId != OP_GS_NOP; +bool msgSupportsStream(int64_t MsgId, int64_t OpId, + const MCSubtargetInfo &STI) { + return !isGFX11Plus(STI) && + (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) && + OpId != OP_GS_NOP; } -void decodeMsg(unsigned Val, - uint16_t &MsgId, - uint16_t &OpId, - uint16_t &StreamId) { - MsgId = Val & ID_MASK_; - OpId = (Val & OP_MASK_) >> OP_SHIFT_; - StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_; +void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, + uint16_t &StreamId, const MCSubtargetInfo &STI) { + MsgId = Val & getMsgIdMask(STI); + if (isGFX11Plus(STI)) { + OpId = 0; + StreamId = 0; + } else { + OpId = (Val & OP_MASK_) >> OP_SHIFT_; + StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_; + } } uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId) { - return (MsgId << ID_SHIFT_) | - (OpId << OP_SHIFT_) | - (StreamId << STREAM_ID_SHIFT_); + return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_); } } // namespace SendMsg @@ -1427,6 +1676,10 @@ bool isModuleEntryFunctionCC(CallingConv::ID CC) { } } +bool isKernelCC(const Function *Func) { + return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv()); +} + bool hasXNACK(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureXNACK]; } @@ -1448,7 +1701,8 @@ bool hasG16(const MCSubtargetInfo &STI) { } bool hasPackedD16(const MCSubtargetInfo &STI) { - return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem]; + return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem] && !isCI(STI) && + !isSI(STI); } bool isSI(const MCSubtargetInfo &STI) { @@ -1467,6 +1721,18 @@ bool isGFX9(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; } +bool isGFX9_GFX10(const MCSubtargetInfo &STI) { + return isGFX9(STI) || isGFX10(STI); +} + +bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI) { + return isVI(STI) || isGFX9(STI) || isGFX10(STI); +} + +bool isGFX8Plus(const MCSubtargetInfo &STI) { + return isVI(STI) || isGFX9Plus(STI); +} + bool isGFX9Plus(const MCSubtargetInfo &STI) { return isGFX9(STI) || isGFX10Plus(STI); } @@ -1475,7 +1741,29 @@ bool isGFX10(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureGFX10]; } -bool isGFX10Plus(const MCSubtargetInfo &STI) { return isGFX10(STI); } +bool isGFX10Plus(const MCSubtargetInfo &STI) { + return isGFX10(STI) || isGFX11Plus(STI); +} + +bool isGFX11(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureGFX11]; +} + +bool isGFX11Plus(const MCSubtargetInfo &STI) { + return isGFX11(STI); +} + +bool isNotGFX11Plus(const MCSubtargetInfo &STI) { + return !isGFX11Plus(STI); +} + +bool isNotGFX10Plus(const MCSubtargetInfo &STI) { + return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI); +} + +bool isGFX10Before1030(const MCSubtargetInfo &STI) { + return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI); +} bool isGCN3Encoding(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding]; @@ -1497,10 +1785,29 @@ bool isGFX90A(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts]; } +bool isGFX940(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureGFX940Insts]; +} + bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; } +bool hasMAIInsts(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureMAIInsts]; +} + +bool hasVOPD(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureVOPD]; +} + +int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, + int32_t ArgNumVGPR) { + if (has90AInsts && ArgNumAGPR) + return alignTo(ArgNumVGPR, 4) + ArgNumAGPR; + return std::max(ArgNumVGPR, ArgNumAGPR); +} + bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); const unsigned FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0); @@ -1508,13 +1815,6 @@ bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { Reg == AMDGPU::SCC; } -bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) { - for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) { - if (*R == Reg1) return true; - } - return false; -} - #define MAP_REG2REG \ using namespace AMDGPU; \ switch(Reg) { \ @@ -1554,6 +1854,9 @@ bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) { CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \ CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ + CASE_GFXPRE11_GFX11PLUS(M0) \ + CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \ + CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \ } #define CASE_CI_VI(node) \ @@ -1563,6 +1866,12 @@ bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) { #define CASE_VI_GFX9PLUS(node) \ case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi; +#define CASE_GFXPRE11_GFX11PLUS(node) \ + case node: return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11; + +#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \ + case node: return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11; + unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { if (STI.getTargetTriple().getArch() == Triple::r600) return Reg; @@ -1571,9 +1880,13 @@ unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { #undef CASE_CI_VI #undef CASE_VI_GFX9PLUS +#undef CASE_GFXPRE11_GFX11PLUS +#undef CASE_GFXPRE11_GFX11PLUS_TO #define CASE_CI_VI(node) case node##_ci: case node##_vi: return node; #define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node; +#define CASE_GFXPRE11_GFX11PLUS(node) case node##_gfx11plus: case node##_gfxpre11: return node; +#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) unsigned mc2PseudoReg(unsigned Reg) { MAP_REG2REG @@ -1581,6 +1894,8 @@ unsigned mc2PseudoReg(unsigned Reg) { #undef CASE_CI_VI #undef CASE_VI_GFX9PLUS +#undef CASE_GFXPRE11_GFX11PLUS +#undef CASE_GFXPRE11_GFX11PLUS_TO #undef MAP_REG2REG bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { @@ -1934,7 +2249,7 @@ Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, } unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST, bool Signed) { - // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. + // Address offset is 12-bit signed for GFX10, 13-bit for GFX9 and GFX11+. if (AMDGPU::isGFX10(ST)) return Signed ? 12 : 11; @@ -2029,7 +2344,8 @@ const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr); #define GET_SourcesOfDivergence_IMPL #define GET_Gfx9BufferFormat_IMPL -#define GET_Gfx10PlusBufferFormat_IMPL +#define GET_Gfx10BufferFormat_IMPL +#define GET_Gfx11PlusBufferFormat_IMPL #include "AMDGPUGenSearchableTables.inc" } // end anonymous namespace @@ -2042,16 +2358,20 @@ const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, uint8_t NumFormat, const MCSubtargetInfo &STI) { - return isGFX10Plus(STI) - ? getGfx10PlusBufferFormatInfo(BitsPerComp, NumComponents, + return isGFX11Plus(STI) + ? getGfx11PlusBufferFormatInfo(BitsPerComp, NumComponents, NumFormat) - : getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat); + : isGFX10(STI) ? getGfx10BufferFormatInfo(BitsPerComp, + NumComponents, NumFormat) + : getGfx9BufferFormatInfo(BitsPerComp, + NumComponents, NumFormat); } const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, const MCSubtargetInfo &STI) { - return isGFX10Plus(STI) ? getGfx10PlusBufferFormatInfo(Format) - : getGfx9BufferFormatInfo(Format); + return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format) + : isGFX10(STI) ? getGfx10BufferFormatInfo(Format) + : getGfx9BufferFormatInfo(Format); } } // namespace AMDGPU diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 4516b511f3c8..dffeec10a14a 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -50,10 +50,19 @@ bool isHsaAbiVersion4(const MCSubtargetInfo *STI); /// \returns True if HSA OS ABI Version identification is 5, /// false otherwise. bool isHsaAbiVersion5(const MCSubtargetInfo *STI); -/// \returns True if HSA OS ABI Version identification is 3 or 4, +/// \returns True if HSA OS ABI Version identification is 3 and above, /// false otherwise. bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI); +/// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr +unsigned getMultigridSyncArgImplicitArgPosition(); + +/// \returns The offset of the hostcall pointer argument from implicitarg_ptr +unsigned getHostcallImplicitArgPosition(); + +/// \returns Code object version. +unsigned getAmdhsaCodeObjectVersion(); + struct GcnBufferFormatInfo { unsigned Format; unsigned BitsPerComp; @@ -62,12 +71,19 @@ struct GcnBufferFormatInfo { unsigned DataFormat; }; +struct MAIInstInfo { + uint16_t Opcode; + bool is_dgemm; + bool is_gfx940_xdl; +}; + #define GET_MIMGBaseOpcode_DECL #define GET_MIMGDim_DECL #define GET_MIMGEncoding_DECL #define GET_MIMGLZMapping_DECL #define GET_MIMGMIPMapping_DECL #define GET_MIMGBiASMapping_DECL +#define GET_MAIInstInfoTable_DECL #include "AMDGPUGenSearchableTables.inc" namespace IsaInfo { @@ -352,6 +368,11 @@ struct MIMGG16MappingInfo { LLVM_READONLY const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L); +struct WMMAOpcodeMappingInfo { + unsigned Opcode2Addr; + unsigned Opcode3Addr; +}; + LLVM_READONLY const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP); @@ -382,6 +403,7 @@ struct MIMGInfo { uint8_t MIMGEncoding; uint8_t VDataDwords; uint8_t VAddrDwords; + uint8_t VAddrOperands; }; LLVM_READONLY @@ -439,6 +461,16 @@ LLVM_READONLY bool getVOP3IsSingle(unsigned Opc); LLVM_READONLY +bool isVOPC64DPP(unsigned Opc); + +/// Returns true if MAI operation is a double precision GEMM. +LLVM_READONLY +bool getMAIIsDGEMM(unsigned Opc); + +LLVM_READONLY +bool getMAIIsGFX940XDL(unsigned Opc); + +LLVM_READONLY const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, uint8_t NumFormat, @@ -450,6 +482,12 @@ const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, LLVM_READONLY int getMCOpcode(uint16_t Opcode, unsigned Gen); +LLVM_READONLY +unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc); + +LLVM_READONLY +unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc); + void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const MCSubtargetInfo *STI); @@ -496,7 +534,7 @@ struct Waitcnt { unsigned LgkmCnt = ~0u; unsigned VsCnt = ~0u; - Waitcnt() {} + Waitcnt() = default; Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt) : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {} @@ -555,11 +593,14 @@ unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt); /// \p Lgkmcnt respectively. /// /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows: -/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only) -/// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only) -/// \p Expcnt = \p Waitcnt[6:4] -/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10 only) -/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10+ only) +/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9) +/// \p Vmcnt = \p Waitcnt[15:14,3:0] (gfx9,10) +/// \p Vmcnt = \p Waitcnt[15:10] (gfx11+) +/// \p Expcnt = \p Waitcnt[6:4] (pre-gfx11) +/// \p Expcnt = \p Waitcnt[2:0] (gfx11+) +/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10) +/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10) +/// \p Lgkmcnt = \p Waitcnt[9:4] (gfx11+) void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt); @@ -581,12 +622,15 @@ unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, /// \p Version. /// /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows: -/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only) -/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only) -/// Waitcnt[6:4] = \p Expcnt -/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10 only) -/// Waitcnt[13:8] = \p Lgkmcnt (gfx10+ only) -/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only) +/// Waitcnt[2:0] = \p Expcnt (gfx11+) +/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9) +/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9,10) +/// Waitcnt[6:4] = \p Expcnt (pre-gfx11) +/// Waitcnt[9:4] = \p Lgkmcnt (gfx11+) +/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10) +/// Waitcnt[13:8] = \p Lgkmcnt (gfx10) +/// Waitcnt[15:10] = \p Vmcnt (gfx11+) +/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9,10) /// /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given /// isa \p Version. @@ -598,10 +642,7 @@ unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded); namespace Hwreg { LLVM_READONLY -int64_t getHwregId(const StringRef Name); - -LLVM_READNONE -bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI); +int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI); LLVM_READNONE bool isValidHwreg(int64_t Id); @@ -622,6 +663,18 @@ void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width); } // namespace Hwreg +namespace DepCtr { + +int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI); +int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, + const MCSubtargetInfo &STI); +bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, + const MCSubtargetInfo &STI); +bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, + bool &IsDefault, const MCSubtargetInfo &STI); + +} // namespace DepCtr + namespace Exp { bool getTgtName(unsigned Id, StringRef &Name, int &Index); @@ -653,13 +706,14 @@ bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI); bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI); -int64_t getUnifiedFormat(const StringRef Name); +int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI); -StringRef getUnifiedFormatName(unsigned Id); +StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI); -bool isValidUnifiedFormat(unsigned Val); +bool isValidUnifiedFormat(unsigned Val, const MCSubtargetInfo &STI); -int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt); +int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, + const MCSubtargetInfo &STI); bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI); @@ -670,19 +724,19 @@ unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI); namespace SendMsg { LLVM_READONLY -int64_t getMsgId(const StringRef Name); +int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI); LLVM_READONLY int64_t getMsgOpId(int64_t MsgId, const StringRef Name); LLVM_READNONE -StringRef getMsgName(int64_t MsgId); +StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI); LLVM_READNONE -StringRef getMsgOpName(int64_t MsgId, int64_t OpId); +StringRef getMsgOpName(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI); LLVM_READNONE -bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict = true); +bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI); LLVM_READNONE bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, @@ -693,15 +747,13 @@ bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict = true); LLVM_READNONE -bool msgRequiresOp(int64_t MsgId); +bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI); LLVM_READNONE -bool msgSupportsStream(int64_t MsgId, int64_t OpId); +bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI); -void decodeMsg(unsigned Val, - uint16_t &MsgId, - uint16_t &OpId, - uint16_t &StreamId); +void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, + uint16_t &StreamId, const MCSubtargetInfo &STI); LLVM_READNONE uint64_t encodeMsg(uint64_t MsgId, @@ -738,6 +790,8 @@ bool isEntryFunctionCC(CallingConv::ID CC); LLVM_READNONE bool isModuleEntryFunctionCC(CallingConv::ID CC); +bool isKernelCC(const Function *Func); + // FIXME: Remove this when calling conventions cleaned up LLVM_READNONE inline bool isKernel(CallingConv::ID CC) { @@ -761,22 +815,31 @@ bool isSI(const MCSubtargetInfo &STI); bool isCI(const MCSubtargetInfo &STI); bool isVI(const MCSubtargetInfo &STI); bool isGFX9(const MCSubtargetInfo &STI); +bool isGFX9_GFX10(const MCSubtargetInfo &STI); +bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI); +bool isGFX8Plus(const MCSubtargetInfo &STI); bool isGFX9Plus(const MCSubtargetInfo &STI); bool isGFX10(const MCSubtargetInfo &STI); bool isGFX10Plus(const MCSubtargetInfo &STI); +bool isNotGFX10Plus(const MCSubtargetInfo &STI); +bool isGFX10Before1030(const MCSubtargetInfo &STI); +bool isGFX11(const MCSubtargetInfo &STI); +bool isGFX11Plus(const MCSubtargetInfo &STI); +bool isNotGFX11Plus(const MCSubtargetInfo &STI); bool isGCN3Encoding(const MCSubtargetInfo &STI); bool isGFX10_AEncoding(const MCSubtargetInfo &STI); bool isGFX10_BEncoding(const MCSubtargetInfo &STI); bool hasGFX10_3Insts(const MCSubtargetInfo &STI); bool isGFX90A(const MCSubtargetInfo &STI); +bool isGFX940(const MCSubtargetInfo &STI); bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI); +bool hasMAIInsts(const MCSubtargetInfo &STI); +bool hasVOPD(const MCSubtargetInfo &STI); +int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR); /// Is Reg - scalar register bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI); -/// Is there any intersection between registers -bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI); - /// If \p Reg is a pseudo reg, return the correct hardware register given /// \p STI otherwise return \p Reg. unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI); @@ -931,7 +994,7 @@ inline bool isLegal64BitDPPControl(unsigned DC) { /// \returns true if the intrinsic is divergent bool isIntrinsicSourceOfDivergence(unsigned IntrID); -// Track defaults for fields in the MODE registser. +// Track defaults for fields in the MODE register. struct SIModeRegisterDefaults { /// Floating point opcodes that support exception flag gathering quiet and /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10 diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h deleted file mode 100644 index 83ef68cc3f60..000000000000 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h +++ /dev/null @@ -1,38 +0,0 @@ -//===- AMDGPULDSUtils.h - LDS related helper functions -*- C++ -*----------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// AMDGPU LDS related helper utility functions. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULDSUTILS_H -#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULDSUTILS_H - -#include "llvm/ADT/DenseMap.h" -#include "llvm/IR/Constants.h" - -namespace llvm { - -class ConstantExpr; - -namespace AMDGPU { - -bool isKernelCC(const Function *Func); - -Align getAlign(DataLayout const &DL, const GlobalVariable *GV); - -std::vector<GlobalVariable *> findVariablesToLower(Module &M, - const Function *F = nullptr); - -/// Replace all uses of constant \p C with instructions in \p F. -void replaceConstantUsesInFunction(ConstantExpr *C, const Function *F); -} // end namespace AMDGPU - -} // end namespace llvm - -#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULDSUTILS_H diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp index a83ff6667956..83d7cbdb183c 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp @@ -1,33 +1,32 @@ -//===- AMDGPULDSUtils.cpp -------------------------------------------------===// +//===-- AMDGPUMemoryUtils.cpp - -------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// AMDGPU LDS related helper utility functions. -// -//===----------------------------------------------------------------------===// -#include "AMDGPULDSUtils.h" +#include "AMDGPUMemoryUtils.h" #include "AMDGPU.h" -#include "Utils/AMDGPUBaseInfo.h" -#include "llvm/ADT/DepthFirstIterator.h" +#include "AMDGPUBaseInfo.h" #include "llvm/ADT/SetVector.h" -#include "llvm/IR/Constants.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/ReplaceConstant.h" +#define DEBUG_TYPE "amdgpu-memory-utils" + using namespace llvm; namespace llvm { namespace AMDGPU { -bool isKernelCC(const Function *Func) { - return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv()); -} - Align getAlign(DataLayout const &DL, const GlobalVariable *GV) { return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL), GV->getValueType()); @@ -139,6 +138,83 @@ std::vector<GlobalVariable *> findVariablesToLower(Module &M, return LocalVars; } +bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) { + Instruction *DefInst = Def->getMemoryInst(); + + if (isa<FenceInst>(DefInst)) + return false; + + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) { + switch (II->getIntrinsicID()) { + case Intrinsic::amdgcn_s_barrier: + case Intrinsic::amdgcn_wave_barrier: + case Intrinsic::amdgcn_sched_barrier: + return false; + default: + break; + } + } + + // Ignore atomics not aliasing with the original load, any atomic is a + // universal MemoryDef from MSSA's point of view too, just like a fence. + const auto checkNoAlias = [AA, Ptr](auto I) -> bool { + return I && AA->isNoAlias(I->getPointerOperand(), Ptr); + }; + + if (checkNoAlias(dyn_cast<AtomicCmpXchgInst>(DefInst)) || + checkNoAlias(dyn_cast<AtomicRMWInst>(DefInst))) + return false; + + return true; +} + +bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA, + AAResults *AA) { + MemorySSAWalker *Walker = MSSA->getWalker(); + SmallVector<MemoryAccess *> WorkList{Walker->getClobberingMemoryAccess(Load)}; + SmallSet<MemoryAccess *, 8> Visited; + MemoryLocation Loc(MemoryLocation::get(Load)); + + LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n'); + + // Start with a nearest dominating clobbering access, it will be either + // live on entry (nothing to do, load is not clobbered), MemoryDef, or + // MemoryPhi if several MemoryDefs can define this memory state. In that + // case add all Defs to WorkList and continue going up and checking all + // the definitions of this memory location until the root. When all the + // defs are exhausted and came to the entry state we have no clobber. + // Along the scan ignore barriers and fences which are considered clobbers + // by the MemorySSA, but not really writing anything into the memory. + while (!WorkList.empty()) { + MemoryAccess *MA = WorkList.pop_back_val(); + if (!Visited.insert(MA).second) + continue; + + if (MSSA->isLiveOnEntryDef(MA)) + continue; + + if (MemoryDef *Def = dyn_cast<MemoryDef>(MA)) { + LLVM_DEBUG(dbgs() << " Def: " << *Def->getMemoryInst() << '\n'); + + if (isReallyAClobber(Load->getPointerOperand(), Def, AA)) { + LLVM_DEBUG(dbgs() << " -> load is clobbered\n"); + return true; + } + + WorkList.push_back( + Walker->getClobberingMemoryAccess(Def->getDefiningAccess(), Loc)); + continue; + } + + const MemoryPhi *Phi = cast<MemoryPhi>(MA); + for (auto &Use : Phi->incoming_values()) + WorkList.push_back(cast<MemoryAccess>(&Use)); + } + + LLVM_DEBUG(dbgs() << " -> no clobber\n"); + return false; +} + } // end namespace AMDGPU } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h new file mode 100644 index 000000000000..65ed02ca62de --- /dev/null +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h @@ -0,0 +1,51 @@ +//===- AMDGPUMemoryUtils.h - Memory related helper functions -*- C++ -*----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUMEMORYUTILS_H +#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUMEMORYUTILS_H + +#include <vector> + +namespace llvm { + +struct Align; +class AAResults; +class ConstantExpr; +class DataLayout; +class Function; +class GlobalVariable; +class LoadInst; +class MemoryDef; +class MemorySSA; +class Module; +class Value; + +namespace AMDGPU { + +Align getAlign(DataLayout const &DL, const GlobalVariable *GV); + +std::vector<GlobalVariable *> findVariablesToLower(Module &M, + const Function *F = nullptr); + +/// Replace all uses of constant \p C with instructions in \p F. +void replaceConstantUsesInFunction(ConstantExpr *C, const Function *F); + +/// Given a \p Def clobbering a load from \p Ptr according to the MSSA check +/// if this is actually a memory update or an artificial clobber to facilitate +/// ordering constraints. +bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA); + +/// Check is a \p Load is clobbered in its function. +bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA, + AAResults *AA); + +} // end namespace AMDGPU + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUMEMORYUTILS_H diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp index f6b5975f1934..4ad93f7b0b68 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp @@ -209,6 +209,11 @@ void AMDGPUPALMetadata::setNumUsedVgprs(CallingConv::ID CC, unsigned Val) { getHwStage(CC)[".vgpr_count"] = MsgPackDoc.getNode(Val); } +// Set the number of used agprs in the metadata. +void AMDGPUPALMetadata::setNumUsedAgprs(CallingConv::ID CC, unsigned Val) { + getHwStage(CC)[".agpr_count"] = Val; +} + // Set the number of used sgprs in the metadata. This is an optional advisory // record for logging etc; wave dispatch actually uses the rsrc1 register for // the shader stage to determine the number of sgprs to allocate. diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h index 7fdd9a8429c1..a45a799e38a9 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h @@ -69,6 +69,10 @@ public: // the shader stage to determine the number of vgprs to allocate. void setNumUsedVgprs(unsigned CC, unsigned Val); + // Set the number of used agprs in the metadata. This is an optional advisory + // record for logging etc; + void setNumUsedAgprs(unsigned CC, unsigned Val); + // Set the number of used sgprs in the metadata. This is an optional advisory // record for logging etc; wave dispatch actually uses the rsrc1 register for // the shader stage to determine the number of sgprs to allocate. |
