aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp247
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp460
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h188
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp50
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h2
8 files changed, 875 insertions, 109 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
index 5819a621f55d..c8a85d76a55b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
@@ -6,6 +6,9 @@
//
//===----------------------------------------------------------------------===//
#include "AMDGPUAsmUtils.h"
+#include "SIDefines.h"
+
+#include "llvm/ADT/StringRef.h"
namespace llvm {
namespace AMDGPU {
@@ -87,6 +90,250 @@ const char* const IdSymbolic[] = {
} // namespace Hwreg
+namespace MTBUFFormat {
+
+StringLiteral const DfmtSymbolic[] = {
+ "BUF_DATA_FORMAT_INVALID",
+ "BUF_DATA_FORMAT_8",
+ "BUF_DATA_FORMAT_16",
+ "BUF_DATA_FORMAT_8_8",
+ "BUF_DATA_FORMAT_32",
+ "BUF_DATA_FORMAT_16_16",
+ "BUF_DATA_FORMAT_10_11_11",
+ "BUF_DATA_FORMAT_11_11_10",
+ "BUF_DATA_FORMAT_10_10_10_2",
+ "BUF_DATA_FORMAT_2_10_10_10",
+ "BUF_DATA_FORMAT_8_8_8_8",
+ "BUF_DATA_FORMAT_32_32",
+ "BUF_DATA_FORMAT_16_16_16_16",
+ "BUF_DATA_FORMAT_32_32_32",
+ "BUF_DATA_FORMAT_32_32_32_32",
+ "BUF_DATA_FORMAT_RESERVED_15"
+};
+
+StringLiteral const NfmtSymbolicGFX10[] = {
+ "BUF_NUM_FORMAT_UNORM",
+ "BUF_NUM_FORMAT_SNORM",
+ "BUF_NUM_FORMAT_USCALED",
+ "BUF_NUM_FORMAT_SSCALED",
+ "BUF_NUM_FORMAT_UINT",
+ "BUF_NUM_FORMAT_SINT",
+ "",
+ "BUF_NUM_FORMAT_FLOAT"
+};
+
+StringLiteral const NfmtSymbolicSICI[] = {
+ "BUF_NUM_FORMAT_UNORM",
+ "BUF_NUM_FORMAT_SNORM",
+ "BUF_NUM_FORMAT_USCALED",
+ "BUF_NUM_FORMAT_SSCALED",
+ "BUF_NUM_FORMAT_UINT",
+ "BUF_NUM_FORMAT_SINT",
+ "BUF_NUM_FORMAT_SNORM_OGL",
+ "BUF_NUM_FORMAT_FLOAT"
+};
+
+StringLiteral const NfmtSymbolicVI[] = { // VI and GFX9
+ "BUF_NUM_FORMAT_UNORM",
+ "BUF_NUM_FORMAT_SNORM",
+ "BUF_NUM_FORMAT_USCALED",
+ "BUF_NUM_FORMAT_SSCALED",
+ "BUF_NUM_FORMAT_UINT",
+ "BUF_NUM_FORMAT_SINT",
+ "BUF_NUM_FORMAT_RESERVED_6",
+ "BUF_NUM_FORMAT_FLOAT"
+};
+
+StringLiteral const UfmtSymbolic[] = {
+ "BUF_FMT_INVALID",
+
+ "BUF_FMT_8_UNORM",
+ "BUF_FMT_8_SNORM",
+ "BUF_FMT_8_USCALED",
+ "BUF_FMT_8_SSCALED",
+ "BUF_FMT_8_UINT",
+ "BUF_FMT_8_SINT",
+
+ "BUF_FMT_16_UNORM",
+ "BUF_FMT_16_SNORM",
+ "BUF_FMT_16_USCALED",
+ "BUF_FMT_16_SSCALED",
+ "BUF_FMT_16_UINT",
+ "BUF_FMT_16_SINT",
+ "BUF_FMT_16_FLOAT",
+
+ "BUF_FMT_8_8_UNORM",
+ "BUF_FMT_8_8_SNORM",
+ "BUF_FMT_8_8_USCALED",
+ "BUF_FMT_8_8_SSCALED",
+ "BUF_FMT_8_8_UINT",
+ "BUF_FMT_8_8_SINT",
+
+ "BUF_FMT_32_UINT",
+ "BUF_FMT_32_SINT",
+ "BUF_FMT_32_FLOAT",
+
+ "BUF_FMT_16_16_UNORM",
+ "BUF_FMT_16_16_SNORM",
+ "BUF_FMT_16_16_USCALED",
+ "BUF_FMT_16_16_SSCALED",
+ "BUF_FMT_16_16_UINT",
+ "BUF_FMT_16_16_SINT",
+ "BUF_FMT_16_16_FLOAT",
+
+ "BUF_FMT_10_11_11_UNORM",
+ "BUF_FMT_10_11_11_SNORM",
+ "BUF_FMT_10_11_11_USCALED",
+ "BUF_FMT_10_11_11_SSCALED",
+ "BUF_FMT_10_11_11_UINT",
+ "BUF_FMT_10_11_11_SINT",
+ "BUF_FMT_10_11_11_FLOAT",
+
+ "BUF_FMT_11_11_10_UNORM",
+ "BUF_FMT_11_11_10_SNORM",
+ "BUF_FMT_11_11_10_USCALED",
+ "BUF_FMT_11_11_10_SSCALED",
+ "BUF_FMT_11_11_10_UINT",
+ "BUF_FMT_11_11_10_SINT",
+ "BUF_FMT_11_11_10_FLOAT",
+
+ "BUF_FMT_10_10_10_2_UNORM",
+ "BUF_FMT_10_10_10_2_SNORM",
+ "BUF_FMT_10_10_10_2_USCALED",
+ "BUF_FMT_10_10_10_2_SSCALED",
+ "BUF_FMT_10_10_10_2_UINT",
+ "BUF_FMT_10_10_10_2_SINT",
+
+ "BUF_FMT_2_10_10_10_UNORM",
+ "BUF_FMT_2_10_10_10_SNORM",
+ "BUF_FMT_2_10_10_10_USCALED",
+ "BUF_FMT_2_10_10_10_SSCALED",
+ "BUF_FMT_2_10_10_10_UINT",
+ "BUF_FMT_2_10_10_10_SINT",
+
+ "BUF_FMT_8_8_8_8_UNORM",
+ "BUF_FMT_8_8_8_8_SNORM",
+ "BUF_FMT_8_8_8_8_USCALED",
+ "BUF_FMT_8_8_8_8_SSCALED",
+ "BUF_FMT_8_8_8_8_UINT",
+ "BUF_FMT_8_8_8_8_SINT",
+
+ "BUF_FMT_32_32_UINT",
+ "BUF_FMT_32_32_SINT",
+ "BUF_FMT_32_32_FLOAT",
+
+ "BUF_FMT_16_16_16_16_UNORM",
+ "BUF_FMT_16_16_16_16_SNORM",
+ "BUF_FMT_16_16_16_16_USCALED",
+ "BUF_FMT_16_16_16_16_SSCALED",
+ "BUF_FMT_16_16_16_16_UINT",
+ "BUF_FMT_16_16_16_16_SINT",
+ "BUF_FMT_16_16_16_16_FLOAT",
+
+ "BUF_FMT_32_32_32_UINT",
+ "BUF_FMT_32_32_32_SINT",
+ "BUF_FMT_32_32_32_FLOAT",
+ "BUF_FMT_32_32_32_32_UINT",
+ "BUF_FMT_32_32_32_32_SINT",
+ "BUF_FMT_32_32_32_32_FLOAT"
+};
+
+unsigned const DfmtNfmt2UFmt[] = {
+ DFMT_INVALID | (NFMT_UNORM << NFMT_SHIFT),
+
+ DFMT_8 | (NFMT_UNORM << NFMT_SHIFT),
+ DFMT_8 | (NFMT_SNORM << NFMT_SHIFT),
+ DFMT_8 | (NFMT_USCALED << NFMT_SHIFT),
+ DFMT_8 | (NFMT_SSCALED << NFMT_SHIFT),
+ DFMT_8 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_8 | (NFMT_SINT << NFMT_SHIFT),
+
+ DFMT_16 | (NFMT_UNORM << NFMT_SHIFT),
+ DFMT_16 | (NFMT_SNORM << NFMT_SHIFT),
+ DFMT_16 | (NFMT_USCALED << NFMT_SHIFT),
+ DFMT_16 | (NFMT_SSCALED << NFMT_SHIFT),
+ DFMT_16 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_16 | (NFMT_SINT << NFMT_SHIFT),
+ DFMT_16 | (NFMT_FLOAT << NFMT_SHIFT),
+
+ DFMT_8_8 | (NFMT_UNORM << NFMT_SHIFT),
+ DFMT_8_8 | (NFMT_SNORM << NFMT_SHIFT),
+ DFMT_8_8 | (NFMT_USCALED << NFMT_SHIFT),
+ DFMT_8_8 | (NFMT_SSCALED << NFMT_SHIFT),
+ DFMT_8_8 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_8_8 | (NFMT_SINT << NFMT_SHIFT),
+
+ DFMT_32 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_32 | (NFMT_SINT << NFMT_SHIFT),
+ DFMT_32 | (NFMT_FLOAT << NFMT_SHIFT),
+
+ DFMT_16_16 | (NFMT_UNORM << NFMT_SHIFT),
+ DFMT_16_16 | (NFMT_SNORM << NFMT_SHIFT),
+ DFMT_16_16 | (NFMT_USCALED << NFMT_SHIFT),
+ DFMT_16_16 | (NFMT_SSCALED << NFMT_SHIFT),
+ DFMT_16_16 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_16_16 | (NFMT_SINT << NFMT_SHIFT),
+ DFMT_16_16 | (NFMT_FLOAT << NFMT_SHIFT),
+
+ DFMT_10_11_11 | (NFMT_UNORM << NFMT_SHIFT),
+ DFMT_10_11_11 | (NFMT_SNORM << NFMT_SHIFT),
+ DFMT_10_11_11 | (NFMT_USCALED << NFMT_SHIFT),
+ DFMT_10_11_11 | (NFMT_SSCALED << NFMT_SHIFT),
+ DFMT_10_11_11 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_10_11_11 | (NFMT_SINT << NFMT_SHIFT),
+ DFMT_10_11_11 | (NFMT_FLOAT << NFMT_SHIFT),
+
+ DFMT_11_11_10 | (NFMT_UNORM << NFMT_SHIFT),
+ DFMT_11_11_10 | (NFMT_SNORM << NFMT_SHIFT),
+ DFMT_11_11_10 | (NFMT_USCALED << NFMT_SHIFT),
+ DFMT_11_11_10 | (NFMT_SSCALED << NFMT_SHIFT),
+ DFMT_11_11_10 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_11_11_10 | (NFMT_SINT << NFMT_SHIFT),
+ DFMT_11_11_10 | (NFMT_FLOAT << NFMT_SHIFT),
+
+ DFMT_10_10_10_2 | (NFMT_UNORM << NFMT_SHIFT),
+ DFMT_10_10_10_2 | (NFMT_SNORM << NFMT_SHIFT),
+ DFMT_10_10_10_2 | (NFMT_USCALED << NFMT_SHIFT),
+ DFMT_10_10_10_2 | (NFMT_SSCALED << NFMT_SHIFT),
+ DFMT_10_10_10_2 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_10_10_10_2 | (NFMT_SINT << NFMT_SHIFT),
+
+ DFMT_2_10_10_10 | (NFMT_UNORM << NFMT_SHIFT),
+ DFMT_2_10_10_10 | (NFMT_SNORM << NFMT_SHIFT),
+ DFMT_2_10_10_10 | (NFMT_USCALED << NFMT_SHIFT),
+ DFMT_2_10_10_10 | (NFMT_SSCALED << NFMT_SHIFT),
+ DFMT_2_10_10_10 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_2_10_10_10 | (NFMT_SINT << NFMT_SHIFT),
+
+ DFMT_8_8_8_8 | (NFMT_UNORM << NFMT_SHIFT),
+ DFMT_8_8_8_8 | (NFMT_SNORM << NFMT_SHIFT),
+ DFMT_8_8_8_8 | (NFMT_USCALED << NFMT_SHIFT),
+ DFMT_8_8_8_8 | (NFMT_SSCALED << NFMT_SHIFT),
+ DFMT_8_8_8_8 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_8_8_8_8 | (NFMT_SINT << NFMT_SHIFT),
+
+ DFMT_32_32 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_32_32 | (NFMT_SINT << NFMT_SHIFT),
+ DFMT_32_32 | (NFMT_FLOAT << NFMT_SHIFT),
+
+ DFMT_16_16_16_16 | (NFMT_UNORM << NFMT_SHIFT),
+ DFMT_16_16_16_16 | (NFMT_SNORM << NFMT_SHIFT),
+ DFMT_16_16_16_16 | (NFMT_USCALED << NFMT_SHIFT),
+ DFMT_16_16_16_16 | (NFMT_SSCALED << NFMT_SHIFT),
+ DFMT_16_16_16_16 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_16_16_16_16 | (NFMT_SINT << NFMT_SHIFT),
+ DFMT_16_16_16_16 | (NFMT_FLOAT << NFMT_SHIFT),
+
+ DFMT_32_32_32 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_32_32_32 | (NFMT_SINT << NFMT_SHIFT),
+ DFMT_32_32_32 | (NFMT_FLOAT << NFMT_SHIFT),
+ DFMT_32_32_32_32 | (NFMT_UINT << NFMT_SHIFT),
+ DFMT_32_32_32_32 | (NFMT_SINT << NFMT_SHIFT),
+ DFMT_32_32_32_32 | (NFMT_FLOAT << NFMT_SHIFT)
+};
+
+} // namespace MTBUFFormat
+
namespace Swizzle {
// This must be in sync with llvm::AMDGPU::Swizzle::Id enum members, see SIDefines.h.
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h
index cd91c5f6edd5..3eb27c5e5f42 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h
@@ -10,7 +10,11 @@
#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUASMUTILS_H
namespace llvm {
+
+class StringLiteral;
+
namespace AMDGPU {
+
namespace SendMsg { // Symbolic names for the sendmsg(...) syntax.
extern const char* const IdSymbolic[];
@@ -25,6 +29,17 @@ extern const char* const IdSymbolic[];
} // namespace Hwreg
+namespace MTBUFFormat {
+
+extern StringLiteral const DfmtSymbolic[];
+extern StringLiteral const NfmtSymbolicGFX10[];
+extern StringLiteral const NfmtSymbolicSICI[];
+extern StringLiteral const NfmtSymbolicVI[];
+extern StringLiteral const UfmtSymbolic[];
+extern unsigned const DfmtNfmt2UFmt[];
+
+} // namespace MTBUFFormat
+
namespace Swizzle { // Symbolic names for the swizzle(...) syntax.
extern const char* const IdSymbolic[];
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 3df2157fc402..4c1e4dec7ecb 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -9,44 +9,28 @@
#include "AMDGPUBaseInfo.h"
#include "AMDGPU.h"
#include "AMDGPUAsmUtils.h"
-#include "AMDGPUTargetTransformInfo.h"
-#include "SIDefines.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
+#include "AMDKernelCodeT.h"
+#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/IR/Attributes.h"
-#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/Instruction.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/SubtargetFeature.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include <algorithm>
-#include <cassert>
-#include <cstdint>
-#include <cstring>
-#include <utility>
-
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/Support/AMDHSAKernelDescriptor.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/TargetParser.h"
#define GET_INSTRINFO_NAMED_OPS
#define GET_INSTRMAP_INFO
#include "AMDGPUGenInstrInfo.inc"
-#undef GET_INSTRMAP_INFO
-#undef GET_INSTRINFO_NAMED_OPS
+
+static llvm::cl::opt<unsigned> AmdhsaCodeObjectVersion(
+ "amdhsa-code-object-version", llvm::cl::Hidden,
+ llvm::cl::desc("AMDHSA Code Object Version"), llvm::cl::init(3));
namespace {
@@ -103,6 +87,32 @@ namespace llvm {
namespace AMDGPU {
+Optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI) {
+ if (STI && STI->getTargetTriple().getOS() != Triple::AMDHSA)
+ return None;
+
+ switch (AmdhsaCodeObjectVersion) {
+ case 2:
+ return ELF::ELFABIVERSION_AMDGPU_HSA_V2;
+ case 3:
+ return ELF::ELFABIVERSION_AMDGPU_HSA_V3;
+ default:
+ return ELF::ELFABIVERSION_AMDGPU_HSA_V3;
+ }
+}
+
+bool isHsaAbiVersion2(const MCSubtargetInfo *STI) {
+ if (const auto &&HsaAbiVer = getHsaAbiVersion(STI))
+ return HsaAbiVer.getValue() == ELF::ELFABIVERSION_AMDGPU_HSA_V2;
+ return false;
+}
+
+bool isHsaAbiVersion3(const MCSubtargetInfo *STI) {
+ if (const auto &&HsaAbiVer = getHsaAbiVersion(STI))
+ return HsaAbiVer.getValue() == ELF::ELFABIVERSION_AMDGPU_HSA_V3;
+ return false;
+}
+
#define GET_MIMGBaseOpcodesTable_IMPL
#define GET_MIMGDimInfoTable_IMPL
#define GET_MIMGInfoTable_IMPL
@@ -236,6 +246,94 @@ int getMCOpcode(uint16_t Opcode, unsigned Gen) {
namespace IsaInfo {
+AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI)
+ : XnackSetting(TargetIDSetting::Any), SramEccSetting(TargetIDSetting::Any) {
+ if (!STI.getFeatureBits().test(FeatureSupportsXNACK))
+ XnackSetting = TargetIDSetting::Unsupported;
+ if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC))
+ SramEccSetting = TargetIDSetting::Unsupported;
+}
+
+void AMDGPUTargetID::setTargetIDFromFeaturesString(StringRef FS) {
+ // Check if xnack or sramecc is explicitly enabled or disabled. In the
+ // absence of the target features we assume we must generate code that can run
+ // in any environment.
+ SubtargetFeatures Features(FS);
+ Optional<bool> XnackRequested;
+ Optional<bool> SramEccRequested;
+
+ for (const std::string &Feature : Features.getFeatures()) {
+ if (Feature == "+xnack")
+ XnackRequested = true;
+ else if (Feature == "-xnack")
+ XnackRequested = false;
+ else if (Feature == "+sramecc")
+ SramEccRequested = true;
+ else if (Feature == "-sramecc")
+ SramEccRequested = false;
+ }
+
+ bool XnackSupported = isXnackSupported();
+ bool SramEccSupported = isSramEccSupported();
+
+ if (XnackRequested) {
+ if (XnackSupported) {
+ XnackSetting =
+ *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off;
+ } else {
+ // If a specific xnack setting was requested and this GPU does not support
+ // xnack emit a warning. Setting will remain set to "Unsupported".
+ if (*XnackRequested) {
+ errs() << "warning: xnack 'On' was requested for a processor that does "
+ "not support it!\n";
+ } else {
+ errs() << "warning: xnack 'Off' was requested for a processor that "
+ "does not support it!\n";
+ }
+ }
+ }
+
+ if (SramEccRequested) {
+ if (SramEccSupported) {
+ SramEccSetting =
+ *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off;
+ } else {
+ // If a specific sramecc setting was requested and this GPU does not
+ // support sramecc emit a warning. Setting will remain set to
+ // "Unsupported".
+ if (*SramEccRequested) {
+ errs() << "warning: sramecc 'On' was requested for a processor that "
+ "does not support it!\n";
+ } else {
+ errs() << "warning: sramecc 'Off' was requested for a processor that "
+ "does not support it!\n";
+ }
+ }
+ }
+}
+
+static TargetIDSetting
+getTargetIDSettingFromFeatureString(StringRef FeatureString) {
+ if (FeatureString.endswith("-"))
+ return TargetIDSetting::Off;
+ if (FeatureString.endswith("+"))
+ return TargetIDSetting::On;
+
+ llvm_unreachable("Malformed feature string");
+}
+
+void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) {
+ SmallVector<StringRef, 3> TargetIDSplit;
+ TargetID.split(TargetIDSplit, ':');
+
+ for (const auto &FeatureString : TargetIDSplit) {
+ if (FeatureString.startswith("xnack"))
+ XnackSetting = getTargetIDSettingFromFeatureString(FeatureString);
+ if (FeatureString.startswith("sramecc"))
+ SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString);
+ }
+}
+
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
auto TargetTriple = STI->getTargetTriple();
auto Version = getIsaVersion(STI->getCPU());
@@ -252,16 +350,11 @@ void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
if (hasXNACK(*STI))
Stream << "+xnack";
if (hasSRAMECC(*STI))
- Stream << "+sram-ecc";
+ Stream << "+sramecc";
Stream.flush();
}
-bool hasCodeObjectV3(const MCSubtargetInfo *STI) {
- return STI->getTargetTriple().getOS() == Triple::AMDHSA &&
- STI->getFeatureBits().test(FeatureCodeObjectV3);
-}
-
unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
if (STI->getFeatureBits().test(FeatureWavefrontSize16))
return 16;
@@ -284,7 +377,7 @@ unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
// "Per CU" really means "per whatever functional block the waves of a
// workgroup must share". For gfx10 in CU mode this is the CU, which contains
// two SIMDs.
- if (isGFX10(*STI) && STI->getFeatureBits().test(FeatureCuMode))
+ if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode))
return 2;
// Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains
// two CUs, so a total of four SIMDs.
@@ -309,7 +402,7 @@ unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
// FIXME: Need to take scratch memory into account.
- if (!isGFX10(*STI))
+ if (!isGFX10Plus(*STI))
return 10;
return hasGFX10_3Insts(*STI) ? 16 : 20;
}
@@ -459,7 +552,7 @@ unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
}
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
- if (!isGFX10(*STI))
+ if (!isGFX10Plus(*STI))
return 256;
return STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1024 : 512;
}
@@ -578,7 +671,7 @@ bool isReadOnlySegment(const GlobalValue *GV) {
}
bool shouldEmitConstantsToTextSection(const Triple &TT) {
- return TT.getOS() == Triple::AMDPAL || TT.getArch() == Triple::r600;
+ return TT.getArch() == Triple::r600;
}
int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
@@ -784,6 +877,165 @@ void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width)
} // namespace Hwreg
//===----------------------------------------------------------------------===//
+// exp tgt
+//===----------------------------------------------------------------------===//
+
+namespace Exp {
+
+struct ExpTgt {
+ StringLiteral Name;
+ unsigned Tgt;
+ unsigned MaxIndex;
+};
+
+static constexpr ExpTgt ExpTgtInfo[] = {
+ {{"null"}, ET_NULL, ET_NULL_MAX_IDX},
+ {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX},
+ {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX},
+ {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX},
+ {{"pos"}, ET_POS0, ET_POS_MAX_IDX},
+ {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX},
+};
+
+bool getTgtName(unsigned Id, StringRef &Name, int &Index) {
+ for (const ExpTgt &Val : ExpTgtInfo) {
+ if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
+ Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
+ Name = Val.Name;
+ return true;
+ }
+ }
+ return false;
+}
+
+unsigned getTgtId(const StringRef Name) {
+
+ for (const ExpTgt &Val : ExpTgtInfo) {
+ if (Val.MaxIndex == 0 && Name == Val.Name)
+ return Val.Tgt;
+
+ if (Val.MaxIndex > 0 && Name.startswith(Val.Name)) {
+ StringRef Suffix = Name.drop_front(Val.Name.size());
+
+ unsigned Id;
+ if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex)
+ return ET_INVALID;
+
+ // Disable leading zeroes
+ if (Suffix.size() > 1 && Suffix[0] == '0')
+ return ET_INVALID;
+
+ return Val.Tgt + Id;
+ }
+ }
+ return ET_INVALID;
+}
+
+bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) {
+ return (Id != ET_POS4 && Id != ET_PRIM) || isGFX10Plus(STI);
+}
+
+} // namespace Exp
+
+//===----------------------------------------------------------------------===//
+// MTBUF Format
+//===----------------------------------------------------------------------===//
+
+namespace MTBUFFormat {
+
+int64_t getDfmt(const StringRef Name) {
+ for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) {
+ if (Name == DfmtSymbolic[Id])
+ return Id;
+ }
+ return DFMT_UNDEF;
+}
+
+StringRef getDfmtName(unsigned Id) {
+ assert(Id <= DFMT_MAX);
+ return DfmtSymbolic[Id];
+}
+
+static StringLiteral const *getNfmtLookupTable(const MCSubtargetInfo &STI) {
+ if (isSI(STI) || isCI(STI))
+ return NfmtSymbolicSICI;
+ if (isVI(STI) || isGFX9(STI))
+ return NfmtSymbolicVI;
+ return NfmtSymbolicGFX10;
+}
+
+int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) {
+ auto lookupTable = getNfmtLookupTable(STI);
+ for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) {
+ if (Name == lookupTable[Id])
+ return Id;
+ }
+ return NFMT_UNDEF;
+}
+
+StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) {
+ assert(Id <= NFMT_MAX);
+ return getNfmtLookupTable(STI)[Id];
+}
+
+bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) {
+ unsigned Dfmt;
+ unsigned Nfmt;
+ decodeDfmtNfmt(Id, Dfmt, Nfmt);
+ return isValidNfmt(Nfmt, STI);
+}
+
+bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) {
+ return !getNfmtName(Id, STI).empty();
+}
+
+int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) {
+ return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT);
+}
+
+void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) {
+ Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK;
+ Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK;
+}
+
+int64_t getUnifiedFormat(const StringRef Name) {
+ for (int Id = UFMT_FIRST; Id <= UFMT_LAST; ++Id) {
+ if (Name == UfmtSymbolic[Id])
+ return Id;
+ }
+ return UFMT_UNDEF;
+}
+
+StringRef getUnifiedFormatName(unsigned Id) {
+ return isValidUnifiedFormat(Id) ? UfmtSymbolic[Id] : "";
+}
+
+bool isValidUnifiedFormat(unsigned Id) {
+ return Id <= UFMT_LAST;
+}
+
+int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt) {
+ int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt);
+ for (int Id = UFMT_FIRST; Id <= UFMT_LAST; ++Id) {
+ if (Fmt == DfmtNfmt2UFmt[Id])
+ return Id;
+ }
+ return UFMT_UNDEF;
+}
+
+bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) {
+ return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX);
+}
+
+unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI) {
+ if (isGFX10Plus(STI))
+ return UFMT_DEFAULT;
+ return DFMT_NFMT_DEFAULT;
+}
+
+} // namespace MTBUFFormat
+
+//===----------------------------------------------------------------------===//
// SendMsg
//===----------------------------------------------------------------------===//
@@ -804,7 +1056,7 @@ static bool isValidMsgId(int64_t MsgId) {
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict) {
if (Strict) {
if (MsgId == ID_GS_ALLOC_REQ || MsgId == ID_GET_DOORBELL)
- return isGFX9(STI) || isGFX10(STI);
+ return isGFX9Plus(STI);
else
return isValidMsgId(MsgId);
} else {
@@ -919,8 +1171,12 @@ bool isShader(CallingConv::ID cc) {
}
}
+bool isGraphics(CallingConv::ID cc) {
+ return isShader(cc) || cc == CallingConv::AMDGPU_Gfx;
+}
+
bool isCompute(CallingConv::ID cc) {
- return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
+ return !isGraphics(cc) || cc == CallingConv::AMDGPU_CS;
}
bool isEntryFunctionCC(CallingConv::ID CC) {
@@ -940,6 +1196,15 @@ bool isEntryFunctionCC(CallingConv::ID CC) {
}
}
+bool isModuleEntryFunctionCC(CallingConv::ID CC) {
+ switch (CC) {
+ case CallingConv::AMDGPU_Gfx:
+ return true;
+ default:
+ return isEntryFunctionCC(CC);
+ }
+}
+
bool hasXNACK(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
}
@@ -980,10 +1245,16 @@ bool isGFX9(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
}
+bool isGFX9Plus(const MCSubtargetInfo &STI) {
+ return isGFX9(STI) || isGFX10Plus(STI);
+}
+
bool isGFX10(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureGFX10];
}
+bool isGFX10Plus(const MCSubtargetInfo &STI) { return isGFX10(STI); }
+
bool isGCN3Encoding(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
}
@@ -1017,46 +1288,46 @@ bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
CASE_CI_VI(FLAT_SCR) \
CASE_CI_VI(FLAT_SCR_LO) \
CASE_CI_VI(FLAT_SCR_HI) \
- CASE_VI_GFX9_GFX10(TTMP0) \
- CASE_VI_GFX9_GFX10(TTMP1) \
- CASE_VI_GFX9_GFX10(TTMP2) \
- CASE_VI_GFX9_GFX10(TTMP3) \
- CASE_VI_GFX9_GFX10(TTMP4) \
- CASE_VI_GFX9_GFX10(TTMP5) \
- CASE_VI_GFX9_GFX10(TTMP6) \
- CASE_VI_GFX9_GFX10(TTMP7) \
- CASE_VI_GFX9_GFX10(TTMP8) \
- CASE_VI_GFX9_GFX10(TTMP9) \
- CASE_VI_GFX9_GFX10(TTMP10) \
- CASE_VI_GFX9_GFX10(TTMP11) \
- CASE_VI_GFX9_GFX10(TTMP12) \
- CASE_VI_GFX9_GFX10(TTMP13) \
- CASE_VI_GFX9_GFX10(TTMP14) \
- CASE_VI_GFX9_GFX10(TTMP15) \
- CASE_VI_GFX9_GFX10(TTMP0_TTMP1) \
- CASE_VI_GFX9_GFX10(TTMP2_TTMP3) \
- CASE_VI_GFX9_GFX10(TTMP4_TTMP5) \
- CASE_VI_GFX9_GFX10(TTMP6_TTMP7) \
- CASE_VI_GFX9_GFX10(TTMP8_TTMP9) \
- CASE_VI_GFX9_GFX10(TTMP10_TTMP11) \
- CASE_VI_GFX9_GFX10(TTMP12_TTMP13) \
- CASE_VI_GFX9_GFX10(TTMP14_TTMP15) \
- CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3) \
- CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7) \
- CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11) \
- CASE_VI_GFX9_GFX10(TTMP12_TTMP13_TTMP14_TTMP15) \
- CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
- CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
- CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
- CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
+ CASE_VI_GFX9PLUS(TTMP0) \
+ CASE_VI_GFX9PLUS(TTMP1) \
+ CASE_VI_GFX9PLUS(TTMP2) \
+ CASE_VI_GFX9PLUS(TTMP3) \
+ CASE_VI_GFX9PLUS(TTMP4) \
+ CASE_VI_GFX9PLUS(TTMP5) \
+ CASE_VI_GFX9PLUS(TTMP6) \
+ CASE_VI_GFX9PLUS(TTMP7) \
+ CASE_VI_GFX9PLUS(TTMP8) \
+ CASE_VI_GFX9PLUS(TTMP9) \
+ CASE_VI_GFX9PLUS(TTMP10) \
+ CASE_VI_GFX9PLUS(TTMP11) \
+ CASE_VI_GFX9PLUS(TTMP12) \
+ CASE_VI_GFX9PLUS(TTMP13) \
+ CASE_VI_GFX9PLUS(TTMP14) \
+ CASE_VI_GFX9PLUS(TTMP15) \
+ CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
+ CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
+ CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
+ CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
+ CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
+ CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
+ CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
+ CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
+ CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
+ CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
+ CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
+ CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
+ CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
+ CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
+ CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
+ CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
}
#define CASE_CI_VI(node) \
assert(!isSI(STI)); \
case node: return isCI(STI) ? node##_ci : node##_vi;
-#define CASE_VI_GFX9_GFX10(node) \
- case node: return (isGFX9(STI) || isGFX10(STI)) ? node##_gfx9_gfx10 : node##_vi;
+#define CASE_VI_GFX9PLUS(node) \
+ case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
if (STI.getTargetTriple().getArch() == Triple::r600)
@@ -1065,17 +1336,17 @@ unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
}
#undef CASE_CI_VI
-#undef CASE_VI_GFX9_GFX10
+#undef CASE_VI_GFX9PLUS
#define CASE_CI_VI(node) case node##_ci: case node##_vi: return node;
-#define CASE_VI_GFX9_GFX10(node) case node##_vi: case node##_gfx9_gfx10: return node;
+#define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node;
unsigned mc2PseudoReg(unsigned Reg) {
MAP_REG2REG
}
#undef CASE_CI_VI
-#undef CASE_VI_GFX9_GFX10
+#undef CASE_VI_GFX9PLUS
#undef MAP_REG2REG
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
@@ -1311,6 +1582,7 @@ bool isArgPassedInSGPR(const Argument *A) {
case CallingConv::AMDGPU_GS:
case CallingConv::AMDGPU_PS:
case CallingConv::AMDGPU_CS:
+ case CallingConv::AMDGPU_Gfx:
// For non-compute shaders, SGPR inputs are marked with either inreg or byval.
// Everything else is in VGPRs.
return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
@@ -1322,11 +1594,11 @@ bool isArgPassedInSGPR(const Argument *A) {
}
static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
- return isGCN3Encoding(ST) || isGFX10(ST);
+ return isGCN3Encoding(ST) || isGFX10Plus(ST);
}
static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) {
- return isGFX9(ST) || isGFX10(ST);
+ return isGFX9Plus(ST);
}
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
@@ -1382,6 +1654,14 @@ Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
return isUInt<32>(EncodedOffset) ? Optional<int64_t>(EncodedOffset) : None;
}
+unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST, bool Signed) {
+ // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
+ if (AMDGPU::isGFX10(ST))
+ return Signed ? 12 : 11;
+
+ return Signed ? 13 : 12;
+}
+
// Given Imm, split it into the values to put into the SOffset and ImmOffset
// fields in an MUBUF instruction. Return false if it is not possible (due to a
// hardware bug needing a workaround).
@@ -1483,7 +1763,7 @@ const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
uint8_t NumComponents,
uint8_t NumFormat,
const MCSubtargetInfo &STI) {
- return isGFX10(STI)
+ return isGFX10Plus(STI)
? getGfx10PlusBufferFormatInfo(BitsPerComp, NumComponents,
NumFormat)
: getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat);
@@ -1491,9 +1771,29 @@ const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
const MCSubtargetInfo &STI) {
- return isGFX10(STI) ? getGfx10PlusBufferFormatInfo(Format)
- : getGfx9BufferFormatInfo(Format);
+ return isGFX10Plus(STI) ? getGfx10PlusBufferFormatInfo(Format)
+ : getGfx9BufferFormatInfo(Format);
}
} // namespace AMDGPU
+
+raw_ostream &operator<<(raw_ostream &OS,
+ const AMDGPU::IsaInfo::TargetIDSetting S) {
+ switch (S) {
+ case (AMDGPU::IsaInfo::TargetIDSetting::Unsupported):
+ OS << "Unsupported";
+ break;
+ case (AMDGPU::IsaInfo::TargetIDSetting::Any):
+ OS << "Any";
+ break;
+ case (AMDGPU::IsaInfo::TargetIDSetting::Off):
+ OS << "Off";
+ break;
+ case (AMDGPU::IsaInfo::TargetIDSetting::On):
+ OS << "On";
+ break;
+ }
+ return OS;
+}
+
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 26bb77f4b4c7..f9378693cf48 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -9,22 +9,15 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
-#include "AMDGPU.h"
-#include "AMDKernelCodeT.h"
#include "SIDefines.h"
#include "llvm/IR/CallingConv.h"
-#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/Support/AMDHSAKernelDescriptor.h"
#include "llvm/Support/Alignment.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetParser.h"
-#include <cstdint>
-#include <string>
-#include <utility>
+
+struct amd_kernel_code_t;
namespace llvm {
+struct Align;
class Argument;
class Function;
class GCNSubtarget;
@@ -35,8 +28,23 @@ class MCSubtargetInfo;
class StringRef;
class Triple;
+namespace amdhsa {
+struct kernel_descriptor_t;
+}
+
namespace AMDGPU {
+struct IsaVersion;
+
+/// \returns HSA OS ABI Version identification.
+Optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI);
+/// \returns True if HSA OS ABI Version identification is 2,
+/// false otherwise.
+bool isHsaAbiVersion2(const MCSubtargetInfo *STI);
+/// \returns True if HSA OS ABI Version identification is 3,
+/// false otherwise.
+bool isHsaAbiVersion3(const MCSubtargetInfo *STI);
+
struct GcnBufferFormatInfo {
unsigned Format;
unsigned BitsPerComp;
@@ -61,13 +69,87 @@ enum {
TRAP_NUM_SGPRS = 16
};
+enum class TargetIDSetting {
+ Unsupported,
+ Any,
+ Off,
+ On
+};
+
+class AMDGPUTargetID {
+private:
+ TargetIDSetting XnackSetting;
+ TargetIDSetting SramEccSetting;
+
+public:
+ explicit AMDGPUTargetID(const MCSubtargetInfo &STI);
+ ~AMDGPUTargetID() = default;
+
+ /// \return True if the current xnack setting is not "Unsupported".
+ bool isXnackSupported() const {
+ return XnackSetting != TargetIDSetting::Unsupported;
+ }
+
+ /// \returns True if the current xnack setting is "On" or "Any".
+ bool isXnackOnOrAny() const {
+ return XnackSetting == TargetIDSetting::On ||
+ XnackSetting == TargetIDSetting::Any;
+ }
+
+ /// \returns True if current xnack setting is "On" or "Off",
+ /// false otherwise.
+ bool isXnackOnOrOff() const {
+ return getXnackSetting() == TargetIDSetting::On ||
+ getXnackSetting() == TargetIDSetting::Off;
+ }
+
+ /// \returns The current xnack TargetIDSetting, possible options are
+ /// "Unsupported", "Any", "Off", and "On".
+ TargetIDSetting getXnackSetting() const {
+ return XnackSetting;
+ }
+
+ /// Sets xnack setting to \p NewXnackSetting.
+ void setXnackSetting(TargetIDSetting NewXnackSetting) {
+ XnackSetting = NewXnackSetting;
+ }
+
+ /// \return True if the current sramecc setting is not "Unsupported".
+ bool isSramEccSupported() const {
+ return SramEccSetting != TargetIDSetting::Unsupported;
+ }
+
+ /// \returns True if the current sramecc setting is "On" or "Any".
+ bool isSramEccOnOrAny() const {
+ return SramEccSetting == TargetIDSetting::On ||
+ SramEccSetting == TargetIDSetting::Any;
+ }
+
+ /// \returns True if current sramecc setting is "On" or "Off",
+ /// false otherwise.
+ bool isSramEccOnOrOff() const {
+ return getSramEccSetting() == TargetIDSetting::On ||
+ getSramEccSetting() == TargetIDSetting::Off;
+ }
+
+ /// \returns The current sramecc TargetIDSetting, possible options are
+ /// "Unsupported", "Any", "Off", and "On".
+ TargetIDSetting getSramEccSetting() const {
+ return SramEccSetting;
+ }
+
+ /// Sets sramecc setting to \p NewSramEccSetting.
+ void setSramEccSetting(TargetIDSetting NewSramEccSetting) {
+ SramEccSetting = NewSramEccSetting;
+ }
+
+ void setTargetIDFromFeaturesString(StringRef FS);
+ void setTargetIDFromTargetIDStream(StringRef TargetID);
+};
+
/// Streams isa version string for given subtarget \p STI into \p Stream.
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
-/// \returns True if given subtarget \p STI supports code object version 3,
-/// false otherwise.
-bool hasCodeObjectV3(const MCSubtargetInfo *STI);
-
/// \returns Wavefront size for given subtarget \p STI.
unsigned getWavefrontSize(const MCSubtargetInfo *STI);
@@ -368,8 +450,8 @@ struct Waitcnt {
Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
: VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {}
- static Waitcnt allZero(const IsaVersion &Version) {
- return Waitcnt(0, 0, 0, Version.Major >= 10 ? 0 : ~0u);
+ static Waitcnt allZero(bool HasVscnt) {
+ return Waitcnt(0, 0, 0, HasVscnt ? 0 : ~0u);
}
static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); }
@@ -482,6 +564,51 @@ void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width);
} // namespace Hwreg
+namespace Exp {
+
+bool getTgtName(unsigned Id, StringRef &Name, int &Index);
+
+LLVM_READONLY
+unsigned getTgtId(const StringRef Name);
+
+LLVM_READNONE
+bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI);
+
+} // namespace Exp
+
+namespace MTBUFFormat {
+
+LLVM_READNONE
+int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt);
+
+void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt);
+
+int64_t getDfmt(const StringRef Name);
+
+StringRef getDfmtName(unsigned Id);
+
+int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI);
+
+StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI);
+
+bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI);
+
+bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI);
+
+int64_t getUnifiedFormat(const StringRef Name);
+
+StringRef getUnifiedFormatName(unsigned Id);
+
+bool isValidUnifiedFormat(unsigned Val);
+
+int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt);
+
+bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI);
+
+unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI);
+
+} // namespace MTBUFFormat
+
namespace SendMsg {
LLVM_READONLY
@@ -530,11 +657,23 @@ LLVM_READNONE
bool isShader(CallingConv::ID CC);
LLVM_READNONE
+bool isGraphics(CallingConv::ID CC);
+
+LLVM_READNONE
bool isCompute(CallingConv::ID CC);
LLVM_READNONE
bool isEntryFunctionCC(CallingConv::ID CC);
+// These functions are considered entrypoints into the current module, i.e. they
+// are allowed to be called from outside the current module. This is different
+// from isEntryFunctionCC, which is only true for functions that are entered by
+// the hardware. Module entry points include all entry functions but also
+// include functions that can be called from other functions inside or outside
+// the current module. Module entry functions are allowed to allocate LDS.
+LLVM_READNONE
+bool isModuleEntryFunctionCC(CallingConv::ID CC);
+
// FIXME: Remove this when calling conventions cleaned up
LLVM_READNONE
inline bool isKernel(CallingConv::ID CC) {
@@ -558,7 +697,9 @@ bool isSI(const MCSubtargetInfo &STI);
bool isCI(const MCSubtargetInfo &STI);
bool isVI(const MCSubtargetInfo &STI);
bool isGFX9(const MCSubtargetInfo &STI);
+bool isGFX9Plus(const MCSubtargetInfo &STI);
bool isGFX10(const MCSubtargetInfo &STI);
+bool isGFX10Plus(const MCSubtargetInfo &STI);
bool isGCN3Encoding(const MCSubtargetInfo &STI);
bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
@@ -690,6 +831,13 @@ Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
int64_t ByteOffset);
+/// For FLAT segment the offset must be positive;
+/// MSB is ignored and forced to zero.
+///
+/// \return The number of bits available for the offset field in flat
+/// instructions.
+unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST, bool Signed);
+
/// \returns true if this offset is small enough to fit in the SMRD
/// offset field. \p ByteOffset should be the offset in bytes and
/// not the encoded offset.
@@ -735,10 +883,8 @@ struct SIModeRegisterDefaults {
SIModeRegisterDefaults(const Function &F);
static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
- const bool IsCompute = AMDGPU::isCompute(CC);
-
SIModeRegisterDefaults Mode;
- Mode.IEEE = IsCompute;
+ Mode.IEEE = !AMDGPU::isShader(CC);
return Mode;
}
@@ -805,6 +951,10 @@ struct SIModeRegisterDefaults {
};
} // end namespace AMDGPU
+
+raw_ostream &operator<<(raw_ostream &OS,
+ const AMDGPU::IsaInfo::TargetIDSetting S);
+
} // end namespace llvm
#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
index ef010a7ac157..b7dd757a8af3 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
@@ -15,12 +15,10 @@
//
#include "AMDGPUPALMetadata.h"
-#include "AMDGPU.h"
-#include "AMDGPUAsmPrinter.h"
-#include "MCTargetDesc/AMDGPUTargetStreamer.h"
+#include "AMDGPUPTNote.h"
#include "SIDefines.h"
#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/IR/CallingConv.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/AMDGPUMetadata.h"
@@ -45,8 +43,11 @@ void AMDGPUPALMetadata::readFromIR(Module &M) {
}
BlobType = ELF::NT_AMD_AMDGPU_PAL_METADATA;
NamedMD = M.getNamedMetadata("amdgpu.pal.metadata");
- if (!NamedMD || !NamedMD->getNumOperands())
+ if (!NamedMD || !NamedMD->getNumOperands()) {
+ // Emit msgpack metadata by default
+ BlobType = ELF::NT_AMDGPU_METADATA;
return;
+ }
// This is the old reg=value pair format for metadata. It is a NamedMD
// containing an MDTuple containing a number of MDNodes each of which is an
// integer value, and each two integer values forms a key=value pair that we
@@ -235,6 +236,13 @@ void AMDGPUPALMetadata::setScratchSize(CallingConv::ID CC, unsigned Val) {
getHwStage(CC)[".scratch_memory_size"] = MsgPackDoc.getNode(Val);
}
+// Set the stack frame size of a function in the metadata.
+void AMDGPUPALMetadata::setFunctionScratchSize(const MachineFunction &MF,
+ unsigned Val) {
+ auto Node = getShaderFunction(MF.getFunction().getName());
+ Node[".stack_frame_size_in_bytes"] = MsgPackDoc.getNode(Val);
+}
+
// Set the hardware register bit in PAL metadata to enable wave32 on the
// shader of the given calling convention.
void AMDGPUPALMetadata::setWave32(unsigned CC) {
@@ -718,6 +726,30 @@ msgpack::MapDocNode AMDGPUPALMetadata::getRegisters() {
return Registers.getMap();
}
+// Reference (create if necessary) the node for the shader functions map.
+msgpack::DocNode &AMDGPUPALMetadata::refShaderFunctions() {
+ auto &N =
+ MsgPackDoc.getRoot()
+ .getMap(/*Convert=*/true)[MsgPackDoc.getNode("amdpal.pipelines")]
+ .getArray(/*Convert=*/true)[0]
+ .getMap(/*Convert=*/true)[MsgPackDoc.getNode(".shader_functions")];
+ N.getMap(/*Convert=*/true);
+ return N;
+}
+
+// Get (create if necessary) the shader functions map.
+msgpack::MapDocNode AMDGPUPALMetadata::getShaderFunctions() {
+ if (ShaderFunctions.isEmpty())
+ ShaderFunctions = refShaderFunctions();
+ return ShaderFunctions.getMap();
+}
+
+// Get (create if necessary) a function in the shader functions map.
+msgpack::MapDocNode AMDGPUPALMetadata::getShaderFunction(StringRef Name) {
+ auto Functions = getShaderFunctions();
+ return Functions[Name].getMap(/*Convert=*/true);
+}
+
// Return the PAL metadata hardware shader stage name.
static const char *getStageName(CallingConv::ID CC) {
switch (CC) {
@@ -733,6 +765,8 @@ static const char *getStageName(CallingConv::ID CC) {
return ".hs";
case CallingConv::AMDGPU_LS:
return ".ls";
+ case CallingConv::AMDGPU_Gfx:
+ llvm_unreachable("Callable shader has no hardware stage");
default:
return ".cs";
}
@@ -773,3 +807,9 @@ void AMDGPUPALMetadata::setLegacy() {
BlobType = ELF::NT_AMD_AMDGPU_PAL_METADATA;
}
+// Erase all PAL metadata.
+void AMDGPUPALMetadata::reset() {
+ MsgPackDoc.clear();
+ Registers = MsgPackDoc.getEmptyNode();
+ HwStages = MsgPackDoc.getEmptyNode();
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
index 544ab669d9ae..8fa1f738487c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
@@ -13,11 +13,11 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H
-
#include "llvm/BinaryFormat/MsgPackDocument.h"
namespace llvm {
+class MachineFunction;
class Module;
class StringRef;
@@ -26,6 +26,7 @@ class AMDGPUPALMetadata {
msgpack::Document MsgPackDoc;
msgpack::DocNode Registers;
msgpack::DocNode HwStages;
+ msgpack::DocNode ShaderFunctions;
public:
// Read the amdgpu.pal.metadata supplied by the frontend, ready for
@@ -76,6 +77,9 @@ public:
// Set the scratch size in the metadata.
void setScratchSize(unsigned CC, unsigned Val);
+ // Set the stack frame size of a function in the metadata.
+ void setFunctionScratchSize(const MachineFunction &MF, unsigned Val);
+
// Set the hardware register bit in PAL metadata to enable wave32 on the
// shader of the given calling convention.
void setWave32(unsigned CC);
@@ -106,6 +110,9 @@ public:
// Set legacy PAL metadata format.
void setLegacy();
+ // Erase all PAL metadata.
+ void reset();
+
private:
// Return whether the blob type is legacy PAL metadata.
bool isLegacy() const;
@@ -116,6 +123,15 @@ private:
// Get (create if necessary) the registers map.
msgpack::MapDocNode getRegisters();
+ // Reference (create if necessary) the node for the shader functions map.
+ msgpack::DocNode &refShaderFunctions();
+
+ // Get (create if necessary) the shader functions map.
+ msgpack::MapDocNode getShaderFunctions();
+
+ // Get (create if necessary) a function in the shader functions map.
+ msgpack::MapDocNode getShaderFunction(StringRef Name);
+
// Get (create if necessary) the .hardware_stages entry for the given calling
// convention.
msgpack::MapDocNode getHwStage(unsigned CC);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
index 443e2cc45ac0..45eb6c321476 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "AMDKernelCodeTUtils.h"
+#include "AMDKernelCodeT.h"
#include "SIDefines.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringMap.h"
@@ -18,9 +19,6 @@
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/Support/raw_ostream.h"
-#include <cassert>
-#include <cstdint>
-#include <utility>
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h
index a87325a78df3..41d0e0d745e5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h
@@ -13,7 +13,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDKERNELCODETUTILS_H
#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDKERNELCODETUTILS_H
-#include "AMDKernelCodeT.h"
+struct amd_kernel_code_t;
namespace llvm {