diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils')
8 files changed, 875 insertions, 109 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp index 5819a621f55d..c8a85d76a55b 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp @@ -6,6 +6,9 @@ // //===----------------------------------------------------------------------===// #include "AMDGPUAsmUtils.h" +#include "SIDefines.h" + +#include "llvm/ADT/StringRef.h" namespace llvm { namespace AMDGPU { @@ -87,6 +90,250 @@ const char* const IdSymbolic[] = { } // namespace Hwreg +namespace MTBUFFormat { + +StringLiteral const DfmtSymbolic[] = { + "BUF_DATA_FORMAT_INVALID", + "BUF_DATA_FORMAT_8", + "BUF_DATA_FORMAT_16", + "BUF_DATA_FORMAT_8_8", + "BUF_DATA_FORMAT_32", + "BUF_DATA_FORMAT_16_16", + "BUF_DATA_FORMAT_10_11_11", + "BUF_DATA_FORMAT_11_11_10", + "BUF_DATA_FORMAT_10_10_10_2", + "BUF_DATA_FORMAT_2_10_10_10", + "BUF_DATA_FORMAT_8_8_8_8", + "BUF_DATA_FORMAT_32_32", + "BUF_DATA_FORMAT_16_16_16_16", + "BUF_DATA_FORMAT_32_32_32", + "BUF_DATA_FORMAT_32_32_32_32", + "BUF_DATA_FORMAT_RESERVED_15" +}; + +StringLiteral const NfmtSymbolicGFX10[] = { + "BUF_NUM_FORMAT_UNORM", + "BUF_NUM_FORMAT_SNORM", + "BUF_NUM_FORMAT_USCALED", + "BUF_NUM_FORMAT_SSCALED", + "BUF_NUM_FORMAT_UINT", + "BUF_NUM_FORMAT_SINT", + "", + "BUF_NUM_FORMAT_FLOAT" +}; + +StringLiteral const NfmtSymbolicSICI[] = { + "BUF_NUM_FORMAT_UNORM", + "BUF_NUM_FORMAT_SNORM", + "BUF_NUM_FORMAT_USCALED", + "BUF_NUM_FORMAT_SSCALED", + "BUF_NUM_FORMAT_UINT", + "BUF_NUM_FORMAT_SINT", + "BUF_NUM_FORMAT_SNORM_OGL", + "BUF_NUM_FORMAT_FLOAT" +}; + +StringLiteral const NfmtSymbolicVI[] = { // VI and GFX9 + "BUF_NUM_FORMAT_UNORM", + "BUF_NUM_FORMAT_SNORM", + "BUF_NUM_FORMAT_USCALED", + "BUF_NUM_FORMAT_SSCALED", + "BUF_NUM_FORMAT_UINT", + "BUF_NUM_FORMAT_SINT", + "BUF_NUM_FORMAT_RESERVED_6", + "BUF_NUM_FORMAT_FLOAT" +}; + +StringLiteral const UfmtSymbolic[] = { + "BUF_FMT_INVALID", + + "BUF_FMT_8_UNORM", + "BUF_FMT_8_SNORM", + "BUF_FMT_8_USCALED", + "BUF_FMT_8_SSCALED", + "BUF_FMT_8_UINT", + "BUF_FMT_8_SINT", + + "BUF_FMT_16_UNORM", + "BUF_FMT_16_SNORM", + "BUF_FMT_16_USCALED", + "BUF_FMT_16_SSCALED", + "BUF_FMT_16_UINT", + "BUF_FMT_16_SINT", + "BUF_FMT_16_FLOAT", + + "BUF_FMT_8_8_UNORM", + "BUF_FMT_8_8_SNORM", + "BUF_FMT_8_8_USCALED", + "BUF_FMT_8_8_SSCALED", + "BUF_FMT_8_8_UINT", + "BUF_FMT_8_8_SINT", + + "BUF_FMT_32_UINT", + "BUF_FMT_32_SINT", + "BUF_FMT_32_FLOAT", + + "BUF_FMT_16_16_UNORM", + "BUF_FMT_16_16_SNORM", + "BUF_FMT_16_16_USCALED", + "BUF_FMT_16_16_SSCALED", + "BUF_FMT_16_16_UINT", + "BUF_FMT_16_16_SINT", + "BUF_FMT_16_16_FLOAT", + + "BUF_FMT_10_11_11_UNORM", + "BUF_FMT_10_11_11_SNORM", + "BUF_FMT_10_11_11_USCALED", + "BUF_FMT_10_11_11_SSCALED", + "BUF_FMT_10_11_11_UINT", + "BUF_FMT_10_11_11_SINT", + "BUF_FMT_10_11_11_FLOAT", + + "BUF_FMT_11_11_10_UNORM", + "BUF_FMT_11_11_10_SNORM", + "BUF_FMT_11_11_10_USCALED", + "BUF_FMT_11_11_10_SSCALED", + "BUF_FMT_11_11_10_UINT", + "BUF_FMT_11_11_10_SINT", + "BUF_FMT_11_11_10_FLOAT", + + "BUF_FMT_10_10_10_2_UNORM", + "BUF_FMT_10_10_10_2_SNORM", + "BUF_FMT_10_10_10_2_USCALED", + "BUF_FMT_10_10_10_2_SSCALED", + "BUF_FMT_10_10_10_2_UINT", + "BUF_FMT_10_10_10_2_SINT", + + "BUF_FMT_2_10_10_10_UNORM", + "BUF_FMT_2_10_10_10_SNORM", + "BUF_FMT_2_10_10_10_USCALED", + "BUF_FMT_2_10_10_10_SSCALED", + "BUF_FMT_2_10_10_10_UINT", + "BUF_FMT_2_10_10_10_SINT", + + "BUF_FMT_8_8_8_8_UNORM", + "BUF_FMT_8_8_8_8_SNORM", + "BUF_FMT_8_8_8_8_USCALED", + "BUF_FMT_8_8_8_8_SSCALED", + "BUF_FMT_8_8_8_8_UINT", + "BUF_FMT_8_8_8_8_SINT", + + "BUF_FMT_32_32_UINT", + "BUF_FMT_32_32_SINT", + "BUF_FMT_32_32_FLOAT", + + "BUF_FMT_16_16_16_16_UNORM", + "BUF_FMT_16_16_16_16_SNORM", + "BUF_FMT_16_16_16_16_USCALED", + "BUF_FMT_16_16_16_16_SSCALED", + "BUF_FMT_16_16_16_16_UINT", + "BUF_FMT_16_16_16_16_SINT", + "BUF_FMT_16_16_16_16_FLOAT", + + "BUF_FMT_32_32_32_UINT", + "BUF_FMT_32_32_32_SINT", + "BUF_FMT_32_32_32_FLOAT", + "BUF_FMT_32_32_32_32_UINT", + "BUF_FMT_32_32_32_32_SINT", + "BUF_FMT_32_32_32_32_FLOAT" +}; + +unsigned const DfmtNfmt2UFmt[] = { + DFMT_INVALID | (NFMT_UNORM << NFMT_SHIFT), + + DFMT_8 | (NFMT_UNORM << NFMT_SHIFT), + DFMT_8 | (NFMT_SNORM << NFMT_SHIFT), + DFMT_8 | (NFMT_USCALED << NFMT_SHIFT), + DFMT_8 | (NFMT_SSCALED << NFMT_SHIFT), + DFMT_8 | (NFMT_UINT << NFMT_SHIFT), + DFMT_8 | (NFMT_SINT << NFMT_SHIFT), + + DFMT_16 | (NFMT_UNORM << NFMT_SHIFT), + DFMT_16 | (NFMT_SNORM << NFMT_SHIFT), + DFMT_16 | (NFMT_USCALED << NFMT_SHIFT), + DFMT_16 | (NFMT_SSCALED << NFMT_SHIFT), + DFMT_16 | (NFMT_UINT << NFMT_SHIFT), + DFMT_16 | (NFMT_SINT << NFMT_SHIFT), + DFMT_16 | (NFMT_FLOAT << NFMT_SHIFT), + + DFMT_8_8 | (NFMT_UNORM << NFMT_SHIFT), + DFMT_8_8 | (NFMT_SNORM << NFMT_SHIFT), + DFMT_8_8 | (NFMT_USCALED << NFMT_SHIFT), + DFMT_8_8 | (NFMT_SSCALED << NFMT_SHIFT), + DFMT_8_8 | (NFMT_UINT << NFMT_SHIFT), + DFMT_8_8 | (NFMT_SINT << NFMT_SHIFT), + + DFMT_32 | (NFMT_UINT << NFMT_SHIFT), + DFMT_32 | (NFMT_SINT << NFMT_SHIFT), + DFMT_32 | (NFMT_FLOAT << NFMT_SHIFT), + + DFMT_16_16 | (NFMT_UNORM << NFMT_SHIFT), + DFMT_16_16 | (NFMT_SNORM << NFMT_SHIFT), + DFMT_16_16 | (NFMT_USCALED << NFMT_SHIFT), + DFMT_16_16 | (NFMT_SSCALED << NFMT_SHIFT), + DFMT_16_16 | (NFMT_UINT << NFMT_SHIFT), + DFMT_16_16 | (NFMT_SINT << NFMT_SHIFT), + DFMT_16_16 | (NFMT_FLOAT << NFMT_SHIFT), + + DFMT_10_11_11 | (NFMT_UNORM << NFMT_SHIFT), + DFMT_10_11_11 | (NFMT_SNORM << NFMT_SHIFT), + DFMT_10_11_11 | (NFMT_USCALED << NFMT_SHIFT), + DFMT_10_11_11 | (NFMT_SSCALED << NFMT_SHIFT), + DFMT_10_11_11 | (NFMT_UINT << NFMT_SHIFT), + DFMT_10_11_11 | (NFMT_SINT << NFMT_SHIFT), + DFMT_10_11_11 | (NFMT_FLOAT << NFMT_SHIFT), + + DFMT_11_11_10 | (NFMT_UNORM << NFMT_SHIFT), + DFMT_11_11_10 | (NFMT_SNORM << NFMT_SHIFT), + DFMT_11_11_10 | (NFMT_USCALED << NFMT_SHIFT), + DFMT_11_11_10 | (NFMT_SSCALED << NFMT_SHIFT), + DFMT_11_11_10 | (NFMT_UINT << NFMT_SHIFT), + DFMT_11_11_10 | (NFMT_SINT << NFMT_SHIFT), + DFMT_11_11_10 | (NFMT_FLOAT << NFMT_SHIFT), + + DFMT_10_10_10_2 | (NFMT_UNORM << NFMT_SHIFT), + DFMT_10_10_10_2 | (NFMT_SNORM << NFMT_SHIFT), + DFMT_10_10_10_2 | (NFMT_USCALED << NFMT_SHIFT), + DFMT_10_10_10_2 | (NFMT_SSCALED << NFMT_SHIFT), + DFMT_10_10_10_2 | (NFMT_UINT << NFMT_SHIFT), + DFMT_10_10_10_2 | (NFMT_SINT << NFMT_SHIFT), + + DFMT_2_10_10_10 | (NFMT_UNORM << NFMT_SHIFT), + DFMT_2_10_10_10 | (NFMT_SNORM << NFMT_SHIFT), + DFMT_2_10_10_10 | (NFMT_USCALED << NFMT_SHIFT), + DFMT_2_10_10_10 | (NFMT_SSCALED << NFMT_SHIFT), + DFMT_2_10_10_10 | (NFMT_UINT << NFMT_SHIFT), + DFMT_2_10_10_10 | (NFMT_SINT << NFMT_SHIFT), + + DFMT_8_8_8_8 | (NFMT_UNORM << NFMT_SHIFT), + DFMT_8_8_8_8 | (NFMT_SNORM << NFMT_SHIFT), + DFMT_8_8_8_8 | (NFMT_USCALED << NFMT_SHIFT), + DFMT_8_8_8_8 | (NFMT_SSCALED << NFMT_SHIFT), + DFMT_8_8_8_8 | (NFMT_UINT << NFMT_SHIFT), + DFMT_8_8_8_8 | (NFMT_SINT << NFMT_SHIFT), + + DFMT_32_32 | (NFMT_UINT << NFMT_SHIFT), + DFMT_32_32 | (NFMT_SINT << NFMT_SHIFT), + DFMT_32_32 | (NFMT_FLOAT << NFMT_SHIFT), + + DFMT_16_16_16_16 | (NFMT_UNORM << NFMT_SHIFT), + DFMT_16_16_16_16 | (NFMT_SNORM << NFMT_SHIFT), + DFMT_16_16_16_16 | (NFMT_USCALED << NFMT_SHIFT), + DFMT_16_16_16_16 | (NFMT_SSCALED << NFMT_SHIFT), + DFMT_16_16_16_16 | (NFMT_UINT << NFMT_SHIFT), + DFMT_16_16_16_16 | (NFMT_SINT << NFMT_SHIFT), + DFMT_16_16_16_16 | (NFMT_FLOAT << NFMT_SHIFT), + + DFMT_32_32_32 | (NFMT_UINT << NFMT_SHIFT), + DFMT_32_32_32 | (NFMT_SINT << NFMT_SHIFT), + DFMT_32_32_32 | (NFMT_FLOAT << NFMT_SHIFT), + DFMT_32_32_32_32 | (NFMT_UINT << NFMT_SHIFT), + DFMT_32_32_32_32 | (NFMT_SINT << NFMT_SHIFT), + DFMT_32_32_32_32 | (NFMT_FLOAT << NFMT_SHIFT) +}; + +} // namespace MTBUFFormat + namespace Swizzle { // This must be in sync with llvm::AMDGPU::Swizzle::Id enum members, see SIDefines.h. diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h index cd91c5f6edd5..3eb27c5e5f42 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h @@ -10,7 +10,11 @@ #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUASMUTILS_H namespace llvm { + +class StringLiteral; + namespace AMDGPU { + namespace SendMsg { // Symbolic names for the sendmsg(...) syntax. extern const char* const IdSymbolic[]; @@ -25,6 +29,17 @@ extern const char* const IdSymbolic[]; } // namespace Hwreg +namespace MTBUFFormat { + +extern StringLiteral const DfmtSymbolic[]; +extern StringLiteral const NfmtSymbolicGFX10[]; +extern StringLiteral const NfmtSymbolicSICI[]; +extern StringLiteral const NfmtSymbolicVI[]; +extern StringLiteral const UfmtSymbolic[]; +extern unsigned const DfmtNfmt2UFmt[]; + +} // namespace MTBUFFormat + namespace Swizzle { // Symbolic names for the swizzle(...) syntax. extern const char* const IdSymbolic[]; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 3df2157fc402..4c1e4dec7ecb 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -9,44 +9,28 @@ #include "AMDGPUBaseInfo.h" #include "AMDGPU.h" #include "AMDGPUAsmUtils.h" -#include "AMDGPUTargetTransformInfo.h" -#include "SIDefines.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Triple.h" +#include "AMDKernelCodeT.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/BinaryFormat/ELF.h" -#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/IR/Attributes.h" -#include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" -#include "llvm/IR/Instruction.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsR600.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCInstrDesc.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/SubtargetFeature.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" -#include <algorithm> -#include <cassert> -#include <cstdint> -#include <cstring> -#include <utility> - -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/Support/AMDHSAKernelDescriptor.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/TargetParser.h" #define GET_INSTRINFO_NAMED_OPS #define GET_INSTRMAP_INFO #include "AMDGPUGenInstrInfo.inc" -#undef GET_INSTRMAP_INFO -#undef GET_INSTRINFO_NAMED_OPS + +static llvm::cl::opt<unsigned> AmdhsaCodeObjectVersion( + "amdhsa-code-object-version", llvm::cl::Hidden, + llvm::cl::desc("AMDHSA Code Object Version"), llvm::cl::init(3)); namespace { @@ -103,6 +87,32 @@ namespace llvm { namespace AMDGPU { +Optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI) { + if (STI && STI->getTargetTriple().getOS() != Triple::AMDHSA) + return None; + + switch (AmdhsaCodeObjectVersion) { + case 2: + return ELF::ELFABIVERSION_AMDGPU_HSA_V2; + case 3: + return ELF::ELFABIVERSION_AMDGPU_HSA_V3; + default: + return ELF::ELFABIVERSION_AMDGPU_HSA_V3; + } +} + +bool isHsaAbiVersion2(const MCSubtargetInfo *STI) { + if (const auto &&HsaAbiVer = getHsaAbiVersion(STI)) + return HsaAbiVer.getValue() == ELF::ELFABIVERSION_AMDGPU_HSA_V2; + return false; +} + +bool isHsaAbiVersion3(const MCSubtargetInfo *STI) { + if (const auto &&HsaAbiVer = getHsaAbiVersion(STI)) + return HsaAbiVer.getValue() == ELF::ELFABIVERSION_AMDGPU_HSA_V3; + return false; +} + #define GET_MIMGBaseOpcodesTable_IMPL #define GET_MIMGDimInfoTable_IMPL #define GET_MIMGInfoTable_IMPL @@ -236,6 +246,94 @@ int getMCOpcode(uint16_t Opcode, unsigned Gen) { namespace IsaInfo { +AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI) + : XnackSetting(TargetIDSetting::Any), SramEccSetting(TargetIDSetting::Any) { + if (!STI.getFeatureBits().test(FeatureSupportsXNACK)) + XnackSetting = TargetIDSetting::Unsupported; + if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC)) + SramEccSetting = TargetIDSetting::Unsupported; +} + +void AMDGPUTargetID::setTargetIDFromFeaturesString(StringRef FS) { + // Check if xnack or sramecc is explicitly enabled or disabled. In the + // absence of the target features we assume we must generate code that can run + // in any environment. + SubtargetFeatures Features(FS); + Optional<bool> XnackRequested; + Optional<bool> SramEccRequested; + + for (const std::string &Feature : Features.getFeatures()) { + if (Feature == "+xnack") + XnackRequested = true; + else if (Feature == "-xnack") + XnackRequested = false; + else if (Feature == "+sramecc") + SramEccRequested = true; + else if (Feature == "-sramecc") + SramEccRequested = false; + } + + bool XnackSupported = isXnackSupported(); + bool SramEccSupported = isSramEccSupported(); + + if (XnackRequested) { + if (XnackSupported) { + XnackSetting = + *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off; + } else { + // If a specific xnack setting was requested and this GPU does not support + // xnack emit a warning. Setting will remain set to "Unsupported". + if (*XnackRequested) { + errs() << "warning: xnack 'On' was requested for a processor that does " + "not support it!\n"; + } else { + errs() << "warning: xnack 'Off' was requested for a processor that " + "does not support it!\n"; + } + } + } + + if (SramEccRequested) { + if (SramEccSupported) { + SramEccSetting = + *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off; + } else { + // If a specific sramecc setting was requested and this GPU does not + // support sramecc emit a warning. Setting will remain set to + // "Unsupported". + if (*SramEccRequested) { + errs() << "warning: sramecc 'On' was requested for a processor that " + "does not support it!\n"; + } else { + errs() << "warning: sramecc 'Off' was requested for a processor that " + "does not support it!\n"; + } + } + } +} + +static TargetIDSetting +getTargetIDSettingFromFeatureString(StringRef FeatureString) { + if (FeatureString.endswith("-")) + return TargetIDSetting::Off; + if (FeatureString.endswith("+")) + return TargetIDSetting::On; + + llvm_unreachable("Malformed feature string"); +} + +void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) { + SmallVector<StringRef, 3> TargetIDSplit; + TargetID.split(TargetIDSplit, ':'); + + for (const auto &FeatureString : TargetIDSplit) { + if (FeatureString.startswith("xnack")) + XnackSetting = getTargetIDSettingFromFeatureString(FeatureString); + if (FeatureString.startswith("sramecc")) + SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString); + } +} + void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) { auto TargetTriple = STI->getTargetTriple(); auto Version = getIsaVersion(STI->getCPU()); @@ -252,16 +350,11 @@ void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) { if (hasXNACK(*STI)) Stream << "+xnack"; if (hasSRAMECC(*STI)) - Stream << "+sram-ecc"; + Stream << "+sramecc"; Stream.flush(); } -bool hasCodeObjectV3(const MCSubtargetInfo *STI) { - return STI->getTargetTriple().getOS() == Triple::AMDHSA && - STI->getFeatureBits().test(FeatureCodeObjectV3); -} - unsigned getWavefrontSize(const MCSubtargetInfo *STI) { if (STI->getFeatureBits().test(FeatureWavefrontSize16)) return 16; @@ -284,7 +377,7 @@ unsigned getEUsPerCU(const MCSubtargetInfo *STI) { // "Per CU" really means "per whatever functional block the waves of a // workgroup must share". For gfx10 in CU mode this is the CU, which contains // two SIMDs. - if (isGFX10(*STI) && STI->getFeatureBits().test(FeatureCuMode)) + if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode)) return 2; // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains // two CUs, so a total of four SIMDs. @@ -309,7 +402,7 @@ unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) { unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) { // FIXME: Need to take scratch memory into account. - if (!isGFX10(*STI)) + if (!isGFX10Plus(*STI)) return 10; return hasGFX10_3Insts(*STI) ? 16 : 20; } @@ -459,7 +552,7 @@ unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, } unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) { - if (!isGFX10(*STI)) + if (!isGFX10Plus(*STI)) return 256; return STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1024 : 512; } @@ -578,7 +671,7 @@ bool isReadOnlySegment(const GlobalValue *GV) { } bool shouldEmitConstantsToTextSection(const Triple &TT) { - return TT.getOS() == Triple::AMDPAL || TT.getArch() == Triple::r600; + return TT.getArch() == Triple::r600; } int getIntegerAttribute(const Function &F, StringRef Name, int Default) { @@ -784,6 +877,165 @@ void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width) } // namespace Hwreg //===----------------------------------------------------------------------===// +// exp tgt +//===----------------------------------------------------------------------===// + +namespace Exp { + +struct ExpTgt { + StringLiteral Name; + unsigned Tgt; + unsigned MaxIndex; +}; + +static constexpr ExpTgt ExpTgtInfo[] = { + {{"null"}, ET_NULL, ET_NULL_MAX_IDX}, + {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX}, + {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX}, + {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX}, + {{"pos"}, ET_POS0, ET_POS_MAX_IDX}, + {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX}, +}; + +bool getTgtName(unsigned Id, StringRef &Name, int &Index) { + for (const ExpTgt &Val : ExpTgtInfo) { + if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) { + Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt); + Name = Val.Name; + return true; + } + } + return false; +} + +unsigned getTgtId(const StringRef Name) { + + for (const ExpTgt &Val : ExpTgtInfo) { + if (Val.MaxIndex == 0 && Name == Val.Name) + return Val.Tgt; + + if (Val.MaxIndex > 0 && Name.startswith(Val.Name)) { + StringRef Suffix = Name.drop_front(Val.Name.size()); + + unsigned Id; + if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex) + return ET_INVALID; + + // Disable leading zeroes + if (Suffix.size() > 1 && Suffix[0] == '0') + return ET_INVALID; + + return Val.Tgt + Id; + } + } + return ET_INVALID; +} + +bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) { + return (Id != ET_POS4 && Id != ET_PRIM) || isGFX10Plus(STI); +} + +} // namespace Exp + +//===----------------------------------------------------------------------===// +// MTBUF Format +//===----------------------------------------------------------------------===// + +namespace MTBUFFormat { + +int64_t getDfmt(const StringRef Name) { + for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) { + if (Name == DfmtSymbolic[Id]) + return Id; + } + return DFMT_UNDEF; +} + +StringRef getDfmtName(unsigned Id) { + assert(Id <= DFMT_MAX); + return DfmtSymbolic[Id]; +} + +static StringLiteral const *getNfmtLookupTable(const MCSubtargetInfo &STI) { + if (isSI(STI) || isCI(STI)) + return NfmtSymbolicSICI; + if (isVI(STI) || isGFX9(STI)) + return NfmtSymbolicVI; + return NfmtSymbolicGFX10; +} + +int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) { + auto lookupTable = getNfmtLookupTable(STI); + for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) { + if (Name == lookupTable[Id]) + return Id; + } + return NFMT_UNDEF; +} + +StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) { + assert(Id <= NFMT_MAX); + return getNfmtLookupTable(STI)[Id]; +} + +bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) { + unsigned Dfmt; + unsigned Nfmt; + decodeDfmtNfmt(Id, Dfmt, Nfmt); + return isValidNfmt(Nfmt, STI); +} + +bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) { + return !getNfmtName(Id, STI).empty(); +} + +int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) { + return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT); +} + +void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) { + Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK; + Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK; +} + +int64_t getUnifiedFormat(const StringRef Name) { + for (int Id = UFMT_FIRST; Id <= UFMT_LAST; ++Id) { + if (Name == UfmtSymbolic[Id]) + return Id; + } + return UFMT_UNDEF; +} + +StringRef getUnifiedFormatName(unsigned Id) { + return isValidUnifiedFormat(Id) ? UfmtSymbolic[Id] : ""; +} + +bool isValidUnifiedFormat(unsigned Id) { + return Id <= UFMT_LAST; +} + +int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt) { + int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt); + for (int Id = UFMT_FIRST; Id <= UFMT_LAST; ++Id) { + if (Fmt == DfmtNfmt2UFmt[Id]) + return Id; + } + return UFMT_UNDEF; +} + +bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) { + return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX); +} + +unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI) { + if (isGFX10Plus(STI)) + return UFMT_DEFAULT; + return DFMT_NFMT_DEFAULT; +} + +} // namespace MTBUFFormat + +//===----------------------------------------------------------------------===// // SendMsg //===----------------------------------------------------------------------===// @@ -804,7 +1056,7 @@ static bool isValidMsgId(int64_t MsgId) { bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict) { if (Strict) { if (MsgId == ID_GS_ALLOC_REQ || MsgId == ID_GET_DOORBELL) - return isGFX9(STI) || isGFX10(STI); + return isGFX9Plus(STI); else return isValidMsgId(MsgId); } else { @@ -919,8 +1171,12 @@ bool isShader(CallingConv::ID cc) { } } +bool isGraphics(CallingConv::ID cc) { + return isShader(cc) || cc == CallingConv::AMDGPU_Gfx; +} + bool isCompute(CallingConv::ID cc) { - return !isShader(cc) || cc == CallingConv::AMDGPU_CS; + return !isGraphics(cc) || cc == CallingConv::AMDGPU_CS; } bool isEntryFunctionCC(CallingConv::ID CC) { @@ -940,6 +1196,15 @@ bool isEntryFunctionCC(CallingConv::ID CC) { } } +bool isModuleEntryFunctionCC(CallingConv::ID CC) { + switch (CC) { + case CallingConv::AMDGPU_Gfx: + return true; + default: + return isEntryFunctionCC(CC); + } +} + bool hasXNACK(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureXNACK]; } @@ -980,10 +1245,16 @@ bool isGFX9(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; } +bool isGFX9Plus(const MCSubtargetInfo &STI) { + return isGFX9(STI) || isGFX10Plus(STI); +} + bool isGFX10(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureGFX10]; } +bool isGFX10Plus(const MCSubtargetInfo &STI) { return isGFX10(STI); } + bool isGCN3Encoding(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding]; } @@ -1017,46 +1288,46 @@ bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) { CASE_CI_VI(FLAT_SCR) \ CASE_CI_VI(FLAT_SCR_LO) \ CASE_CI_VI(FLAT_SCR_HI) \ - CASE_VI_GFX9_GFX10(TTMP0) \ - CASE_VI_GFX9_GFX10(TTMP1) \ - CASE_VI_GFX9_GFX10(TTMP2) \ - CASE_VI_GFX9_GFX10(TTMP3) \ - CASE_VI_GFX9_GFX10(TTMP4) \ - CASE_VI_GFX9_GFX10(TTMP5) \ - CASE_VI_GFX9_GFX10(TTMP6) \ - CASE_VI_GFX9_GFX10(TTMP7) \ - CASE_VI_GFX9_GFX10(TTMP8) \ - CASE_VI_GFX9_GFX10(TTMP9) \ - CASE_VI_GFX9_GFX10(TTMP10) \ - CASE_VI_GFX9_GFX10(TTMP11) \ - CASE_VI_GFX9_GFX10(TTMP12) \ - CASE_VI_GFX9_GFX10(TTMP13) \ - CASE_VI_GFX9_GFX10(TTMP14) \ - CASE_VI_GFX9_GFX10(TTMP15) \ - CASE_VI_GFX9_GFX10(TTMP0_TTMP1) \ - CASE_VI_GFX9_GFX10(TTMP2_TTMP3) \ - CASE_VI_GFX9_GFX10(TTMP4_TTMP5) \ - CASE_VI_GFX9_GFX10(TTMP6_TTMP7) \ - CASE_VI_GFX9_GFX10(TTMP8_TTMP9) \ - CASE_VI_GFX9_GFX10(TTMP10_TTMP11) \ - CASE_VI_GFX9_GFX10(TTMP12_TTMP13) \ - CASE_VI_GFX9_GFX10(TTMP14_TTMP15) \ - CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3) \ - CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7) \ - CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11) \ - CASE_VI_GFX9_GFX10(TTMP12_TTMP13_TTMP14_TTMP15) \ - CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \ - CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \ - CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ - CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ + CASE_VI_GFX9PLUS(TTMP0) \ + CASE_VI_GFX9PLUS(TTMP1) \ + CASE_VI_GFX9PLUS(TTMP2) \ + CASE_VI_GFX9PLUS(TTMP3) \ + CASE_VI_GFX9PLUS(TTMP4) \ + CASE_VI_GFX9PLUS(TTMP5) \ + CASE_VI_GFX9PLUS(TTMP6) \ + CASE_VI_GFX9PLUS(TTMP7) \ + CASE_VI_GFX9PLUS(TTMP8) \ + CASE_VI_GFX9PLUS(TTMP9) \ + CASE_VI_GFX9PLUS(TTMP10) \ + CASE_VI_GFX9PLUS(TTMP11) \ + CASE_VI_GFX9PLUS(TTMP12) \ + CASE_VI_GFX9PLUS(TTMP13) \ + CASE_VI_GFX9PLUS(TTMP14) \ + CASE_VI_GFX9PLUS(TTMP15) \ + CASE_VI_GFX9PLUS(TTMP0_TTMP1) \ + CASE_VI_GFX9PLUS(TTMP2_TTMP3) \ + CASE_VI_GFX9PLUS(TTMP4_TTMP5) \ + CASE_VI_GFX9PLUS(TTMP6_TTMP7) \ + CASE_VI_GFX9PLUS(TTMP8_TTMP9) \ + CASE_VI_GFX9PLUS(TTMP10_TTMP11) \ + CASE_VI_GFX9PLUS(TTMP12_TTMP13) \ + CASE_VI_GFX9PLUS(TTMP14_TTMP15) \ + CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \ + CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \ + CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \ + CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \ + CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \ + CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \ + CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ + CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ } #define CASE_CI_VI(node) \ assert(!isSI(STI)); \ case node: return isCI(STI) ? node##_ci : node##_vi; -#define CASE_VI_GFX9_GFX10(node) \ - case node: return (isGFX9(STI) || isGFX10(STI)) ? node##_gfx9_gfx10 : node##_vi; +#define CASE_VI_GFX9PLUS(node) \ + case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi; unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { if (STI.getTargetTriple().getArch() == Triple::r600) @@ -1065,17 +1336,17 @@ unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { } #undef CASE_CI_VI -#undef CASE_VI_GFX9_GFX10 +#undef CASE_VI_GFX9PLUS #define CASE_CI_VI(node) case node##_ci: case node##_vi: return node; -#define CASE_VI_GFX9_GFX10(node) case node##_vi: case node##_gfx9_gfx10: return node; +#define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node; unsigned mc2PseudoReg(unsigned Reg) { MAP_REG2REG } #undef CASE_CI_VI -#undef CASE_VI_GFX9_GFX10 +#undef CASE_VI_GFX9PLUS #undef MAP_REG2REG bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { @@ -1311,6 +1582,7 @@ bool isArgPassedInSGPR(const Argument *A) { case CallingConv::AMDGPU_GS: case CallingConv::AMDGPU_PS: case CallingConv::AMDGPU_CS: + case CallingConv::AMDGPU_Gfx: // For non-compute shaders, SGPR inputs are marked with either inreg or byval. // Everything else is in VGPRs. return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) || @@ -1322,11 +1594,11 @@ bool isArgPassedInSGPR(const Argument *A) { } static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) { - return isGCN3Encoding(ST) || isGFX10(ST); + return isGCN3Encoding(ST) || isGFX10Plus(ST); } static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) { - return isGFX9(ST) || isGFX10(ST); + return isGFX9Plus(ST); } bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, @@ -1382,6 +1654,14 @@ Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, return isUInt<32>(EncodedOffset) ? Optional<int64_t>(EncodedOffset) : None; } +unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST, bool Signed) { + // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. + if (AMDGPU::isGFX10(ST)) + return Signed ? 12 : 11; + + return Signed ? 13 : 12; +} + // Given Imm, split it into the values to put into the SOffset and ImmOffset // fields in an MUBUF instruction. Return false if it is not possible (due to a // hardware bug needing a workaround). @@ -1483,7 +1763,7 @@ const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, uint8_t NumFormat, const MCSubtargetInfo &STI) { - return isGFX10(STI) + return isGFX10Plus(STI) ? getGfx10PlusBufferFormatInfo(BitsPerComp, NumComponents, NumFormat) : getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat); @@ -1491,9 +1771,29 @@ const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, const MCSubtargetInfo &STI) { - return isGFX10(STI) ? getGfx10PlusBufferFormatInfo(Format) - : getGfx9BufferFormatInfo(Format); + return isGFX10Plus(STI) ? getGfx10PlusBufferFormatInfo(Format) + : getGfx9BufferFormatInfo(Format); } } // namespace AMDGPU + +raw_ostream &operator<<(raw_ostream &OS, + const AMDGPU::IsaInfo::TargetIDSetting S) { + switch (S) { + case (AMDGPU::IsaInfo::TargetIDSetting::Unsupported): + OS << "Unsupported"; + break; + case (AMDGPU::IsaInfo::TargetIDSetting::Any): + OS << "Any"; + break; + case (AMDGPU::IsaInfo::TargetIDSetting::Off): + OS << "Off"; + break; + case (AMDGPU::IsaInfo::TargetIDSetting::On): + OS << "On"; + break; + } + return OS; +} + } // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 26bb77f4b4c7..f9378693cf48 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -9,22 +9,15 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H -#include "AMDGPU.h" -#include "AMDKernelCodeT.h" #include "SIDefines.h" #include "llvm/IR/CallingConv.h" -#include "llvm/MC/MCInstrDesc.h" -#include "llvm/Support/AMDHSAKernelDescriptor.h" #include "llvm/Support/Alignment.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TargetParser.h" -#include <cstdint> -#include <string> -#include <utility> + +struct amd_kernel_code_t; namespace llvm { +struct Align; class Argument; class Function; class GCNSubtarget; @@ -35,8 +28,23 @@ class MCSubtargetInfo; class StringRef; class Triple; +namespace amdhsa { +struct kernel_descriptor_t; +} + namespace AMDGPU { +struct IsaVersion; + +/// \returns HSA OS ABI Version identification. +Optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI); +/// \returns True if HSA OS ABI Version identification is 2, +/// false otherwise. +bool isHsaAbiVersion2(const MCSubtargetInfo *STI); +/// \returns True if HSA OS ABI Version identification is 3, +/// false otherwise. +bool isHsaAbiVersion3(const MCSubtargetInfo *STI); + struct GcnBufferFormatInfo { unsigned Format; unsigned BitsPerComp; @@ -61,13 +69,87 @@ enum { TRAP_NUM_SGPRS = 16 }; +enum class TargetIDSetting { + Unsupported, + Any, + Off, + On +}; + +class AMDGPUTargetID { +private: + TargetIDSetting XnackSetting; + TargetIDSetting SramEccSetting; + +public: + explicit AMDGPUTargetID(const MCSubtargetInfo &STI); + ~AMDGPUTargetID() = default; + + /// \return True if the current xnack setting is not "Unsupported". + bool isXnackSupported() const { + return XnackSetting != TargetIDSetting::Unsupported; + } + + /// \returns True if the current xnack setting is "On" or "Any". + bool isXnackOnOrAny() const { + return XnackSetting == TargetIDSetting::On || + XnackSetting == TargetIDSetting::Any; + } + + /// \returns True if current xnack setting is "On" or "Off", + /// false otherwise. + bool isXnackOnOrOff() const { + return getXnackSetting() == TargetIDSetting::On || + getXnackSetting() == TargetIDSetting::Off; + } + + /// \returns The current xnack TargetIDSetting, possible options are + /// "Unsupported", "Any", "Off", and "On". + TargetIDSetting getXnackSetting() const { + return XnackSetting; + } + + /// Sets xnack setting to \p NewXnackSetting. + void setXnackSetting(TargetIDSetting NewXnackSetting) { + XnackSetting = NewXnackSetting; + } + + /// \return True if the current sramecc setting is not "Unsupported". + bool isSramEccSupported() const { + return SramEccSetting != TargetIDSetting::Unsupported; + } + + /// \returns True if the current sramecc setting is "On" or "Any". + bool isSramEccOnOrAny() const { + return SramEccSetting == TargetIDSetting::On || + SramEccSetting == TargetIDSetting::Any; + } + + /// \returns True if current sramecc setting is "On" or "Off", + /// false otherwise. + bool isSramEccOnOrOff() const { + return getSramEccSetting() == TargetIDSetting::On || + getSramEccSetting() == TargetIDSetting::Off; + } + + /// \returns The current sramecc TargetIDSetting, possible options are + /// "Unsupported", "Any", "Off", and "On". + TargetIDSetting getSramEccSetting() const { + return SramEccSetting; + } + + /// Sets sramecc setting to \p NewSramEccSetting. + void setSramEccSetting(TargetIDSetting NewSramEccSetting) { + SramEccSetting = NewSramEccSetting; + } + + void setTargetIDFromFeaturesString(StringRef FS); + void setTargetIDFromTargetIDStream(StringRef TargetID); +}; + /// Streams isa version string for given subtarget \p STI into \p Stream. void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream); -/// \returns True if given subtarget \p STI supports code object version 3, -/// false otherwise. -bool hasCodeObjectV3(const MCSubtargetInfo *STI); - /// \returns Wavefront size for given subtarget \p STI. unsigned getWavefrontSize(const MCSubtargetInfo *STI); @@ -368,8 +450,8 @@ struct Waitcnt { Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt) : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {} - static Waitcnt allZero(const IsaVersion &Version) { - return Waitcnt(0, 0, 0, Version.Major >= 10 ? 0 : ~0u); + static Waitcnt allZero(bool HasVscnt) { + return Waitcnt(0, 0, 0, HasVscnt ? 0 : ~0u); } static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); } @@ -482,6 +564,51 @@ void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width); } // namespace Hwreg +namespace Exp { + +bool getTgtName(unsigned Id, StringRef &Name, int &Index); + +LLVM_READONLY +unsigned getTgtId(const StringRef Name); + +LLVM_READNONE +bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI); + +} // namespace Exp + +namespace MTBUFFormat { + +LLVM_READNONE +int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt); + +void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt); + +int64_t getDfmt(const StringRef Name); + +StringRef getDfmtName(unsigned Id); + +int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI); + +StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI); + +bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI); + +bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI); + +int64_t getUnifiedFormat(const StringRef Name); + +StringRef getUnifiedFormatName(unsigned Id); + +bool isValidUnifiedFormat(unsigned Val); + +int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt); + +bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI); + +unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI); + +} // namespace MTBUFFormat + namespace SendMsg { LLVM_READONLY @@ -530,11 +657,23 @@ LLVM_READNONE bool isShader(CallingConv::ID CC); LLVM_READNONE +bool isGraphics(CallingConv::ID CC); + +LLVM_READNONE bool isCompute(CallingConv::ID CC); LLVM_READNONE bool isEntryFunctionCC(CallingConv::ID CC); +// These functions are considered entrypoints into the current module, i.e. they +// are allowed to be called from outside the current module. This is different +// from isEntryFunctionCC, which is only true for functions that are entered by +// the hardware. Module entry points include all entry functions but also +// include functions that can be called from other functions inside or outside +// the current module. Module entry functions are allowed to allocate LDS. +LLVM_READNONE +bool isModuleEntryFunctionCC(CallingConv::ID CC); + // FIXME: Remove this when calling conventions cleaned up LLVM_READNONE inline bool isKernel(CallingConv::ID CC) { @@ -558,7 +697,9 @@ bool isSI(const MCSubtargetInfo &STI); bool isCI(const MCSubtargetInfo &STI); bool isVI(const MCSubtargetInfo &STI); bool isGFX9(const MCSubtargetInfo &STI); +bool isGFX9Plus(const MCSubtargetInfo &STI); bool isGFX10(const MCSubtargetInfo &STI); +bool isGFX10Plus(const MCSubtargetInfo &STI); bool isGCN3Encoding(const MCSubtargetInfo &STI); bool isGFX10_BEncoding(const MCSubtargetInfo &STI); bool hasGFX10_3Insts(const MCSubtargetInfo &STI); @@ -690,6 +831,13 @@ Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST, Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset); +/// For FLAT segment the offset must be positive; +/// MSB is ignored and forced to zero. +/// +/// \return The number of bits available for the offset field in flat +/// instructions. +unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST, bool Signed); + /// \returns true if this offset is small enough to fit in the SMRD /// offset field. \p ByteOffset should be the offset in bytes and /// not the encoded offset. @@ -735,10 +883,8 @@ struct SIModeRegisterDefaults { SIModeRegisterDefaults(const Function &F); static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) { - const bool IsCompute = AMDGPU::isCompute(CC); - SIModeRegisterDefaults Mode; - Mode.IEEE = IsCompute; + Mode.IEEE = !AMDGPU::isShader(CC); return Mode; } @@ -805,6 +951,10 @@ struct SIModeRegisterDefaults { }; } // end namespace AMDGPU + +raw_ostream &operator<<(raw_ostream &OS, + const AMDGPU::IsaInfo::TargetIDSetting S); + } // end namespace llvm #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp index ef010a7ac157..b7dd757a8af3 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp @@ -15,12 +15,10 @@ // #include "AMDGPUPALMetadata.h" -#include "AMDGPU.h" -#include "AMDGPUAsmPrinter.h" -#include "MCTargetDesc/AMDGPUTargetStreamer.h" +#include "AMDGPUPTNote.h" #include "SIDefines.h" #include "llvm/BinaryFormat/ELF.h" -#include "llvm/IR/CallingConv.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Module.h" #include "llvm/Support/AMDGPUMetadata.h" @@ -45,8 +43,11 @@ void AMDGPUPALMetadata::readFromIR(Module &M) { } BlobType = ELF::NT_AMD_AMDGPU_PAL_METADATA; NamedMD = M.getNamedMetadata("amdgpu.pal.metadata"); - if (!NamedMD || !NamedMD->getNumOperands()) + if (!NamedMD || !NamedMD->getNumOperands()) { + // Emit msgpack metadata by default + BlobType = ELF::NT_AMDGPU_METADATA; return; + } // This is the old reg=value pair format for metadata. It is a NamedMD // containing an MDTuple containing a number of MDNodes each of which is an // integer value, and each two integer values forms a key=value pair that we @@ -235,6 +236,13 @@ void AMDGPUPALMetadata::setScratchSize(CallingConv::ID CC, unsigned Val) { getHwStage(CC)[".scratch_memory_size"] = MsgPackDoc.getNode(Val); } +// Set the stack frame size of a function in the metadata. +void AMDGPUPALMetadata::setFunctionScratchSize(const MachineFunction &MF, + unsigned Val) { + auto Node = getShaderFunction(MF.getFunction().getName()); + Node[".stack_frame_size_in_bytes"] = MsgPackDoc.getNode(Val); +} + // Set the hardware register bit in PAL metadata to enable wave32 on the // shader of the given calling convention. void AMDGPUPALMetadata::setWave32(unsigned CC) { @@ -718,6 +726,30 @@ msgpack::MapDocNode AMDGPUPALMetadata::getRegisters() { return Registers.getMap(); } +// Reference (create if necessary) the node for the shader functions map. +msgpack::DocNode &AMDGPUPALMetadata::refShaderFunctions() { + auto &N = + MsgPackDoc.getRoot() + .getMap(/*Convert=*/true)[MsgPackDoc.getNode("amdpal.pipelines")] + .getArray(/*Convert=*/true)[0] + .getMap(/*Convert=*/true)[MsgPackDoc.getNode(".shader_functions")]; + N.getMap(/*Convert=*/true); + return N; +} + +// Get (create if necessary) the shader functions map. +msgpack::MapDocNode AMDGPUPALMetadata::getShaderFunctions() { + if (ShaderFunctions.isEmpty()) + ShaderFunctions = refShaderFunctions(); + return ShaderFunctions.getMap(); +} + +// Get (create if necessary) a function in the shader functions map. +msgpack::MapDocNode AMDGPUPALMetadata::getShaderFunction(StringRef Name) { + auto Functions = getShaderFunctions(); + return Functions[Name].getMap(/*Convert=*/true); +} + // Return the PAL metadata hardware shader stage name. static const char *getStageName(CallingConv::ID CC) { switch (CC) { @@ -733,6 +765,8 @@ static const char *getStageName(CallingConv::ID CC) { return ".hs"; case CallingConv::AMDGPU_LS: return ".ls"; + case CallingConv::AMDGPU_Gfx: + llvm_unreachable("Callable shader has no hardware stage"); default: return ".cs"; } @@ -773,3 +807,9 @@ void AMDGPUPALMetadata::setLegacy() { BlobType = ELF::NT_AMD_AMDGPU_PAL_METADATA; } +// Erase all PAL metadata. +void AMDGPUPALMetadata::reset() { + MsgPackDoc.clear(); + Registers = MsgPackDoc.getEmptyNode(); + HwStages = MsgPackDoc.getEmptyNode(); +} diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h index 544ab669d9ae..8fa1f738487c 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h @@ -13,11 +13,11 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H - #include "llvm/BinaryFormat/MsgPackDocument.h" namespace llvm { +class MachineFunction; class Module; class StringRef; @@ -26,6 +26,7 @@ class AMDGPUPALMetadata { msgpack::Document MsgPackDoc; msgpack::DocNode Registers; msgpack::DocNode HwStages; + msgpack::DocNode ShaderFunctions; public: // Read the amdgpu.pal.metadata supplied by the frontend, ready for @@ -76,6 +77,9 @@ public: // Set the scratch size in the metadata. void setScratchSize(unsigned CC, unsigned Val); + // Set the stack frame size of a function in the metadata. + void setFunctionScratchSize(const MachineFunction &MF, unsigned Val); + // Set the hardware register bit in PAL metadata to enable wave32 on the // shader of the given calling convention. void setWave32(unsigned CC); @@ -106,6 +110,9 @@ public: // Set legacy PAL metadata format. void setLegacy(); + // Erase all PAL metadata. + void reset(); + private: // Return whether the blob type is legacy PAL metadata. bool isLegacy() const; @@ -116,6 +123,15 @@ private: // Get (create if necessary) the registers map. msgpack::MapDocNode getRegisters(); + // Reference (create if necessary) the node for the shader functions map. + msgpack::DocNode &refShaderFunctions(); + + // Get (create if necessary) the shader functions map. + msgpack::MapDocNode getShaderFunctions(); + + // Get (create if necessary) a function in the shader functions map. + msgpack::MapDocNode getShaderFunction(StringRef Name); + // Get (create if necessary) the .hardware_stages entry for the given calling // convention. msgpack::MapDocNode getHwStage(unsigned CC); diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp index 443e2cc45ac0..45eb6c321476 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "AMDKernelCodeTUtils.h" +#include "AMDKernelCodeT.h" #include "SIDefines.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringMap.h" @@ -18,9 +19,6 @@ #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/Support/raw_ostream.h" -#include <cassert> -#include <cstdint> -#include <utility> using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h index a87325a78df3..41d0e0d745e5 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h @@ -13,7 +13,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDKERNELCODETUTILS_H #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDKERNELCODETUTILS_H -#include "AMDKernelCodeT.h" +struct amd_kernel_code_t; namespace llvm { |
