diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp | 77 |
1 files changed, 65 insertions, 12 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp index 125f006a1d1d..50f8ad4433c6 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp @@ -35,6 +35,7 @@ #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineFunctionPass.h" using namespace llvm; @@ -42,11 +43,39 @@ using namespace llvm; namespace { +// A clause length of 64 instructions could be encoded in the s_clause +// instruction, but the hardware documentation (at least for GFX11) says that +// 63 is the maximum allowed. +constexpr unsigned MaxInstructionsInClause = 63; + enum HardClauseType { + // For GFX10: + // Texture, buffer, global or scratch memory instructions. HARDCLAUSE_VMEM, // Flat (not global or scratch) memory instructions. HARDCLAUSE_FLAT, + + // For GFX11: + + // Texture memory instructions. + HARDCLAUSE_MIMG_LOAD, + HARDCLAUSE_MIMG_STORE, + HARDCLAUSE_MIMG_ATOMIC, + HARDCLAUSE_MIMG_SAMPLE, + // Buffer, global or scratch memory instructions. + HARDCLAUSE_VMEM_LOAD, + HARDCLAUSE_VMEM_STORE, + HARDCLAUSE_VMEM_ATOMIC, + // Flat (not global or scratch) memory instructions. + HARDCLAUSE_FLAT_LOAD, + HARDCLAUSE_FLAT_STORE, + HARDCLAUSE_FLAT_ATOMIC, + // BVH instructions. + HARDCLAUSE_BVH, + + // Common: + // Instructions that access LDS. HARDCLAUSE_LDS, // Scalar memory instructions. @@ -78,19 +107,43 @@ public: } HardClauseType getHardClauseType(const MachineInstr &MI) { - - // On current architectures we only get a benefit from clausing loads. - if (MI.mayLoad()) { - if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) { - if (ST->hasNSAClauseBug()) { + if (MI.mayLoad() || (MI.mayStore() && ST->shouldClusterStores())) { + if (ST->getGeneration() == AMDGPUSubtarget::GFX10) { + if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) { + if (ST->hasNSAClauseBug()) { + const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode()); + if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA) + return HARDCLAUSE_ILLEGAL; + } + return HARDCLAUSE_VMEM; + } + if (SIInstrInfo::isFLAT(MI)) + return HARDCLAUSE_FLAT; + } else { + assert(ST->getGeneration() >= AMDGPUSubtarget::GFX11); + if (SIInstrInfo::isMIMG(MI)) { const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode()); - if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA) - return HARDCLAUSE_ILLEGAL; + const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo = + AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); + if (BaseInfo->BVH) + return HARDCLAUSE_BVH; + if (BaseInfo->Sampler) + return HARDCLAUSE_MIMG_SAMPLE; + return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_MIMG_ATOMIC + : HARDCLAUSE_MIMG_LOAD + : HARDCLAUSE_MIMG_STORE; + } + if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) { + return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_VMEM_ATOMIC + : HARDCLAUSE_VMEM_LOAD + : HARDCLAUSE_VMEM_STORE; + } + if (SIInstrInfo::isFLAT(MI)) { + return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_FLAT_ATOMIC + : HARDCLAUSE_FLAT_LOAD + : HARDCLAUSE_FLAT_STORE; } - return HARDCLAUSE_VMEM; } - if (SIInstrInfo::isFLAT(MI)) - return HARDCLAUSE_FLAT; // TODO: LDS if (SIInstrInfo::isSMRD(MI)) return HARDCLAUSE_SMEM; @@ -129,7 +182,7 @@ public: bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) { if (CI.First == CI.Last) return false; - assert(CI.Length <= 64 && "Hard clause is too long!"); + assert(CI.Length <= MaxInstructionsInClause && "Hard clause is too long!"); auto &MBB = *CI.First->getParent(); auto ClauseMI = @@ -170,7 +223,7 @@ public: } } - if (CI.Length == 64 || + if (CI.Length == MaxInstructionsInClause || (CI.Length && Type != HARDCLAUSE_INTERNAL && Type != HARDCLAUSE_IGNORE && (Type != CI.Type || |