aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h')
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h203
1 files changed, 187 insertions, 16 deletions
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 20123ed4ac81..209ef7eef749 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1,9 +1,8 @@
//===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -46,6 +45,7 @@ namespace AMDGPU {
#define GET_MIMGDim_DECL
#define GET_MIMGEncoding_DECL
#define GET_MIMGLZMapping_DECL
+#define GET_MIMGMIPMapping_DECL
#include "AMDGPUGenSearchableTables.inc"
namespace IsaInfo {
@@ -150,10 +150,18 @@ unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
/// \returns VGPR allocation granularity for given subtarget \p STI.
-unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI);
+///
+/// For subtargets which support it, \p EnableWavefrontSize32 should match
+/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
+unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
+ Optional<bool> EnableWavefrontSize32 = None);
/// \returns VGPR encoding granularity for given subtarget \p STI.
-unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI);
+///
+/// For subtargets which support it, \p EnableWavefrontSize32 should match
+/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
+unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
+ Optional<bool> EnableWavefrontSize32 = None);
/// \returns Total number of VGPRs for given subtarget \p STI.
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
@@ -171,13 +179,20 @@ unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
/// \returns Number of VGPR blocks needed for given subtarget \p STI when
/// \p NumVGPRs are used.
-unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
+///
+/// For subtargets which support it, \p EnableWavefrontSize32 should match the
+/// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
+unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs,
+ Optional<bool> EnableWavefrontSize32 = None);
} // end namespace IsaInfo
LLVM_READONLY
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
+LLVM_READONLY
+int getSOPPWithRelaxation(uint16_t Opcode);
+
struct MIMGBaseOpcodeInfo {
MIMGBaseOpcode BaseOpcode;
bool Store;
@@ -201,26 +216,53 @@ struct MIMGDimInfo {
uint8_t NumCoords;
uint8_t NumGradients;
bool DA;
+ uint8_t Encoding;
+ const char *AsmSuffix;
};
LLVM_READONLY
-const MIMGDimInfo *getMIMGDimInfo(unsigned Dim);
+const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
+
+LLVM_READONLY
+const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
+
+LLVM_READONLY
+const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
struct MIMGLZMappingInfo {
MIMGBaseOpcode L;
MIMGBaseOpcode LZ;
};
+struct MIMGMIPMappingInfo {
+ MIMGBaseOpcode MIP;
+ MIMGBaseOpcode NONMIP;
+};
+
LLVM_READONLY
const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
LLVM_READONLY
+const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned L);
+
+LLVM_READONLY
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
unsigned VDataDwords, unsigned VAddrDwords);
LLVM_READONLY
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
+struct MIMGInfo {
+ uint16_t Opcode;
+ uint16_t BaseOpcode;
+ uint8_t MIMGEncoding;
+ uint8_t VDataDwords;
+ uint8_t VAddrDwords;
+};
+
+LLVM_READONLY
+const MIMGInfo *getMIMGInfo(unsigned Opc);
+
LLVM_READONLY
int getMUBUFBaseOpcode(unsigned Opc);
@@ -245,7 +287,8 @@ int getMCOpcode(uint16_t Opcode, unsigned Gen);
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
const MCSubtargetInfo *STI);
-amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor();
+amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
+ const MCSubtargetInfo *STI);
bool isGroupSegment(const GlobalValue *GV);
bool isGlobalSegment(const GlobalValue *GV);
@@ -285,21 +328,30 @@ struct Waitcnt {
unsigned VmCnt = ~0u;
unsigned ExpCnt = ~0u;
unsigned LgkmCnt = ~0u;
+ unsigned VsCnt = ~0u;
Waitcnt() {}
- Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt)
- : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt) {}
+ Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
+ : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {}
+
+ static Waitcnt allZero(const IsaVersion &Version) {
+ return Waitcnt(0, 0, 0, Version.Major >= 10 ? 0 : ~0u);
+ }
+ static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); }
- static Waitcnt allZero() { return Waitcnt(0, 0, 0); }
+ bool hasWait() const {
+ return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u;
+ }
bool dominates(const Waitcnt &Other) const {
return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt &&
- LgkmCnt <= Other.LgkmCnt;
+ LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt;
}
Waitcnt combined(const Waitcnt &Other) const {
return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt),
- std::min(LgkmCnt, Other.LgkmCnt));
+ std::min(LgkmCnt, Other.LgkmCnt),
+ std::min(VsCnt, Other.VsCnt));
}
};
@@ -332,7 +384,8 @@ unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only)
/// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only)
/// \p Expcnt = \p Waitcnt[6:4]
-/// \p Lgkmcnt = \p Waitcnt[11:8]
+/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10 only)
+/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10+ only)
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
@@ -357,7 +410,8 @@ unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only)
/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only)
/// Waitcnt[6:4] = \p Expcnt
-/// Waitcnt[11:8] = \p Lgkmcnt
+/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10 only)
+/// Waitcnt[13:8] = \p Lgkmcnt (gfx10+ only)
/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only)
///
/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
@@ -367,6 +421,75 @@ unsigned encodeWaitcnt(const IsaVersion &Version,
unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
+namespace Hwreg {
+
+LLVM_READONLY
+int64_t getHwregId(const StringRef Name);
+
+LLVM_READNONE
+bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI);
+
+LLVM_READNONE
+bool isValidHwreg(int64_t Id);
+
+LLVM_READNONE
+bool isValidHwregOffset(int64_t Offset);
+
+LLVM_READNONE
+bool isValidHwregWidth(int64_t Width);
+
+LLVM_READNONE
+uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width);
+
+LLVM_READNONE
+StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI);
+
+void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width);
+
+} // namespace Hwreg
+
+namespace SendMsg {
+
+LLVM_READONLY
+int64_t getMsgId(const StringRef Name);
+
+LLVM_READONLY
+int64_t getMsgOpId(int64_t MsgId, const StringRef Name);
+
+LLVM_READNONE
+StringRef getMsgName(int64_t MsgId);
+
+LLVM_READNONE
+StringRef getMsgOpName(int64_t MsgId, int64_t OpId);
+
+LLVM_READNONE
+bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict = true);
+
+LLVM_READNONE
+bool isValidMsgOp(int64_t MsgId, int64_t OpId, bool Strict = true);
+
+LLVM_READNONE
+bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, bool Strict = true);
+
+LLVM_READNONE
+bool msgRequiresOp(int64_t MsgId);
+
+LLVM_READNONE
+bool msgSupportsStream(int64_t MsgId, int64_t OpId);
+
+void decodeMsg(unsigned Val,
+ uint16_t &MsgId,
+ uint16_t &OpId,
+ uint16_t &StreamId);
+
+LLVM_READNONE
+uint64_t encodeMsg(uint64_t MsgId,
+ uint64_t OpId,
+ uint64_t StreamId);
+
+} // namespace SendMsg
+
+
unsigned getInitialPSInputAddr(const Function &F);
LLVM_READNONE
@@ -399,6 +522,7 @@ bool isSI(const MCSubtargetInfo &STI);
bool isCI(const MCSubtargetInfo &STI);
bool isVI(const MCSubtargetInfo &STI);
bool isGFX9(const MCSubtargetInfo &STI);
+bool isGFX10(const MCSubtargetInfo &STI);
/// Is Reg - scalar register
bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
@@ -440,6 +564,8 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
case AMDGPU::OPERAND_REG_IMM_FP32:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
return 4;
case AMDGPU::OPERAND_REG_IMM_INT64:
@@ -454,6 +580,12 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
+ case AMDGPU::OPERAND_REG_IMM_V2INT16:
+ case AMDGPU::OPERAND_REG_IMM_V2FP16:
return 2;
default:
@@ -496,6 +628,45 @@ bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
/// \returns true if the intrinsic is divergent
bool isIntrinsicSourceOfDivergence(unsigned IntrID);
+
+// Track defaults for fields in the MODE registser.
+struct SIModeRegisterDefaults {
+ /// Floating point opcodes that support exception flag gathering quiet and
+ /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10
+ /// become IEEE 754- 2008 compliant due to signaling NaN propagation and
+ /// quieting.
+ bool IEEE : 1;
+
+ /// Used by the vector ALU to force DX10-style treatment of NaNs: when set,
+ /// clamp NaN to zero; otherwise, pass NaN through.
+ bool DX10Clamp : 1;
+
+ // TODO: FP mode fields
+
+ SIModeRegisterDefaults() :
+ IEEE(true),
+ DX10Clamp(true) {}
+
+ SIModeRegisterDefaults(const Function &F);
+
+ static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
+ SIModeRegisterDefaults Mode;
+ Mode.DX10Clamp = true;
+ Mode.IEEE = AMDGPU::isCompute(CC);
+ return Mode;
+ }
+
+ bool operator ==(const SIModeRegisterDefaults Other) const {
+ return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp;
+ }
+
+ // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
+ // be able to override.
+ bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const {
+ return *this == CalleeMode;
+ }
+};
+
} // end namespace AMDGPU
} // end namespace llvm