diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 691 |
1 files changed, 691 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h new file mode 100644 index 000000000000..f78dadd447ff --- /dev/null +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -0,0 +1,691 @@ +//===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H +#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H + +#include "AMDGPU.h" +#include "AMDKernelCodeT.h" +#include "SIDefines.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Support/AMDHSAKernelDescriptor.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetParser.h" +#include <cstdint> +#include <string> +#include <utility> + +namespace llvm { + +class Argument; +class AMDGPUSubtarget; +class FeatureBitset; +class Function; +class GCNSubtarget; +class GlobalValue; +class MCContext; +class MCRegisterClass; +class MCRegisterInfo; +class MCSection; +class MCSubtargetInfo; +class MachineMemOperand; +class Triple; + +namespace AMDGPU { + +#define GET_MIMGBaseOpcode_DECL +#define GET_MIMGDim_DECL +#define GET_MIMGEncoding_DECL +#define GET_MIMGLZMapping_DECL +#define GET_MIMGMIPMapping_DECL +#include "AMDGPUGenSearchableTables.inc" + +namespace IsaInfo { + +enum { + // The closed Vulkan driver sets 96, which limits the wave count to 8 but + // doesn't spill SGPRs as much as when 80 is set. + FIXED_NUM_SGPRS_FOR_INIT_BUG = 96, + TRAP_NUM_SGPRS = 16 +}; + +/// Streams isa version string for given subtarget \p STI into \p Stream. +void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream); + +/// \returns True if given subtarget \p STI supports code object version 3, +/// false otherwise. +bool hasCodeObjectV3(const MCSubtargetInfo *STI); + +/// \returns Wavefront size for given subtarget \p STI. +unsigned getWavefrontSize(const MCSubtargetInfo *STI); + +/// \returns Local memory size in bytes for given subtarget \p STI. +unsigned getLocalMemorySize(const MCSubtargetInfo *STI); + +/// \returns Number of execution units per compute unit for given subtarget \p +/// STI. +unsigned getEUsPerCU(const MCSubtargetInfo *STI); + +/// \returns Maximum number of work groups per compute unit for given subtarget +/// \p STI and limited by given \p FlatWorkGroupSize. +unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, + unsigned FlatWorkGroupSize); + +/// \returns Maximum number of waves per compute unit for given subtarget \p +/// STI without any kind of limitation. +unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI); + +/// \returns Maximum number of waves per compute unit for given subtarget \p +/// STI and limited by given \p FlatWorkGroupSize. +unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI, + unsigned FlatWorkGroupSize); + +/// \returns Minimum number of waves per execution unit for given subtarget \p +/// STI. +unsigned getMinWavesPerEU(const MCSubtargetInfo *STI); + +/// \returns Maximum number of waves per execution unit for given subtarget \p +/// STI without any kind of limitation. +unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI); + +/// \returns Maximum number of waves per execution unit for given subtarget \p +/// STI and limited by given \p FlatWorkGroupSize. +unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI, + unsigned FlatWorkGroupSize); + +/// \returns Minimum flat work group size for given subtarget \p STI. +unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI); + +/// \returns Maximum flat work group size for given subtarget \p STI. +unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI); + +/// \returns Number of waves per work group for given subtarget \p STI and +/// limited by given \p FlatWorkGroupSize. +unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, + unsigned FlatWorkGroupSize); + +/// \returns SGPR allocation granularity for given subtarget \p STI. +unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI); + +/// \returns SGPR encoding granularity for given subtarget \p STI. +unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI); + +/// \returns Total number of SGPRs for given subtarget \p STI. +unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI); + +/// \returns Addressable number of SGPRs for given subtarget \p STI. +unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI); + +/// \returns Minimum number of SGPRs that meets the given number of waves per +/// execution unit requirement for given subtarget \p STI. +unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); + +/// \returns Maximum number of SGPRs that meets the given number of waves per +/// execution unit requirement for given subtarget \p STI. +unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, + bool Addressable); + +/// \returns Number of extra SGPRs implicitly required by given subtarget \p +/// STI when the given special registers are used. +unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, + bool FlatScrUsed, bool XNACKUsed); + +/// \returns Number of extra SGPRs implicitly required by given subtarget \p +/// STI when the given special registers are used. XNACK is inferred from +/// \p STI. +unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, + bool FlatScrUsed); + +/// \returns Number of SGPR blocks needed for given subtarget \p STI when +/// \p NumSGPRs are used. \p NumSGPRs should already include any special +/// register counts. +unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs); + +/// \returns VGPR allocation granularity for given subtarget \p STI. +/// +/// For subtargets which support it, \p EnableWavefrontSize32 should match +/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. +unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, + Optional<bool> EnableWavefrontSize32 = None); + +/// \returns VGPR encoding granularity for given subtarget \p STI. +/// +/// For subtargets which support it, \p EnableWavefrontSize32 should match +/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. +unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, + Optional<bool> EnableWavefrontSize32 = None); + +/// \returns Total number of VGPRs for given subtarget \p STI. +unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI); + +/// \returns Addressable number of VGPRs for given subtarget \p STI. +unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI); + +/// \returns Minimum number of VGPRs that meets given number of waves per +/// execution unit requirement for given subtarget \p STI. +unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); + +/// \returns Maximum number of VGPRs that meets given number of waves per +/// execution unit requirement for given subtarget \p STI. +unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); + +/// \returns Number of VGPR blocks needed for given subtarget \p STI when +/// \p NumVGPRs are used. +/// +/// For subtargets which support it, \p EnableWavefrontSize32 should match the +/// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. +unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs, + Optional<bool> EnableWavefrontSize32 = None); + +} // end namespace IsaInfo + +LLVM_READONLY +int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); + +LLVM_READONLY +int getSOPPWithRelaxation(uint16_t Opcode); + +struct MIMGBaseOpcodeInfo { + MIMGBaseOpcode BaseOpcode; + bool Store; + bool Atomic; + bool AtomicX2; + bool Sampler; + bool Gather4; + + uint8_t NumExtraArgs; + bool Gradients; + bool Coordinates; + bool LodOrClampOrMip; + bool HasD16; +}; + +LLVM_READONLY +const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode); + +struct MIMGDimInfo { + MIMGDim Dim; + uint8_t NumCoords; + uint8_t NumGradients; + bool DA; + uint8_t Encoding; + const char *AsmSuffix; +}; + +LLVM_READONLY +const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum); + +LLVM_READONLY +const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc); + +LLVM_READONLY +const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix); + +struct MIMGLZMappingInfo { + MIMGBaseOpcode L; + MIMGBaseOpcode LZ; +}; + +struct MIMGMIPMappingInfo { + MIMGBaseOpcode MIP; + MIMGBaseOpcode NONMIP; +}; + +LLVM_READONLY +const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L); + +LLVM_READONLY +const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned L); + +LLVM_READONLY +int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, + unsigned VDataDwords, unsigned VAddrDwords); + +LLVM_READONLY +int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels); + +struct MIMGInfo { + uint16_t Opcode; + uint16_t BaseOpcode; + uint8_t MIMGEncoding; + uint8_t VDataDwords; + uint8_t VAddrDwords; +}; + +LLVM_READONLY +const MIMGInfo *getMIMGInfo(unsigned Opc); + +LLVM_READONLY +int getMTBUFBaseOpcode(unsigned Opc); + +LLVM_READONLY +int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements); + +LLVM_READONLY +int getMTBUFElements(unsigned Opc); + +LLVM_READONLY +bool getMTBUFHasVAddr(unsigned Opc); + +LLVM_READONLY +bool getMTBUFHasSrsrc(unsigned Opc); + +LLVM_READONLY +bool getMTBUFHasSoffset(unsigned Opc); + +LLVM_READONLY +int getMUBUFBaseOpcode(unsigned Opc); + +LLVM_READONLY +int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements); + +LLVM_READONLY +int getMUBUFElements(unsigned Opc); + +LLVM_READONLY +bool getMUBUFHasVAddr(unsigned Opc); + +LLVM_READONLY +bool getMUBUFHasSrsrc(unsigned Opc); + +LLVM_READONLY +bool getMUBUFHasSoffset(unsigned Opc); + +LLVM_READONLY +int getMCOpcode(uint16_t Opcode, unsigned Gen); + +void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, + const MCSubtargetInfo *STI); + +amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor( + const MCSubtargetInfo *STI); + +bool isGroupSegment(const GlobalValue *GV); +bool isGlobalSegment(const GlobalValue *GV); +bool isReadOnlySegment(const GlobalValue *GV); + +/// \returns True if constants should be emitted to .text section for given +/// target triple \p TT, false otherwise. +bool shouldEmitConstantsToTextSection(const Triple &TT); + +/// \returns Integer value requested using \p F's \p Name attribute. +/// +/// \returns \p Default if attribute is not present. +/// +/// \returns \p Default and emits error if requested value cannot be converted +/// to integer. +int getIntegerAttribute(const Function &F, StringRef Name, int Default); + +/// \returns A pair of integer values requested using \p F's \p Name attribute +/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired +/// is false). +/// +/// \returns \p Default if attribute is not present. +/// +/// \returns \p Default and emits error if one of the requested values cannot be +/// converted to integer, or \p OnlyFirstRequired is false and "second" value is +/// not present. +std::pair<int, int> getIntegerPairAttribute(const Function &F, + StringRef Name, + std::pair<int, int> Default, + bool OnlyFirstRequired = false); + +/// Represents the counter values to wait for in an s_waitcnt instruction. +/// +/// Large values (including the maximum possible integer) can be used to +/// represent "don't care" waits. +struct Waitcnt { + unsigned VmCnt = ~0u; + unsigned ExpCnt = ~0u; + unsigned LgkmCnt = ~0u; + unsigned VsCnt = ~0u; + + Waitcnt() {} + Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt) + : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {} + + static Waitcnt allZero(const IsaVersion &Version) { + return Waitcnt(0, 0, 0, Version.Major >= 10 ? 0 : ~0u); + } + static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); } + + bool hasWait() const { + return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u; + } + + bool dominates(const Waitcnt &Other) const { + return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt && + LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt; + } + + Waitcnt combined(const Waitcnt &Other) const { + return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt), + std::min(LgkmCnt, Other.LgkmCnt), + std::min(VsCnt, Other.VsCnt)); + } +}; + +/// \returns Vmcnt bit mask for given isa \p Version. +unsigned getVmcntBitMask(const IsaVersion &Version); + +/// \returns Expcnt bit mask for given isa \p Version. +unsigned getExpcntBitMask(const IsaVersion &Version); + +/// \returns Lgkmcnt bit mask for given isa \p Version. +unsigned getLgkmcntBitMask(const IsaVersion &Version); + +/// \returns Waitcnt bit mask for given isa \p Version. +unsigned getWaitcntBitMask(const IsaVersion &Version); + +/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version. +unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt); + +/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version. +unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt); + +/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version. +unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt); + +/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa +/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and +/// \p Lgkmcnt respectively. +/// +/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows: +/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only) +/// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only) +/// \p Expcnt = \p Waitcnt[6:4] +/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10 only) +/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10+ only) +void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, + unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt); + +Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded); + +/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version. +unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, + unsigned Vmcnt); + +/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version. +unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, + unsigned Expcnt); + +/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version. +unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, + unsigned Lgkmcnt); + +/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa +/// \p Version. +/// +/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows: +/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only) +/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only) +/// Waitcnt[6:4] = \p Expcnt +/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10 only) +/// Waitcnt[13:8] = \p Lgkmcnt (gfx10+ only) +/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only) +/// +/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given +/// isa \p Version. +unsigned encodeWaitcnt(const IsaVersion &Version, + unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt); + +unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded); + +namespace Hwreg { + +LLVM_READONLY +int64_t getHwregId(const StringRef Name); + +LLVM_READNONE +bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI); + +LLVM_READNONE +bool isValidHwreg(int64_t Id); + +LLVM_READNONE +bool isValidHwregOffset(int64_t Offset); + +LLVM_READNONE +bool isValidHwregWidth(int64_t Width); + +LLVM_READNONE +uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width); + +LLVM_READNONE +StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI); + +void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width); + +} // namespace Hwreg + +namespace SendMsg { + +LLVM_READONLY +int64_t getMsgId(const StringRef Name); + +LLVM_READONLY +int64_t getMsgOpId(int64_t MsgId, const StringRef Name); + +LLVM_READNONE +StringRef getMsgName(int64_t MsgId); + +LLVM_READNONE +StringRef getMsgOpName(int64_t MsgId, int64_t OpId); + +LLVM_READNONE +bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict = true); + +LLVM_READNONE +bool isValidMsgOp(int64_t MsgId, int64_t OpId, bool Strict = true); + +LLVM_READNONE +bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, bool Strict = true); + +LLVM_READNONE +bool msgRequiresOp(int64_t MsgId); + +LLVM_READNONE +bool msgSupportsStream(int64_t MsgId, int64_t OpId); + +void decodeMsg(unsigned Val, + uint16_t &MsgId, + uint16_t &OpId, + uint16_t &StreamId); + +LLVM_READNONE +uint64_t encodeMsg(uint64_t MsgId, + uint64_t OpId, + uint64_t StreamId); + +} // namespace SendMsg + + +unsigned getInitialPSInputAddr(const Function &F); + +LLVM_READNONE +bool isShader(CallingConv::ID CC); + +LLVM_READNONE +bool isCompute(CallingConv::ID CC); + +LLVM_READNONE +bool isEntryFunctionCC(CallingConv::ID CC); + +// FIXME: Remove this when calling conventions cleaned up +LLVM_READNONE +inline bool isKernel(CallingConv::ID CC) { + switch (CC) { + case CallingConv::AMDGPU_KERNEL: + case CallingConv::SPIR_KERNEL: + return true; + default: + return false; + } +} + +bool hasXNACK(const MCSubtargetInfo &STI); +bool hasSRAMECC(const MCSubtargetInfo &STI); +bool hasMIMG_R128(const MCSubtargetInfo &STI); +bool hasPackedD16(const MCSubtargetInfo &STI); + +bool isSI(const MCSubtargetInfo &STI); +bool isCI(const MCSubtargetInfo &STI); +bool isVI(const MCSubtargetInfo &STI); +bool isGFX9(const MCSubtargetInfo &STI); +bool isGFX10(const MCSubtargetInfo &STI); + +/// Is Reg - scalar register +bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI); + +/// Is there any intersection between registers +bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI); + +/// If \p Reg is a pseudo reg, return the correct hardware register given +/// \p STI otherwise return \p Reg. +unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI); + +/// Convert hardware register \p Reg to a pseudo register +LLVM_READNONE +unsigned mc2PseudoReg(unsigned Reg); + +/// Can this operand also contain immediate values? +bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo); + +/// Is this floating-point operand? +bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo); + +/// Does this opearnd support only inlinable literals? +bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo); + +/// Get the size in bits of a register from the register class \p RC. +unsigned getRegBitWidth(unsigned RCID); + +/// Get the size in bits of a register from the register class \p RC. +unsigned getRegBitWidth(const MCRegisterClass &RC); + +/// Get size of register operand +unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, + unsigned OpNo); + +LLVM_READNONE +inline unsigned getOperandSize(const MCOperandInfo &OpInfo) { + switch (OpInfo.OperandType) { + case AMDGPU::OPERAND_REG_IMM_INT32: + case AMDGPU::OPERAND_REG_IMM_FP32: + case AMDGPU::OPERAND_REG_INLINE_C_INT32: + case AMDGPU::OPERAND_REG_INLINE_C_FP32: + case AMDGPU::OPERAND_REG_INLINE_AC_INT32: + case AMDGPU::OPERAND_REG_INLINE_AC_FP32: + return 4; + + case AMDGPU::OPERAND_REG_IMM_INT64: + case AMDGPU::OPERAND_REG_IMM_FP64: + case AMDGPU::OPERAND_REG_INLINE_C_INT64: + case AMDGPU::OPERAND_REG_INLINE_C_FP64: + return 8; + + case AMDGPU::OPERAND_REG_IMM_INT16: + case AMDGPU::OPERAND_REG_IMM_FP16: + case AMDGPU::OPERAND_REG_INLINE_C_INT16: + case AMDGPU::OPERAND_REG_INLINE_C_FP16: + case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: + case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: + case AMDGPU::OPERAND_REG_INLINE_AC_INT16: + case AMDGPU::OPERAND_REG_INLINE_AC_FP16: + case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: + case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: + case AMDGPU::OPERAND_REG_IMM_V2INT16: + case AMDGPU::OPERAND_REG_IMM_V2FP16: + return 2; + + default: + llvm_unreachable("unhandled operand type"); + } +} + +LLVM_READNONE +inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) { + return getOperandSize(Desc.OpInfo[OpNo]); +} + +/// Is this literal inlinable +LLVM_READNONE +bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi); + +LLVM_READNONE +bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi); + +LLVM_READNONE +bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi); + +LLVM_READNONE +bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi); + +bool isArgPassedInSGPR(const Argument *Arg); + +/// \returns The encoding that will be used for \p ByteOffset in the SMRD +/// offset field. +int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset); + +/// \returns true if this offset is small enough to fit in the SMRD +/// offset field. \p ByteOffset should be the offset in bytes and +/// not the encoded offset. +bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset); + +bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, + const GCNSubtarget *Subtarget, uint32_t Align = 4); + +/// \returns true if the intrinsic is divergent +bool isIntrinsicSourceOfDivergence(unsigned IntrID); + + +// Track defaults for fields in the MODE registser. +struct SIModeRegisterDefaults { + /// Floating point opcodes that support exception flag gathering quiet and + /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10 + /// become IEEE 754- 2008 compliant due to signaling NaN propagation and + /// quieting. + bool IEEE : 1; + + /// Used by the vector ALU to force DX10-style treatment of NaNs: when set, + /// clamp NaN to zero; otherwise, pass NaN through. + bool DX10Clamp : 1; + + // TODO: FP mode fields + + SIModeRegisterDefaults() : + IEEE(true), + DX10Clamp(true) {} + + SIModeRegisterDefaults(const Function &F); + + static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) { + SIModeRegisterDefaults Mode; + Mode.DX10Clamp = true; + Mode.IEEE = AMDGPU::isCompute(CC); + return Mode; + } + + bool operator ==(const SIModeRegisterDefaults Other) const { + return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp; + } + + // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should + // be able to override. + bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const { + return *this == CalleeMode; + } +}; + +} // end namespace AMDGPU +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H |
