diff options
Diffstat (limited to 'lib/Target/AMDGPU/Utils')
-rw-r--r-- | lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 448 | ||||
-rw-r--r-- | lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 217 | ||||
-rw-r--r-- | lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h | 2 |
3 files changed, 573 insertions, 94 deletions
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 5f651d4da5d2..86095a8e1142 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1,4 +1,4 @@ -//===-- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information--------------===// +//===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===// // // The LLVM Compiler Infrastructure // @@ -6,32 +6,42 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -#include "AMDGPUBaseInfo.h" + #include "AMDGPU.h" +#include "AMDGPUBaseInfo.h" #include "SIDefines.h" -#include "llvm/IR/LLVMContext.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/SubtargetFeature.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstring> +#include <utility> -#define GET_SUBTARGETINFO_ENUM -#include "AMDGPUGenSubtargetInfo.inc" -#undef GET_SUBTARGETINFO_ENUM +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" -#define GET_REGINFO_ENUM -#include "AMDGPUGenRegisterInfo.inc" -#undef GET_REGINFO_ENUM #define GET_INSTRINFO_NAMED_OPS -#define GET_INSTRINFO_ENUM #include "AMDGPUGenInstrInfo.inc" #undef GET_INSTRINFO_NAMED_OPS -#undef GET_INSTRINFO_ENUM namespace { @@ -56,11 +66,11 @@ unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) { return (Src & getBitMask(Shift, Width)) >> Shift; } -/// \returns Vmcnt bit shift. -unsigned getVmcntBitShift() { return 0; } +/// \returns Vmcnt bit shift (lower bits). +unsigned getVmcntBitShiftLo() { return 0; } -/// \returns Vmcnt bit width. -unsigned getVmcntBitWidth() { return 4; } +/// \returns Vmcnt bit width (lower bits). +unsigned getVmcntBitWidthLo() { return 4; } /// \returns Expcnt bit shift. unsigned getExpcntBitShift() { return 4; } @@ -74,52 +84,224 @@ unsigned getLgkmcntBitShift() { return 8; } /// \returns Lgkmcnt bit width. unsigned getLgkmcntBitWidth() { return 4; } -} // anonymous namespace +/// \returns Vmcnt bit shift (higher bits). +unsigned getVmcntBitShiftHi() { return 14; } + +/// \returns Vmcnt bit width (higher bits). +unsigned getVmcntBitWidthHi() { return 2; } + +} // end namespace anonymous namespace llvm { namespace AMDGPU { -IsaVersion getIsaVersion(const FeatureBitset &Features) { +namespace IsaInfo { +IsaVersion getIsaVersion(const FeatureBitset &Features) { + // CI. if (Features.test(FeatureISAVersion7_0_0)) return {7, 0, 0}; - if (Features.test(FeatureISAVersion7_0_1)) return {7, 0, 1}; - if (Features.test(FeatureISAVersion7_0_2)) return {7, 0, 2}; + // VI. if (Features.test(FeatureISAVersion8_0_0)) return {8, 0, 0}; - if (Features.test(FeatureISAVersion8_0_1)) return {8, 0, 1}; - if (Features.test(FeatureISAVersion8_0_2)) return {8, 0, 2}; - if (Features.test(FeatureISAVersion8_0_3)) return {8, 0, 3}; - if (Features.test(FeatureISAVersion8_0_4)) return {8, 0, 4}; - if (Features.test(FeatureISAVersion8_1_0)) return {8, 1, 0}; - return {0, 0, 0}; + // GFX9. + if (Features.test(FeatureISAVersion9_0_0)) + return {9, 0, 0}; + if (Features.test(FeatureISAVersion9_0_1)) + return {9, 0, 1}; + + if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands)) + return {0, 0, 0}; + return {7, 0, 0}; +} + +unsigned getWavefrontSize(const FeatureBitset &Features) { + if (Features.test(FeatureWavefrontSize16)) + return 16; + if (Features.test(FeatureWavefrontSize32)) + return 32; + + return 64; +} + +unsigned getLocalMemorySize(const FeatureBitset &Features) { + if (Features.test(FeatureLocalMemorySize32768)) + return 32768; + if (Features.test(FeatureLocalMemorySize65536)) + return 65536; + + return 0; +} + +unsigned getEUsPerCU(const FeatureBitset &Features) { + return 4; +} + +unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features, + unsigned FlatWorkGroupSize) { + if (!Features.test(FeatureGCN)) + return 8; + unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize); + if (N == 1) + return 40; + N = 40 / N; + return std::min(N, 16u); +} + +unsigned getMaxWavesPerCU(const FeatureBitset &Features) { + return getMaxWavesPerEU(Features) * getEUsPerCU(Features); +} + +unsigned getMaxWavesPerCU(const FeatureBitset &Features, + unsigned FlatWorkGroupSize) { + return getWavesPerWorkGroup(Features, FlatWorkGroupSize); +} + +unsigned getMinWavesPerEU(const FeatureBitset &Features) { + return 1; +} + +unsigned getMaxWavesPerEU(const FeatureBitset &Features) { + if (!Features.test(FeatureGCN)) + return 8; + // FIXME: Need to take scratch memory into account. + return 10; +} + +unsigned getMaxWavesPerEU(const FeatureBitset &Features, + unsigned FlatWorkGroupSize) { + return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize), + getEUsPerCU(Features)) / getEUsPerCU(Features); +} + +unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) { + return 1; +} + +unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) { + return 2048; +} + +unsigned getWavesPerWorkGroup(const FeatureBitset &Features, + unsigned FlatWorkGroupSize) { + return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) / + getWavefrontSize(Features); +} + +unsigned getSGPRAllocGranule(const FeatureBitset &Features) { + IsaVersion Version = getIsaVersion(Features); + if (Version.Major >= 8) + return 16; + return 8; +} + +unsigned getSGPREncodingGranule(const FeatureBitset &Features) { + return 8; +} + +unsigned getTotalNumSGPRs(const FeatureBitset &Features) { + IsaVersion Version = getIsaVersion(Features); + if (Version.Major >= 8) + return 800; + return 512; +} + +unsigned getAddressableNumSGPRs(const FeatureBitset &Features) { + if (Features.test(FeatureSGPRInitBug)) + return FIXED_NUM_SGPRS_FOR_INIT_BUG; + + IsaVersion Version = getIsaVersion(Features); + if (Version.Major >= 8) + return 102; + return 104; +} + +unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { + assert(WavesPerEU != 0); + + if (WavesPerEU >= getMaxWavesPerEU(Features)) + return 0; + unsigned MinNumSGPRs = + alignDown(getTotalNumSGPRs(Features) / (WavesPerEU + 1), + getSGPRAllocGranule(Features)) + 1; + return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features)); +} + +unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU, + bool Addressable) { + assert(WavesPerEU != 0); + + IsaVersion Version = getIsaVersion(Features); + unsigned MaxNumSGPRs = alignDown(getTotalNumSGPRs(Features) / WavesPerEU, + getSGPRAllocGranule(Features)); + unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features); + if (Version.Major >= 8 && !Addressable) + AddressableNumSGPRs = 112; + return std::min(MaxNumSGPRs, AddressableNumSGPRs); +} + +unsigned getVGPRAllocGranule(const FeatureBitset &Features) { + return 4; +} + +unsigned getVGPREncodingGranule(const FeatureBitset &Features) { + return getVGPRAllocGranule(Features); +} + +unsigned getTotalNumVGPRs(const FeatureBitset &Features) { + return 256; } +unsigned getAddressableNumVGPRs(const FeatureBitset &Features) { + return getTotalNumVGPRs(Features); +} + +unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { + assert(WavesPerEU != 0); + + if (WavesPerEU >= getMaxWavesPerEU(Features)) + return 0; + unsigned MinNumVGPRs = + alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1), + getVGPRAllocGranule(Features)) + 1; + return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features)); +} + +unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { + assert(WavesPerEU != 0); + + unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU, + getVGPRAllocGranule(Features)); + unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features); + return std::min(MaxNumVGPRs, AddressableNumVGPRs); +} + +} // end namespace IsaInfo + void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const FeatureBitset &Features) { - - IsaVersion ISA = getIsaVersion(Features); + IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features); memset(&Header, 0, sizeof(Header)); Header.amd_kernel_code_version_major = 1; - Header.amd_kernel_code_version_minor = 0; + Header.amd_kernel_code_version_minor = 1; Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU Header.amd_machine_version_major = ISA.Major; Header.amd_machine_version_minor = ISA.Minor; @@ -127,6 +309,11 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, Header.kernel_code_entry_byte_offset = sizeof(Header); // wavefront_size is specified as a power of 2: 2^6 = 64 threads. Header.wavefront_size = 6; + + // If the code object does not support indirect functions, then the value must + // be 0xffffffff. + Header.call_convention = -1; + // These alignment values are specified in powers of two, so alignment = // 2^n. The minimum alignment is 2^4 = 16. Header.kernarg_segment_alignment = 4; @@ -161,16 +348,16 @@ MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) { ELF::SHF_AMDGPU_HSA_AGENT); } -bool isGroupSegment(const GlobalValue *GV) { - return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; +bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) { + return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS; } -bool isGlobalSegment(const GlobalValue *GV) { - return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; +bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS) { + return GV->getType()->getAddressSpace() == AS.GLOBAL_ADDRESS; } -bool isReadOnlySegment(const GlobalValue *GV) { - return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; +bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS) { + return GV->getType()->getAddressSpace() == AS.CONSTANT_ADDRESS; } bool shouldEmitConstantsToTextSection(const Triple &TT) { @@ -208,7 +395,7 @@ std::pair<int, int> getIntegerPairAttribute(const Function &F, return Default; } if (Strs.second.trim().getAsInteger(0, Ints.second)) { - if (!OnlyFirstRequired || Strs.second.trim().size()) { + if (!OnlyFirstRequired || !Strs.second.trim().empty()) { Ctx.emitError("can't parse second integer attribute " + Name); return Default; } @@ -217,57 +404,84 @@ std::pair<int, int> getIntegerPairAttribute(const Function &F, return Ints; } -unsigned getWaitcntBitMask(IsaVersion Version) { - unsigned Vmcnt = getBitMask(getVmcntBitShift(), getVmcntBitWidth()); - unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth()); - unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth()); - return Vmcnt | Expcnt | Lgkmcnt; -} +unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) { + unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1; + if (Version.Major < 9) + return VmcntLo; -unsigned getVmcntBitMask(IsaVersion Version) { - return (1 << getVmcntBitWidth()) - 1; + unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo(); + return VmcntLo | VmcntHi; } -unsigned getExpcntBitMask(IsaVersion Version) { +unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) { return (1 << getExpcntBitWidth()) - 1; } -unsigned getLgkmcntBitMask(IsaVersion Version) { +unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) { return (1 << getLgkmcntBitWidth()) - 1; } -unsigned decodeVmcnt(IsaVersion Version, unsigned Waitcnt) { - return unpackBits(Waitcnt, getVmcntBitShift(), getVmcntBitWidth()); +unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) { + unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo()); + unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth()); + unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth()); + unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt; + if (Version.Major < 9) + return Waitcnt; + + unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi()); + return Waitcnt | VmcntHi; +} + +unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { + unsigned VmcntLo = + unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); + if (Version.Major < 9) + return VmcntLo; + + unsigned VmcntHi = + unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); + VmcntHi <<= getVmcntBitWidthLo(); + return VmcntLo | VmcntHi; } -unsigned decodeExpcnt(IsaVersion Version, unsigned Waitcnt) { +unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); } -unsigned decodeLgkmcnt(IsaVersion Version, unsigned Waitcnt) { +unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); } -void decodeWaitcnt(IsaVersion Version, unsigned Waitcnt, +void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { Vmcnt = decodeVmcnt(Version, Waitcnt); Expcnt = decodeExpcnt(Version, Waitcnt); Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); } -unsigned encodeVmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Vmcnt) { - return packBits(Vmcnt, Waitcnt, getVmcntBitShift(), getVmcntBitWidth()); +unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, + unsigned Vmcnt) { + Waitcnt = + packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); + if (Version.Major < 9) + return Waitcnt; + + Vmcnt >>= getVmcntBitWidthLo(); + return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); } -unsigned encodeExpcnt(IsaVersion Version, unsigned Waitcnt, unsigned Expcnt) { +unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, + unsigned Expcnt) { return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); } -unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt) { +unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, + unsigned Lgkmcnt) { return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); } -unsigned encodeWaitcnt(IsaVersion Version, +unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { unsigned Waitcnt = getWaitcntBitMask(Version); Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); @@ -296,6 +510,10 @@ bool isCompute(CallingConv::ID cc) { return !isShader(cc) || cc == CallingConv::AMDGPU_CS; } +bool isEntryFunctionCC(CallingConv::ID CC) { + return true; +} + bool isSI(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands]; } @@ -327,13 +545,34 @@ unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { return Reg; } +unsigned mc2PseudoReg(unsigned Reg) { + switch (Reg) { + case AMDGPU::FLAT_SCR_ci: + case AMDGPU::FLAT_SCR_vi: + return FLAT_SCR; + + case AMDGPU::FLAT_SCR_LO_ci: + case AMDGPU::FLAT_SCR_LO_vi: + return AMDGPU::FLAT_SCR_LO; + + case AMDGPU::FLAT_SCR_HI_ci: + case AMDGPU::FLAT_SCR_HI_vi: + return AMDGPU::FLAT_SCR_HI; + + default: + return Reg; + } +} + bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { + assert(OpNo < Desc.NumOperands); unsigned OpType = Desc.OpInfo[OpNo].OperandType; return OpType >= AMDGPU::OPERAND_SRC_FIRST && OpType <= AMDGPU::OPERAND_SRC_LAST; } bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { + assert(OpNo < Desc.NumOperands); unsigned OpType = Desc.OpInfo[OpNo].OperandType; switch (OpType) { case AMDGPU::OPERAND_REG_IMM_FP32: @@ -342,6 +581,7 @@ bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { case AMDGPU::OPERAND_REG_INLINE_C_FP32: case AMDGPU::OPERAND_REG_INLINE_C_FP64: case AMDGPU::OPERAND_REG_INLINE_C_FP16: + case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: return true; default: return false; @@ -349,6 +589,7 @@ bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { } bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { + assert(OpNo < Desc.NumOperands); unsigned OpType = Desc.OpInfo[OpNo].OperandType; return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST && OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST; @@ -392,6 +633,7 @@ unsigned getRegBitWidth(const MCRegisterClass &RC) { unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, unsigned OpNo) { + assert(OpNo < Desc.NumOperands); unsigned RCID = Desc.OpInfo[OpNo].RegClass; return getRegBitWidth(MRI->getRegClass(RCID)) / 8; } @@ -440,7 +682,8 @@ bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) { } bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) { - assert(HasInv2Pi); + if (!HasInv2Pi) + return false; if (Literal >= -16 && Literal <= 64) return true; @@ -457,5 +700,92 @@ bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) { Val == 0x3118; // 1/2pi } -} // End namespace AMDGPU -} // End namespace llvm +bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) { + assert(HasInv2Pi); + + int16_t Lo16 = static_cast<int16_t>(Literal); + int16_t Hi16 = static_cast<int16_t>(Literal >> 16); + return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi); +} + +bool isUniformMMO(const MachineMemOperand *MMO) { + const Value *Ptr = MMO->getValue(); + // UndefValue means this is a load of a kernel input. These are uniform. + // Sometimes LDS instructions have constant pointers. + // If Ptr is null, then that means this mem operand contains a + // PseudoSourceValue like GOT. + if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || + isa<Constant>(Ptr) || isa<GlobalValue>(Ptr)) + return true; + + const Instruction *I = dyn_cast<Instruction>(Ptr); + return I && I->getMetadata("amdgpu.uniform"); +} + +int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { + if (isSI(ST) || isCI(ST)) + return ByteOffset >> 2; + + return ByteOffset; +} + +bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { + int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset); + return isSI(ST) || isCI(ST) ? isUInt<8>(EncodedOffset) : + isUInt<20>(EncodedOffset); +} +} // end namespace AMDGPU + +} // end namespace llvm + +const unsigned AMDGPUAS::MAX_COMMON_ADDRESS; +const unsigned AMDGPUAS::GLOBAL_ADDRESS; +const unsigned AMDGPUAS::LOCAL_ADDRESS; +const unsigned AMDGPUAS::PARAM_D_ADDRESS; +const unsigned AMDGPUAS::PARAM_I_ADDRESS; +const unsigned AMDGPUAS::CONSTANT_BUFFER_0; +const unsigned AMDGPUAS::CONSTANT_BUFFER_1; +const unsigned AMDGPUAS::CONSTANT_BUFFER_2; +const unsigned AMDGPUAS::CONSTANT_BUFFER_3; +const unsigned AMDGPUAS::CONSTANT_BUFFER_4; +const unsigned AMDGPUAS::CONSTANT_BUFFER_5; +const unsigned AMDGPUAS::CONSTANT_BUFFER_6; +const unsigned AMDGPUAS::CONSTANT_BUFFER_7; +const unsigned AMDGPUAS::CONSTANT_BUFFER_8; +const unsigned AMDGPUAS::CONSTANT_BUFFER_9; +const unsigned AMDGPUAS::CONSTANT_BUFFER_10; +const unsigned AMDGPUAS::CONSTANT_BUFFER_11; +const unsigned AMDGPUAS::CONSTANT_BUFFER_12; +const unsigned AMDGPUAS::CONSTANT_BUFFER_13; +const unsigned AMDGPUAS::CONSTANT_BUFFER_14; +const unsigned AMDGPUAS::CONSTANT_BUFFER_15; +const unsigned AMDGPUAS::UNKNOWN_ADDRESS_SPACE; + +namespace llvm { +namespace AMDGPU { + +AMDGPUAS getAMDGPUAS(Triple T) { + auto Env = T.getEnvironmentName(); + AMDGPUAS AS; + if (Env == "amdgiz" || Env == "amdgizcl") { + AS.FLAT_ADDRESS = 0; + AS.PRIVATE_ADDRESS = 5; + AS.REGION_ADDRESS = 4; + } + else { + AS.FLAT_ADDRESS = 4; + AS.PRIVATE_ADDRESS = 0; + AS.REGION_ADDRESS = 5; + } + return AS; +} + +AMDGPUAS getAMDGPUAS(const TargetMachine &M) { + return getAMDGPUAS(M.getTargetTriple()); +} + +AMDGPUAS getAMDGPUAS(const Module &M) { + return getAMDGPUAS(Triple(M.getTargetTriple())); +} +} // namespace AMDGPU +} // namespace llvm diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index ea5fc366d205..d6c836eb748b 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1,4 +1,4 @@ -//===-- AMDGPUBaseInfo.h - Top level definitions for AMDGPU -----*- C++ -*-===// +//===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -10,39 +10,143 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H +#include "AMDGPU.h" #include "AMDKernelCodeT.h" -#include "llvm/IR/CallingConv.h" - #include "SIDefines.h" - -#define GET_INSTRINFO_OPERAND_ENUM -#include "AMDGPUGenInstrInfo.inc" -#undef GET_INSTRINFO_OPERAND_ENUM +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include <cstdint> +#include <utility> namespace llvm { class FeatureBitset; class Function; class GlobalValue; +class MachineMemOperand; class MCContext; -class MCInstrDesc; class MCRegisterClass; class MCRegisterInfo; class MCSection; class MCSubtargetInfo; +class Triple; namespace AMDGPU { +namespace IsaInfo { -LLVM_READONLY -int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); +enum { + // The closed Vulkan driver sets 96, which limits the wave count to 8 but + // doesn't spill SGPRs as much as when 80 is set. + FIXED_NUM_SGPRS_FOR_INIT_BUG = 96 +}; +/// \brief Instruction set architecture version. struct IsaVersion { unsigned Major; unsigned Minor; unsigned Stepping; }; +/// \returns Isa version for given subtarget \p Features. IsaVersion getIsaVersion(const FeatureBitset &Features); + +/// \returns Wavefront size for given subtarget \p Features. +unsigned getWavefrontSize(const FeatureBitset &Features); + +/// \returns Local memory size in bytes for given subtarget \p Features. +unsigned getLocalMemorySize(const FeatureBitset &Features); + +/// \returns Number of execution units per compute unit for given subtarget \p +/// Features. +unsigned getEUsPerCU(const FeatureBitset &Features); + +/// \returns Maximum number of work groups per compute unit for given subtarget +/// \p Features and limited by given \p FlatWorkGroupSize. +unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features, + unsigned FlatWorkGroupSize); + +/// \returns Maximum number of waves per compute unit for given subtarget \p +/// Features without any kind of limitation. +unsigned getMaxWavesPerCU(const FeatureBitset &Features); + +/// \returns Maximum number of waves per compute unit for given subtarget \p +/// Features and limited by given \p FlatWorkGroupSize. +unsigned getMaxWavesPerCU(const FeatureBitset &Features, + unsigned FlatWorkGroupSize); + +/// \returns Minimum number of waves per execution unit for given subtarget \p +/// Features. +unsigned getMinWavesPerEU(const FeatureBitset &Features); + +/// \returns Maximum number of waves per execution unit for given subtarget \p +/// Features without any kind of limitation. +unsigned getMaxWavesPerEU(const FeatureBitset &Features); + +/// \returns Maximum number of waves per execution unit for given subtarget \p +/// Features and limited by given \p FlatWorkGroupSize. +unsigned getMaxWavesPerEU(const FeatureBitset &Features, + unsigned FlatWorkGroupSize); + +/// \returns Minimum flat work group size for given subtarget \p Features. +unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features); + +/// \returns Maximum flat work group size for given subtarget \p Features. +unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features); + +/// \returns Number of waves per work group for given subtarget \p Features and +/// limited by given \p FlatWorkGroupSize. +unsigned getWavesPerWorkGroup(const FeatureBitset &Features, + unsigned FlatWorkGroupSize); + +/// \returns SGPR allocation granularity for given subtarget \p Features. +unsigned getSGPRAllocGranule(const FeatureBitset &Features); + +/// \returns SGPR encoding granularity for given subtarget \p Features. +unsigned getSGPREncodingGranule(const FeatureBitset &Features); + +/// \returns Total number of SGPRs for given subtarget \p Features. +unsigned getTotalNumSGPRs(const FeatureBitset &Features); + +/// \returns Addressable number of SGPRs for given subtarget \p Features. +unsigned getAddressableNumSGPRs(const FeatureBitset &Features); + +/// \returns Minimum number of SGPRs that meets the given number of waves per +/// execution unit requirement for given subtarget \p Features. +unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU); + +/// \returns Maximum number of SGPRs that meets the given number of waves per +/// execution unit requirement for given subtarget \p Features. +unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU, + bool Addressable); + +/// \returns VGPR allocation granularity for given subtarget \p Features. +unsigned getVGPRAllocGranule(const FeatureBitset &Features); + +/// \returns VGPR encoding granularity for given subtarget \p Features. +unsigned getVGPREncodingGranule(const FeatureBitset &Features); + +/// \returns Total number of VGPRs for given subtarget \p Features. +unsigned getTotalNumVGPRs(const FeatureBitset &Features); + +/// \returns Addressable number of VGPRs for given subtarget \p Features. +unsigned getAddressableNumVGPRs(const FeatureBitset &Features); + +/// \returns Minimum number of VGPRs that meets given number of waves per +/// execution unit requirement for given subtarget \p Features. +unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU); + +/// \returns Maximum number of VGPRs that meets given number of waves per +/// execution unit requirement for given subtarget \p Features. +unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU); + +} // end namespace IsaInfo + +LLVM_READONLY +int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); + void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const FeatureBitset &Features); MCSection *getHSATextSection(MCContext &Ctx); @@ -53,9 +157,9 @@ MCSection *getHSADataGlobalProgramSection(MCContext &Ctx); MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx); -bool isGroupSegment(const GlobalValue *GV); -bool isGlobalSegment(const GlobalValue *GV); -bool isReadOnlySegment(const GlobalValue *GV); +bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS); +bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS); +bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS); /// \returns True if constants should be emitted to .text section for given /// target triple \p TT, false otherwise. @@ -83,64 +187,89 @@ std::pair<int, int> getIntegerPairAttribute(const Function &F, std::pair<int, int> Default, bool OnlyFirstRequired = false); -/// \returns Waitcnt bit mask for given isa \p Version. -unsigned getWaitcntBitMask(IsaVersion Version); - /// \returns Vmcnt bit mask for given isa \p Version. -unsigned getVmcntBitMask(IsaVersion Version); +unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version); /// \returns Expcnt bit mask for given isa \p Version. -unsigned getExpcntBitMask(IsaVersion Version); +unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version); /// \returns Lgkmcnt bit mask for given isa \p Version. -unsigned getLgkmcntBitMask(IsaVersion Version); +unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version); + +/// \returns Waitcnt bit mask for given isa \p Version. +unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version); /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version. -unsigned decodeVmcnt(IsaVersion Version, unsigned Waitcnt); +unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt); /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version. -unsigned decodeExpcnt(IsaVersion Version, unsigned Waitcnt); +unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt); /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version. -unsigned decodeLgkmcnt(IsaVersion Version, unsigned Waitcnt); +unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt); /// \brief Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and /// \p Lgkmcnt respectively. /// /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows: -/// \p Vmcnt = \p Waitcnt[3:0] +/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only) +/// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only) /// \p Expcnt = \p Waitcnt[6:4] /// \p Lgkmcnt = \p Waitcnt[11:8] -void decodeWaitcnt(IsaVersion Version, unsigned Waitcnt, +void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt); /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version. -unsigned encodeVmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Vmcnt); +unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, + unsigned Vmcnt); /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version. -unsigned encodeExpcnt(IsaVersion Version, unsigned Waitcnt, unsigned Expcnt); +unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, + unsigned Expcnt); /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version. -unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt); +unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, + unsigned Lgkmcnt); /// \brief Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa /// \p Version. /// /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows: -/// Waitcnt[3:0] = \p Vmcnt -/// Waitcnt[6:4] = \p Expcnt -/// Waitcnt[11:8] = \p Lgkmcnt +/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only) +/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only) +/// Waitcnt[6:4] = \p Expcnt +/// Waitcnt[11:8] = \p Lgkmcnt +/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only) /// /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given /// isa \p Version. -unsigned encodeWaitcnt(IsaVersion Version, +unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt); unsigned getInitialPSInputAddr(const Function &F); -bool isShader(CallingConv::ID cc); -bool isCompute(CallingConv::ID cc); +LLVM_READNONE +bool isShader(CallingConv::ID CC); + +LLVM_READNONE +bool isCompute(CallingConv::ID CC); + +LLVM_READNONE +bool isEntryFunctionCC(CallingConv::ID CC); + +// FIXME: Remove this when calling conventions cleaned up +LLVM_READNONE +inline bool isKernel(CallingConv::ID CC) { + switch (CC) { + case CallingConv::C: + case CallingConv::AMDGPU_KERNEL: + case CallingConv::SPIR_KERNEL: + return true; + default: + return false; + } +} bool isSI(const MCSubtargetInfo &STI); bool isCI(const MCSubtargetInfo &STI); @@ -150,6 +279,10 @@ bool isVI(const MCSubtargetInfo &STI); /// \p STI otherwise return \p Reg. unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI); +/// \brief Convert hardware register \p Reg to a pseudo register +LLVM_READNONE +unsigned mc2PseudoReg(unsigned Reg); + /// \brief Can this operand also contain immediate values? bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo); @@ -188,6 +321,8 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) { case AMDGPU::OPERAND_REG_IMM_FP16: case AMDGPU::OPERAND_REG_INLINE_C_INT16: case AMDGPU::OPERAND_REG_INLINE_C_FP16: + case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: + case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: return 2; default: @@ -210,7 +345,21 @@ bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi); LLVM_READNONE bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi); +LLVM_READNONE +bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi); + +bool isUniformMMO(const MachineMemOperand *MMO); + +/// \returns The encoding that will be used for \p ByteOffset in the SMRD +/// offset field. +int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset); + +/// \returns true if this offset is small enough to fit in the SMRD +/// offset field. \p ByteOffset should be the offset in bytes and +/// not the encoded offset. +bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset); + } // end namespace AMDGPU } // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H diff --git a/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h b/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h index c55eaab077d1..991408c81c92 100644 --- a/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h +++ b/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h @@ -87,7 +87,7 @@ COMPPGM1(enable_ieee_mode, compute_pgm_rsrc1_ieee_mode, IEEE // TODO: cdbg_user COMPPGM2(enable_sgpr_private_segment_wave_byte_offset, compute_pgm_rsrc2_scratch_en, SCRATCH_EN), COMPPGM2(user_sgpr_count, compute_pgm_rsrc2_user_sgpr, USER_SGPR), -// TODO: enable_trap_handler +COMPPGM2(enable_trap_handler, compute_pgm_rsrc2_trap_handler, TRAP_HANDLER), COMPPGM2(enable_sgpr_workgroup_id_x, compute_pgm_rsrc2_tgid_x_en, TGID_X_EN), COMPPGM2(enable_sgpr_workgroup_id_y, compute_pgm_rsrc2_tgid_y_en, TGID_Y_EN), COMPPGM2(enable_sgpr_workgroup_id_z, compute_pgm_rsrc2_tgid_z_en, TGID_Z_EN), |