aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/Utils
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/Utils')
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp448
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h217
-rw-r--r--lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h2
3 files changed, 573 insertions, 94 deletions
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 5f651d4da5d2..86095a8e1142 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1,4 +1,4 @@
-//===-- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information--------------===//
+//===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -6,32 +6,42 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-#include "AMDGPUBaseInfo.h"
+
#include "AMDGPU.h"
+#include "AMDGPUBaseInfo.h"
#include "SIDefines.h"
-#include "llvm/IR/LLVMContext.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <utility>
-#define GET_SUBTARGETINFO_ENUM
-#include "AMDGPUGenSubtargetInfo.inc"
-#undef GET_SUBTARGETINFO_ENUM
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#define GET_REGINFO_ENUM
-#include "AMDGPUGenRegisterInfo.inc"
-#undef GET_REGINFO_ENUM
#define GET_INSTRINFO_NAMED_OPS
-#define GET_INSTRINFO_ENUM
#include "AMDGPUGenInstrInfo.inc"
#undef GET_INSTRINFO_NAMED_OPS
-#undef GET_INSTRINFO_ENUM
namespace {
@@ -56,11 +66,11 @@ unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
return (Src & getBitMask(Shift, Width)) >> Shift;
}
-/// \returns Vmcnt bit shift.
-unsigned getVmcntBitShift() { return 0; }
+/// \returns Vmcnt bit shift (lower bits).
+unsigned getVmcntBitShiftLo() { return 0; }
-/// \returns Vmcnt bit width.
-unsigned getVmcntBitWidth() { return 4; }
+/// \returns Vmcnt bit width (lower bits).
+unsigned getVmcntBitWidthLo() { return 4; }
/// \returns Expcnt bit shift.
unsigned getExpcntBitShift() { return 4; }
@@ -74,52 +84,224 @@ unsigned getLgkmcntBitShift() { return 8; }
/// \returns Lgkmcnt bit width.
unsigned getLgkmcntBitWidth() { return 4; }
-} // anonymous namespace
+/// \returns Vmcnt bit shift (higher bits).
+unsigned getVmcntBitShiftHi() { return 14; }
+
+/// \returns Vmcnt bit width (higher bits).
+unsigned getVmcntBitWidthHi() { return 2; }
+
+} // end namespace anonymous
namespace llvm {
namespace AMDGPU {
-IsaVersion getIsaVersion(const FeatureBitset &Features) {
+namespace IsaInfo {
+IsaVersion getIsaVersion(const FeatureBitset &Features) {
+ // CI.
if (Features.test(FeatureISAVersion7_0_0))
return {7, 0, 0};
-
if (Features.test(FeatureISAVersion7_0_1))
return {7, 0, 1};
-
if (Features.test(FeatureISAVersion7_0_2))
return {7, 0, 2};
+ // VI.
if (Features.test(FeatureISAVersion8_0_0))
return {8, 0, 0};
-
if (Features.test(FeatureISAVersion8_0_1))
return {8, 0, 1};
-
if (Features.test(FeatureISAVersion8_0_2))
return {8, 0, 2};
-
if (Features.test(FeatureISAVersion8_0_3))
return {8, 0, 3};
-
if (Features.test(FeatureISAVersion8_0_4))
return {8, 0, 4};
-
if (Features.test(FeatureISAVersion8_1_0))
return {8, 1, 0};
- return {0, 0, 0};
+ // GFX9.
+ if (Features.test(FeatureISAVersion9_0_0))
+ return {9, 0, 0};
+ if (Features.test(FeatureISAVersion9_0_1))
+ return {9, 0, 1};
+
+ if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands))
+ return {0, 0, 0};
+ return {7, 0, 0};
+}
+
+unsigned getWavefrontSize(const FeatureBitset &Features) {
+ if (Features.test(FeatureWavefrontSize16))
+ return 16;
+ if (Features.test(FeatureWavefrontSize32))
+ return 32;
+
+ return 64;
+}
+
+unsigned getLocalMemorySize(const FeatureBitset &Features) {
+ if (Features.test(FeatureLocalMemorySize32768))
+ return 32768;
+ if (Features.test(FeatureLocalMemorySize65536))
+ return 65536;
+
+ return 0;
+}
+
+unsigned getEUsPerCU(const FeatureBitset &Features) {
+ return 4;
+}
+
+unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
+ unsigned FlatWorkGroupSize) {
+ if (!Features.test(FeatureGCN))
+ return 8;
+ unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
+ if (N == 1)
+ return 40;
+ N = 40 / N;
+ return std::min(N, 16u);
+}
+
+unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
+ return getMaxWavesPerEU(Features) * getEUsPerCU(Features);
+}
+
+unsigned getMaxWavesPerCU(const FeatureBitset &Features,
+ unsigned FlatWorkGroupSize) {
+ return getWavesPerWorkGroup(Features, FlatWorkGroupSize);
+}
+
+unsigned getMinWavesPerEU(const FeatureBitset &Features) {
+ return 1;
+}
+
+unsigned getMaxWavesPerEU(const FeatureBitset &Features) {
+ if (!Features.test(FeatureGCN))
+ return 8;
+ // FIXME: Need to take scratch memory into account.
+ return 10;
+}
+
+unsigned getMaxWavesPerEU(const FeatureBitset &Features,
+ unsigned FlatWorkGroupSize) {
+ return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize),
+ getEUsPerCU(Features)) / getEUsPerCU(Features);
+}
+
+unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) {
+ return 1;
+}
+
+unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) {
+ return 2048;
+}
+
+unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
+ unsigned FlatWorkGroupSize) {
+ return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) /
+ getWavefrontSize(Features);
+}
+
+unsigned getSGPRAllocGranule(const FeatureBitset &Features) {
+ IsaVersion Version = getIsaVersion(Features);
+ if (Version.Major >= 8)
+ return 16;
+ return 8;
+}
+
+unsigned getSGPREncodingGranule(const FeatureBitset &Features) {
+ return 8;
+}
+
+unsigned getTotalNumSGPRs(const FeatureBitset &Features) {
+ IsaVersion Version = getIsaVersion(Features);
+ if (Version.Major >= 8)
+ return 800;
+ return 512;
+}
+
+unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
+ if (Features.test(FeatureSGPRInitBug))
+ return FIXED_NUM_SGPRS_FOR_INIT_BUG;
+
+ IsaVersion Version = getIsaVersion(Features);
+ if (Version.Major >= 8)
+ return 102;
+ return 104;
+}
+
+unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
+ assert(WavesPerEU != 0);
+
+ if (WavesPerEU >= getMaxWavesPerEU(Features))
+ return 0;
+ unsigned MinNumSGPRs =
+ alignDown(getTotalNumSGPRs(Features) / (WavesPerEU + 1),
+ getSGPRAllocGranule(Features)) + 1;
+ return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
+}
+
+unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
+ bool Addressable) {
+ assert(WavesPerEU != 0);
+
+ IsaVersion Version = getIsaVersion(Features);
+ unsigned MaxNumSGPRs = alignDown(getTotalNumSGPRs(Features) / WavesPerEU,
+ getSGPRAllocGranule(Features));
+ unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
+ if (Version.Major >= 8 && !Addressable)
+ AddressableNumSGPRs = 112;
+ return std::min(MaxNumSGPRs, AddressableNumSGPRs);
+}
+
+unsigned getVGPRAllocGranule(const FeatureBitset &Features) {
+ return 4;
+}
+
+unsigned getVGPREncodingGranule(const FeatureBitset &Features) {
+ return getVGPRAllocGranule(Features);
+}
+
+unsigned getTotalNumVGPRs(const FeatureBitset &Features) {
+ return 256;
}
+unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
+ return getTotalNumVGPRs(Features);
+}
+
+unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
+ assert(WavesPerEU != 0);
+
+ if (WavesPerEU >= getMaxWavesPerEU(Features))
+ return 0;
+ unsigned MinNumVGPRs =
+ alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
+ getVGPRAllocGranule(Features)) + 1;
+ return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features));
+}
+
+unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
+ assert(WavesPerEU != 0);
+
+ unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU,
+ getVGPRAllocGranule(Features));
+ unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features);
+ return std::min(MaxNumVGPRs, AddressableNumVGPRs);
+}
+
+} // end namespace IsaInfo
+
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
const FeatureBitset &Features) {
-
- IsaVersion ISA = getIsaVersion(Features);
+ IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features);
memset(&Header, 0, sizeof(Header));
Header.amd_kernel_code_version_major = 1;
- Header.amd_kernel_code_version_minor = 0;
+ Header.amd_kernel_code_version_minor = 1;
Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
Header.amd_machine_version_major = ISA.Major;
Header.amd_machine_version_minor = ISA.Minor;
@@ -127,6 +309,11 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
Header.kernel_code_entry_byte_offset = sizeof(Header);
// wavefront_size is specified as a power of 2: 2^6 = 64 threads.
Header.wavefront_size = 6;
+
+ // If the code object does not support indirect functions, then the value must
+ // be 0xffffffff.
+ Header.call_convention = -1;
+
// These alignment values are specified in powers of two, so alignment =
// 2^n. The minimum alignment is 2^4 = 16.
Header.kernarg_segment_alignment = 4;
@@ -161,16 +348,16 @@ MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) {
ELF::SHF_AMDGPU_HSA_AGENT);
}
-bool isGroupSegment(const GlobalValue *GV) {
- return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) {
+ return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS;
}
-bool isGlobalSegment(const GlobalValue *GV) {
- return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
+bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS) {
+ return GV->getType()->getAddressSpace() == AS.GLOBAL_ADDRESS;
}
-bool isReadOnlySegment(const GlobalValue *GV) {
- return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
+bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS) {
+ return GV->getType()->getAddressSpace() == AS.CONSTANT_ADDRESS;
}
bool shouldEmitConstantsToTextSection(const Triple &TT) {
@@ -208,7 +395,7 @@ std::pair<int, int> getIntegerPairAttribute(const Function &F,
return Default;
}
if (Strs.second.trim().getAsInteger(0, Ints.second)) {
- if (!OnlyFirstRequired || Strs.second.trim().size()) {
+ if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
Ctx.emitError("can't parse second integer attribute " + Name);
return Default;
}
@@ -217,57 +404,84 @@ std::pair<int, int> getIntegerPairAttribute(const Function &F,
return Ints;
}
-unsigned getWaitcntBitMask(IsaVersion Version) {
- unsigned Vmcnt = getBitMask(getVmcntBitShift(), getVmcntBitWidth());
- unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
- unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
- return Vmcnt | Expcnt | Lgkmcnt;
-}
+unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
+ unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
+ if (Version.Major < 9)
+ return VmcntLo;
-unsigned getVmcntBitMask(IsaVersion Version) {
- return (1 << getVmcntBitWidth()) - 1;
+ unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo();
+ return VmcntLo | VmcntHi;
}
-unsigned getExpcntBitMask(IsaVersion Version) {
+unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) {
return (1 << getExpcntBitWidth()) - 1;
}
-unsigned getLgkmcntBitMask(IsaVersion Version) {
+unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) {
return (1 << getLgkmcntBitWidth()) - 1;
}
-unsigned decodeVmcnt(IsaVersion Version, unsigned Waitcnt) {
- return unpackBits(Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
+unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
+ unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
+ unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
+ unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
+ unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
+ if (Version.Major < 9)
+ return Waitcnt;
+
+ unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi());
+ return Waitcnt | VmcntHi;
+}
+
+unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
+ unsigned VmcntLo =
+ unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
+ if (Version.Major < 9)
+ return VmcntLo;
+
+ unsigned VmcntHi =
+ unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
+ VmcntHi <<= getVmcntBitWidthLo();
+ return VmcntLo | VmcntHi;
}
-unsigned decodeExpcnt(IsaVersion Version, unsigned Waitcnt) {
+unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
}
-unsigned decodeLgkmcnt(IsaVersion Version, unsigned Waitcnt) {
+unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
}
-void decodeWaitcnt(IsaVersion Version, unsigned Waitcnt,
+void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
Vmcnt = decodeVmcnt(Version, Waitcnt);
Expcnt = decodeExpcnt(Version, Waitcnt);
Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
}
-unsigned encodeVmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Vmcnt) {
- return packBits(Vmcnt, Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
+unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+ unsigned Vmcnt) {
+ Waitcnt =
+ packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
+ if (Version.Major < 9)
+ return Waitcnt;
+
+ Vmcnt >>= getVmcntBitWidthLo();
+ return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
}
-unsigned encodeExpcnt(IsaVersion Version, unsigned Waitcnt, unsigned Expcnt) {
+unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+ unsigned Expcnt) {
return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
}
-unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt) {
+unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+ unsigned Lgkmcnt) {
return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
}
-unsigned encodeWaitcnt(IsaVersion Version,
+unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
unsigned Waitcnt = getWaitcntBitMask(Version);
Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
@@ -296,6 +510,10 @@ bool isCompute(CallingConv::ID cc) {
return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
}
+bool isEntryFunctionCC(CallingConv::ID CC) {
+ return true;
+}
+
bool isSI(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
}
@@ -327,13 +545,34 @@ unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
return Reg;
}
+unsigned mc2PseudoReg(unsigned Reg) {
+ switch (Reg) {
+ case AMDGPU::FLAT_SCR_ci:
+ case AMDGPU::FLAT_SCR_vi:
+ return FLAT_SCR;
+
+ case AMDGPU::FLAT_SCR_LO_ci:
+ case AMDGPU::FLAT_SCR_LO_vi:
+ return AMDGPU::FLAT_SCR_LO;
+
+ case AMDGPU::FLAT_SCR_HI_ci:
+ case AMDGPU::FLAT_SCR_HI_vi:
+ return AMDGPU::FLAT_SCR_HI;
+
+ default:
+ return Reg;
+ }
+}
+
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
+ assert(OpNo < Desc.NumOperands);
unsigned OpType = Desc.OpInfo[OpNo].OperandType;
return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
OpType <= AMDGPU::OPERAND_SRC_LAST;
}
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
+ assert(OpNo < Desc.NumOperands);
unsigned OpType = Desc.OpInfo[OpNo].OperandType;
switch (OpType) {
case AMDGPU::OPERAND_REG_IMM_FP32:
@@ -342,6 +581,7 @@ bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
return true;
default:
return false;
@@ -349,6 +589,7 @@ bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
}
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
+ assert(OpNo < Desc.NumOperands);
unsigned OpType = Desc.OpInfo[OpNo].OperandType;
return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST;
@@ -392,6 +633,7 @@ unsigned getRegBitWidth(const MCRegisterClass &RC) {
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
unsigned OpNo) {
+ assert(OpNo < Desc.NumOperands);
unsigned RCID = Desc.OpInfo[OpNo].RegClass;
return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
}
@@ -440,7 +682,8 @@ bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
}
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
- assert(HasInv2Pi);
+ if (!HasInv2Pi)
+ return false;
if (Literal >= -16 && Literal <= 64)
return true;
@@ -457,5 +700,92 @@ bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
Val == 0x3118; // 1/2pi
}
-} // End namespace AMDGPU
-} // End namespace llvm
+bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
+ assert(HasInv2Pi);
+
+ int16_t Lo16 = static_cast<int16_t>(Literal);
+ int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
+ return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
+}
+
+bool isUniformMMO(const MachineMemOperand *MMO) {
+ const Value *Ptr = MMO->getValue();
+ // UndefValue means this is a load of a kernel input. These are uniform.
+ // Sometimes LDS instructions have constant pointers.
+ // If Ptr is null, then that means this mem operand contains a
+ // PseudoSourceValue like GOT.
+ if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
+ isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
+ return true;
+
+ const Instruction *I = dyn_cast<Instruction>(Ptr);
+ return I && I->getMetadata("amdgpu.uniform");
+}
+
+int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
+ if (isSI(ST) || isCI(ST))
+ return ByteOffset >> 2;
+
+ return ByteOffset;
+}
+
+bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
+ int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
+ return isSI(ST) || isCI(ST) ? isUInt<8>(EncodedOffset) :
+ isUInt<20>(EncodedOffset);
+}
+} // end namespace AMDGPU
+
+} // end namespace llvm
+
+const unsigned AMDGPUAS::MAX_COMMON_ADDRESS;
+const unsigned AMDGPUAS::GLOBAL_ADDRESS;
+const unsigned AMDGPUAS::LOCAL_ADDRESS;
+const unsigned AMDGPUAS::PARAM_D_ADDRESS;
+const unsigned AMDGPUAS::PARAM_I_ADDRESS;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_0;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_1;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_2;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_3;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_4;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_5;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_6;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_7;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_8;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_9;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_10;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_11;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_12;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_13;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_14;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_15;
+const unsigned AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
+
+namespace llvm {
+namespace AMDGPU {
+
+AMDGPUAS getAMDGPUAS(Triple T) {
+ auto Env = T.getEnvironmentName();
+ AMDGPUAS AS;
+ if (Env == "amdgiz" || Env == "amdgizcl") {
+ AS.FLAT_ADDRESS = 0;
+ AS.PRIVATE_ADDRESS = 5;
+ AS.REGION_ADDRESS = 4;
+ }
+ else {
+ AS.FLAT_ADDRESS = 4;
+ AS.PRIVATE_ADDRESS = 0;
+ AS.REGION_ADDRESS = 5;
+ }
+ return AS;
+}
+
+AMDGPUAS getAMDGPUAS(const TargetMachine &M) {
+ return getAMDGPUAS(M.getTargetTriple());
+}
+
+AMDGPUAS getAMDGPUAS(const Module &M) {
+ return getAMDGPUAS(Triple(M.getTargetTriple()));
+}
+} // namespace AMDGPU
+} // namespace llvm
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index ea5fc366d205..d6c836eb748b 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1,4 +1,4 @@
-//===-- AMDGPUBaseInfo.h - Top level definitions for AMDGPU -----*- C++ -*-===//
+//===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,39 +10,143 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
+#include "AMDGPU.h"
#include "AMDKernelCodeT.h"
-#include "llvm/IR/CallingConv.h"
-
#include "SIDefines.h"
-
-#define GET_INSTRINFO_OPERAND_ENUM
-#include "AMDGPUGenInstrInfo.inc"
-#undef GET_INSTRINFO_OPERAND_ENUM
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstdint>
+#include <utility>
namespace llvm {
class FeatureBitset;
class Function;
class GlobalValue;
+class MachineMemOperand;
class MCContext;
-class MCInstrDesc;
class MCRegisterClass;
class MCRegisterInfo;
class MCSection;
class MCSubtargetInfo;
+class Triple;
namespace AMDGPU {
+namespace IsaInfo {
-LLVM_READONLY
-int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
+enum {
+ // The closed Vulkan driver sets 96, which limits the wave count to 8 but
+ // doesn't spill SGPRs as much as when 80 is set.
+ FIXED_NUM_SGPRS_FOR_INIT_BUG = 96
+};
+/// \brief Instruction set architecture version.
struct IsaVersion {
unsigned Major;
unsigned Minor;
unsigned Stepping;
};
+/// \returns Isa version for given subtarget \p Features.
IsaVersion getIsaVersion(const FeatureBitset &Features);
+
+/// \returns Wavefront size for given subtarget \p Features.
+unsigned getWavefrontSize(const FeatureBitset &Features);
+
+/// \returns Local memory size in bytes for given subtarget \p Features.
+unsigned getLocalMemorySize(const FeatureBitset &Features);
+
+/// \returns Number of execution units per compute unit for given subtarget \p
+/// Features.
+unsigned getEUsPerCU(const FeatureBitset &Features);
+
+/// \returns Maximum number of work groups per compute unit for given subtarget
+/// \p Features and limited by given \p FlatWorkGroupSize.
+unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
+ unsigned FlatWorkGroupSize);
+
+/// \returns Maximum number of waves per compute unit for given subtarget \p
+/// Features without any kind of limitation.
+unsigned getMaxWavesPerCU(const FeatureBitset &Features);
+
+/// \returns Maximum number of waves per compute unit for given subtarget \p
+/// Features and limited by given \p FlatWorkGroupSize.
+unsigned getMaxWavesPerCU(const FeatureBitset &Features,
+ unsigned FlatWorkGroupSize);
+
+/// \returns Minimum number of waves per execution unit for given subtarget \p
+/// Features.
+unsigned getMinWavesPerEU(const FeatureBitset &Features);
+
+/// \returns Maximum number of waves per execution unit for given subtarget \p
+/// Features without any kind of limitation.
+unsigned getMaxWavesPerEU(const FeatureBitset &Features);
+
+/// \returns Maximum number of waves per execution unit for given subtarget \p
+/// Features and limited by given \p FlatWorkGroupSize.
+unsigned getMaxWavesPerEU(const FeatureBitset &Features,
+ unsigned FlatWorkGroupSize);
+
+/// \returns Minimum flat work group size for given subtarget \p Features.
+unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features);
+
+/// \returns Maximum flat work group size for given subtarget \p Features.
+unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features);
+
+/// \returns Number of waves per work group for given subtarget \p Features and
+/// limited by given \p FlatWorkGroupSize.
+unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
+ unsigned FlatWorkGroupSize);
+
+/// \returns SGPR allocation granularity for given subtarget \p Features.
+unsigned getSGPRAllocGranule(const FeatureBitset &Features);
+
+/// \returns SGPR encoding granularity for given subtarget \p Features.
+unsigned getSGPREncodingGranule(const FeatureBitset &Features);
+
+/// \returns Total number of SGPRs for given subtarget \p Features.
+unsigned getTotalNumSGPRs(const FeatureBitset &Features);
+
+/// \returns Addressable number of SGPRs for given subtarget \p Features.
+unsigned getAddressableNumSGPRs(const FeatureBitset &Features);
+
+/// \returns Minimum number of SGPRs that meets the given number of waves per
+/// execution unit requirement for given subtarget \p Features.
+unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
+
+/// \returns Maximum number of SGPRs that meets the given number of waves per
+/// execution unit requirement for given subtarget \p Features.
+unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
+ bool Addressable);
+
+/// \returns VGPR allocation granularity for given subtarget \p Features.
+unsigned getVGPRAllocGranule(const FeatureBitset &Features);
+
+/// \returns VGPR encoding granularity for given subtarget \p Features.
+unsigned getVGPREncodingGranule(const FeatureBitset &Features);
+
+/// \returns Total number of VGPRs for given subtarget \p Features.
+unsigned getTotalNumVGPRs(const FeatureBitset &Features);
+
+/// \returns Addressable number of VGPRs for given subtarget \p Features.
+unsigned getAddressableNumVGPRs(const FeatureBitset &Features);
+
+/// \returns Minimum number of VGPRs that meets given number of waves per
+/// execution unit requirement for given subtarget \p Features.
+unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
+
+/// \returns Maximum number of VGPRs that meets given number of waves per
+/// execution unit requirement for given subtarget \p Features.
+unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
+
+} // end namespace IsaInfo
+
+LLVM_READONLY
+int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
+
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
const FeatureBitset &Features);
MCSection *getHSATextSection(MCContext &Ctx);
@@ -53,9 +157,9 @@ MCSection *getHSADataGlobalProgramSection(MCContext &Ctx);
MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx);
-bool isGroupSegment(const GlobalValue *GV);
-bool isGlobalSegment(const GlobalValue *GV);
-bool isReadOnlySegment(const GlobalValue *GV);
+bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS);
+bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS);
+bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS);
/// \returns True if constants should be emitted to .text section for given
/// target triple \p TT, false otherwise.
@@ -83,64 +187,89 @@ std::pair<int, int> getIntegerPairAttribute(const Function &F,
std::pair<int, int> Default,
bool OnlyFirstRequired = false);
-/// \returns Waitcnt bit mask for given isa \p Version.
-unsigned getWaitcntBitMask(IsaVersion Version);
-
/// \returns Vmcnt bit mask for given isa \p Version.
-unsigned getVmcntBitMask(IsaVersion Version);
+unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version);
/// \returns Expcnt bit mask for given isa \p Version.
-unsigned getExpcntBitMask(IsaVersion Version);
+unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version);
/// \returns Lgkmcnt bit mask for given isa \p Version.
-unsigned getLgkmcntBitMask(IsaVersion Version);
+unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version);
+
+/// \returns Waitcnt bit mask for given isa \p Version.
+unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version);
/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
-unsigned decodeVmcnt(IsaVersion Version, unsigned Waitcnt);
+unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
-unsigned decodeExpcnt(IsaVersion Version, unsigned Waitcnt);
+unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
-unsigned decodeLgkmcnt(IsaVersion Version, unsigned Waitcnt);
+unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
/// \brief Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
/// \p Lgkmcnt respectively.
///
/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
-/// \p Vmcnt = \p Waitcnt[3:0]
+/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only)
+/// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only)
/// \p Expcnt = \p Waitcnt[6:4]
/// \p Lgkmcnt = \p Waitcnt[11:8]
-void decodeWaitcnt(IsaVersion Version, unsigned Waitcnt,
+void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
-unsigned encodeVmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Vmcnt);
+unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+ unsigned Vmcnt);
/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
-unsigned encodeExpcnt(IsaVersion Version, unsigned Waitcnt, unsigned Expcnt);
+unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+ unsigned Expcnt);
/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
-unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt);
+unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+ unsigned Lgkmcnt);
/// \brief Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
/// \p Version.
///
/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
-/// Waitcnt[3:0] = \p Vmcnt
-/// Waitcnt[6:4] = \p Expcnt
-/// Waitcnt[11:8] = \p Lgkmcnt
+/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only)
+/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only)
+/// Waitcnt[6:4] = \p Expcnt
+/// Waitcnt[11:8] = \p Lgkmcnt
+/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only)
///
/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
/// isa \p Version.
-unsigned encodeWaitcnt(IsaVersion Version,
+unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
unsigned getInitialPSInputAddr(const Function &F);
-bool isShader(CallingConv::ID cc);
-bool isCompute(CallingConv::ID cc);
+LLVM_READNONE
+bool isShader(CallingConv::ID CC);
+
+LLVM_READNONE
+bool isCompute(CallingConv::ID CC);
+
+LLVM_READNONE
+bool isEntryFunctionCC(CallingConv::ID CC);
+
+// FIXME: Remove this when calling conventions cleaned up
+LLVM_READNONE
+inline bool isKernel(CallingConv::ID CC) {
+ switch (CC) {
+ case CallingConv::C:
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::SPIR_KERNEL:
+ return true;
+ default:
+ return false;
+ }
+}
bool isSI(const MCSubtargetInfo &STI);
bool isCI(const MCSubtargetInfo &STI);
@@ -150,6 +279,10 @@ bool isVI(const MCSubtargetInfo &STI);
/// \p STI otherwise return \p Reg.
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
+/// \brief Convert hardware register \p Reg to a pseudo register
+LLVM_READNONE
+unsigned mc2PseudoReg(unsigned Reg);
+
/// \brief Can this operand also contain immediate values?
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
@@ -188,6 +321,8 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
return 2;
default:
@@ -210,7 +345,21 @@ bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
LLVM_READNONE
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
+LLVM_READNONE
+bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
+
+bool isUniformMMO(const MachineMemOperand *MMO);
+
+/// \returns The encoding that will be used for \p ByteOffset in the SMRD
+/// offset field.
+int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
+
+/// \returns true if this offset is small enough to fit in the SMRD
+/// offset field. \p ByteOffset should be the offset in bytes and
+/// not the encoded offset.
+bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
+
} // end namespace AMDGPU
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
diff --git a/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h b/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
index c55eaab077d1..991408c81c92 100644
--- a/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
+++ b/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
@@ -87,7 +87,7 @@ COMPPGM1(enable_ieee_mode, compute_pgm_rsrc1_ieee_mode, IEEE
// TODO: cdbg_user
COMPPGM2(enable_sgpr_private_segment_wave_byte_offset, compute_pgm_rsrc2_scratch_en, SCRATCH_EN),
COMPPGM2(user_sgpr_count, compute_pgm_rsrc2_user_sgpr, USER_SGPR),
-// TODO: enable_trap_handler
+COMPPGM2(enable_trap_handler, compute_pgm_rsrc2_trap_handler, TRAP_HANDLER),
COMPPGM2(enable_sgpr_workgroup_id_x, compute_pgm_rsrc2_tgid_x_en, TGID_X_EN),
COMPPGM2(enable_sgpr_workgroup_id_y, compute_pgm_rsrc2_tgid_y_en, TGID_Y_EN),
COMPPGM2(enable_sgpr_workgroup_id_z, compute_pgm_rsrc2_tgid_z_en, TGID_Z_EN),