aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp')
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp352
1 files changed, 194 insertions, 158 deletions
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 4eba19382315..54c866bdc63c 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -128,6 +128,49 @@ int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
return NewInfo ? NewInfo->Opcode : -1;
}
+struct MUBUFInfo {
+ uint16_t Opcode;
+ uint16_t BaseOpcode;
+ uint8_t dwords;
+ bool has_vaddr;
+ bool has_srsrc;
+ bool has_soffset;
+};
+
+#define GET_MUBUFInfoTable_DECL
+#define GET_MUBUFInfoTable_IMPL
+#include "AMDGPUGenSearchableTables.inc"
+
+int getMUBUFBaseOpcode(unsigned Opc) {
+ const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
+ return Info ? Info->BaseOpcode : -1;
+}
+
+int getMUBUFOpcode(unsigned BaseOpc, unsigned Dwords) {
+ const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndDwords(BaseOpc, Dwords);
+ return Info ? Info->Opcode : -1;
+}
+
+int getMUBUFDwords(unsigned Opc) {
+ const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
+ return Info ? Info->dwords : 0;
+}
+
+bool getMUBUFHasVAddr(unsigned Opc) {
+ const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
+ return Info ? Info->has_vaddr : false;
+}
+
+bool getMUBUFHasSrsrc(unsigned Opc) {
+ const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
+ return Info ? Info->has_srsrc : false;
+}
+
+bool getMUBUFHasSoffset(unsigned Opc) {
+ const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
+ return Info ? Info->has_soffset : false;
+}
+
// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
// header files, so we need to wrap it in a function that takes unsigned
// instead.
@@ -137,122 +180,75 @@ int getMCOpcode(uint16_t Opcode, unsigned Gen) {
namespace IsaInfo {
-IsaVersion getIsaVersion(const FeatureBitset &Features) {
- // GCN GFX6 (Southern Islands (SI)).
- if (Features.test(FeatureISAVersion6_0_0))
- return {6, 0, 0};
- if (Features.test(FeatureISAVersion6_0_1))
- return {6, 0, 1};
-
- // GCN GFX7 (Sea Islands (CI)).
- if (Features.test(FeatureISAVersion7_0_0))
- return {7, 0, 0};
- if (Features.test(FeatureISAVersion7_0_1))
- return {7, 0, 1};
- if (Features.test(FeatureISAVersion7_0_2))
- return {7, 0, 2};
- if (Features.test(FeatureISAVersion7_0_3))
- return {7, 0, 3};
- if (Features.test(FeatureISAVersion7_0_4))
- return {7, 0, 4};
- if (Features.test(FeatureSeaIslands))
- return {7, 0, 0};
-
- // GCN GFX8 (Volcanic Islands (VI)).
- if (Features.test(FeatureISAVersion8_0_1))
- return {8, 0, 1};
- if (Features.test(FeatureISAVersion8_0_2))
- return {8, 0, 2};
- if (Features.test(FeatureISAVersion8_0_3))
- return {8, 0, 3};
- if (Features.test(FeatureISAVersion8_1_0))
- return {8, 1, 0};
- if (Features.test(FeatureVolcanicIslands))
- return {8, 0, 0};
-
- // GCN GFX9.
- if (Features.test(FeatureISAVersion9_0_0))
- return {9, 0, 0};
- if (Features.test(FeatureISAVersion9_0_2))
- return {9, 0, 2};
- if (Features.test(FeatureISAVersion9_0_4))
- return {9, 0, 4};
- if (Features.test(FeatureISAVersion9_0_6))
- return {9, 0, 6};
- if (Features.test(FeatureGFX9))
- return {9, 0, 0};
-
- if (Features.test(FeatureSouthernIslands))
- return {0, 0, 0};
- return {7, 0, 0};
-}
-
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
auto TargetTriple = STI->getTargetTriple();
- auto ISAVersion = IsaInfo::getIsaVersion(STI->getFeatureBits());
+ auto Version = getIsaVersion(STI->getCPU());
Stream << TargetTriple.getArchName() << '-'
<< TargetTriple.getVendorName() << '-'
<< TargetTriple.getOSName() << '-'
<< TargetTriple.getEnvironmentName() << '-'
<< "gfx"
- << ISAVersion.Major
- << ISAVersion.Minor
- << ISAVersion.Stepping;
+ << Version.Major
+ << Version.Minor
+ << Version.Stepping;
if (hasXNACK(*STI))
Stream << "+xnack";
+ if (hasSRAMECC(*STI))
+ Stream << "+sram-ecc";
Stream.flush();
}
bool hasCodeObjectV3(const MCSubtargetInfo *STI) {
- return STI->getFeatureBits().test(FeatureCodeObjectV3);
+ return STI->getTargetTriple().getOS() == Triple::AMDHSA &&
+ STI->getFeatureBits().test(FeatureCodeObjectV3);
}
-unsigned getWavefrontSize(const FeatureBitset &Features) {
- if (Features.test(FeatureWavefrontSize16))
+unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
+ if (STI->getFeatureBits().test(FeatureWavefrontSize16))
return 16;
- if (Features.test(FeatureWavefrontSize32))
+ if (STI->getFeatureBits().test(FeatureWavefrontSize32))
return 32;
return 64;
}
-unsigned getLocalMemorySize(const FeatureBitset &Features) {
- if (Features.test(FeatureLocalMemorySize32768))
+unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
+ if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
return 32768;
- if (Features.test(FeatureLocalMemorySize65536))
+ if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
return 65536;
return 0;
}
-unsigned getEUsPerCU(const FeatureBitset &Features) {
+unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
return 4;
}
-unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
+unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
unsigned FlatWorkGroupSize) {
- if (!Features.test(FeatureGCN))
+ if (!STI->getFeatureBits().test(FeatureGCN))
return 8;
- unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
+ unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
if (N == 1)
return 40;
N = 40 / N;
return std::min(N, 16u);
}
-unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
- return getMaxWavesPerEU() * getEUsPerCU(Features);
+unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) {
+ return getMaxWavesPerEU() * getEUsPerCU(STI);
}
-unsigned getMaxWavesPerCU(const FeatureBitset &Features,
+unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
unsigned FlatWorkGroupSize) {
- return getWavesPerWorkGroup(Features, FlatWorkGroupSize);
+ return getWavesPerWorkGroup(STI, FlatWorkGroupSize);
}
-unsigned getMinWavesPerEU(const FeatureBitset &Features) {
+unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
return 1;
}
@@ -261,89 +257,89 @@ unsigned getMaxWavesPerEU() {
return 10;
}
-unsigned getMaxWavesPerEU(const FeatureBitset &Features,
+unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
unsigned FlatWorkGroupSize) {
- return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize),
- getEUsPerCU(Features)) / getEUsPerCU(Features);
+ return alignTo(getMaxWavesPerCU(STI, FlatWorkGroupSize),
+ getEUsPerCU(STI)) / getEUsPerCU(STI);
}
-unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) {
+unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) {
return 1;
}
-unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) {
+unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
return 2048;
}
-unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
+unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
unsigned FlatWorkGroupSize) {
- return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) /
- getWavefrontSize(Features);
+ return alignTo(FlatWorkGroupSize, getWavefrontSize(STI)) /
+ getWavefrontSize(STI);
}
-unsigned getSGPRAllocGranule(const FeatureBitset &Features) {
- IsaVersion Version = getIsaVersion(Features);
+unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
+ IsaVersion Version = getIsaVersion(STI->getCPU());
if (Version.Major >= 8)
return 16;
return 8;
}
-unsigned getSGPREncodingGranule(const FeatureBitset &Features) {
+unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) {
return 8;
}
-unsigned getTotalNumSGPRs(const FeatureBitset &Features) {
- IsaVersion Version = getIsaVersion(Features);
+unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
+ IsaVersion Version = getIsaVersion(STI->getCPU());
if (Version.Major >= 8)
return 800;
return 512;
}
-unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
- if (Features.test(FeatureSGPRInitBug))
+unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
+ if (STI->getFeatureBits().test(FeatureSGPRInitBug))
return FIXED_NUM_SGPRS_FOR_INIT_BUG;
- IsaVersion Version = getIsaVersion(Features);
+ IsaVersion Version = getIsaVersion(STI->getCPU());
if (Version.Major >= 8)
return 102;
return 104;
}
-unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
+unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
assert(WavesPerEU != 0);
if (WavesPerEU >= getMaxWavesPerEU())
return 0;
- unsigned MinNumSGPRs = getTotalNumSGPRs(Features) / (WavesPerEU + 1);
- if (Features.test(FeatureTrapHandler))
+ unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
+ if (STI->getFeatureBits().test(FeatureTrapHandler))
MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
- MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(Features)) + 1;
- return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
+ MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
+ return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
}
-unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
+unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
bool Addressable) {
assert(WavesPerEU != 0);
- IsaVersion Version = getIsaVersion(Features);
- unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
+ IsaVersion Version = getIsaVersion(STI->getCPU());
+ unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
if (Version.Major >= 8 && !Addressable)
AddressableNumSGPRs = 112;
- unsigned MaxNumSGPRs = getTotalNumSGPRs(Features) / WavesPerEU;
- if (Features.test(FeatureTrapHandler))
+ unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
+ if (STI->getFeatureBits().test(FeatureTrapHandler))
MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
- MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(Features));
+ MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
return std::min(MaxNumSGPRs, AddressableNumSGPRs);
}
-unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
+unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
bool FlatScrUsed, bool XNACKUsed) {
unsigned ExtraSGPRs = 0;
if (VCCUsed)
ExtraSGPRs = 2;
- IsaVersion Version = getIsaVersion(Features);
+ IsaVersion Version = getIsaVersion(STI->getCPU());
if (Version.Major < 8) {
if (FlatScrUsed)
ExtraSGPRs = 4;
@@ -358,74 +354,74 @@ unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
return ExtraSGPRs;
}
-unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
+unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
bool FlatScrUsed) {
- return getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed,
- Features[AMDGPU::FeatureXNACK]);
+ return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
+ STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
}
-unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs) {
- NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(Features));
+unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
+ NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI));
// SGPRBlocks is actual number of SGPR blocks minus 1.
- return NumSGPRs / getSGPREncodingGranule(Features) - 1;
+ return NumSGPRs / getSGPREncodingGranule(STI) - 1;
}
-unsigned getVGPRAllocGranule(const FeatureBitset &Features) {
+unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI) {
return 4;
}
-unsigned getVGPREncodingGranule(const FeatureBitset &Features) {
- return getVGPRAllocGranule(Features);
+unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI) {
+ return getVGPRAllocGranule(STI);
}
-unsigned getTotalNumVGPRs(const FeatureBitset &Features) {
+unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
return 256;
}
-unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
- return getTotalNumVGPRs(Features);
+unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
+ return getTotalNumVGPRs(STI);
}
-unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
+unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
assert(WavesPerEU != 0);
if (WavesPerEU >= getMaxWavesPerEU())
return 0;
unsigned MinNumVGPRs =
- alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
- getVGPRAllocGranule(Features)) + 1;
- return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features));
+ alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1),
+ getVGPRAllocGranule(STI)) + 1;
+ return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI));
}
-unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
+unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
assert(WavesPerEU != 0);
- unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU,
- getVGPRAllocGranule(Features));
- unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features);
+ unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
+ getVGPRAllocGranule(STI));
+ unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
return std::min(MaxNumVGPRs, AddressableNumVGPRs);
}
-unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumVGPRs) {
- NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(Features));
+unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs) {
+ NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(STI));
// VGPRBlocks is actual number of VGPR blocks minus 1.
- return NumVGPRs / getVGPREncodingGranule(Features) - 1;
+ return NumVGPRs / getVGPREncodingGranule(STI) - 1;
}
} // end namespace IsaInfo
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
- const FeatureBitset &Features) {
- IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features);
+ const MCSubtargetInfo *STI) {
+ IsaVersion Version = getIsaVersion(STI->getCPU());
memset(&Header, 0, sizeof(Header));
Header.amd_kernel_code_version_major = 1;
Header.amd_kernel_code_version_minor = 2;
Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
- Header.amd_machine_version_major = ISA.Major;
- Header.amd_machine_version_minor = ISA.Minor;
- Header.amd_machine_version_stepping = ISA.Stepping;
+ Header.amd_machine_version_major = Version.Major;
+ Header.amd_machine_version_minor = Version.Minor;
+ Header.amd_machine_version_stepping = Version.Stepping;
Header.kernel_code_entry_byte_offset = sizeof(Header);
// wavefront_size is specified as a power of 2: 2^6 = 64 threads.
Header.wavefront_size = 6;
@@ -513,7 +509,7 @@ std::pair<int, int> getIntegerPairAttribute(const Function &F,
return Ints;
}
-unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
+unsigned getVmcntBitMask(const IsaVersion &Version) {
unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
if (Version.Major < 9)
return VmcntLo;
@@ -522,15 +518,15 @@ unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
return VmcntLo | VmcntHi;
}
-unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) {
+unsigned getExpcntBitMask(const IsaVersion &Version) {
return (1 << getExpcntBitWidth()) - 1;
}
-unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) {
+unsigned getLgkmcntBitMask(const IsaVersion &Version) {
return (1 << getLgkmcntBitWidth()) - 1;
}
-unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
+unsigned getWaitcntBitMask(const IsaVersion &Version) {
unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
@@ -542,7 +538,7 @@ unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
return Waitcnt | VmcntHi;
}
-unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
+unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
unsigned VmcntLo =
unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
if (Version.Major < 9)
@@ -554,22 +550,30 @@ unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
return VmcntLo | VmcntHi;
}
-unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
+unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
}
-unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
+unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
}
-void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
Vmcnt = decodeVmcnt(Version, Waitcnt);
Expcnt = decodeExpcnt(Version, Waitcnt);
Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
}
-unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
+ Waitcnt Decoded;
+ Decoded.VmCnt = decodeVmcnt(Version, Encoded);
+ Decoded.ExpCnt = decodeExpcnt(Version, Encoded);
+ Decoded.LgkmCnt = decodeLgkmcnt(Version, Encoded);
+ return Decoded;
+}
+
+unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned Vmcnt) {
Waitcnt =
packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
@@ -580,17 +584,17 @@ unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
}
-unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned Expcnt) {
return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
}
-unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned Lgkmcnt) {
return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
}
-unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
+unsigned encodeWaitcnt(const IsaVersion &Version,
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
unsigned Waitcnt = getWaitcntBitMask(Version);
Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
@@ -599,6 +603,10 @@ unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
return Waitcnt;
}
+unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
+ return encodeWaitcnt(Version, Decoded.VmCnt, Decoded.ExpCnt, Decoded.LgkmCnt);
+}
+
unsigned getInitialPSInputAddr(const Function &F) {
return getIntegerAttribute(F, "InitialPSInputAddr", 0);
}
@@ -643,6 +651,10 @@ bool hasXNACK(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
}
+bool hasSRAMECC(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureSRAMECC];
+}
+
bool hasMIMG_R128(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128];
}
@@ -798,6 +810,7 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::VS_64RegClassID:
case AMDGPU::SReg_64RegClassID:
case AMDGPU::VReg_64RegClassID:
+ case AMDGPU::SReg_64_XEXECRegClassID:
return 64;
case AMDGPU::VReg_96RegClassID:
return 96;
@@ -935,27 +948,50 @@ bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset);
}
-} // end namespace AMDGPU
-
-} // end namespace llvm
-
-namespace llvm {
-namespace AMDGPU {
-
-AMDGPUAS getAMDGPUAS(Triple T) {
- AMDGPUAS AS;
- AS.FLAT_ADDRESS = 0;
- AS.PRIVATE_ADDRESS = 5;
- AS.REGION_ADDRESS = 2;
- return AS;
-}
+// Given Imm, split it into the values to put into the SOffset and ImmOffset
+// fields in an MUBUF instruction. Return false if it is not possible (due to a
+// hardware bug needing a workaround).
+//
+// The required alignment ensures that individual address components remain
+// aligned if they are aligned to begin with. It also ensures that additional
+// offsets within the given alignment can be added to the resulting ImmOffset.
+bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
+ const GCNSubtarget *Subtarget, uint32_t Align) {
+ const uint32_t MaxImm = alignDown(4095, Align);
+ uint32_t Overflow = 0;
+
+ if (Imm > MaxImm) {
+ if (Imm <= MaxImm + 64) {
+ // Use an SOffset inline constant for 4..64
+ Overflow = Imm - MaxImm;
+ Imm = MaxImm;
+ } else {
+ // Try to keep the same value in SOffset for adjacent loads, so that
+ // the corresponding register contents can be re-used.
+ //
+ // Load values with all low-bits (except for alignment bits) set into
+ // SOffset, so that a larger range of values can be covered using
+ // s_movk_i32.
+ //
+ // Atomic operations fail to work correctly when individual address
+ // components are unaligned, even if their sum is aligned.
+ uint32_t High = (Imm + Align) & ~4095;
+ uint32_t Low = (Imm + Align) & 4095;
+ Imm = Low;
+ Overflow = High - Align;
+ }
+ }
-AMDGPUAS getAMDGPUAS(const TargetMachine &M) {
- return getAMDGPUAS(M.getTargetTriple());
-}
+ // There is a hardware bug in SI and CI which prevents address clamping in
+ // MUBUF instructions from working correctly with SOffsets. The immediate
+ // offset is unaffected.
+ if (Overflow > 0 &&
+ Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
+ return false;
-AMDGPUAS getAMDGPUAS(const Module &M) {
- return getAMDGPUAS(Triple(M.getTargetTriple()));
+ ImmOffset = Imm;
+ SOffset = Overflow;
+ return true;
}
namespace {