diff options
Diffstat (limited to 'lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp')
| -rw-r--r-- | lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 352 |
1 files changed, 194 insertions, 158 deletions
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 4eba19382315..54c866bdc63c 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -128,6 +128,49 @@ int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) { return NewInfo ? NewInfo->Opcode : -1; } +struct MUBUFInfo { + uint16_t Opcode; + uint16_t BaseOpcode; + uint8_t dwords; + bool has_vaddr; + bool has_srsrc; + bool has_soffset; +}; + +#define GET_MUBUFInfoTable_DECL +#define GET_MUBUFInfoTable_IMPL +#include "AMDGPUGenSearchableTables.inc" + +int getMUBUFBaseOpcode(unsigned Opc) { + const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc); + return Info ? Info->BaseOpcode : -1; +} + +int getMUBUFOpcode(unsigned BaseOpc, unsigned Dwords) { + const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndDwords(BaseOpc, Dwords); + return Info ? Info->Opcode : -1; +} + +int getMUBUFDwords(unsigned Opc) { + const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); + return Info ? Info->dwords : 0; +} + +bool getMUBUFHasVAddr(unsigned Opc) { + const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); + return Info ? Info->has_vaddr : false; +} + +bool getMUBUFHasSrsrc(unsigned Opc) { + const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); + return Info ? Info->has_srsrc : false; +} + +bool getMUBUFHasSoffset(unsigned Opc) { + const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); + return Info ? Info->has_soffset : false; +} + // Wrapper for Tablegen'd function. enum Subtarget is not defined in any // header files, so we need to wrap it in a function that takes unsigned // instead. @@ -137,122 +180,75 @@ int getMCOpcode(uint16_t Opcode, unsigned Gen) { namespace IsaInfo { -IsaVersion getIsaVersion(const FeatureBitset &Features) { - // GCN GFX6 (Southern Islands (SI)). - if (Features.test(FeatureISAVersion6_0_0)) - return {6, 0, 0}; - if (Features.test(FeatureISAVersion6_0_1)) - return {6, 0, 1}; - - // GCN GFX7 (Sea Islands (CI)). - if (Features.test(FeatureISAVersion7_0_0)) - return {7, 0, 0}; - if (Features.test(FeatureISAVersion7_0_1)) - return {7, 0, 1}; - if (Features.test(FeatureISAVersion7_0_2)) - return {7, 0, 2}; - if (Features.test(FeatureISAVersion7_0_3)) - return {7, 0, 3}; - if (Features.test(FeatureISAVersion7_0_4)) - return {7, 0, 4}; - if (Features.test(FeatureSeaIslands)) - return {7, 0, 0}; - - // GCN GFX8 (Volcanic Islands (VI)). - if (Features.test(FeatureISAVersion8_0_1)) - return {8, 0, 1}; - if (Features.test(FeatureISAVersion8_0_2)) - return {8, 0, 2}; - if (Features.test(FeatureISAVersion8_0_3)) - return {8, 0, 3}; - if (Features.test(FeatureISAVersion8_1_0)) - return {8, 1, 0}; - if (Features.test(FeatureVolcanicIslands)) - return {8, 0, 0}; - - // GCN GFX9. - if (Features.test(FeatureISAVersion9_0_0)) - return {9, 0, 0}; - if (Features.test(FeatureISAVersion9_0_2)) - return {9, 0, 2}; - if (Features.test(FeatureISAVersion9_0_4)) - return {9, 0, 4}; - if (Features.test(FeatureISAVersion9_0_6)) - return {9, 0, 6}; - if (Features.test(FeatureGFX9)) - return {9, 0, 0}; - - if (Features.test(FeatureSouthernIslands)) - return {0, 0, 0}; - return {7, 0, 0}; -} - void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) { auto TargetTriple = STI->getTargetTriple(); - auto ISAVersion = IsaInfo::getIsaVersion(STI->getFeatureBits()); + auto Version = getIsaVersion(STI->getCPU()); Stream << TargetTriple.getArchName() << '-' << TargetTriple.getVendorName() << '-' << TargetTriple.getOSName() << '-' << TargetTriple.getEnvironmentName() << '-' << "gfx" - << ISAVersion.Major - << ISAVersion.Minor - << ISAVersion.Stepping; + << Version.Major + << Version.Minor + << Version.Stepping; if (hasXNACK(*STI)) Stream << "+xnack"; + if (hasSRAMECC(*STI)) + Stream << "+sram-ecc"; Stream.flush(); } bool hasCodeObjectV3(const MCSubtargetInfo *STI) { - return STI->getFeatureBits().test(FeatureCodeObjectV3); + return STI->getTargetTriple().getOS() == Triple::AMDHSA && + STI->getFeatureBits().test(FeatureCodeObjectV3); } -unsigned getWavefrontSize(const FeatureBitset &Features) { - if (Features.test(FeatureWavefrontSize16)) +unsigned getWavefrontSize(const MCSubtargetInfo *STI) { + if (STI->getFeatureBits().test(FeatureWavefrontSize16)) return 16; - if (Features.test(FeatureWavefrontSize32)) + if (STI->getFeatureBits().test(FeatureWavefrontSize32)) return 32; return 64; } -unsigned getLocalMemorySize(const FeatureBitset &Features) { - if (Features.test(FeatureLocalMemorySize32768)) +unsigned getLocalMemorySize(const MCSubtargetInfo *STI) { + if (STI->getFeatureBits().test(FeatureLocalMemorySize32768)) return 32768; - if (Features.test(FeatureLocalMemorySize65536)) + if (STI->getFeatureBits().test(FeatureLocalMemorySize65536)) return 65536; return 0; } -unsigned getEUsPerCU(const FeatureBitset &Features) { +unsigned getEUsPerCU(const MCSubtargetInfo *STI) { return 4; } -unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features, +unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize) { - if (!Features.test(FeatureGCN)) + if (!STI->getFeatureBits().test(FeatureGCN)) return 8; - unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize); + unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize); if (N == 1) return 40; N = 40 / N; return std::min(N, 16u); } -unsigned getMaxWavesPerCU(const FeatureBitset &Features) { - return getMaxWavesPerEU() * getEUsPerCU(Features); +unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) { + return getMaxWavesPerEU() * getEUsPerCU(STI); } -unsigned getMaxWavesPerCU(const FeatureBitset &Features, +unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize) { - return getWavesPerWorkGroup(Features, FlatWorkGroupSize); + return getWavesPerWorkGroup(STI, FlatWorkGroupSize); } -unsigned getMinWavesPerEU(const FeatureBitset &Features) { +unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) { return 1; } @@ -261,89 +257,89 @@ unsigned getMaxWavesPerEU() { return 10; } -unsigned getMaxWavesPerEU(const FeatureBitset &Features, +unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize) { - return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize), - getEUsPerCU(Features)) / getEUsPerCU(Features); + return alignTo(getMaxWavesPerCU(STI, FlatWorkGroupSize), + getEUsPerCU(STI)) / getEUsPerCU(STI); } -unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) { +unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) { return 1; } -unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) { +unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) { return 2048; } -unsigned getWavesPerWorkGroup(const FeatureBitset &Features, +unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize) { - return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) / - getWavefrontSize(Features); + return alignTo(FlatWorkGroupSize, getWavefrontSize(STI)) / + getWavefrontSize(STI); } -unsigned getSGPRAllocGranule(const FeatureBitset &Features) { - IsaVersion Version = getIsaVersion(Features); +unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) { + IsaVersion Version = getIsaVersion(STI->getCPU()); if (Version.Major >= 8) return 16; return 8; } -unsigned getSGPREncodingGranule(const FeatureBitset &Features) { +unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) { return 8; } -unsigned getTotalNumSGPRs(const FeatureBitset &Features) { - IsaVersion Version = getIsaVersion(Features); +unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) { + IsaVersion Version = getIsaVersion(STI->getCPU()); if (Version.Major >= 8) return 800; return 512; } -unsigned getAddressableNumSGPRs(const FeatureBitset &Features) { - if (Features.test(FeatureSGPRInitBug)) +unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) { + if (STI->getFeatureBits().test(FeatureSGPRInitBug)) return FIXED_NUM_SGPRS_FOR_INIT_BUG; - IsaVersion Version = getIsaVersion(Features); + IsaVersion Version = getIsaVersion(STI->getCPU()); if (Version.Major >= 8) return 102; return 104; } -unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { +unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { assert(WavesPerEU != 0); if (WavesPerEU >= getMaxWavesPerEU()) return 0; - unsigned MinNumSGPRs = getTotalNumSGPRs(Features) / (WavesPerEU + 1); - if (Features.test(FeatureTrapHandler)) + unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1); + if (STI->getFeatureBits().test(FeatureTrapHandler)) MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS); - MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(Features)) + 1; - return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features)); + MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1; + return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI)); } -unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU, +unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable) { assert(WavesPerEU != 0); - IsaVersion Version = getIsaVersion(Features); - unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features); + IsaVersion Version = getIsaVersion(STI->getCPU()); + unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI); if (Version.Major >= 8 && !Addressable) AddressableNumSGPRs = 112; - unsigned MaxNumSGPRs = getTotalNumSGPRs(Features) / WavesPerEU; - if (Features.test(FeatureTrapHandler)) + unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU; + if (STI->getFeatureBits().test(FeatureTrapHandler)) MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS); - MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(Features)); + MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI)); return std::min(MaxNumSGPRs, AddressableNumSGPRs); } -unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed, +unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed) { unsigned ExtraSGPRs = 0; if (VCCUsed) ExtraSGPRs = 2; - IsaVersion Version = getIsaVersion(Features); + IsaVersion Version = getIsaVersion(STI->getCPU()); if (Version.Major < 8) { if (FlatScrUsed) ExtraSGPRs = 4; @@ -358,74 +354,74 @@ unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed, return ExtraSGPRs; } -unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed, +unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed) { - return getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed, - Features[AMDGPU::FeatureXNACK]); + return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed, + STI->getFeatureBits().test(AMDGPU::FeatureXNACK)); } -unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs) { - NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(Features)); +unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) { + NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI)); // SGPRBlocks is actual number of SGPR blocks minus 1. - return NumSGPRs / getSGPREncodingGranule(Features) - 1; + return NumSGPRs / getSGPREncodingGranule(STI) - 1; } -unsigned getVGPRAllocGranule(const FeatureBitset &Features) { +unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI) { return 4; } -unsigned getVGPREncodingGranule(const FeatureBitset &Features) { - return getVGPRAllocGranule(Features); +unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI) { + return getVGPRAllocGranule(STI); } -unsigned getTotalNumVGPRs(const FeatureBitset &Features) { +unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) { return 256; } -unsigned getAddressableNumVGPRs(const FeatureBitset &Features) { - return getTotalNumVGPRs(Features); +unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) { + return getTotalNumVGPRs(STI); } -unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { +unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { assert(WavesPerEU != 0); if (WavesPerEU >= getMaxWavesPerEU()) return 0; unsigned MinNumVGPRs = - alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1), - getVGPRAllocGranule(Features)) + 1; - return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features)); + alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1), + getVGPRAllocGranule(STI)) + 1; + return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI)); } -unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { +unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { assert(WavesPerEU != 0); - unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU, - getVGPRAllocGranule(Features)); - unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features); + unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU, + getVGPRAllocGranule(STI)); + unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI); return std::min(MaxNumVGPRs, AddressableNumVGPRs); } -unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumVGPRs) { - NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(Features)); +unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs) { + NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(STI)); // VGPRBlocks is actual number of VGPR blocks minus 1. - return NumVGPRs / getVGPREncodingGranule(Features) - 1; + return NumVGPRs / getVGPREncodingGranule(STI) - 1; } } // end namespace IsaInfo void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, - const FeatureBitset &Features) { - IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features); + const MCSubtargetInfo *STI) { + IsaVersion Version = getIsaVersion(STI->getCPU()); memset(&Header, 0, sizeof(Header)); Header.amd_kernel_code_version_major = 1; Header.amd_kernel_code_version_minor = 2; Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU - Header.amd_machine_version_major = ISA.Major; - Header.amd_machine_version_minor = ISA.Minor; - Header.amd_machine_version_stepping = ISA.Stepping; + Header.amd_machine_version_major = Version.Major; + Header.amd_machine_version_minor = Version.Minor; + Header.amd_machine_version_stepping = Version.Stepping; Header.kernel_code_entry_byte_offset = sizeof(Header); // wavefront_size is specified as a power of 2: 2^6 = 64 threads. Header.wavefront_size = 6; @@ -513,7 +509,7 @@ std::pair<int, int> getIntegerPairAttribute(const Function &F, return Ints; } -unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) { +unsigned getVmcntBitMask(const IsaVersion &Version) { unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1; if (Version.Major < 9) return VmcntLo; @@ -522,15 +518,15 @@ unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) { return VmcntLo | VmcntHi; } -unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) { +unsigned getExpcntBitMask(const IsaVersion &Version) { return (1 << getExpcntBitWidth()) - 1; } -unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) { +unsigned getLgkmcntBitMask(const IsaVersion &Version) { return (1 << getLgkmcntBitWidth()) - 1; } -unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) { +unsigned getWaitcntBitMask(const IsaVersion &Version) { unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo()); unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth()); unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth()); @@ -542,7 +538,7 @@ unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) { return Waitcnt | VmcntHi; } -unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { +unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) { unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); if (Version.Major < 9) @@ -554,22 +550,30 @@ unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { return VmcntLo | VmcntHi; } -unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { +unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) { return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); } -unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { +unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) { return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); } -void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, +void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { Vmcnt = decodeVmcnt(Version, Waitcnt); Expcnt = decodeExpcnt(Version, Waitcnt); Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); } -unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, +Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) { + Waitcnt Decoded; + Decoded.VmCnt = decodeVmcnt(Version, Encoded); + Decoded.ExpCnt = decodeExpcnt(Version, Encoded); + Decoded.LgkmCnt = decodeLgkmcnt(Version, Encoded); + return Decoded; +} + +unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt) { Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); @@ -580,17 +584,17 @@ unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); } -unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, +unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt) { return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); } -unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, +unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt) { return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); } -unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version, +unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { unsigned Waitcnt = getWaitcntBitMask(Version); Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); @@ -599,6 +603,10 @@ unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version, return Waitcnt; } +unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) { + return encodeWaitcnt(Version, Decoded.VmCnt, Decoded.ExpCnt, Decoded.LgkmCnt); +} + unsigned getInitialPSInputAddr(const Function &F) { return getIntegerAttribute(F, "InitialPSInputAddr", 0); } @@ -643,6 +651,10 @@ bool hasXNACK(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureXNACK]; } +bool hasSRAMECC(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureSRAMECC]; +} + bool hasMIMG_R128(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128]; } @@ -798,6 +810,7 @@ unsigned getRegBitWidth(unsigned RCID) { case AMDGPU::VS_64RegClassID: case AMDGPU::SReg_64RegClassID: case AMDGPU::VReg_64RegClassID: + case AMDGPU::SReg_64_XEXECRegClassID: return 64; case AMDGPU::VReg_96RegClassID: return 96; @@ -935,27 +948,50 @@ bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset); } -} // end namespace AMDGPU - -} // end namespace llvm - -namespace llvm { -namespace AMDGPU { - -AMDGPUAS getAMDGPUAS(Triple T) { - AMDGPUAS AS; - AS.FLAT_ADDRESS = 0; - AS.PRIVATE_ADDRESS = 5; - AS.REGION_ADDRESS = 2; - return AS; -} +// Given Imm, split it into the values to put into the SOffset and ImmOffset +// fields in an MUBUF instruction. Return false if it is not possible (due to a +// hardware bug needing a workaround). +// +// The required alignment ensures that individual address components remain +// aligned if they are aligned to begin with. It also ensures that additional +// offsets within the given alignment can be added to the resulting ImmOffset. +bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, + const GCNSubtarget *Subtarget, uint32_t Align) { + const uint32_t MaxImm = alignDown(4095, Align); + uint32_t Overflow = 0; + + if (Imm > MaxImm) { + if (Imm <= MaxImm + 64) { + // Use an SOffset inline constant for 4..64 + Overflow = Imm - MaxImm; + Imm = MaxImm; + } else { + // Try to keep the same value in SOffset for adjacent loads, so that + // the corresponding register contents can be re-used. + // + // Load values with all low-bits (except for alignment bits) set into + // SOffset, so that a larger range of values can be covered using + // s_movk_i32. + // + // Atomic operations fail to work correctly when individual address + // components are unaligned, even if their sum is aligned. + uint32_t High = (Imm + Align) & ~4095; + uint32_t Low = (Imm + Align) & 4095; + Imm = Low; + Overflow = High - Align; + } + } -AMDGPUAS getAMDGPUAS(const TargetMachine &M) { - return getAMDGPUAS(M.getTargetTriple()); -} + // There is a hardware bug in SI and CI which prevents address clamping in + // MUBUF instructions from working correctly with SOffsets. The immediate + // offset is unaffected. + if (Overflow > 0 && + Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) + return false; -AMDGPUAS getAMDGPUAS(const Module &M) { - return getAMDGPUAS(Triple(M.getTargetTriple())); + ImmOffset = Imm; + SOffset = Overflow; + return true; } namespace { |
