diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 204 |
1 files changed, 165 insertions, 39 deletions
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 5271bc3aacc65..00e6d517bde58 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -108,6 +108,7 @@ namespace AMDGPU { #define GET_MIMGInfoTable_IMPL #define GET_MIMGLZMappingTable_IMPL #define GET_MIMGMIPMappingTable_IMPL +#define GET_MIMGG16MappingTable_IMPL #include "AMDGPUGenSearchableTables.inc" int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, @@ -148,10 +149,17 @@ struct MTBUFInfo { bool has_soffset; }; +struct SMInfo { + uint16_t Opcode; + bool IsBuffer; +}; + #define GET_MTBUFInfoTable_DECL #define GET_MTBUFInfoTable_IMPL #define GET_MUBUFInfoTable_DECL #define GET_MUBUFInfoTable_IMPL +#define GET_SMInfoTable_DECL +#define GET_SMInfoTable_IMPL #include "AMDGPUGenSearchableTables.inc" int getMTBUFBaseOpcode(unsigned Opc) { @@ -214,6 +222,11 @@ bool getMUBUFHasSoffset(unsigned Opc) { return Info ? Info->has_soffset : false; } +bool getSMEMIsBuffer(unsigned Opc) { + const SMInfo *Info = getSMEMOpcodeHelper(Opc); + return Info ? Info->IsBuffer : false; +} + // Wrapper for Tablegen'd function. enum Subtarget is not defined in any // header files, so we need to wrap it in a function that takes unsigned // instead. @@ -268,6 +281,13 @@ unsigned getLocalMemorySize(const MCSubtargetInfo *STI) { } unsigned getEUsPerCU(const MCSubtargetInfo *STI) { + // "Per CU" really means "per whatever functional block the waves of a + // workgroup must share". For gfx10 in CU mode this is the CU, which contains + // two SIMDs. + if (isGFX10(*STI) && STI->getFeatureBits().test(FeatureCuMode)) + return 2; + // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains + // two CUs, so a total of four SIMDs. return 4; } @@ -283,15 +303,6 @@ unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, return std::min(N, 16u); } -unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) { - return getMaxWavesPerEU(STI) * getEUsPerCU(STI); -} - -unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI, - unsigned FlatWorkGroupSize) { - return getWavesPerWorkGroup(STI, FlatWorkGroupSize); -} - unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) { return 1; } @@ -300,13 +311,13 @@ unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) { // FIXME: Need to take scratch memory into account. if (!isGFX10(*STI)) return 10; - return 20; + return hasGFX10_3Insts(*STI) ? 16 : 20; } -unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI, - unsigned FlatWorkGroupSize) { - return alignTo(getMaxWavesPerCU(STI, FlatWorkGroupSize), - getEUsPerCU(STI)) / getEUsPerCU(STI); +unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, + unsigned FlatWorkGroupSize) { + return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize), + getEUsPerCU(STI)); } unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) { @@ -320,8 +331,7 @@ unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) { unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize) { - return alignTo(FlatWorkGroupSize, getWavefrontSize(STI)) / - getWavefrontSize(STI); + return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI)); } unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) { @@ -431,12 +441,21 @@ unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, bool IsWave32 = EnableWavefrontSize32 ? *EnableWavefrontSize32 : STI->getFeatureBits().test(FeatureWavefrontSize32); + + if (hasGFX10_3Insts(*STI)) + return IsWave32 ? 16 : 8; + return IsWave32 ? 8 : 4; } unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, Optional<bool> EnableWavefrontSize32) { - return getVGPRAllocGranule(STI, EnableWavefrontSize32); + + bool IsWave32 = EnableWavefrontSize32 ? + *EnableWavefrontSize32 : + STI->getFeatureBits().test(FeatureWavefrontSize32); + + return IsWave32 ? 8 : 4; } unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) { @@ -559,7 +578,7 @@ bool isReadOnlySegment(const GlobalValue *GV) { } bool shouldEmitConstantsToTextSection(const Triple &TT) { - return TT.getOS() == Triple::AMDPAL; + return TT.getOS() == Triple::AMDPAL || TT.getArch() == Triple::r600; } int getIntegerAttribute(const Function &F, StringRef Name, int Default) { @@ -722,13 +741,16 @@ static unsigned getLastSymbolicHwreg(const MCSubtargetInfo &STI) { return ID_SYMBOLIC_FIRST_GFX9_; else if (isGFX9(STI)) return ID_SYMBOLIC_FIRST_GFX10_; + else if (isGFX10(STI) && !isGFX10_BEncoding(STI)) + return ID_SYMBOLIC_FIRST_GFX1030_; else return ID_SYMBOLIC_LAST_; } bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI) { - return ID_SYMBOLIC_FIRST_ <= Id && Id < getLastSymbolicHwreg(STI) && - IdSymbolic[Id]; + return + ID_SYMBOLIC_FIRST_ <= Id && Id < getLastSymbolicHwreg(STI) && + IdSymbolic[Id] && (Id != ID_XNACK_MASK || !AMDGPU::isGFX10_BEncoding(STI)); } bool isValidHwreg(int64_t Id) { @@ -927,7 +949,15 @@ bool hasSRAMECC(const MCSubtargetInfo &STI) { } bool hasMIMG_R128(const MCSubtargetInfo &STI) { - return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128]; + return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128] && !STI.getFeatureBits()[AMDGPU::FeatureR128A16]; +} + +bool hasGFX10A16(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureGFX10A16]; +} + +bool hasG16(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureG16]; } bool hasPackedD16(const MCSubtargetInfo &STI) { @@ -958,9 +988,17 @@ bool isGCN3Encoding(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding]; } +bool isGFX10_BEncoding(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding]; +} + +bool hasGFX10_3Insts(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureGFX10_3Insts]; +} + bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); - const unsigned FirstSubReg = TRI->getSubReg(Reg, 1); + const unsigned FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0); return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) || Reg == AMDGPU::SCC; } @@ -1082,6 +1120,11 @@ bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { // (move from MC* level to Target* level). Return size in bits. unsigned getRegBitWidth(unsigned RCID) { switch (RCID) { + case AMDGPU::VGPR_LO16RegClassID: + case AMDGPU::VGPR_HI16RegClassID: + case AMDGPU::SGPR_LO16RegClassID: + case AMDGPU::AGPR_LO16RegClassID: + return 16; case AMDGPU::SGPR_32RegClassID: case AMDGPU::VGPR_32RegClassID: case AMDGPU::VRegOrLds_32RegClassID: @@ -1103,6 +1146,7 @@ unsigned getRegBitWidth(unsigned RCID) { case AMDGPU::SGPR_96RegClassID: case AMDGPU::SReg_96RegClassID: case AMDGPU::VReg_96RegClassID: + case AMDGPU::AReg_96RegClassID: return 96; case AMDGPU::SGPR_128RegClassID: case AMDGPU::SReg_128RegClassID: @@ -1112,14 +1156,24 @@ unsigned getRegBitWidth(unsigned RCID) { case AMDGPU::SGPR_160RegClassID: case AMDGPU::SReg_160RegClassID: case AMDGPU::VReg_160RegClassID: + case AMDGPU::AReg_160RegClassID: return 160; + case AMDGPU::SGPR_192RegClassID: + case AMDGPU::SReg_192RegClassID: + case AMDGPU::VReg_192RegClassID: + case AMDGPU::AReg_192RegClassID: + return 192; + case AMDGPU::SGPR_256RegClassID: case AMDGPU::SReg_256RegClassID: case AMDGPU::VReg_256RegClassID: + case AMDGPU::AReg_256RegClassID: return 256; + case AMDGPU::SGPR_512RegClassID: case AMDGPU::SReg_512RegClassID: case AMDGPU::VReg_512RegClassID: case AMDGPU::AReg_512RegClassID: return 512; + case AMDGPU::SGPR_1024RegClassID: case AMDGPU::SReg_1024RegClassID: case AMDGPU::VReg_1024RegClassID: case AMDGPU::AReg_1024RegClassID: @@ -1141,7 +1195,7 @@ unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, } bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { - if (Literal >= -16 && Literal <= 64) + if (isInlinableIntLiteral(Literal)) return true; uint64_t Val = static_cast<uint64_t>(Literal); @@ -1158,7 +1212,7 @@ bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { } bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) { - if (Literal >= -16 && Literal <= 64) + if (isInlinableIntLiteral(Literal)) return true; // The actual type of the operand does not seem to matter as long @@ -1187,7 +1241,7 @@ bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) { if (!HasInv2Pi) return false; - if (Literal >= -16 && Literal <= 64) + if (isInlinableIntLiteral(Literal)) return true; uint16_t Val = static_cast<uint16_t>(Literal); @@ -1217,6 +1271,17 @@ bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) { return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi); } +bool isInlinableIntLiteralV216(int32_t Literal) { + int16_t Lo16 = static_cast<int16_t>(Literal); + if (isInt<16>(Literal) || isUInt<16>(Literal)) + return isInlinableIntLiteral(Lo16); + + int16_t Hi16 = static_cast<int16_t>(Literal >> 16); + if (!(Literal & 0xffff)) + return isInlinableIntLiteral(Hi16); + return Lo16 == Hi16 && isInlinableIntLiteral(Lo16); +} + bool isArgPassedInSGPR(const Argument *A) { const Function *F = A->getParent(); @@ -1247,16 +1312,61 @@ static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) { return isGCN3Encoding(ST) || isGFX10(ST); } -int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { +static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) { + return isGFX9(ST) || isGFX10(ST); +} + +bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, + int64_t EncodedOffset) { + return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset) + : isUInt<8>(EncodedOffset); +} + +bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, + int64_t EncodedOffset, + bool IsBuffer) { + return !IsBuffer && + hasSMRDSignedImmOffset(ST) && + isInt<21>(EncodedOffset); +} + +static bool isDwordAligned(uint64_t ByteOffset) { + return (ByteOffset & 3) == 0; +} + +uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, + uint64_t ByteOffset) { if (hasSMEMByteOffset(ST)) return ByteOffset; + + assert(isDwordAligned(ByteOffset)); return ByteOffset >> 2; } -bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { - int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset); - return (hasSMEMByteOffset(ST)) ? - isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset); +Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST, + int64_t ByteOffset, bool IsBuffer) { + // The signed version is always a byte offset. + if (!IsBuffer && hasSMRDSignedImmOffset(ST)) { + assert(hasSMEMByteOffset(ST)); + return isInt<20>(ByteOffset) ? Optional<int64_t>(ByteOffset) : None; + } + + if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST)) + return None; + + int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); + return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset) + ? Optional<int64_t>(EncodedOffset) + : None; +} + +Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, + int64_t ByteOffset) { + if (!isCI(ST) || !isDwordAligned(ByteOffset)) + return None; + + int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); + return isUInt<32>(EncodedOffset) ? Optional<int64_t>(EncodedOffset) : None; } // Given Imm, split it into the values to put into the SOffset and ImmOffset @@ -1267,8 +1377,8 @@ bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { // aligned if they are aligned to begin with. It also ensures that additional // offsets within the given alignment can be added to the resulting ImmOffset. bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, - const GCNSubtarget *Subtarget, uint32_t Align) { - const uint32_t MaxImm = alignDown(4095, Align); + const GCNSubtarget *Subtarget, Align Alignment) { + const uint32_t MaxImm = alignDown(4095, Alignment.value()); uint32_t Overflow = 0; if (Imm > MaxImm) { @@ -1286,10 +1396,10 @@ bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, // // Atomic operations fail to work correctly when individual address // components are unaligned, even if their sum is aligned. - uint32_t High = (Imm + Align) & ~4095; - uint32_t Low = (Imm + Align) & 4095; + uint32_t High = (Imm + Alignment.value()) & ~4095; + uint32_t Low = (Imm + Alignment.value()) & 4095; Imm = Low; - Overflow = High - Align; + Overflow = High - Alignment.value(); } } @@ -1305,8 +1415,7 @@ bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, return true; } -SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F, - const GCNSubtarget &ST) { +SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) { *this = getDefaultForCallingConv(F.getCallingConv()); StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString(); @@ -1318,8 +1427,25 @@ SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F, if (!DX10ClampAttr.empty()) DX10Clamp = DX10ClampAttr == "true"; - FP32Denormals = ST.hasFP32Denormals(F); - FP64FP16Denormals = ST.hasFP64FP16Denormals(F); + StringRef DenormF32Attr = F.getFnAttribute("denormal-fp-math-f32").getValueAsString(); + if (!DenormF32Attr.empty()) { + DenormalMode DenormMode = parseDenormalFPAttribute(DenormF32Attr); + FP32InputDenormals = DenormMode.Input == DenormalMode::IEEE; + FP32OutputDenormals = DenormMode.Output == DenormalMode::IEEE; + } + + StringRef DenormAttr = F.getFnAttribute("denormal-fp-math").getValueAsString(); + if (!DenormAttr.empty()) { + DenormalMode DenormMode = parseDenormalFPAttribute(DenormAttr); + + if (DenormF32Attr.empty()) { + FP32InputDenormals = DenormMode.Input == DenormalMode::IEEE; + FP32OutputDenormals = DenormMode.Output == DenormalMode::IEEE; + } + + FP64FP16InputDenormals = DenormMode.Input == DenormalMode::IEEE; + FP64FP16OutputDenormals = DenormMode.Output == DenormalMode::IEEE; + } } namespace { |