diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/Utils')
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 204 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 153 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp | 114 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h | 6 |
5 files changed, 364 insertions, 119 deletions
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp index 075e08986c0c0..5819a621f55d6 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp @@ -78,7 +78,11 @@ const char* const IdSymbolic[] = { "HW_REG_XNACK_MASK", nullptr, // HW_ID1, no predictable values nullptr, // HW_ID2, no predictable values - "HW_REG_POPS_PACKER" + "HW_REG_POPS_PACKER", + nullptr, + nullptr, + nullptr, + "HW_REG_SHADER_CYCLES" }; } // namespace Hwreg diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 5271bc3aacc65..00e6d517bde58 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -108,6 +108,7 @@ namespace AMDGPU { #define GET_MIMGInfoTable_IMPL #define GET_MIMGLZMappingTable_IMPL #define GET_MIMGMIPMappingTable_IMPL +#define GET_MIMGG16MappingTable_IMPL #include "AMDGPUGenSearchableTables.inc" int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, @@ -148,10 +149,17 @@ struct MTBUFInfo { bool has_soffset; }; +struct SMInfo { + uint16_t Opcode; + bool IsBuffer; +}; + #define GET_MTBUFInfoTable_DECL #define GET_MTBUFInfoTable_IMPL #define GET_MUBUFInfoTable_DECL #define GET_MUBUFInfoTable_IMPL +#define GET_SMInfoTable_DECL +#define GET_SMInfoTable_IMPL #include "AMDGPUGenSearchableTables.inc" int getMTBUFBaseOpcode(unsigned Opc) { @@ -214,6 +222,11 @@ bool getMUBUFHasSoffset(unsigned Opc) { return Info ? Info->has_soffset : false; } +bool getSMEMIsBuffer(unsigned Opc) { + const SMInfo *Info = getSMEMOpcodeHelper(Opc); + return Info ? Info->IsBuffer : false; +} + // Wrapper for Tablegen'd function. enum Subtarget is not defined in any // header files, so we need to wrap it in a function that takes unsigned // instead. @@ -268,6 +281,13 @@ unsigned getLocalMemorySize(const MCSubtargetInfo *STI) { } unsigned getEUsPerCU(const MCSubtargetInfo *STI) { + // "Per CU" really means "per whatever functional block the waves of a + // workgroup must share". For gfx10 in CU mode this is the CU, which contains + // two SIMDs. + if (isGFX10(*STI) && STI->getFeatureBits().test(FeatureCuMode)) + return 2; + // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains + // two CUs, so a total of four SIMDs. return 4; } @@ -283,15 +303,6 @@ unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, return std::min(N, 16u); } -unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) { - return getMaxWavesPerEU(STI) * getEUsPerCU(STI); -} - -unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI, - unsigned FlatWorkGroupSize) { - return getWavesPerWorkGroup(STI, FlatWorkGroupSize); -} - unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) { return 1; } @@ -300,13 +311,13 @@ unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) { // FIXME: Need to take scratch memory into account. if (!isGFX10(*STI)) return 10; - return 20; + return hasGFX10_3Insts(*STI) ? 16 : 20; } -unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI, - unsigned FlatWorkGroupSize) { - return alignTo(getMaxWavesPerCU(STI, FlatWorkGroupSize), - getEUsPerCU(STI)) / getEUsPerCU(STI); +unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, + unsigned FlatWorkGroupSize) { + return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize), + getEUsPerCU(STI)); } unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) { @@ -320,8 +331,7 @@ unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) { unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize) { - return alignTo(FlatWorkGroupSize, getWavefrontSize(STI)) / - getWavefrontSize(STI); + return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI)); } unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) { @@ -431,12 +441,21 @@ unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, bool IsWave32 = EnableWavefrontSize32 ? *EnableWavefrontSize32 : STI->getFeatureBits().test(FeatureWavefrontSize32); + + if (hasGFX10_3Insts(*STI)) + return IsWave32 ? 16 : 8; + return IsWave32 ? 8 : 4; } unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, Optional<bool> EnableWavefrontSize32) { - return getVGPRAllocGranule(STI, EnableWavefrontSize32); + + bool IsWave32 = EnableWavefrontSize32 ? + *EnableWavefrontSize32 : + STI->getFeatureBits().test(FeatureWavefrontSize32); + + return IsWave32 ? 8 : 4; } unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) { @@ -559,7 +578,7 @@ bool isReadOnlySegment(const GlobalValue *GV) { } bool shouldEmitConstantsToTextSection(const Triple &TT) { - return TT.getOS() == Triple::AMDPAL; + return TT.getOS() == Triple::AMDPAL || TT.getArch() == Triple::r600; } int getIntegerAttribute(const Function &F, StringRef Name, int Default) { @@ -722,13 +741,16 @@ static unsigned getLastSymbolicHwreg(const MCSubtargetInfo &STI) { return ID_SYMBOLIC_FIRST_GFX9_; else if (isGFX9(STI)) return ID_SYMBOLIC_FIRST_GFX10_; + else if (isGFX10(STI) && !isGFX10_BEncoding(STI)) + return ID_SYMBOLIC_FIRST_GFX1030_; else return ID_SYMBOLIC_LAST_; } bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI) { - return ID_SYMBOLIC_FIRST_ <= Id && Id < getLastSymbolicHwreg(STI) && - IdSymbolic[Id]; + return + ID_SYMBOLIC_FIRST_ <= Id && Id < getLastSymbolicHwreg(STI) && + IdSymbolic[Id] && (Id != ID_XNACK_MASK || !AMDGPU::isGFX10_BEncoding(STI)); } bool isValidHwreg(int64_t Id) { @@ -927,7 +949,15 @@ bool hasSRAMECC(const MCSubtargetInfo &STI) { } bool hasMIMG_R128(const MCSubtargetInfo &STI) { - return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128]; + return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128] && !STI.getFeatureBits()[AMDGPU::FeatureR128A16]; +} + +bool hasGFX10A16(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureGFX10A16]; +} + +bool hasG16(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureG16]; } bool hasPackedD16(const MCSubtargetInfo &STI) { @@ -958,9 +988,17 @@ bool isGCN3Encoding(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding]; } +bool isGFX10_BEncoding(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding]; +} + +bool hasGFX10_3Insts(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureGFX10_3Insts]; +} + bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); - const unsigned FirstSubReg = TRI->getSubReg(Reg, 1); + const unsigned FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0); return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) || Reg == AMDGPU::SCC; } @@ -1082,6 +1120,11 @@ bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { // (move from MC* level to Target* level). Return size in bits. unsigned getRegBitWidth(unsigned RCID) { switch (RCID) { + case AMDGPU::VGPR_LO16RegClassID: + case AMDGPU::VGPR_HI16RegClassID: + case AMDGPU::SGPR_LO16RegClassID: + case AMDGPU::AGPR_LO16RegClassID: + return 16; case AMDGPU::SGPR_32RegClassID: case AMDGPU::VGPR_32RegClassID: case AMDGPU::VRegOrLds_32RegClassID: @@ -1103,6 +1146,7 @@ unsigned getRegBitWidth(unsigned RCID) { case AMDGPU::SGPR_96RegClassID: case AMDGPU::SReg_96RegClassID: case AMDGPU::VReg_96RegClassID: + case AMDGPU::AReg_96RegClassID: return 96; case AMDGPU::SGPR_128RegClassID: case AMDGPU::SReg_128RegClassID: @@ -1112,14 +1156,24 @@ unsigned getRegBitWidth(unsigned RCID) { case AMDGPU::SGPR_160RegClassID: case AMDGPU::SReg_160RegClassID: case AMDGPU::VReg_160RegClassID: + case AMDGPU::AReg_160RegClassID: return 160; + case AMDGPU::SGPR_192RegClassID: + case AMDGPU::SReg_192RegClassID: + case AMDGPU::VReg_192RegClassID: + case AMDGPU::AReg_192RegClassID: + return 192; + case AMDGPU::SGPR_256RegClassID: case AMDGPU::SReg_256RegClassID: case AMDGPU::VReg_256RegClassID: + case AMDGPU::AReg_256RegClassID: return 256; + case AMDGPU::SGPR_512RegClassID: case AMDGPU::SReg_512RegClassID: case AMDGPU::VReg_512RegClassID: case AMDGPU::AReg_512RegClassID: return 512; + case AMDGPU::SGPR_1024RegClassID: case AMDGPU::SReg_1024RegClassID: case AMDGPU::VReg_1024RegClassID: case AMDGPU::AReg_1024RegClassID: @@ -1141,7 +1195,7 @@ unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, } bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { - if (Literal >= -16 && Literal <= 64) + if (isInlinableIntLiteral(Literal)) return true; uint64_t Val = static_cast<uint64_t>(Literal); @@ -1158,7 +1212,7 @@ bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { } bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) { - if (Literal >= -16 && Literal <= 64) + if (isInlinableIntLiteral(Literal)) return true; // The actual type of the operand does not seem to matter as long @@ -1187,7 +1241,7 @@ bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) { if (!HasInv2Pi) return false; - if (Literal >= -16 && Literal <= 64) + if (isInlinableIntLiteral(Literal)) return true; uint16_t Val = static_cast<uint16_t>(Literal); @@ -1217,6 +1271,17 @@ bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) { return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi); } +bool isInlinableIntLiteralV216(int32_t Literal) { + int16_t Lo16 = static_cast<int16_t>(Literal); + if (isInt<16>(Literal) || isUInt<16>(Literal)) + return isInlinableIntLiteral(Lo16); + + int16_t Hi16 = static_cast<int16_t>(Literal >> 16); + if (!(Literal & 0xffff)) + return isInlinableIntLiteral(Hi16); + return Lo16 == Hi16 && isInlinableIntLiteral(Lo16); +} + bool isArgPassedInSGPR(const Argument *A) { const Function *F = A->getParent(); @@ -1247,16 +1312,61 @@ static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) { return isGCN3Encoding(ST) || isGFX10(ST); } -int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { +static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) { + return isGFX9(ST) || isGFX10(ST); +} + +bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, + int64_t EncodedOffset) { + return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset) + : isUInt<8>(EncodedOffset); +} + +bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, + int64_t EncodedOffset, + bool IsBuffer) { + return !IsBuffer && + hasSMRDSignedImmOffset(ST) && + isInt<21>(EncodedOffset); +} + +static bool isDwordAligned(uint64_t ByteOffset) { + return (ByteOffset & 3) == 0; +} + +uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, + uint64_t ByteOffset) { if (hasSMEMByteOffset(ST)) return ByteOffset; + + assert(isDwordAligned(ByteOffset)); return ByteOffset >> 2; } -bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { - int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset); - return (hasSMEMByteOffset(ST)) ? - isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset); +Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST, + int64_t ByteOffset, bool IsBuffer) { + // The signed version is always a byte offset. + if (!IsBuffer && hasSMRDSignedImmOffset(ST)) { + assert(hasSMEMByteOffset(ST)); + return isInt<20>(ByteOffset) ? Optional<int64_t>(ByteOffset) : None; + } + + if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST)) + return None; + + int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); + return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset) + ? Optional<int64_t>(EncodedOffset) + : None; +} + +Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, + int64_t ByteOffset) { + if (!isCI(ST) || !isDwordAligned(ByteOffset)) + return None; + + int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); + return isUInt<32>(EncodedOffset) ? Optional<int64_t>(EncodedOffset) : None; } // Given Imm, split it into the values to put into the SOffset and ImmOffset @@ -1267,8 +1377,8 @@ bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { // aligned if they are aligned to begin with. It also ensures that additional // offsets within the given alignment can be added to the resulting ImmOffset. bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, - const GCNSubtarget *Subtarget, uint32_t Align) { - const uint32_t MaxImm = alignDown(4095, Align); + const GCNSubtarget *Subtarget, Align Alignment) { + const uint32_t MaxImm = alignDown(4095, Alignment.value()); uint32_t Overflow = 0; if (Imm > MaxImm) { @@ -1286,10 +1396,10 @@ bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, // // Atomic operations fail to work correctly when individual address // components are unaligned, even if their sum is aligned. - uint32_t High = (Imm + Align) & ~4095; - uint32_t Low = (Imm + Align) & 4095; + uint32_t High = (Imm + Alignment.value()) & ~4095; + uint32_t Low = (Imm + Alignment.value()) & 4095; Imm = Low; - Overflow = High - Align; + Overflow = High - Alignment.value(); } } @@ -1305,8 +1415,7 @@ bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, return true; } -SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F, - const GCNSubtarget &ST) { +SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) { *this = getDefaultForCallingConv(F.getCallingConv()); StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString(); @@ -1318,8 +1427,25 @@ SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F, if (!DX10ClampAttr.empty()) DX10Clamp = DX10ClampAttr == "true"; - FP32Denormals = ST.hasFP32Denormals(F); - FP64FP16Denormals = ST.hasFP64FP16Denormals(F); + StringRef DenormF32Attr = F.getFnAttribute("denormal-fp-math-f32").getValueAsString(); + if (!DenormF32Attr.empty()) { + DenormalMode DenormMode = parseDenormalFPAttribute(DenormF32Attr); + FP32InputDenormals = DenormMode.Input == DenormalMode::IEEE; + FP32OutputDenormals = DenormMode.Output == DenormalMode::IEEE; + } + + StringRef DenormAttr = F.getFnAttribute("denormal-fp-math").getValueAsString(); + if (!DenormAttr.empty()) { + DenormalMode DenormMode = parseDenormalFPAttribute(DenormAttr); + + if (DenormF32Attr.empty()) { + FP32InputDenormals = DenormMode.Input == DenormalMode::IEEE; + FP32OutputDenormals = DenormMode.Output == DenormalMode::IEEE; + } + + FP64FP16InputDenormals = DenormMode.Input == DenormalMode::IEEE; + FP64FP16OutputDenormals = DenormMode.Output == DenormalMode::IEEE; + } } namespace { diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index a5bada2890d2c..e71554575f6af 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -12,10 +12,10 @@ #include "AMDGPU.h" #include "AMDKernelCodeT.h" #include "SIDefines.h" -#include "llvm/ADT/StringRef.h" #include "llvm/IR/CallingConv.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/AMDHSAKernelDescriptor.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetParser.h" @@ -26,17 +26,13 @@ namespace llvm { class Argument; -class AMDGPUSubtarget; -class FeatureBitset; class Function; class GCNSubtarget; class GlobalValue; -class MCContext; class MCRegisterClass; class MCRegisterInfo; -class MCSection; class MCSubtargetInfo; -class MachineMemOperand; +class StringRef; class Triple; namespace AMDGPU { @@ -87,15 +83,6 @@ unsigned getEUsPerCU(const MCSubtargetInfo *STI); unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize); -/// \returns Maximum number of waves per compute unit for given subtarget \p -/// STI without any kind of limitation. -unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI); - -/// \returns Maximum number of waves per compute unit for given subtarget \p -/// STI and limited by given \p FlatWorkGroupSize. -unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI, - unsigned FlatWorkGroupSize); - /// \returns Minimum number of waves per execution unit for given subtarget \p /// STI. unsigned getMinWavesPerEU(const MCSubtargetInfo *STI); @@ -104,10 +91,10 @@ unsigned getMinWavesPerEU(const MCSubtargetInfo *STI); /// STI without any kind of limitation. unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI); -/// \returns Maximum number of waves per execution unit for given subtarget \p -/// STI and limited by given \p FlatWorkGroupSize. -unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI, - unsigned FlatWorkGroupSize); +/// \returns Number of waves per execution unit required to support the given \p +/// FlatWorkGroupSize. +unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, + unsigned FlatWorkGroupSize); /// \returns Minimum flat work group size for given subtarget \p STI. unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI); @@ -116,7 +103,7 @@ unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI); unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI); /// \returns Number of waves per work group for given subtarget \p STI and -/// limited by given \p FlatWorkGroupSize. +/// \p FlatWorkGroupSize. unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize); @@ -211,6 +198,7 @@ struct MIMGBaseOpcodeInfo { uint8_t NumExtraArgs; bool Gradients; + bool G16; bool Coordinates; bool LodOrClampOrMip; bool HasD16; @@ -247,11 +235,19 @@ struct MIMGMIPMappingInfo { MIMGBaseOpcode NONMIP; }; +struct MIMGG16MappingInfo { + MIMGBaseOpcode G; + MIMGBaseOpcode G16; +}; + LLVM_READONLY const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L); LLVM_READONLY -const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned L); +const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP); + +LLVM_READONLY +const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G); LLVM_READONLY int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, @@ -308,6 +304,9 @@ LLVM_READONLY bool getMUBUFHasSoffset(unsigned Opc); LLVM_READONLY +bool getSMEMIsBuffer(unsigned Opc); + +LLVM_READONLY const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, uint8_t NumFormat, @@ -551,6 +550,8 @@ inline bool isKernel(CallingConv::ID CC) { bool hasXNACK(const MCSubtargetInfo &STI); bool hasSRAMECC(const MCSubtargetInfo &STI); bool hasMIMG_R128(const MCSubtargetInfo &STI); +bool hasGFX10A16(const MCSubtargetInfo &STI); +bool hasG16(const MCSubtargetInfo &STI); bool hasPackedD16(const MCSubtargetInfo &STI); bool isSI(const MCSubtargetInfo &STI); @@ -558,6 +559,9 @@ bool isCI(const MCSubtargetInfo &STI); bool isVI(const MCSubtargetInfo &STI); bool isGFX9(const MCSubtargetInfo &STI); bool isGFX10(const MCSubtargetInfo &STI); +bool isGCN3Encoding(const MCSubtargetInfo &STI); +bool isGFX10_BEncoding(const MCSubtargetInfo &STI); +bool hasGFX10_3Insts(const MCSubtargetInfo &STI); /// Is Reg - scalar register bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI); @@ -633,6 +637,13 @@ inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) { return getOperandSize(Desc.OpInfo[OpNo]); } +/// Is this literal inlinable, and not one of the values intended for floating +/// point values. +LLVM_READNONE +inline bool isInlinableIntLiteral(int64_t Literal) { + return Literal >= -16 && Literal <= 64; +} + /// Is this literal inlinable LLVM_READNONE bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi); @@ -646,11 +657,35 @@ bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi); LLVM_READNONE bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi); +LLVM_READNONE +bool isInlinableIntLiteralV216(int32_t Literal); + bool isArgPassedInSGPR(const Argument *Arg); -/// \returns The encoding that will be used for \p ByteOffset in the SMRD -/// offset field. -int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset); +LLVM_READONLY +bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, + int64_t EncodedOffset); + +LLVM_READONLY +bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, + int64_t EncodedOffset, + bool IsBuffer); + +/// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate +/// offsets. +uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset); + +/// \returns The encoding that will be used for \p ByteOffset in the +/// SMRD offset field, or None if it won't fit. On GFX9 and GFX10 +/// S_LOAD instructions have a signed offset, on other subtargets it is +/// unsigned. S_BUFFER has an unsigned offset for all subtargets. +Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST, + int64_t ByteOffset, bool IsBuffer); + +/// \return The encoding that can be used for a 32-bit literal offset in an SMRD +/// instruction. This is only useful on CI.s +Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, + int64_t ByteOffset); /// \returns true if this offset is small enough to fit in the SMRD /// offset field. \p ByteOffset should be the offset in bytes and @@ -658,7 +693,8 @@ int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset); bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset); bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, - const GCNSubtarget *Subtarget, uint32_t Align = 4); + const GCNSubtarget *Subtarget, + Align Alignment = Align(4)); /// \returns true if the intrinsic is divergent bool isIntrinsicSourceOfDivergence(unsigned IntrID); @@ -677,45 +713,76 @@ struct SIModeRegisterDefaults { /// If this is set, neither input or output denormals are flushed for most f32 /// instructions. - /// - /// TODO: Split into separate input and output fields if necessary like the - /// control bits really provide? - bool FP32Denormals : 1; + bool FP32InputDenormals : 1; + bool FP32OutputDenormals : 1; /// If this is set, neither input or output denormals are flushed for both f64 /// and f16/v2f16 instructions. - bool FP64FP16Denormals : 1; + bool FP64FP16InputDenormals : 1; + bool FP64FP16OutputDenormals : 1; SIModeRegisterDefaults() : IEEE(true), DX10Clamp(true), - FP32Denormals(true), - FP64FP16Denormals(true) {} + FP32InputDenormals(true), + FP32OutputDenormals(true), + FP64FP16InputDenormals(true), + FP64FP16OutputDenormals(true) {} - // FIXME: Should not depend on the subtarget - SIModeRegisterDefaults(const Function &F, const GCNSubtarget &ST); + SIModeRegisterDefaults(const Function &F); static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) { const bool IsCompute = AMDGPU::isCompute(CC); SIModeRegisterDefaults Mode; - Mode.DX10Clamp = true; Mode.IEEE = IsCompute; - Mode.FP32Denormals = false; // FIXME: Should be on by default. - Mode.FP64FP16Denormals = true; return Mode; } bool operator ==(const SIModeRegisterDefaults Other) const { return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp && - FP32Denormals == Other.FP32Denormals && - FP64FP16Denormals == Other.FP64FP16Denormals; + FP32InputDenormals == Other.FP32InputDenormals && + FP32OutputDenormals == Other.FP32OutputDenormals && + FP64FP16InputDenormals == Other.FP64FP16InputDenormals && + FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals; + } + + bool allFP32Denormals() const { + return FP32InputDenormals && FP32OutputDenormals; + } + + bool allFP64FP16Denormals() const { + return FP64FP16InputDenormals && FP64FP16OutputDenormals; + } + + /// Get the encoding value for the FP_DENORM bits of the mode register for the + /// FP32 denormal mode. + uint32_t fpDenormModeSPValue() const { + if (FP32InputDenormals && FP32OutputDenormals) + return FP_DENORM_FLUSH_NONE; + if (FP32InputDenormals) + return FP_DENORM_FLUSH_OUT; + if (FP32OutputDenormals) + return FP_DENORM_FLUSH_IN; + return FP_DENORM_FLUSH_IN_FLUSH_OUT; + } + + /// Get the encoding value for the FP_DENORM bits of the mode register for the + /// FP64/FP16 denormal mode. + uint32_t fpDenormModeDPValue() const { + if (FP64FP16InputDenormals && FP64FP16OutputDenormals) + return FP_DENORM_FLUSH_NONE; + if (FP64FP16InputDenormals) + return FP_DENORM_FLUSH_OUT; + if (FP64FP16OutputDenormals) + return FP_DENORM_FLUSH_IN; + return FP_DENORM_FLUSH_IN_FLUSH_OUT; } /// Returns true if a flag is compatible if it's enabled in the callee, but /// disabled in the caller. static bool oneWayCompatible(bool CallerMode, bool CalleeMode) { - return CallerMode == CalleeMode || (CallerMode && !CalleeMode); + return CallerMode == CalleeMode || (!CallerMode && CalleeMode); } // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should @@ -727,8 +794,10 @@ struct SIModeRegisterDefaults { return false; // Allow inlining denormals enabled into denormals flushed functions. - return oneWayCompatible(FP64FP16Denormals, CalleeMode.FP64FP16Denormals) && - oneWayCompatible(FP32Denormals, CalleeMode.FP32Denormals); + return oneWayCompatible(FP64FP16InputDenormals, CalleeMode.FP64FP16InputDenormals) && + oneWayCompatible(FP64FP16OutputDenormals, CalleeMode.FP64FP16OutputDenormals) && + oneWayCompatible(FP32InputDenormals, CalleeMode.FP32InputDenormals) && + oneWayCompatible(FP32OutputDenormals, CalleeMode.FP32OutputDenormals); } }; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp index 207e4232e8298..ef010a7ac1576 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp @@ -397,6 +397,39 @@ static const char *getRegisterName(unsigned RegNum) { {0x2c6a, "SPI_SHADER_USER_DATA_VS_30"}, {0x2c6b, "SPI_SHADER_USER_DATA_VS_31"}, + {0x2c8c, "SPI_SHADER_USER_DATA_GS_0"}, + {0x2c8d, "SPI_SHADER_USER_DATA_GS_1"}, + {0x2c8e, "SPI_SHADER_USER_DATA_GS_2"}, + {0x2c8f, "SPI_SHADER_USER_DATA_GS_3"}, + {0x2c90, "SPI_SHADER_USER_DATA_GS_4"}, + {0x2c91, "SPI_SHADER_USER_DATA_GS_5"}, + {0x2c92, "SPI_SHADER_USER_DATA_GS_6"}, + {0x2c93, "SPI_SHADER_USER_DATA_GS_7"}, + {0x2c94, "SPI_SHADER_USER_DATA_GS_8"}, + {0x2c95, "SPI_SHADER_USER_DATA_GS_9"}, + {0x2c96, "SPI_SHADER_USER_DATA_GS_10"}, + {0x2c97, "SPI_SHADER_USER_DATA_GS_11"}, + {0x2c98, "SPI_SHADER_USER_DATA_GS_12"}, + {0x2c99, "SPI_SHADER_USER_DATA_GS_13"}, + {0x2c9a, "SPI_SHADER_USER_DATA_GS_14"}, + {0x2c9b, "SPI_SHADER_USER_DATA_GS_15"}, + {0x2c9c, "SPI_SHADER_USER_DATA_GS_16"}, + {0x2c9d, "SPI_SHADER_USER_DATA_GS_17"}, + {0x2c9e, "SPI_SHADER_USER_DATA_GS_18"}, + {0x2c9f, "SPI_SHADER_USER_DATA_GS_19"}, + {0x2ca0, "SPI_SHADER_USER_DATA_GS_20"}, + {0x2ca1, "SPI_SHADER_USER_DATA_GS_21"}, + {0x2ca2, "SPI_SHADER_USER_DATA_GS_22"}, + {0x2ca3, "SPI_SHADER_USER_DATA_GS_23"}, + {0x2ca4, "SPI_SHADER_USER_DATA_GS_24"}, + {0x2ca5, "SPI_SHADER_USER_DATA_GS_25"}, + {0x2ca6, "SPI_SHADER_USER_DATA_GS_26"}, + {0x2ca7, "SPI_SHADER_USER_DATA_GS_27"}, + {0x2ca8, "SPI_SHADER_USER_DATA_GS_28"}, + {0x2ca9, "SPI_SHADER_USER_DATA_GS_29"}, + {0x2caa, "SPI_SHADER_USER_DATA_GS_30"}, + {0x2cab, "SPI_SHADER_USER_DATA_GS_31"}, + {0x2ccc, "SPI_SHADER_USER_DATA_ES_0"}, {0x2ccd, "SPI_SHADER_USER_DATA_ES_1"}, {0x2cce, "SPI_SHADER_USER_DATA_ES_2"}, @@ -491,38 +524,55 @@ static const char *getRegisterName(unsigned RegNum) { {0xa310, "PA_SC_SHADER_CONTROL"}, {0xa313, "PA_SC_CONSERVATIVE_RASTERIZATION_CNTL"}, - {0x2d0c, "SPI_SHADER_USER_DATA_LS_0"}, - {0x2d0d, "SPI_SHADER_USER_DATA_LS_1"}, - {0x2d0e, "SPI_SHADER_USER_DATA_LS_2"}, - {0x2d0f, "SPI_SHADER_USER_DATA_LS_3"}, - {0x2d10, "SPI_SHADER_USER_DATA_LS_4"}, - {0x2d11, "SPI_SHADER_USER_DATA_LS_5"}, - {0x2d12, "SPI_SHADER_USER_DATA_LS_6"}, - {0x2d13, "SPI_SHADER_USER_DATA_LS_7"}, - {0x2d14, "SPI_SHADER_USER_DATA_LS_8"}, - {0x2d15, "SPI_SHADER_USER_DATA_LS_9"}, - {0x2d16, "SPI_SHADER_USER_DATA_LS_10"}, - {0x2d17, "SPI_SHADER_USER_DATA_LS_11"}, - {0x2d18, "SPI_SHADER_USER_DATA_LS_12"}, - {0x2d19, "SPI_SHADER_USER_DATA_LS_13"}, - {0x2d1a, "SPI_SHADER_USER_DATA_LS_14"}, - {0x2d1b, "SPI_SHADER_USER_DATA_LS_15"}, - {0x2d1c, "SPI_SHADER_USER_DATA_LS_16"}, - {0x2d1d, "SPI_SHADER_USER_DATA_LS_17"}, - {0x2d1e, "SPI_SHADER_USER_DATA_LS_18"}, - {0x2d1f, "SPI_SHADER_USER_DATA_LS_19"}, - {0x2d20, "SPI_SHADER_USER_DATA_LS_20"}, - {0x2d21, "SPI_SHADER_USER_DATA_LS_21"}, - {0x2d22, "SPI_SHADER_USER_DATA_LS_22"}, - {0x2d23, "SPI_SHADER_USER_DATA_LS_23"}, - {0x2d24, "SPI_SHADER_USER_DATA_LS_24"}, - {0x2d25, "SPI_SHADER_USER_DATA_LS_25"}, - {0x2d26, "SPI_SHADER_USER_DATA_LS_26"}, - {0x2d27, "SPI_SHADER_USER_DATA_LS_27"}, - {0x2d28, "SPI_SHADER_USER_DATA_LS_28"}, - {0x2d29, "SPI_SHADER_USER_DATA_LS_29"}, - {0x2d2a, "SPI_SHADER_USER_DATA_LS_30"}, - {0x2d2b, "SPI_SHADER_USER_DATA_LS_31"}, + {0x2d0c, "SPI_SHADER_USER_DATA_HS_0"}, + {0x2d0d, "SPI_SHADER_USER_DATA_HS_1"}, + {0x2d0e, "SPI_SHADER_USER_DATA_HS_2"}, + {0x2d0f, "SPI_SHADER_USER_DATA_HS_3"}, + {0x2d10, "SPI_SHADER_USER_DATA_HS_4"}, + {0x2d11, "SPI_SHADER_USER_DATA_HS_5"}, + {0x2d12, "SPI_SHADER_USER_DATA_HS_6"}, + {0x2d13, "SPI_SHADER_USER_DATA_HS_7"}, + {0x2d14, "SPI_SHADER_USER_DATA_HS_8"}, + {0x2d15, "SPI_SHADER_USER_DATA_HS_9"}, + {0x2d16, "SPI_SHADER_USER_DATA_HS_10"}, + {0x2d17, "SPI_SHADER_USER_DATA_HS_11"}, + {0x2d18, "SPI_SHADER_USER_DATA_HS_12"}, + {0x2d19, "SPI_SHADER_USER_DATA_HS_13"}, + {0x2d1a, "SPI_SHADER_USER_DATA_HS_14"}, + {0x2d1b, "SPI_SHADER_USER_DATA_HS_15"}, + {0x2d1c, "SPI_SHADER_USER_DATA_HS_16"}, + {0x2d1d, "SPI_SHADER_USER_DATA_HS_17"}, + {0x2d1e, "SPI_SHADER_USER_DATA_HS_18"}, + {0x2d1f, "SPI_SHADER_USER_DATA_HS_19"}, + {0x2d20, "SPI_SHADER_USER_DATA_HS_20"}, + {0x2d21, "SPI_SHADER_USER_DATA_HS_21"}, + {0x2d22, "SPI_SHADER_USER_DATA_HS_22"}, + {0x2d23, "SPI_SHADER_USER_DATA_HS_23"}, + {0x2d24, "SPI_SHADER_USER_DATA_HS_24"}, + {0x2d25, "SPI_SHADER_USER_DATA_HS_25"}, + {0x2d26, "SPI_SHADER_USER_DATA_HS_26"}, + {0x2d27, "SPI_SHADER_USER_DATA_HS_27"}, + {0x2d28, "SPI_SHADER_USER_DATA_HS_28"}, + {0x2d29, "SPI_SHADER_USER_DATA_HS_29"}, + {0x2d2a, "SPI_SHADER_USER_DATA_HS_30"}, + {0x2d2b, "SPI_SHADER_USER_DATA_HS_31"}, + + {0x2d4c, "SPI_SHADER_USER_DATA_LS_0"}, + {0x2d4d, "SPI_SHADER_USER_DATA_LS_1"}, + {0x2d4e, "SPI_SHADER_USER_DATA_LS_2"}, + {0x2d4f, "SPI_SHADER_USER_DATA_LS_3"}, + {0x2d50, "SPI_SHADER_USER_DATA_LS_4"}, + {0x2d51, "SPI_SHADER_USER_DATA_LS_5"}, + {0x2d52, "SPI_SHADER_USER_DATA_LS_6"}, + {0x2d53, "SPI_SHADER_USER_DATA_LS_7"}, + {0x2d54, "SPI_SHADER_USER_DATA_LS_8"}, + {0x2d55, "SPI_SHADER_USER_DATA_LS_9"}, + {0x2d56, "SPI_SHADER_USER_DATA_LS_10"}, + {0x2d57, "SPI_SHADER_USER_DATA_LS_11"}, + {0x2d58, "SPI_SHADER_USER_DATA_LS_12"}, + {0x2d59, "SPI_SHADER_USER_DATA_LS_13"}, + {0x2d5a, "SPI_SHADER_USER_DATA_LS_14"}, + {0x2d5b, "SPI_SHADER_USER_DATA_LS_15"}, {0xa2aa, "IA_MULTI_VGT_PARAM"}, {0xa2a5, "VGT_GS_MAX_PRIMS_PER_SUBGROUP"}, diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h index 0f17c157b2062..544ab669d9ae2 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h @@ -14,16 +14,12 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H -#include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/MsgPackDocument.h" -#include <map> namespace llvm { -class AMDGPUTargetStreamer; -class formatted_raw_ostream; -class MCStreamer; class Module; +class StringRef; class AMDGPUPALMetadata { unsigned BlobType = 0; |