summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/Utils
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/Utils')
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp204
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h153
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp114
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h6
5 files changed, 364 insertions, 119 deletions
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
index 075e08986c0c0..5819a621f55d6 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
@@ -78,7 +78,11 @@ const char* const IdSymbolic[] = {
"HW_REG_XNACK_MASK",
nullptr, // HW_ID1, no predictable values
nullptr, // HW_ID2, no predictable values
- "HW_REG_POPS_PACKER"
+ "HW_REG_POPS_PACKER",
+ nullptr,
+ nullptr,
+ nullptr,
+ "HW_REG_SHADER_CYCLES"
};
} // namespace Hwreg
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 5271bc3aacc65..00e6d517bde58 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -108,6 +108,7 @@ namespace AMDGPU {
#define GET_MIMGInfoTable_IMPL
#define GET_MIMGLZMappingTable_IMPL
#define GET_MIMGMIPMappingTable_IMPL
+#define GET_MIMGG16MappingTable_IMPL
#include "AMDGPUGenSearchableTables.inc"
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
@@ -148,10 +149,17 @@ struct MTBUFInfo {
bool has_soffset;
};
+struct SMInfo {
+ uint16_t Opcode;
+ bool IsBuffer;
+};
+
#define GET_MTBUFInfoTable_DECL
#define GET_MTBUFInfoTable_IMPL
#define GET_MUBUFInfoTable_DECL
#define GET_MUBUFInfoTable_IMPL
+#define GET_SMInfoTable_DECL
+#define GET_SMInfoTable_IMPL
#include "AMDGPUGenSearchableTables.inc"
int getMTBUFBaseOpcode(unsigned Opc) {
@@ -214,6 +222,11 @@ bool getMUBUFHasSoffset(unsigned Opc) {
return Info ? Info->has_soffset : false;
}
+bool getSMEMIsBuffer(unsigned Opc) {
+ const SMInfo *Info = getSMEMOpcodeHelper(Opc);
+ return Info ? Info->IsBuffer : false;
+}
+
// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
// header files, so we need to wrap it in a function that takes unsigned
// instead.
@@ -268,6 +281,13 @@ unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
}
unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
+ // "Per CU" really means "per whatever functional block the waves of a
+ // workgroup must share". For gfx10 in CU mode this is the CU, which contains
+ // two SIMDs.
+ if (isGFX10(*STI) && STI->getFeatureBits().test(FeatureCuMode))
+ return 2;
+ // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains
+ // two CUs, so a total of four SIMDs.
return 4;
}
@@ -283,15 +303,6 @@ unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
return std::min(N, 16u);
}
-unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) {
- return getMaxWavesPerEU(STI) * getEUsPerCU(STI);
-}
-
-unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
- unsigned FlatWorkGroupSize) {
- return getWavesPerWorkGroup(STI, FlatWorkGroupSize);
-}
-
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
return 1;
}
@@ -300,13 +311,13 @@ unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
// FIXME: Need to take scratch memory into account.
if (!isGFX10(*STI))
return 10;
- return 20;
+ return hasGFX10_3Insts(*STI) ? 16 : 20;
}
-unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
- unsigned FlatWorkGroupSize) {
- return alignTo(getMaxWavesPerCU(STI, FlatWorkGroupSize),
- getEUsPerCU(STI)) / getEUsPerCU(STI);
+unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
+ unsigned FlatWorkGroupSize) {
+ return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize),
+ getEUsPerCU(STI));
}
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) {
@@ -320,8 +331,7 @@ unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
unsigned FlatWorkGroupSize) {
- return alignTo(FlatWorkGroupSize, getWavefrontSize(STI)) /
- getWavefrontSize(STI);
+ return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI));
}
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
@@ -431,12 +441,21 @@ unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
bool IsWave32 = EnableWavefrontSize32 ?
*EnableWavefrontSize32 :
STI->getFeatureBits().test(FeatureWavefrontSize32);
+
+ if (hasGFX10_3Insts(*STI))
+ return IsWave32 ? 16 : 8;
+
return IsWave32 ? 8 : 4;
}
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
Optional<bool> EnableWavefrontSize32) {
- return getVGPRAllocGranule(STI, EnableWavefrontSize32);
+
+ bool IsWave32 = EnableWavefrontSize32 ?
+ *EnableWavefrontSize32 :
+ STI->getFeatureBits().test(FeatureWavefrontSize32);
+
+ return IsWave32 ? 8 : 4;
}
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
@@ -559,7 +578,7 @@ bool isReadOnlySegment(const GlobalValue *GV) {
}
bool shouldEmitConstantsToTextSection(const Triple &TT) {
- return TT.getOS() == Triple::AMDPAL;
+ return TT.getOS() == Triple::AMDPAL || TT.getArch() == Triple::r600;
}
int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
@@ -722,13 +741,16 @@ static unsigned getLastSymbolicHwreg(const MCSubtargetInfo &STI) {
return ID_SYMBOLIC_FIRST_GFX9_;
else if (isGFX9(STI))
return ID_SYMBOLIC_FIRST_GFX10_;
+ else if (isGFX10(STI) && !isGFX10_BEncoding(STI))
+ return ID_SYMBOLIC_FIRST_GFX1030_;
else
return ID_SYMBOLIC_LAST_;
}
bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI) {
- return ID_SYMBOLIC_FIRST_ <= Id && Id < getLastSymbolicHwreg(STI) &&
- IdSymbolic[Id];
+ return
+ ID_SYMBOLIC_FIRST_ <= Id && Id < getLastSymbolicHwreg(STI) &&
+ IdSymbolic[Id] && (Id != ID_XNACK_MASK || !AMDGPU::isGFX10_BEncoding(STI));
}
bool isValidHwreg(int64_t Id) {
@@ -927,7 +949,15 @@ bool hasSRAMECC(const MCSubtargetInfo &STI) {
}
bool hasMIMG_R128(const MCSubtargetInfo &STI) {
- return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128];
+ return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128] && !STI.getFeatureBits()[AMDGPU::FeatureR128A16];
+}
+
+bool hasGFX10A16(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureGFX10A16];
+}
+
+bool hasG16(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureG16];
}
bool hasPackedD16(const MCSubtargetInfo &STI) {
@@ -958,9 +988,17 @@ bool isGCN3Encoding(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
}
+bool isGFX10_BEncoding(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding];
+}
+
+bool hasGFX10_3Insts(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureGFX10_3Insts];
+}
+
bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
- const unsigned FirstSubReg = TRI->getSubReg(Reg, 1);
+ const unsigned FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0);
return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
Reg == AMDGPU::SCC;
}
@@ -1082,6 +1120,11 @@ bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
// (move from MC* level to Target* level). Return size in bits.
unsigned getRegBitWidth(unsigned RCID) {
switch (RCID) {
+ case AMDGPU::VGPR_LO16RegClassID:
+ case AMDGPU::VGPR_HI16RegClassID:
+ case AMDGPU::SGPR_LO16RegClassID:
+ case AMDGPU::AGPR_LO16RegClassID:
+ return 16;
case AMDGPU::SGPR_32RegClassID:
case AMDGPU::VGPR_32RegClassID:
case AMDGPU::VRegOrLds_32RegClassID:
@@ -1103,6 +1146,7 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::SGPR_96RegClassID:
case AMDGPU::SReg_96RegClassID:
case AMDGPU::VReg_96RegClassID:
+ case AMDGPU::AReg_96RegClassID:
return 96;
case AMDGPU::SGPR_128RegClassID:
case AMDGPU::SReg_128RegClassID:
@@ -1112,14 +1156,24 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::SGPR_160RegClassID:
case AMDGPU::SReg_160RegClassID:
case AMDGPU::VReg_160RegClassID:
+ case AMDGPU::AReg_160RegClassID:
return 160;
+ case AMDGPU::SGPR_192RegClassID:
+ case AMDGPU::SReg_192RegClassID:
+ case AMDGPU::VReg_192RegClassID:
+ case AMDGPU::AReg_192RegClassID:
+ return 192;
+ case AMDGPU::SGPR_256RegClassID:
case AMDGPU::SReg_256RegClassID:
case AMDGPU::VReg_256RegClassID:
+ case AMDGPU::AReg_256RegClassID:
return 256;
+ case AMDGPU::SGPR_512RegClassID:
case AMDGPU::SReg_512RegClassID:
case AMDGPU::VReg_512RegClassID:
case AMDGPU::AReg_512RegClassID:
return 512;
+ case AMDGPU::SGPR_1024RegClassID:
case AMDGPU::SReg_1024RegClassID:
case AMDGPU::VReg_1024RegClassID:
case AMDGPU::AReg_1024RegClassID:
@@ -1141,7 +1195,7 @@ unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
}
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
- if (Literal >= -16 && Literal <= 64)
+ if (isInlinableIntLiteral(Literal))
return true;
uint64_t Val = static_cast<uint64_t>(Literal);
@@ -1158,7 +1212,7 @@ bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
}
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
- if (Literal >= -16 && Literal <= 64)
+ if (isInlinableIntLiteral(Literal))
return true;
// The actual type of the operand does not seem to matter as long
@@ -1187,7 +1241,7 @@ bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
if (!HasInv2Pi)
return false;
- if (Literal >= -16 && Literal <= 64)
+ if (isInlinableIntLiteral(Literal))
return true;
uint16_t Val = static_cast<uint16_t>(Literal);
@@ -1217,6 +1271,17 @@ bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
}
+bool isInlinableIntLiteralV216(int32_t Literal) {
+ int16_t Lo16 = static_cast<int16_t>(Literal);
+ if (isInt<16>(Literal) || isUInt<16>(Literal))
+ return isInlinableIntLiteral(Lo16);
+
+ int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
+ if (!(Literal & 0xffff))
+ return isInlinableIntLiteral(Hi16);
+ return Lo16 == Hi16 && isInlinableIntLiteral(Lo16);
+}
+
bool isArgPassedInSGPR(const Argument *A) {
const Function *F = A->getParent();
@@ -1247,16 +1312,61 @@ static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
return isGCN3Encoding(ST) || isGFX10(ST);
}
-int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
+static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) {
+ return isGFX9(ST) || isGFX10(ST);
+}
+
+bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
+ int64_t EncodedOffset) {
+ return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset)
+ : isUInt<8>(EncodedOffset);
+}
+
+bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
+ int64_t EncodedOffset,
+ bool IsBuffer) {
+ return !IsBuffer &&
+ hasSMRDSignedImmOffset(ST) &&
+ isInt<21>(EncodedOffset);
+}
+
+static bool isDwordAligned(uint64_t ByteOffset) {
+ return (ByteOffset & 3) == 0;
+}
+
+uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST,
+ uint64_t ByteOffset) {
if (hasSMEMByteOffset(ST))
return ByteOffset;
+
+ assert(isDwordAligned(ByteOffset));
return ByteOffset >> 2;
}
-bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
- int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
- return (hasSMEMByteOffset(ST)) ?
- isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset);
+Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
+ int64_t ByteOffset, bool IsBuffer) {
+ // The signed version is always a byte offset.
+ if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
+ assert(hasSMEMByteOffset(ST));
+ return isInt<20>(ByteOffset) ? Optional<int64_t>(ByteOffset) : None;
+ }
+
+ if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
+ return None;
+
+ int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
+ return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)
+ ? Optional<int64_t>(EncodedOffset)
+ : None;
+}
+
+Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
+ int64_t ByteOffset) {
+ if (!isCI(ST) || !isDwordAligned(ByteOffset))
+ return None;
+
+ int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
+ return isUInt<32>(EncodedOffset) ? Optional<int64_t>(EncodedOffset) : None;
}
// Given Imm, split it into the values to put into the SOffset and ImmOffset
@@ -1267,8 +1377,8 @@ bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
// aligned if they are aligned to begin with. It also ensures that additional
// offsets within the given alignment can be added to the resulting ImmOffset.
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
- const GCNSubtarget *Subtarget, uint32_t Align) {
- const uint32_t MaxImm = alignDown(4095, Align);
+ const GCNSubtarget *Subtarget, Align Alignment) {
+ const uint32_t MaxImm = alignDown(4095, Alignment.value());
uint32_t Overflow = 0;
if (Imm > MaxImm) {
@@ -1286,10 +1396,10 @@ bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
//
// Atomic operations fail to work correctly when individual address
// components are unaligned, even if their sum is aligned.
- uint32_t High = (Imm + Align) & ~4095;
- uint32_t Low = (Imm + Align) & 4095;
+ uint32_t High = (Imm + Alignment.value()) & ~4095;
+ uint32_t Low = (Imm + Alignment.value()) & 4095;
Imm = Low;
- Overflow = High - Align;
+ Overflow = High - Alignment.value();
}
}
@@ -1305,8 +1415,7 @@ bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
return true;
}
-SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F,
- const GCNSubtarget &ST) {
+SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) {
*this = getDefaultForCallingConv(F.getCallingConv());
StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString();
@@ -1318,8 +1427,25 @@ SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F,
if (!DX10ClampAttr.empty())
DX10Clamp = DX10ClampAttr == "true";
- FP32Denormals = ST.hasFP32Denormals(F);
- FP64FP16Denormals = ST.hasFP64FP16Denormals(F);
+ StringRef DenormF32Attr = F.getFnAttribute("denormal-fp-math-f32").getValueAsString();
+ if (!DenormF32Attr.empty()) {
+ DenormalMode DenormMode = parseDenormalFPAttribute(DenormF32Attr);
+ FP32InputDenormals = DenormMode.Input == DenormalMode::IEEE;
+ FP32OutputDenormals = DenormMode.Output == DenormalMode::IEEE;
+ }
+
+ StringRef DenormAttr = F.getFnAttribute("denormal-fp-math").getValueAsString();
+ if (!DenormAttr.empty()) {
+ DenormalMode DenormMode = parseDenormalFPAttribute(DenormAttr);
+
+ if (DenormF32Attr.empty()) {
+ FP32InputDenormals = DenormMode.Input == DenormalMode::IEEE;
+ FP32OutputDenormals = DenormMode.Output == DenormalMode::IEEE;
+ }
+
+ FP64FP16InputDenormals = DenormMode.Input == DenormalMode::IEEE;
+ FP64FP16OutputDenormals = DenormMode.Output == DenormalMode::IEEE;
+ }
}
namespace {
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index a5bada2890d2c..e71554575f6af 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -12,10 +12,10 @@
#include "AMDGPU.h"
#include "AMDKernelCodeT.h"
#include "SIDefines.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/AMDHSAKernelDescriptor.h"
+#include "llvm/Support/Alignment.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetParser.h"
@@ -26,17 +26,13 @@
namespace llvm {
class Argument;
-class AMDGPUSubtarget;
-class FeatureBitset;
class Function;
class GCNSubtarget;
class GlobalValue;
-class MCContext;
class MCRegisterClass;
class MCRegisterInfo;
-class MCSection;
class MCSubtargetInfo;
-class MachineMemOperand;
+class StringRef;
class Triple;
namespace AMDGPU {
@@ -87,15 +83,6 @@ unsigned getEUsPerCU(const MCSubtargetInfo *STI);
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
unsigned FlatWorkGroupSize);
-/// \returns Maximum number of waves per compute unit for given subtarget \p
-/// STI without any kind of limitation.
-unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI);
-
-/// \returns Maximum number of waves per compute unit for given subtarget \p
-/// STI and limited by given \p FlatWorkGroupSize.
-unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
- unsigned FlatWorkGroupSize);
-
/// \returns Minimum number of waves per execution unit for given subtarget \p
/// STI.
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
@@ -104,10 +91,10 @@ unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
/// STI without any kind of limitation.
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
-/// \returns Maximum number of waves per execution unit for given subtarget \p
-/// STI and limited by given \p FlatWorkGroupSize.
-unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
- unsigned FlatWorkGroupSize);
+/// \returns Number of waves per execution unit required to support the given \p
+/// FlatWorkGroupSize.
+unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
+ unsigned FlatWorkGroupSize);
/// \returns Minimum flat work group size for given subtarget \p STI.
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
@@ -116,7 +103,7 @@ unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
/// \returns Number of waves per work group for given subtarget \p STI and
-/// limited by given \p FlatWorkGroupSize.
+/// \p FlatWorkGroupSize.
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
unsigned FlatWorkGroupSize);
@@ -211,6 +198,7 @@ struct MIMGBaseOpcodeInfo {
uint8_t NumExtraArgs;
bool Gradients;
+ bool G16;
bool Coordinates;
bool LodOrClampOrMip;
bool HasD16;
@@ -247,11 +235,19 @@ struct MIMGMIPMappingInfo {
MIMGBaseOpcode NONMIP;
};
+struct MIMGG16MappingInfo {
+ MIMGBaseOpcode G;
+ MIMGBaseOpcode G16;
+};
+
LLVM_READONLY
const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
LLVM_READONLY
-const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned L);
+const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);
+
+LLVM_READONLY
+const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);
LLVM_READONLY
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
@@ -308,6 +304,9 @@ LLVM_READONLY
bool getMUBUFHasSoffset(unsigned Opc);
LLVM_READONLY
+bool getSMEMIsBuffer(unsigned Opc);
+
+LLVM_READONLY
const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
uint8_t NumComponents,
uint8_t NumFormat,
@@ -551,6 +550,8 @@ inline bool isKernel(CallingConv::ID CC) {
bool hasXNACK(const MCSubtargetInfo &STI);
bool hasSRAMECC(const MCSubtargetInfo &STI);
bool hasMIMG_R128(const MCSubtargetInfo &STI);
+bool hasGFX10A16(const MCSubtargetInfo &STI);
+bool hasG16(const MCSubtargetInfo &STI);
bool hasPackedD16(const MCSubtargetInfo &STI);
bool isSI(const MCSubtargetInfo &STI);
@@ -558,6 +559,9 @@ bool isCI(const MCSubtargetInfo &STI);
bool isVI(const MCSubtargetInfo &STI);
bool isGFX9(const MCSubtargetInfo &STI);
bool isGFX10(const MCSubtargetInfo &STI);
+bool isGCN3Encoding(const MCSubtargetInfo &STI);
+bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
+bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
/// Is Reg - scalar register
bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
@@ -633,6 +637,13 @@ inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
return getOperandSize(Desc.OpInfo[OpNo]);
}
+/// Is this literal inlinable, and not one of the values intended for floating
+/// point values.
+LLVM_READNONE
+inline bool isInlinableIntLiteral(int64_t Literal) {
+ return Literal >= -16 && Literal <= 64;
+}
+
/// Is this literal inlinable
LLVM_READNONE
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
@@ -646,11 +657,35 @@ bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
LLVM_READNONE
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
+LLVM_READNONE
+bool isInlinableIntLiteralV216(int32_t Literal);
+
bool isArgPassedInSGPR(const Argument *Arg);
-/// \returns The encoding that will be used for \p ByteOffset in the SMRD
-/// offset field.
-int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
+LLVM_READONLY
+bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
+ int64_t EncodedOffset);
+
+LLVM_READONLY
+bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
+ int64_t EncodedOffset,
+ bool IsBuffer);
+
+/// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
+/// offsets.
+uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset);
+
+/// \returns The encoding that will be used for \p ByteOffset in the
+/// SMRD offset field, or None if it won't fit. On GFX9 and GFX10
+/// S_LOAD instructions have a signed offset, on other subtargets it is
+/// unsigned. S_BUFFER has an unsigned offset for all subtargets.
+Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
+ int64_t ByteOffset, bool IsBuffer);
+
+/// \return The encoding that can be used for a 32-bit literal offset in an SMRD
+/// instruction. This is only useful on CI.s
+Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
+ int64_t ByteOffset);
/// \returns true if this offset is small enough to fit in the SMRD
/// offset field. \p ByteOffset should be the offset in bytes and
@@ -658,7 +693,8 @@ int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
- const GCNSubtarget *Subtarget, uint32_t Align = 4);
+ const GCNSubtarget *Subtarget,
+ Align Alignment = Align(4));
/// \returns true if the intrinsic is divergent
bool isIntrinsicSourceOfDivergence(unsigned IntrID);
@@ -677,45 +713,76 @@ struct SIModeRegisterDefaults {
/// If this is set, neither input or output denormals are flushed for most f32
/// instructions.
- ///
- /// TODO: Split into separate input and output fields if necessary like the
- /// control bits really provide?
- bool FP32Denormals : 1;
+ bool FP32InputDenormals : 1;
+ bool FP32OutputDenormals : 1;
/// If this is set, neither input or output denormals are flushed for both f64
/// and f16/v2f16 instructions.
- bool FP64FP16Denormals : 1;
+ bool FP64FP16InputDenormals : 1;
+ bool FP64FP16OutputDenormals : 1;
SIModeRegisterDefaults() :
IEEE(true),
DX10Clamp(true),
- FP32Denormals(true),
- FP64FP16Denormals(true) {}
+ FP32InputDenormals(true),
+ FP32OutputDenormals(true),
+ FP64FP16InputDenormals(true),
+ FP64FP16OutputDenormals(true) {}
- // FIXME: Should not depend on the subtarget
- SIModeRegisterDefaults(const Function &F, const GCNSubtarget &ST);
+ SIModeRegisterDefaults(const Function &F);
static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
const bool IsCompute = AMDGPU::isCompute(CC);
SIModeRegisterDefaults Mode;
- Mode.DX10Clamp = true;
Mode.IEEE = IsCompute;
- Mode.FP32Denormals = false; // FIXME: Should be on by default.
- Mode.FP64FP16Denormals = true;
return Mode;
}
bool operator ==(const SIModeRegisterDefaults Other) const {
return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp &&
- FP32Denormals == Other.FP32Denormals &&
- FP64FP16Denormals == Other.FP64FP16Denormals;
+ FP32InputDenormals == Other.FP32InputDenormals &&
+ FP32OutputDenormals == Other.FP32OutputDenormals &&
+ FP64FP16InputDenormals == Other.FP64FP16InputDenormals &&
+ FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals;
+ }
+
+ bool allFP32Denormals() const {
+ return FP32InputDenormals && FP32OutputDenormals;
+ }
+
+ bool allFP64FP16Denormals() const {
+ return FP64FP16InputDenormals && FP64FP16OutputDenormals;
+ }
+
+ /// Get the encoding value for the FP_DENORM bits of the mode register for the
+ /// FP32 denormal mode.
+ uint32_t fpDenormModeSPValue() const {
+ if (FP32InputDenormals && FP32OutputDenormals)
+ return FP_DENORM_FLUSH_NONE;
+ if (FP32InputDenormals)
+ return FP_DENORM_FLUSH_OUT;
+ if (FP32OutputDenormals)
+ return FP_DENORM_FLUSH_IN;
+ return FP_DENORM_FLUSH_IN_FLUSH_OUT;
+ }
+
+ /// Get the encoding value for the FP_DENORM bits of the mode register for the
+ /// FP64/FP16 denormal mode.
+ uint32_t fpDenormModeDPValue() const {
+ if (FP64FP16InputDenormals && FP64FP16OutputDenormals)
+ return FP_DENORM_FLUSH_NONE;
+ if (FP64FP16InputDenormals)
+ return FP_DENORM_FLUSH_OUT;
+ if (FP64FP16OutputDenormals)
+ return FP_DENORM_FLUSH_IN;
+ return FP_DENORM_FLUSH_IN_FLUSH_OUT;
}
/// Returns true if a flag is compatible if it's enabled in the callee, but
/// disabled in the caller.
static bool oneWayCompatible(bool CallerMode, bool CalleeMode) {
- return CallerMode == CalleeMode || (CallerMode && !CalleeMode);
+ return CallerMode == CalleeMode || (!CallerMode && CalleeMode);
}
// FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
@@ -727,8 +794,10 @@ struct SIModeRegisterDefaults {
return false;
// Allow inlining denormals enabled into denormals flushed functions.
- return oneWayCompatible(FP64FP16Denormals, CalleeMode.FP64FP16Denormals) &&
- oneWayCompatible(FP32Denormals, CalleeMode.FP32Denormals);
+ return oneWayCompatible(FP64FP16InputDenormals, CalleeMode.FP64FP16InputDenormals) &&
+ oneWayCompatible(FP64FP16OutputDenormals, CalleeMode.FP64FP16OutputDenormals) &&
+ oneWayCompatible(FP32InputDenormals, CalleeMode.FP32InputDenormals) &&
+ oneWayCompatible(FP32OutputDenormals, CalleeMode.FP32OutputDenormals);
}
};
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
index 207e4232e8298..ef010a7ac1576 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
@@ -397,6 +397,39 @@ static const char *getRegisterName(unsigned RegNum) {
{0x2c6a, "SPI_SHADER_USER_DATA_VS_30"},
{0x2c6b, "SPI_SHADER_USER_DATA_VS_31"},
+ {0x2c8c, "SPI_SHADER_USER_DATA_GS_0"},
+ {0x2c8d, "SPI_SHADER_USER_DATA_GS_1"},
+ {0x2c8e, "SPI_SHADER_USER_DATA_GS_2"},
+ {0x2c8f, "SPI_SHADER_USER_DATA_GS_3"},
+ {0x2c90, "SPI_SHADER_USER_DATA_GS_4"},
+ {0x2c91, "SPI_SHADER_USER_DATA_GS_5"},
+ {0x2c92, "SPI_SHADER_USER_DATA_GS_6"},
+ {0x2c93, "SPI_SHADER_USER_DATA_GS_7"},
+ {0x2c94, "SPI_SHADER_USER_DATA_GS_8"},
+ {0x2c95, "SPI_SHADER_USER_DATA_GS_9"},
+ {0x2c96, "SPI_SHADER_USER_DATA_GS_10"},
+ {0x2c97, "SPI_SHADER_USER_DATA_GS_11"},
+ {0x2c98, "SPI_SHADER_USER_DATA_GS_12"},
+ {0x2c99, "SPI_SHADER_USER_DATA_GS_13"},
+ {0x2c9a, "SPI_SHADER_USER_DATA_GS_14"},
+ {0x2c9b, "SPI_SHADER_USER_DATA_GS_15"},
+ {0x2c9c, "SPI_SHADER_USER_DATA_GS_16"},
+ {0x2c9d, "SPI_SHADER_USER_DATA_GS_17"},
+ {0x2c9e, "SPI_SHADER_USER_DATA_GS_18"},
+ {0x2c9f, "SPI_SHADER_USER_DATA_GS_19"},
+ {0x2ca0, "SPI_SHADER_USER_DATA_GS_20"},
+ {0x2ca1, "SPI_SHADER_USER_DATA_GS_21"},
+ {0x2ca2, "SPI_SHADER_USER_DATA_GS_22"},
+ {0x2ca3, "SPI_SHADER_USER_DATA_GS_23"},
+ {0x2ca4, "SPI_SHADER_USER_DATA_GS_24"},
+ {0x2ca5, "SPI_SHADER_USER_DATA_GS_25"},
+ {0x2ca6, "SPI_SHADER_USER_DATA_GS_26"},
+ {0x2ca7, "SPI_SHADER_USER_DATA_GS_27"},
+ {0x2ca8, "SPI_SHADER_USER_DATA_GS_28"},
+ {0x2ca9, "SPI_SHADER_USER_DATA_GS_29"},
+ {0x2caa, "SPI_SHADER_USER_DATA_GS_30"},
+ {0x2cab, "SPI_SHADER_USER_DATA_GS_31"},
+
{0x2ccc, "SPI_SHADER_USER_DATA_ES_0"},
{0x2ccd, "SPI_SHADER_USER_DATA_ES_1"},
{0x2cce, "SPI_SHADER_USER_DATA_ES_2"},
@@ -491,38 +524,55 @@ static const char *getRegisterName(unsigned RegNum) {
{0xa310, "PA_SC_SHADER_CONTROL"},
{0xa313, "PA_SC_CONSERVATIVE_RASTERIZATION_CNTL"},
- {0x2d0c, "SPI_SHADER_USER_DATA_LS_0"},
- {0x2d0d, "SPI_SHADER_USER_DATA_LS_1"},
- {0x2d0e, "SPI_SHADER_USER_DATA_LS_2"},
- {0x2d0f, "SPI_SHADER_USER_DATA_LS_3"},
- {0x2d10, "SPI_SHADER_USER_DATA_LS_4"},
- {0x2d11, "SPI_SHADER_USER_DATA_LS_5"},
- {0x2d12, "SPI_SHADER_USER_DATA_LS_6"},
- {0x2d13, "SPI_SHADER_USER_DATA_LS_7"},
- {0x2d14, "SPI_SHADER_USER_DATA_LS_8"},
- {0x2d15, "SPI_SHADER_USER_DATA_LS_9"},
- {0x2d16, "SPI_SHADER_USER_DATA_LS_10"},
- {0x2d17, "SPI_SHADER_USER_DATA_LS_11"},
- {0x2d18, "SPI_SHADER_USER_DATA_LS_12"},
- {0x2d19, "SPI_SHADER_USER_DATA_LS_13"},
- {0x2d1a, "SPI_SHADER_USER_DATA_LS_14"},
- {0x2d1b, "SPI_SHADER_USER_DATA_LS_15"},
- {0x2d1c, "SPI_SHADER_USER_DATA_LS_16"},
- {0x2d1d, "SPI_SHADER_USER_DATA_LS_17"},
- {0x2d1e, "SPI_SHADER_USER_DATA_LS_18"},
- {0x2d1f, "SPI_SHADER_USER_DATA_LS_19"},
- {0x2d20, "SPI_SHADER_USER_DATA_LS_20"},
- {0x2d21, "SPI_SHADER_USER_DATA_LS_21"},
- {0x2d22, "SPI_SHADER_USER_DATA_LS_22"},
- {0x2d23, "SPI_SHADER_USER_DATA_LS_23"},
- {0x2d24, "SPI_SHADER_USER_DATA_LS_24"},
- {0x2d25, "SPI_SHADER_USER_DATA_LS_25"},
- {0x2d26, "SPI_SHADER_USER_DATA_LS_26"},
- {0x2d27, "SPI_SHADER_USER_DATA_LS_27"},
- {0x2d28, "SPI_SHADER_USER_DATA_LS_28"},
- {0x2d29, "SPI_SHADER_USER_DATA_LS_29"},
- {0x2d2a, "SPI_SHADER_USER_DATA_LS_30"},
- {0x2d2b, "SPI_SHADER_USER_DATA_LS_31"},
+ {0x2d0c, "SPI_SHADER_USER_DATA_HS_0"},
+ {0x2d0d, "SPI_SHADER_USER_DATA_HS_1"},
+ {0x2d0e, "SPI_SHADER_USER_DATA_HS_2"},
+ {0x2d0f, "SPI_SHADER_USER_DATA_HS_3"},
+ {0x2d10, "SPI_SHADER_USER_DATA_HS_4"},
+ {0x2d11, "SPI_SHADER_USER_DATA_HS_5"},
+ {0x2d12, "SPI_SHADER_USER_DATA_HS_6"},
+ {0x2d13, "SPI_SHADER_USER_DATA_HS_7"},
+ {0x2d14, "SPI_SHADER_USER_DATA_HS_8"},
+ {0x2d15, "SPI_SHADER_USER_DATA_HS_9"},
+ {0x2d16, "SPI_SHADER_USER_DATA_HS_10"},
+ {0x2d17, "SPI_SHADER_USER_DATA_HS_11"},
+ {0x2d18, "SPI_SHADER_USER_DATA_HS_12"},
+ {0x2d19, "SPI_SHADER_USER_DATA_HS_13"},
+ {0x2d1a, "SPI_SHADER_USER_DATA_HS_14"},
+ {0x2d1b, "SPI_SHADER_USER_DATA_HS_15"},
+ {0x2d1c, "SPI_SHADER_USER_DATA_HS_16"},
+ {0x2d1d, "SPI_SHADER_USER_DATA_HS_17"},
+ {0x2d1e, "SPI_SHADER_USER_DATA_HS_18"},
+ {0x2d1f, "SPI_SHADER_USER_DATA_HS_19"},
+ {0x2d20, "SPI_SHADER_USER_DATA_HS_20"},
+ {0x2d21, "SPI_SHADER_USER_DATA_HS_21"},
+ {0x2d22, "SPI_SHADER_USER_DATA_HS_22"},
+ {0x2d23, "SPI_SHADER_USER_DATA_HS_23"},
+ {0x2d24, "SPI_SHADER_USER_DATA_HS_24"},
+ {0x2d25, "SPI_SHADER_USER_DATA_HS_25"},
+ {0x2d26, "SPI_SHADER_USER_DATA_HS_26"},
+ {0x2d27, "SPI_SHADER_USER_DATA_HS_27"},
+ {0x2d28, "SPI_SHADER_USER_DATA_HS_28"},
+ {0x2d29, "SPI_SHADER_USER_DATA_HS_29"},
+ {0x2d2a, "SPI_SHADER_USER_DATA_HS_30"},
+ {0x2d2b, "SPI_SHADER_USER_DATA_HS_31"},
+
+ {0x2d4c, "SPI_SHADER_USER_DATA_LS_0"},
+ {0x2d4d, "SPI_SHADER_USER_DATA_LS_1"},
+ {0x2d4e, "SPI_SHADER_USER_DATA_LS_2"},
+ {0x2d4f, "SPI_SHADER_USER_DATA_LS_3"},
+ {0x2d50, "SPI_SHADER_USER_DATA_LS_4"},
+ {0x2d51, "SPI_SHADER_USER_DATA_LS_5"},
+ {0x2d52, "SPI_SHADER_USER_DATA_LS_6"},
+ {0x2d53, "SPI_SHADER_USER_DATA_LS_7"},
+ {0x2d54, "SPI_SHADER_USER_DATA_LS_8"},
+ {0x2d55, "SPI_SHADER_USER_DATA_LS_9"},
+ {0x2d56, "SPI_SHADER_USER_DATA_LS_10"},
+ {0x2d57, "SPI_SHADER_USER_DATA_LS_11"},
+ {0x2d58, "SPI_SHADER_USER_DATA_LS_12"},
+ {0x2d59, "SPI_SHADER_USER_DATA_LS_13"},
+ {0x2d5a, "SPI_SHADER_USER_DATA_LS_14"},
+ {0x2d5b, "SPI_SHADER_USER_DATA_LS_15"},
{0xa2aa, "IA_MULTI_VGT_PARAM"},
{0xa2a5, "VGT_GS_MAX_PRIMS_PER_SUBGROUP"},
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
index 0f17c157b2062..544ab669d9ae2 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
@@ -14,16 +14,12 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H
-#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/MsgPackDocument.h"
-#include <map>
namespace llvm {
-class AMDGPUTargetStreamer;
-class formatted_raw_ostream;
-class MCStreamer;
class Module;
+class StringRef;
class AMDGPUPALMetadata {
unsigned BlobType = 0;