summaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp')
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp249
1 files changed, 145 insertions, 104 deletions
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 125a3b22d0cf..3fd3c75874a3 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUBaseInfo.h"
+#include "AMDGPUTargetTransformInfo.h"
#include "AMDGPU.h"
#include "SIDefines.h"
#include "llvm/ADT/StringRef.h"
@@ -52,7 +53,7 @@ unsigned getBitMask(unsigned Shift, unsigned Width) {
return ((1 << Width) - 1) << Shift;
}
-/// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
+/// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
///
/// \returns Packed \p Dst.
unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
@@ -61,7 +62,7 @@ unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
return Dst;
}
-/// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
+/// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
///
/// \returns Unpacked bits.
unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
@@ -96,64 +97,34 @@ unsigned getVmcntBitWidthHi() { return 2; }
namespace llvm {
-static cl::opt<bool> EnablePackedInlinableLiterals(
- "enable-packed-inlinable-literals",
- cl::desc("Enable packed inlinable literals (v2f16, v2i16)"),
- cl::init(false));
-
namespace AMDGPU {
-LLVM_READNONE
-static inline Channels indexToChannel(unsigned Channel) {
- switch (Channel) {
- case 1:
- return AMDGPU::Channels_1;
- case 2:
- return AMDGPU::Channels_2;
- case 3:
- return AMDGPU::Channels_3;
- case 4:
- return AMDGPU::Channels_4;
- default:
- llvm_unreachable("invalid MIMG channel");
- }
-}
+struct MIMGInfo {
+ uint16_t Opcode;
+ uint16_t BaseOpcode;
+ uint8_t MIMGEncoding;
+ uint8_t VDataDwords;
+ uint8_t VAddrDwords;
+};
+#define GET_MIMGBaseOpcodesTable_IMPL
+#define GET_MIMGDimInfoTable_IMPL
+#define GET_MIMGInfoTable_IMPL
+#include "AMDGPUGenSearchableTables.inc"
-// FIXME: Need to handle d16 images correctly.
-static unsigned rcToChannels(unsigned RCID) {
- switch (RCID) {
- case AMDGPU::VGPR_32RegClassID:
- return 1;
- case AMDGPU::VReg_64RegClassID:
- return 2;
- case AMDGPU::VReg_96RegClassID:
- return 3;
- case AMDGPU::VReg_128RegClassID:
- return 4;
- default:
- llvm_unreachable("invalid MIMG register class");
- }
+int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
+ unsigned VDataDwords, unsigned VAddrDwords) {
+ const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
+ VDataDwords, VAddrDwords);
+ return Info ? Info->Opcode : -1;
}
-int getMaskedMIMGOp(const MCInstrInfo &MII, unsigned Opc, unsigned NewChannels) {
- AMDGPU::Channels Channel = AMDGPU::indexToChannel(NewChannels);
- unsigned OrigChannels = rcToChannels(MII.get(Opc).OpInfo[0].RegClass);
- if (NewChannels == OrigChannels)
- return Opc;
-
- switch (OrigChannels) {
- case 1:
- return AMDGPU::getMaskedMIMGOp1(Opc, Channel);
- case 2:
- return AMDGPU::getMaskedMIMGOp2(Opc, Channel);
- case 3:
- return AMDGPU::getMaskedMIMGOp3(Opc, Channel);
- case 4:
- return AMDGPU::getMaskedMIMGOp4(Opc, Channel);
- default:
- llvm_unreachable("invalid MIMG channel");
- }
+int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
+ const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
+ const MIMGInfo *NewInfo =
+ getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
+ NewChannels, OrigInfo->VAddrDwords);
+ return NewInfo ? NewInfo->Opcode : -1;
}
// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
@@ -183,10 +154,10 @@ IsaVersion getIsaVersion(const FeatureBitset &Features) {
return {7, 0, 3};
if (Features.test(FeatureISAVersion7_0_4))
return {7, 0, 4};
+ if (Features.test(FeatureSeaIslands))
+ return {7, 0, 0};
// GCN GFX8 (Volcanic Islands (VI)).
- if (Features.test(FeatureISAVersion8_0_0))
- return {8, 0, 0};
if (Features.test(FeatureISAVersion8_0_1))
return {8, 0, 1};
if (Features.test(FeatureISAVersion8_0_2))
@@ -195,14 +166,22 @@ IsaVersion getIsaVersion(const FeatureBitset &Features) {
return {8, 0, 3};
if (Features.test(FeatureISAVersion8_1_0))
return {8, 1, 0};
+ if (Features.test(FeatureVolcanicIslands))
+ return {8, 0, 0};
// GCN GFX9.
if (Features.test(FeatureISAVersion9_0_0))
return {9, 0, 0};
if (Features.test(FeatureISAVersion9_0_2))
return {9, 0, 2};
+ if (Features.test(FeatureISAVersion9_0_4))
+ return {9, 0, 4};
+ if (Features.test(FeatureISAVersion9_0_6))
+ return {9, 0, 6};
+ if (Features.test(FeatureGFX9))
+ return {9, 0, 0};
- if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands))
+ if (Features.test(FeatureSouthernIslands))
return {0, 0, 0};
return {7, 0, 0};
}
@@ -219,11 +198,15 @@ void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
<< ISAVersion.Major
<< ISAVersion.Minor
<< ISAVersion.Stepping;
+
+ if (hasXNACK(*STI))
+ Stream << "+xnack";
+
Stream.flush();
}
-bool hasCodeObjectV3(const FeatureBitset &Features) {
- return Features.test(FeatureCodeObjectV3);
+bool hasCodeObjectV3(const MCSubtargetInfo *STI) {
+ return STI->getFeatureBits().test(FeatureCodeObjectV3);
}
unsigned getWavefrontSize(const FeatureBitset &Features) {
@@ -260,7 +243,7 @@ unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
}
unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
- return getMaxWavesPerEU(Features) * getEUsPerCU(Features);
+ return getMaxWavesPerEU() * getEUsPerCU(Features);
}
unsigned getMaxWavesPerCU(const FeatureBitset &Features,
@@ -272,9 +255,7 @@ unsigned getMinWavesPerEU(const FeatureBitset &Features) {
return 1;
}
-unsigned getMaxWavesPerEU(const FeatureBitset &Features) {
- if (!Features.test(FeatureGCN))
- return 8;
+unsigned getMaxWavesPerEU() {
// FIXME: Need to take scratch memory into account.
return 10;
}
@@ -330,11 +311,13 @@ unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
assert(WavesPerEU != 0);
- if (WavesPerEU >= getMaxWavesPerEU(Features))
+ if (WavesPerEU >= getMaxWavesPerEU())
return 0;
- unsigned MinNumSGPRs =
- alignDown(getTotalNumSGPRs(Features) / (WavesPerEU + 1),
- getSGPRAllocGranule(Features)) + 1;
+
+ unsigned MinNumSGPRs = getTotalNumSGPRs(Features) / (WavesPerEU + 1);
+ if (Features.test(FeatureTrapHandler))
+ MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
+ MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(Features)) + 1;
return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
}
@@ -343,14 +326,49 @@ unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
assert(WavesPerEU != 0);
IsaVersion Version = getIsaVersion(Features);
- unsigned MaxNumSGPRs = alignDown(getTotalNumSGPRs(Features) / WavesPerEU,
- getSGPRAllocGranule(Features));
unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
if (Version.Major >= 8 && !Addressable)
AddressableNumSGPRs = 112;
+ unsigned MaxNumSGPRs = getTotalNumSGPRs(Features) / WavesPerEU;
+ if (Features.test(FeatureTrapHandler))
+ MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
+ MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(Features));
return std::min(MaxNumSGPRs, AddressableNumSGPRs);
}
+unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
+ bool FlatScrUsed, bool XNACKUsed) {
+ unsigned ExtraSGPRs = 0;
+ if (VCCUsed)
+ ExtraSGPRs = 2;
+
+ IsaVersion Version = getIsaVersion(Features);
+ if (Version.Major < 8) {
+ if (FlatScrUsed)
+ ExtraSGPRs = 4;
+ } else {
+ if (XNACKUsed)
+ ExtraSGPRs = 4;
+
+ if (FlatScrUsed)
+ ExtraSGPRs = 6;
+ }
+
+ return ExtraSGPRs;
+}
+
+unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
+ bool FlatScrUsed) {
+ return getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed,
+ Features[AMDGPU::FeatureXNACK]);
+}
+
+unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs) {
+ NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(Features));
+ // SGPRBlocks is actual number of SGPR blocks minus 1.
+ return NumSGPRs / getSGPREncodingGranule(Features) - 1;
+}
+
unsigned getVGPRAllocGranule(const FeatureBitset &Features) {
return 4;
}
@@ -370,7 +388,7 @@ unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
assert(WavesPerEU != 0);
- if (WavesPerEU >= getMaxWavesPerEU(Features))
+ if (WavesPerEU >= getMaxWavesPerEU())
return 0;
unsigned MinNumVGPRs =
alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
@@ -387,6 +405,12 @@ unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
return std::min(MaxNumVGPRs, AddressableNumVGPRs);
}
+unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumVGPRs) {
+ NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(Features));
+ // VGPRBlocks is actual number of VGPR blocks minus 1.
+ return NumVGPRs / getVGPREncodingGranule(Features) - 1;
+}
+
} // end namespace IsaInfo
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
@@ -396,7 +420,7 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
memset(&Header, 0, sizeof(Header));
Header.amd_kernel_code_version_major = 1;
- Header.amd_kernel_code_version_minor = 1;
+ Header.amd_kernel_code_version_minor = 2;
Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
Header.amd_machine_version_major = ISA.Major;
Header.amd_machine_version_minor = ISA.Minor;
@@ -416,6 +440,21 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
Header.private_segment_alignment = 4;
}
+amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor() {
+ amdhsa::kernel_descriptor_t KD;
+ memset(&KD, 0, sizeof(KD));
+ AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
+ amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE);
+ AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, 1);
+ AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1);
+ AMDHSA_BITS_SET(KD.compute_pgm_rsrc2,
+ amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
+ return KD;
+}
+
bool isGroupSegment(const GlobalValue *GV) {
return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
}
@@ -425,7 +464,8 @@ bool isGlobalSegment(const GlobalValue *GV) {
}
bool isReadOnlySegment(const GlobalValue *GV) {
- return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
+ return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
+ GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
}
bool shouldEmitConstantsToTextSection(const Triple &TT) {
@@ -598,6 +638,18 @@ bool isEntryFunctionCC(CallingConv::ID CC) {
}
}
+bool hasXNACK(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
+}
+
+bool hasMIMG_R128(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128];
+}
+
+bool hasPackedD16(const MCSubtargetInfo &STI) {
+ return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem];
+}
+
bool isSI(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
}
@@ -681,6 +733,8 @@ bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
case node: return isGFX9(STI) ? node##_gfx9 : node##_vi;
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
+ if (STI.getTargetTriple().getArch() == Triple::r600)
+ return Reg;
MAP_REG2REG
}
@@ -837,9 +891,6 @@ bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
assert(HasInv2Pi);
- if (!EnablePackedInlinableLiterals)
- return false;
-
int16_t Lo16 = static_cast<int16_t>(Literal);
int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
@@ -871,24 +922,6 @@ bool isArgPassedInSGPR(const Argument *A) {
}
}
-// TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence.
-bool isUniformMMO(const MachineMemOperand *MMO) {
- const Value *Ptr = MMO->getValue();
- // UndefValue means this is a load of a kernel input. These are uniform.
- // Sometimes LDS instructions have constant pointers.
- // If Ptr is null, then that means this mem operand contains a
- // PseudoSourceValue like GOT.
- if (!Ptr || isa<UndefValue>(Ptr) ||
- isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
- return true;
-
- if (const Argument *Arg = dyn_cast<Argument>(Ptr))
- return isArgPassedInSGPR(Arg);
-
- const Instruction *I = dyn_cast<Instruction>(Ptr);
- return I && I->getMetadata("amdgpu.uniform");
-}
-
int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
if (isGCN3Encoding(ST))
return ByteOffset;
@@ -909,18 +942,10 @@ namespace llvm {
namespace AMDGPU {
AMDGPUAS getAMDGPUAS(Triple T) {
- auto Env = T.getEnvironmentName();
AMDGPUAS AS;
- if (Env == "amdgiz" || Env == "amdgizcl") {
- AS.FLAT_ADDRESS = 0;
- AS.PRIVATE_ADDRESS = 5;
- AS.REGION_ADDRESS = 4;
- }
- else {
- AS.FLAT_ADDRESS = 4;
- AS.PRIVATE_ADDRESS = 0;
- AS.REGION_ADDRESS = 5;
- }
+ AS.FLAT_ADDRESS = 0;
+ AS.PRIVATE_ADDRESS = 5;
+ AS.REGION_ADDRESS = 2;
return AS;
}
@@ -931,5 +956,21 @@ AMDGPUAS getAMDGPUAS(const TargetMachine &M) {
AMDGPUAS getAMDGPUAS(const Module &M) {
return getAMDGPUAS(Triple(M.getTargetTriple()));
}
+
+namespace {
+
+struct SourceOfDivergence {
+ unsigned Intr;
+};
+const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
+
+#define GET_SourcesOfDivergence_IMPL
+#include "AMDGPUGenSearchableTables.inc"
+
+} // end anonymous namespace
+
+bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
+ return lookupSourceOfDivergence(IntrID);
+}
} // namespace AMDGPU
} // namespace llvm