summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp122
1 files changed, 64 insertions, 58 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 7cd8e53e6521..4bf1f1357b69 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -121,26 +121,13 @@ void AMDGPUAsmPrinter::initTargetStreamer(Module &M) {
TM.getTargetTriple().getOS() != Triple::AMDPAL)
return;
- if (CodeObjectVersion >= AMDGPU::AMDHSA_COV3)
- getTargetStreamer()->EmitDirectiveAMDGCNTarget();
+ getTargetStreamer()->EmitDirectiveAMDGCNTarget();
if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
HSAMetadataStream->begin(M, *getTargetStreamer()->getTargetID());
if (TM.getTargetTriple().getOS() == Triple::AMDPAL)
getTargetStreamer()->getPALMetadata()->readFromIR(M);
-
- if (CodeObjectVersion >= AMDGPU::AMDHSA_COV3)
- return;
-
- // HSA emits NT_AMD_HSA_CODE_OBJECT_VERSION for code objects v2.
- if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
- getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1);
-
- // HSA and PAL emit NT_AMD_HSA_ISA_VERSION for code objects v2.
- IsaVersion Version = getIsaVersion(getGlobalSTI()->getCPU());
- getTargetStreamer()->EmitDirectiveHSACodeObjectISAV2(
- Version.Major, Version.Minor, Version.Stepping, "AMD", "AMDGPU");
}
void AMDGPUAsmPrinter::emitEndOfAsmFile(Module &M) {
@@ -148,8 +135,7 @@ void AMDGPUAsmPrinter::emitEndOfAsmFile(Module &M) {
if (!IsTargetStreamerInitialized)
initTargetStreamer(M);
- if (TM.getTargetTriple().getOS() != Triple::AMDHSA ||
- CodeObjectVersion == AMDGPU::AMDHSA_COV2)
+ if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
getTargetStreamer()->EmitISAVersion();
// Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA).
@@ -162,20 +148,6 @@ void AMDGPUAsmPrinter::emitEndOfAsmFile(Module &M) {
}
}
-bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
- const MachineBasicBlock *MBB) const {
- if (!AsmPrinter::isBlockOnlyReachableByFallthrough(MBB))
- return false;
-
- if (MBB->empty())
- return true;
-
- // If this is a block implementing a long branch, an expression relative to
- // the start of the block is needed. to the start of the block.
- // XXX - Is there a smarter way to check this?
- return (MBB->back().getOpcode() != AMDGPU::S_SETPC_B64);
-}
-
void AMDGPUAsmPrinter::emitFunctionBodyStart() {
const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
@@ -209,7 +181,7 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() {
if (!MFI.isEntryFunction())
return;
- if ((STM.isMesaKernel(F) || CodeObjectVersion == AMDGPU::AMDHSA_COV2) &&
+ if (STM.isMesaKernel(F) &&
(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
F.getCallingConv() == CallingConv::SPIR_KERNEL)) {
amd_kernel_code_t KernelCode;
@@ -219,6 +191,11 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() {
if (STM.isAmdHsaOS())
HSAMetadataStream->emitKernel(*MF, CurrentProgramInfo);
+
+ if (MFI.getNumKernargPreloadedSGPRs() > 0) {
+ assert(AMDGPU::hasKernargPreload(STM));
+ getTargetStreamer()->EmitKernargPreloadHeader(*getGlobalSTI());
+ }
}
void AMDGPUAsmPrinter::emitFunctionBodyEnd() {
@@ -226,8 +203,7 @@ void AMDGPUAsmPrinter::emitFunctionBodyEnd() {
if (!MFI.isEntryFunction())
return;
- if (TM.getTargetTriple().getOS() != Triple::AMDHSA ||
- CodeObjectVersion == AMDGPU::AMDHSA_COV2)
+ if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
return;
auto &Streamer = getTargetStreamer()->getStreamer();
@@ -260,9 +236,23 @@ void AMDGPUAsmPrinter::emitFunctionBodyEnd() {
Streamer.popSection();
}
+void AMDGPUAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
+ Register RegNo = MI->getOperand(0).getReg();
+
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ OS << "implicit-def: "
+ << printReg(RegNo, MF->getSubtarget().getRegisterInfo());
+
+ if (MI->getAsmPrinterFlags() & AMDGPU::SGPR_SPILL)
+ OS << " : SGPR spill to VGPR lane";
+
+ OutStreamer->AddComment(OS.str());
+ OutStreamer->addBlankLine();
+}
+
void AMDGPUAsmPrinter::emitFunctionEntryLabel() {
- if (TM.getTargetTriple().getOS() == Triple::AMDHSA &&
- CodeObjectVersion >= AMDGPU::AMDHSA_COV3) {
+ if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
AsmPrinter::emitFunctionEntryLabel();
return;
}
@@ -337,12 +327,6 @@ bool AMDGPUAsmPrinter::doInitialization(Module &M) {
if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
switch (CodeObjectVersion) {
- case AMDGPU::AMDHSA_COV2:
- HSAMetadataStream.reset(new HSAMD::MetadataStreamerYamlV2());
- break;
- case AMDGPU::AMDHSA_COV3:
- HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV3());
- break;
case AMDGPU::AMDHSA_COV4:
HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV4());
break;
@@ -393,28 +377,29 @@ uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
const MachineFunction &MF) const {
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
uint16_t KernelCodeProperties = 0;
+ const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI.getUserSGPRInfo();
- if (MFI.hasPrivateSegmentBuffer()) {
+ if (UserSGPRInfo.hasPrivateSegmentBuffer()) {
KernelCodeProperties |=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
}
- if (MFI.hasDispatchPtr()) {
+ if (UserSGPRInfo.hasDispatchPtr()) {
KernelCodeProperties |=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
}
- if (MFI.hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5) {
+ if (UserSGPRInfo.hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5) {
KernelCodeProperties |=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
}
- if (MFI.hasKernargSegmentPtr()) {
+ if (UserSGPRInfo.hasKernargSegmentPtr()) {
KernelCodeProperties |=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
}
- if (MFI.hasDispatchID()) {
+ if (UserSGPRInfo.hasDispatchID()) {
KernelCodeProperties |=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
}
- if (MFI.hasFlatScratchInit()) {
+ if (UserSGPRInfo.hasFlatScratchInit()) {
KernelCodeProperties |=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
}
@@ -435,6 +420,7 @@ amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(
const SIProgramInfo &PI) const {
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
const Function &F = MF.getFunction();
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
amdhsa::kernel_descriptor_t KernelDescriptor;
memset(&KernelDescriptor, 0x0, sizeof(KernelDescriptor));
@@ -458,6 +444,10 @@ amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(
KernelDescriptor.compute_pgm_rsrc3 =
CurrentProgramInfo.ComputePGMRSrc3GFX90A;
+ if (AMDGPU::hasKernargPreload(STM))
+ KernelDescriptor.kernarg_preload =
+ static_cast<uint16_t>(Info->getNumKernargPreloadedSGPRs());
+
return KernelDescriptor;
}
@@ -949,6 +939,17 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ProgInfo.Occupancy = STM.computeOccupancy(MF.getFunction(), ProgInfo.LDSSize,
ProgInfo.NumSGPRsForWavesPerEU,
ProgInfo.NumVGPRsForWavesPerEU);
+ const auto [MinWEU, MaxWEU] =
+ AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu", {0, 0}, true);
+ if (ProgInfo.Occupancy < MinWEU) {
+ DiagnosticInfoOptimizationFailure Diag(
+ F, F.getSubprogram(),
+ "failed to meet occupancy target given by 'amdgpu-waves-per-eu' in "
+ "'" +
+ F.getName() + "': desired occupancy was " + Twine(MinWEU) +
+ ", final occupancy is " + Twine(ProgInfo.Occupancy));
+ F.getContext().diagnose(Diag);
+ }
}
static unsigned getRsrcReg(CallingConv::ID CallConv) {
@@ -1113,7 +1114,8 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
auto *MD = getTargetStreamer()->getPALMetadata();
const MachineFrameInfo &MFI = MF.getFrameInfo();
- MD->setFunctionScratchSize(MF, MFI.getStackSize());
+ StringRef FnName = MF.getFunction().getName();
+ MD->setFunctionScratchSize(FnName, MFI.getStackSize());
// Set compute registers
MD->setRsrc1(CallingConv::AMDGPU_CS,
@@ -1121,9 +1123,9 @@ void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
MD->setRsrc2(CallingConv::AMDGPU_CS, CurrentProgramInfo.getComputePGMRSrc2());
// Set optional info
- MD->setFunctionLdsSize(MF, CurrentProgramInfo.LDSSize);
- MD->setFunctionNumUsedVgprs(MF, CurrentProgramInfo.NumVGPRsForWavesPerEU);
- MD->setFunctionNumUsedSgprs(MF, CurrentProgramInfo.NumSGPRsForWavesPerEU);
+ MD->setFunctionLdsSize(FnName, CurrentProgramInfo.LDSSize);
+ MD->setFunctionNumUsedVgprs(FnName, CurrentProgramInfo.NumVGPRsForWavesPerEU);
+ MD->setFunctionNumUsedSgprs(FnName, CurrentProgramInfo.NumSGPRsForWavesPerEU);
}
// This is supposed to be log2(Size)
@@ -1164,27 +1166,28 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE,
getElementByteSizeValue(STM.getMaxPrivateElementSize(true)));
- if (MFI->hasPrivateSegmentBuffer()) {
+ const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI->getUserSGPRInfo();
+ if (UserSGPRInfo.hasPrivateSegmentBuffer()) {
Out.code_properties |=
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
}
- if (MFI->hasDispatchPtr())
+ if (UserSGPRInfo.hasDispatchPtr())
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
- if (MFI->hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5)
+ if (UserSGPRInfo.hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5)
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
- if (MFI->hasKernargSegmentPtr())
+ if (UserSGPRInfo.hasKernargSegmentPtr())
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
- if (MFI->hasDispatchID())
+ if (UserSGPRInfo.hasDispatchID())
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
- if (MFI->hasFlatScratchInit())
+ if (UserSGPRInfo.hasFlatScratchInit())
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
- if (MFI->hasDispatchPtr())
+ if (UserSGPRInfo.hasDispatchPtr())
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
if (STM.isXNACKEnabled())
@@ -1293,6 +1296,9 @@ void AMDGPUAsmPrinter::emitResourceUsageRemarks(
EmitResourceUsageRemark("NumAGPR", "AGPRs", CurrentProgramInfo.NumAccVGPR);
EmitResourceUsageRemark("ScratchSize", "ScratchSize [bytes/lane]",
CurrentProgramInfo.ScratchSize);
+ StringRef DynamicStackStr =
+ CurrentProgramInfo.DynamicCallStack ? "True" : "False";
+ EmitResourceUsageRemark("DynamicStack", "Dynamic Stack", DynamicStackStr);
EmitResourceUsageRemark("Occupancy", "Occupancy [waves/SIMD]",
CurrentProgramInfo.Occupancy);
EmitResourceUsageRemark("SGPRSpill", "SGPRs Spill",