diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 122 |
1 files changed, 64 insertions, 58 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 7cd8e53e6521..4bf1f1357b69 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -121,26 +121,13 @@ void AMDGPUAsmPrinter::initTargetStreamer(Module &M) { TM.getTargetTriple().getOS() != Triple::AMDPAL) return; - if (CodeObjectVersion >= AMDGPU::AMDHSA_COV3) - getTargetStreamer()->EmitDirectiveAMDGCNTarget(); + getTargetStreamer()->EmitDirectiveAMDGCNTarget(); if (TM.getTargetTriple().getOS() == Triple::AMDHSA) HSAMetadataStream->begin(M, *getTargetStreamer()->getTargetID()); if (TM.getTargetTriple().getOS() == Triple::AMDPAL) getTargetStreamer()->getPALMetadata()->readFromIR(M); - - if (CodeObjectVersion >= AMDGPU::AMDHSA_COV3) - return; - - // HSA emits NT_AMD_HSA_CODE_OBJECT_VERSION for code objects v2. - if (TM.getTargetTriple().getOS() == Triple::AMDHSA) - getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1); - - // HSA and PAL emit NT_AMD_HSA_ISA_VERSION for code objects v2. - IsaVersion Version = getIsaVersion(getGlobalSTI()->getCPU()); - getTargetStreamer()->EmitDirectiveHSACodeObjectISAV2( - Version.Major, Version.Minor, Version.Stepping, "AMD", "AMDGPU"); } void AMDGPUAsmPrinter::emitEndOfAsmFile(Module &M) { @@ -148,8 +135,7 @@ void AMDGPUAsmPrinter::emitEndOfAsmFile(Module &M) { if (!IsTargetStreamerInitialized) initTargetStreamer(M); - if (TM.getTargetTriple().getOS() != Triple::AMDHSA || - CodeObjectVersion == AMDGPU::AMDHSA_COV2) + if (TM.getTargetTriple().getOS() != Triple::AMDHSA) getTargetStreamer()->EmitISAVersion(); // Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA). @@ -162,20 +148,6 @@ void AMDGPUAsmPrinter::emitEndOfAsmFile(Module &M) { } } -bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough( - const MachineBasicBlock *MBB) const { - if (!AsmPrinter::isBlockOnlyReachableByFallthrough(MBB)) - return false; - - if (MBB->empty()) - return true; - - // If this is a block implementing a long branch, an expression relative to - // the start of the block is needed. to the start of the block. - // XXX - Is there a smarter way to check this? - return (MBB->back().getOpcode() != AMDGPU::S_SETPC_B64); -} - void AMDGPUAsmPrinter::emitFunctionBodyStart() { const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>(); const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>(); @@ -209,7 +181,7 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() { if (!MFI.isEntryFunction()) return; - if ((STM.isMesaKernel(F) || CodeObjectVersion == AMDGPU::AMDHSA_COV2) && + if (STM.isMesaKernel(F) && (F.getCallingConv() == CallingConv::AMDGPU_KERNEL || F.getCallingConv() == CallingConv::SPIR_KERNEL)) { amd_kernel_code_t KernelCode; @@ -219,6 +191,11 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() { if (STM.isAmdHsaOS()) HSAMetadataStream->emitKernel(*MF, CurrentProgramInfo); + + if (MFI.getNumKernargPreloadedSGPRs() > 0) { + assert(AMDGPU::hasKernargPreload(STM)); + getTargetStreamer()->EmitKernargPreloadHeader(*getGlobalSTI()); + } } void AMDGPUAsmPrinter::emitFunctionBodyEnd() { @@ -226,8 +203,7 @@ void AMDGPUAsmPrinter::emitFunctionBodyEnd() { if (!MFI.isEntryFunction()) return; - if (TM.getTargetTriple().getOS() != Triple::AMDHSA || - CodeObjectVersion == AMDGPU::AMDHSA_COV2) + if (TM.getTargetTriple().getOS() != Triple::AMDHSA) return; auto &Streamer = getTargetStreamer()->getStreamer(); @@ -260,9 +236,23 @@ void AMDGPUAsmPrinter::emitFunctionBodyEnd() { Streamer.popSection(); } +void AMDGPUAsmPrinter::emitImplicitDef(const MachineInstr *MI) const { + Register RegNo = MI->getOperand(0).getReg(); + + SmallString<128> Str; + raw_svector_ostream OS(Str); + OS << "implicit-def: " + << printReg(RegNo, MF->getSubtarget().getRegisterInfo()); + + if (MI->getAsmPrinterFlags() & AMDGPU::SGPR_SPILL) + OS << " : SGPR spill to VGPR lane"; + + OutStreamer->AddComment(OS.str()); + OutStreamer->addBlankLine(); +} + void AMDGPUAsmPrinter::emitFunctionEntryLabel() { - if (TM.getTargetTriple().getOS() == Triple::AMDHSA && - CodeObjectVersion >= AMDGPU::AMDHSA_COV3) { + if (TM.getTargetTriple().getOS() == Triple::AMDHSA) { AsmPrinter::emitFunctionEntryLabel(); return; } @@ -337,12 +327,6 @@ bool AMDGPUAsmPrinter::doInitialization(Module &M) { if (TM.getTargetTriple().getOS() == Triple::AMDHSA) { switch (CodeObjectVersion) { - case AMDGPU::AMDHSA_COV2: - HSAMetadataStream.reset(new HSAMD::MetadataStreamerYamlV2()); - break; - case AMDGPU::AMDHSA_COV3: - HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV3()); - break; case AMDGPU::AMDHSA_COV4: HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV4()); break; @@ -393,28 +377,29 @@ uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties( const MachineFunction &MF) const { const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); uint16_t KernelCodeProperties = 0; + const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI.getUserSGPRInfo(); - if (MFI.hasPrivateSegmentBuffer()) { + if (UserSGPRInfo.hasPrivateSegmentBuffer()) { KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER; } - if (MFI.hasDispatchPtr()) { + if (UserSGPRInfo.hasDispatchPtr()) { KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; } - if (MFI.hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5) { + if (UserSGPRInfo.hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5) { KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR; } - if (MFI.hasKernargSegmentPtr()) { + if (UserSGPRInfo.hasKernargSegmentPtr()) { KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR; } - if (MFI.hasDispatchID()) { + if (UserSGPRInfo.hasDispatchID()) { KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID; } - if (MFI.hasFlatScratchInit()) { + if (UserSGPRInfo.hasFlatScratchInit()) { KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT; } @@ -435,6 +420,7 @@ amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor( const SIProgramInfo &PI) const { const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); const Function &F = MF.getFunction(); + const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); amdhsa::kernel_descriptor_t KernelDescriptor; memset(&KernelDescriptor, 0x0, sizeof(KernelDescriptor)); @@ -458,6 +444,10 @@ amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor( KernelDescriptor.compute_pgm_rsrc3 = CurrentProgramInfo.ComputePGMRSrc3GFX90A; + if (AMDGPU::hasKernargPreload(STM)) + KernelDescriptor.kernarg_preload = + static_cast<uint16_t>(Info->getNumKernargPreloadedSGPRs()); + return KernelDescriptor; } @@ -949,6 +939,17 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, ProgInfo.Occupancy = STM.computeOccupancy(MF.getFunction(), ProgInfo.LDSSize, ProgInfo.NumSGPRsForWavesPerEU, ProgInfo.NumVGPRsForWavesPerEU); + const auto [MinWEU, MaxWEU] = + AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu", {0, 0}, true); + if (ProgInfo.Occupancy < MinWEU) { + DiagnosticInfoOptimizationFailure Diag( + F, F.getSubprogram(), + "failed to meet occupancy target given by 'amdgpu-waves-per-eu' in " + "'" + + F.getName() + "': desired occupancy was " + Twine(MinWEU) + + ", final occupancy is " + Twine(ProgInfo.Occupancy)); + F.getContext().diagnose(Diag); + } } static unsigned getRsrcReg(CallingConv::ID CallConv) { @@ -1113,7 +1114,8 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF, void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) { auto *MD = getTargetStreamer()->getPALMetadata(); const MachineFrameInfo &MFI = MF.getFrameInfo(); - MD->setFunctionScratchSize(MF, MFI.getStackSize()); + StringRef FnName = MF.getFunction().getName(); + MD->setFunctionScratchSize(FnName, MFI.getStackSize()); // Set compute registers MD->setRsrc1(CallingConv::AMDGPU_CS, @@ -1121,9 +1123,9 @@ void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) { MD->setRsrc2(CallingConv::AMDGPU_CS, CurrentProgramInfo.getComputePGMRSrc2()); // Set optional info - MD->setFunctionLdsSize(MF, CurrentProgramInfo.LDSSize); - MD->setFunctionNumUsedVgprs(MF, CurrentProgramInfo.NumVGPRsForWavesPerEU); - MD->setFunctionNumUsedSgprs(MF, CurrentProgramInfo.NumSGPRsForWavesPerEU); + MD->setFunctionLdsSize(FnName, CurrentProgramInfo.LDSSize); + MD->setFunctionNumUsedVgprs(FnName, CurrentProgramInfo.NumVGPRsForWavesPerEU); + MD->setFunctionNumUsedSgprs(FnName, CurrentProgramInfo.NumSGPRsForWavesPerEU); } // This is supposed to be log2(Size) @@ -1164,27 +1166,28 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out, AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE, getElementByteSizeValue(STM.getMaxPrivateElementSize(true))); - if (MFI->hasPrivateSegmentBuffer()) { + const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI->getUserSGPRInfo(); + if (UserSGPRInfo.hasPrivateSegmentBuffer()) { Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER; } - if (MFI->hasDispatchPtr()) + if (UserSGPRInfo.hasDispatchPtr()) Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; - if (MFI->hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5) + if (UserSGPRInfo.hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5) Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR; - if (MFI->hasKernargSegmentPtr()) + if (UserSGPRInfo.hasKernargSegmentPtr()) Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR; - if (MFI->hasDispatchID()) + if (UserSGPRInfo.hasDispatchID()) Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID; - if (MFI->hasFlatScratchInit()) + if (UserSGPRInfo.hasFlatScratchInit()) Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT; - if (MFI->hasDispatchPtr()) + if (UserSGPRInfo.hasDispatchPtr()) Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; if (STM.isXNACKEnabled()) @@ -1293,6 +1296,9 @@ void AMDGPUAsmPrinter::emitResourceUsageRemarks( EmitResourceUsageRemark("NumAGPR", "AGPRs", CurrentProgramInfo.NumAccVGPR); EmitResourceUsageRemark("ScratchSize", "ScratchSize [bytes/lane]", CurrentProgramInfo.ScratchSize); + StringRef DynamicStackStr = + CurrentProgramInfo.DynamicCallStack ? "True" : "False"; + EmitResourceUsageRemark("DynamicStack", "Dynamic Stack", DynamicStackStr); EmitResourceUsageRemark("Occupancy", "Occupancy [waves/SIMD]", CurrentProgramInfo.Occupancy); EmitResourceUsageRemark("SGPRSpill", "SGPRs Spill", |
