diff options
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 212 |
1 files changed, 114 insertions, 98 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 974e79fff3d7..0446655830d1 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -17,11 +17,11 @@ // #include "AMDGPUAsmPrinter.h" +#include "AMDGPUTargetMachine.h" #include "MCTargetDesc/AMDGPUTargetStreamer.h" #include "InstPrinter/AMDGPUInstPrinter.h" #include "Utils/AMDGPUBaseInfo.h" #include "AMDGPU.h" -#include "AMDKernelCodeT.h" #include "AMDGPUSubtarget.h" #include "R600Defines.h" #include "R600MachineFunctionInfo.h" @@ -93,33 +93,40 @@ extern "C" void LLVMInitializeAMDGPUAsmPrinter() { AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer) - : AsmPrinter(TM, std::move(Streamer)) {} + : AsmPrinter(TM, std::move(Streamer)) { + AMDGPUASI = static_cast<AMDGPUTargetMachine*>(&TM)->getAMDGPUAS(); + } StringRef AMDGPUAsmPrinter::getPassName() const { return "AMDGPU Assembly Printer"; } +const MCSubtargetInfo* AMDGPUAsmPrinter::getSTI() const { + return TM.getMCSubtargetInfo(); +} + +AMDGPUTargetStreamer& AMDGPUAsmPrinter::getTargetStreamer() const { + return static_cast<AMDGPUTargetStreamer&>(*OutStreamer->getTargetStreamer()); +} + void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) { if (TM.getTargetTriple().getOS() != Triple::AMDHSA) return; - // Need to construct an MCSubtargetInfo here in case we have no functions - // in the module. - std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo( - TM.getTargetTriple().str(), TM.getTargetCPU(), - TM.getTargetFeatureString())); - - AMDGPUTargetStreamer *TS = - static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer()); + AMDGPU::IsaInfo::IsaVersion ISA = + AMDGPU::IsaInfo::getIsaVersion(getSTI()->getFeatureBits()); - TS->EmitDirectiveHSACodeObjectVersion(2, 1); + getTargetStreamer().EmitDirectiveHSACodeObjectVersion(2, 1); + getTargetStreamer().EmitDirectiveHSACodeObjectISA( + ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU"); + getTargetStreamer().EmitStartOfCodeObjectMetadata(M); +} - AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI->getFeatureBits()); - TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping, - "AMD", "AMDGPU"); +void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) { + if (TM.getTargetTriple().getOS() != Triple::AMDHSA) + return; - // Emit runtime metadata. - TS->EmitRuntimeMetadata(M); + getTargetStreamer().EmitEndOfCodeObjectMetadata(); } bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough( @@ -136,25 +143,32 @@ bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough( return (MBB->back().getOpcode() != AMDGPU::S_SETPC_B64); } - void AMDGPUAsmPrinter::EmitFunctionBodyStart() { const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>(); SIProgramInfo KernelInfo; + amd_kernel_code_t KernelCode; if (STM.isAmdCodeObjectV2(*MF)) { getSIProgramInfo(KernelInfo, *MF); - EmitAmdKernelCodeT(*MF, KernelInfo); + getAmdKernelCode(KernelCode, KernelInfo, *MF); + + OutStreamer->SwitchSection(getObjFileLowering().getTextSection()); + getTargetStreamer().EmitAMDKernelCodeT(KernelCode); } + + if (TM.getTargetTriple().getOS() != Triple::AMDHSA) + return; + getTargetStreamer().EmitKernelCodeObjectMetadata(*MF->getFunction(), + KernelCode); } void AMDGPUAsmPrinter::EmitFunctionEntryLabel() { const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>(); - if (MFI->isKernel() && STM.isAmdCodeObjectV2(*MF)) { - AMDGPUTargetStreamer *TS = - static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer()); + if (MFI->isEntryFunction() && STM.isAmdCodeObjectV2(*MF)) { SmallString<128> SymbolName; getNameWithPrefix(SymbolName, MF->getFunction()), - TS->EmitAMDGPUSymbolType(SymbolName, ELF::STT_AMDGPU_HSA_KERNEL); + getTargetStreamer().EmitAMDGPUSymbolType( + SymbolName, ELF::STT_AMDGPU_HSA_KERNEL); } AsmPrinter::EmitFunctionEntryLabel(); @@ -163,7 +177,7 @@ void AMDGPUAsmPrinter::EmitFunctionEntryLabel() { void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // Group segment variables aren't emitted in HSA. - if (AMDGPU::isGroupSegment(GV)) + if (AMDGPU::isGroupSegment(GV, AMDGPUASI)) return; AsmPrinter::EmitGlobalVariable(GV); @@ -247,6 +261,9 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " + Twine(G_00B84C_USER_SGPR(KernelInfo.ComputePGMRSrc2)), false); + OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TRAP_HANDLER: " + + Twine(G_00B84C_TRAP_HANDLER(KernelInfo.ComputePGMRSrc2)), + false); OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_X_EN: " + Twine(G_00B84C_TGID_X_EN(KernelInfo.ComputePGMRSrc2)), false); @@ -382,6 +399,10 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, case AMDGPU::EXEC_HI: case AMDGPU::SCC: case AMDGPU::M0: + case AMDGPU::SRC_SHARED_BASE: + case AMDGPU::SRC_SHARED_LIMIT: + case AMDGPU::SRC_PRIVATE_BASE: + case AMDGPU::SRC_PRIVATE_LIMIT: continue; case AMDGPU::VCC: @@ -478,33 +499,20 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, ExtraSGPRs = 6; } - // Record first reserved register and reserved register count fields, and - // update max register counts if "amdgpu-debugger-reserve-regs" attribute was - // requested. - ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? MaxVGPR + 1 : 0; - ProgInfo.ReservedVGPRCount = RI->getNumDebuggerReservedVGPRs(STM); - - // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and - // DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue" - // attribute was requested. - if (STM.debuggerEmitPrologue()) { - ProgInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR = - RI->getHWRegIndex(MFI->getScratchWaveOffsetReg()); - ProgInfo.DebuggerPrivateSegmentBufferSGPR = - RI->getHWRegIndex(MFI->getScratchRSrcReg()); - } + unsigned ExtraVGPRs = STM.getReservedNumVGPRs(MF); // Check the addressable register limit before we add ExtraSGPRs. if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS && !STM.hasSGPRInitBug()) { - unsigned MaxAddressableNumSGPRs = STM.getMaxNumSGPRs(); + unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs(); if (MaxSGPR + 1 > MaxAddressableNumSGPRs) { // This can happen due to a compiler bug or when using inline asm. LLVMContext &Ctx = MF.getFunction()->getContext(); DiagnosticInfoResourceLimit Diag(*MF.getFunction(), "addressable scalar registers", MaxSGPR + 1, DS_Error, - DK_ResourceLimit, MaxAddressableNumSGPRs); + DK_ResourceLimit, + MaxAddressableNumSGPRs); Ctx.diagnose(Diag); MaxSGPR = MaxAddressableNumSGPRs - 1; } @@ -512,41 +520,43 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, // Account for extra SGPRs and VGPRs reserved for debugger use. MaxSGPR += ExtraSGPRs; - MaxVGPR += RI->getNumDebuggerReservedVGPRs(STM); + MaxVGPR += ExtraVGPRs; // We found the maximum register index. They start at 0, so add one to get the // number of registers. - ProgInfo.NumVGPR = MaxVGPR + 1; ProgInfo.NumSGPR = MaxSGPR + 1; + ProgInfo.NumVGPR = MaxVGPR + 1; // Adjust number of registers used to meet default/requested minimum/maximum // number of waves per execution unit request. ProgInfo.NumSGPRsForWavesPerEU = std::max( - ProgInfo.NumSGPR, RI->getMinNumSGPRs(STM, MFI->getMaxWavesPerEU())); + ProgInfo.NumSGPR, STM.getMinNumSGPRs(MFI->getMaxWavesPerEU())); ProgInfo.NumVGPRsForWavesPerEU = std::max( - ProgInfo.NumVGPR, RI->getMinNumVGPRs(MFI->getMaxWavesPerEU())); + ProgInfo.NumVGPR, STM.getMinNumVGPRs(MFI->getMaxWavesPerEU())); if (STM.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS || STM.hasSGPRInitBug()) { - unsigned MaxNumSGPRs = STM.getMaxNumSGPRs(); - if (ProgInfo.NumSGPR > MaxNumSGPRs) { - // This can happen due to a compiler bug or when using inline asm to use the - // registers which are usually reserved for vcc etc. - + unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs(); + if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) { + // This can happen due to a compiler bug or when using inline asm to use + // the registers which are usually reserved for vcc etc. LLVMContext &Ctx = MF.getFunction()->getContext(); DiagnosticInfoResourceLimit Diag(*MF.getFunction(), "scalar registers", ProgInfo.NumSGPR, DS_Error, - DK_ResourceLimit, MaxNumSGPRs); + DK_ResourceLimit, + MaxAddressableNumSGPRs); Ctx.diagnose(Diag); - ProgInfo.NumSGPR = MaxNumSGPRs; - ProgInfo.NumSGPRsForWavesPerEU = MaxNumSGPRs; + ProgInfo.NumSGPR = MaxAddressableNumSGPRs; + ProgInfo.NumSGPRsForWavesPerEU = MaxAddressableNumSGPRs; } } if (STM.hasSGPRInitBug()) { - ProgInfo.NumSGPR = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG; - ProgInfo.NumSGPRsForWavesPerEU = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG; + ProgInfo.NumSGPR = + AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; + ProgInfo.NumSGPRsForWavesPerEU = + AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; } if (MFI->NumUserSGPRs > STM.getMaxNumUserSGPRs()) { @@ -565,13 +575,27 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, // SGPRBlocks is actual number of SGPR blocks minus 1. ProgInfo.SGPRBlocks = alignTo(ProgInfo.NumSGPRsForWavesPerEU, - RI->getSGPRAllocGranule()); - ProgInfo.SGPRBlocks = ProgInfo.SGPRBlocks / RI->getSGPRAllocGranule() - 1; + STM.getSGPREncodingGranule()); + ProgInfo.SGPRBlocks = ProgInfo.SGPRBlocks / STM.getSGPREncodingGranule() - 1; // VGPRBlocks is actual number of VGPR blocks minus 1. ProgInfo.VGPRBlocks = alignTo(ProgInfo.NumVGPRsForWavesPerEU, - RI->getVGPRAllocGranule()); - ProgInfo.VGPRBlocks = ProgInfo.VGPRBlocks / RI->getVGPRAllocGranule() - 1; + STM.getVGPREncodingGranule()); + ProgInfo.VGPRBlocks = ProgInfo.VGPRBlocks / STM.getVGPREncodingGranule() - 1; + + // Record first reserved VGPR and number of reserved VGPRs. + ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? MaxVGPR + 1 : 0; + ProgInfo.ReservedVGPRCount = STM.getReservedNumVGPRs(MF); + + // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and + // DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue" + // attribute was requested. + if (STM.debuggerEmitPrologue()) { + ProgInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR = + RI->getHWRegIndex(MFI->getScratchWaveOffsetReg()); + ProgInfo.DebuggerPrivateSegmentBufferSGPR = + RI->getHWRegIndex(MFI->getScratchRSrcReg()); + } // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode // register. @@ -580,7 +604,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, ProgInfo.IEEEMode = STM.enableIEEEBit(MF); // Make clamp modifier on NaN input returns 0. - ProgInfo.DX10Clamp = 1; + ProgInfo.DX10Clamp = STM.enableDX10Clamp(); const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); ProgInfo.ScratchSize = FrameInfo.getStackSize(); @@ -635,6 +659,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, ProgInfo.ComputePGMRSrc2 = S_00B84C_SCRATCH_EN(ProgInfo.ScratchBlocks > 0) | S_00B84C_USER_SGPR(MFI->getNumUserSGPRs()) | + S_00B84C_TRAP_HANDLER(STM.isTrapHandlerEnabled()) | S_00B84C_TGID_X_EN(MFI->hasWorkGroupIDX()) | S_00B84C_TGID_Y_EN(MFI->hasWorkGroupIDY()) | S_00B84C_TGID_Z_EN(MFI->hasWorkGroupIDZ()) | @@ -688,7 +713,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF, OutStreamer->EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4); OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(KernelInfo.LDSBlocks), 4); OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4); - OutStreamer->EmitIntValue(MFI->PSInputEna, 4); + OutStreamer->EmitIntValue(MFI->getPSInputEnable(), 4); OutStreamer->EmitIntValue(R_0286D0_SPI_PS_INPUT_ADDR, 4); OutStreamer->EmitIntValue(MFI->getPSInputAddr(), 4); } @@ -713,97 +738,88 @@ static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) { } } -void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF, - const SIProgramInfo &KernelInfo) const { +void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out, + const SIProgramInfo &KernelInfo, + const MachineFunction &MF) const { const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); const SISubtarget &STM = MF.getSubtarget<SISubtarget>(); - amd_kernel_code_t header; - AMDGPU::initDefaultAMDKernelCodeT(header, STM.getFeatureBits()); + AMDGPU::initDefaultAMDKernelCodeT(Out, STM.getFeatureBits()); - header.compute_pgm_resource_registers = + Out.compute_pgm_resource_registers = KernelInfo.ComputePGMRSrc1 | (KernelInfo.ComputePGMRSrc2 << 32); - header.code_properties = AMD_CODE_PROPERTY_IS_PTR64; - + Out.code_properties = AMD_CODE_PROPERTY_IS_PTR64; - AMD_HSA_BITS_SET(header.code_properties, + AMD_HSA_BITS_SET(Out.code_properties, AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE, getElementByteSizeValue(STM.getMaxPrivateElementSize())); if (MFI->hasPrivateSegmentBuffer()) { - header.code_properties |= + Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER; } if (MFI->hasDispatchPtr()) - header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; + Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; if (MFI->hasQueuePtr()) - header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR; + Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR; if (MFI->hasKernargSegmentPtr()) - header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR; + Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR; if (MFI->hasDispatchID()) - header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID; + Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID; if (MFI->hasFlatScratchInit()) - header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT; - - // TODO: Private segment size + Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT; if (MFI->hasGridWorkgroupCountX()) { - header.code_properties |= + Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X; } if (MFI->hasGridWorkgroupCountY()) { - header.code_properties |= + Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y; } if (MFI->hasGridWorkgroupCountZ()) { - header.code_properties |= + Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z; } if (MFI->hasDispatchPtr()) - header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; + Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; if (STM.debuggerSupported()) - header.code_properties |= AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED; + Out.code_properties |= AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED; if (STM.isXNACKEnabled()) - header.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED; + Out.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED; // FIXME: Should use getKernArgSize - header.kernarg_segment_byte_size = + Out.kernarg_segment_byte_size = STM.getKernArgSegmentSize(MF, MFI->getABIArgOffset()); - header.wavefront_sgpr_count = KernelInfo.NumSGPR; - header.workitem_vgpr_count = KernelInfo.NumVGPR; - header.workitem_private_segment_byte_size = KernelInfo.ScratchSize; - header.workgroup_group_segment_byte_size = KernelInfo.LDSSize; - header.reserved_vgpr_first = KernelInfo.ReservedVGPRFirst; - header.reserved_vgpr_count = KernelInfo.ReservedVGPRCount; + Out.wavefront_sgpr_count = KernelInfo.NumSGPR; + Out.workitem_vgpr_count = KernelInfo.NumVGPR; + Out.workitem_private_segment_byte_size = KernelInfo.ScratchSize; + Out.workgroup_group_segment_byte_size = KernelInfo.LDSSize; + Out.reserved_vgpr_first = KernelInfo.ReservedVGPRFirst; + Out.reserved_vgpr_count = KernelInfo.ReservedVGPRCount; // These alignment values are specified in powers of two, so alignment = // 2^n. The minimum alignment is 2^4 = 16. - header.kernarg_segment_alignment = std::max((size_t)4, + Out.kernarg_segment_alignment = std::max((size_t)4, countTrailingZeros(MFI->getMaxKernArgAlign())); if (STM.debuggerEmitPrologue()) { - header.debug_wavefront_private_segment_offset_sgpr = + Out.debug_wavefront_private_segment_offset_sgpr = KernelInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR; - header.debug_private_segment_buffer_sgpr = + Out.debug_private_segment_buffer_sgpr = KernelInfo.DebuggerPrivateSegmentBufferSGPR; } - - AMDGPUTargetStreamer *TS = - static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer()); - - OutStreamer->SwitchSection(getObjFileLowering().getTextSection()); - TS->EmitAMDKernelCodeT(header); } bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, |