diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp | 53 |
1 files changed, 41 insertions, 12 deletions
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 1bd3cdc67800..eba8e49a46f8 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -66,8 +66,8 @@ bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) { StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) { AMDGPU::GPUKind AK; + // clang-format off switch (ElfMach) { - default: llvm_unreachable("Unhandled ELF::EF_AMDGPU type"); case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break; case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break; case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break; @@ -126,8 +126,12 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) { case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103: AK = GK_GFX1103; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150: AK = GK_GFX1150; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151: AK = GK_GFX1151; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200: AK = GK_GFX1200; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201: AK = GK_GFX1201; break; case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break; + default: AK = GK_NONE; break; } + // clang-format on StringRef GPUName = getArchNameAMDGCN(AK); if (GPUName != "") @@ -140,6 +144,7 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) { if (AK == AMDGPU::GPUKind::GK_NONE) AK = parseArchR600(GPU); + // clang-format off switch (AK) { case GK_R600: return ELF::EF_AMDGPU_MACH_R600_R600; case GK_R630: return ELF::EF_AMDGPU_MACH_R600_R630; @@ -199,8 +204,11 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) { case GK_GFX1103: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103; case GK_GFX1150: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150; case GK_GFX1151: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151; + case GK_GFX1200: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200; + case GK_GFX1201: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201; case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE; } + // clang-format on llvm_unreachable("unknown GPU"); } @@ -368,6 +376,12 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( PRINT_FIELD(OS, ".amdhsa_user_sgpr_flat_scratch_init", KD, kernel_code_properties, amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT); + if (hasKernargPreload(STI)) { + PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_preload_length ", KD, + kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_LENGTH); + PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_preload_offset ", KD, + kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_OFFSET); + } PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_size", KD, kernel_code_properties, amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE); @@ -418,9 +432,6 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( switch (CodeObjectVersion) { default: break; - case AMDGPU::AMDHSA_COV2: - break; - case AMDGPU::AMDHSA_COV3: case AMDGPU::AMDHSA_COV4: case AMDGPU::AMDHSA_COV5: if (getTargetID()->isXnackSupported()) @@ -449,7 +460,7 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( if (IVersion.Major >= 9) PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD, compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL); + amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL); if (AMDGPU::isGFX90A(STI)) PRINT_FIELD(OS, ".amdhsa_tg_split", KD, compute_pgm_rsrc3, @@ -457,13 +468,13 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( if (IVersion.Major >= 10) { PRINT_FIELD(OS, ".amdhsa_workgroup_processor_mode", KD, compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE); + amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE); PRINT_FIELD(OS, ".amdhsa_memory_ordered", KD, compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED); + amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED); PRINT_FIELD(OS, ".amdhsa_forward_progress", KD, compute_pgm_rsrc1, - amdhsa::COMPUTE_PGM_RSRC1_FWD_PROGRESS); + amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS); PRINT_FIELD(OS, ".amdhsa_shared_vgpr_count", KD, compute_pgm_rsrc3, amdhsa::COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT); } @@ -539,7 +550,7 @@ void AMDGPUTargetELFStreamer::EmitNote( unsigned NoteFlags = 0; // TODO Apparently, this is currently needed for OpenCL as mentioned in // https://reviews.llvm.org/D74995 - if (STI.getTargetTriple().getOS() == Triple::AMDHSA) + if (isHsaAbi(STI)) NoteFlags = ELF::SHF_ALLOC; S.pushSection(); @@ -598,11 +609,10 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() { } unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() { - assert(STI.getTargetTriple().getOS() == Triple::AMDHSA); + assert(isHsaAbi(STI)); if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(&STI)) { switch (*HsaAbiVer) { - case ELF::ELFABIVERSION_AMDGPU_HSA_V2: case ELF::ELFABIVERSION_AMDGPU_HSA_V3: return getEFlagsV3(); case ELF::ELFABIVERSION_AMDGPU_HSA_V4: @@ -827,6 +837,24 @@ bool AMDGPUTargetELFStreamer::EmitHSAMetadata( return true; } +bool AMDGPUTargetAsmStreamer::EmitKernargPreloadHeader( + const MCSubtargetInfo &STI) { + for (int i = 0; i < 64; ++i) { + OS << "\ts_nop 0\n"; + } + return true; +} + +bool AMDGPUTargetELFStreamer::EmitKernargPreloadHeader( + const MCSubtargetInfo &STI) { + const uint32_t Encoded_s_nop = 0xbf800000; + MCStreamer &OS = getStreamer(); + for (int i = 0; i < 64; ++i) { + OS.emitInt32(Encoded_s_nop); + } + return true; +} + bool AMDGPUTargetELFStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) { const uint32_t Encoded_s_code_end = 0xbf9f0000; const uint32_t Encoded_s_nop = 0xbf800000; @@ -906,6 +934,7 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor( Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc1); Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc2); Streamer.emitInt16(KernelDescriptor.kernel_code_properties); - for (uint8_t Res : KernelDescriptor.reserved2) + Streamer.emitInt16(KernelDescriptor.kernarg_preload); + for (uint8_t Res : KernelDescriptor.reserved3) Streamer.emitInt8(Res); } |
