summaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp')
-rw-r--r--lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp580
1 files changed, 267 insertions, 313 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index fda6252f46e3..e62e5d52ad74 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -1,4 +1,4 @@
-//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer --------------------===//
+//===-- AMDGPUAsmPrinter.cpp - AMDGPU assembly printer -------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -21,7 +21,9 @@
#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
#include "InstPrinter/AMDGPUInstPrinter.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
+#include "R600AsmPrinter.h"
#include "R600Defines.h"
#include "R600MachineFunctionInfo.h"
#include "R600RegisterInfo.h"
@@ -32,7 +34,6 @@
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/TargetLoweringObjectFile.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
@@ -40,6 +41,7 @@
#include "llvm/Support/AMDGPUMetadata.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
using namespace llvm;
using namespace llvm::AMDGPU;
@@ -65,7 +67,7 @@ using namespace llvm::AMDGPU;
// instructions to run at the double precision rate for the device so it's
// probably best to just report no single precision denormals.
static uint32_t getFPMode(const MachineFunction &F) {
- const SISubtarget& ST = F.getSubtarget<SISubtarget>();
+ const GCNSubtarget& ST = F.getSubtarget<GCNSubtarget>();
// TODO: Is there any real use for the flush in only / flush out only modes?
uint32_t FP32Denormals =
@@ -88,7 +90,7 @@ createAMDGPUAsmPrinterPass(TargetMachine &tm,
extern "C" void LLVMInitializeAMDGPUAsmPrinter() {
TargetRegistry::RegisterAsmPrinter(getTheAMDGPUTarget(),
- createAMDGPUAsmPrinterPass);
+ llvm::createR600AsmPrinterPass);
TargetRegistry::RegisterAsmPrinter(getTheGCNTarget(),
createAMDGPUAsmPrinterPass);
}
@@ -114,7 +116,8 @@ AMDGPUTargetStreamer* AMDGPUAsmPrinter::getTargetStreamer() const {
}
void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
- if (TM.getTargetTriple().getArch() != Triple::amdgcn)
+ if (IsaInfo::hasCodeObjectV3(getSTI()) &&
+ TM.getTargetTriple().getOS() == Triple::AMDHSA)
return;
if (TM.getTargetTriple().getOS() != Triple::AMDHSA &&
@@ -127,10 +130,6 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
if (TM.getTargetTriple().getOS() == Triple::AMDPAL)
readPALMetadata(M);
- // Deprecated notes are not emitted for code object v3.
- if (IsaInfo::hasCodeObjectV3(getSTI()->getFeatureBits()))
- return;
-
// HSA emits NT_AMDGPU_HSA_CODE_OBJECT_VERSION for code objects v2.
if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1);
@@ -142,7 +141,9 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
}
void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
- if (TM.getTargetTriple().getArch() != Triple::amdgcn)
+ // TODO: Add metadata to code object v3.
+ if (IsaInfo::hasCodeObjectV3(getSTI()) &&
+ TM.getTargetTriple().getOS() == Triple::AMDHSA)
return;
// Following code requires TargetStreamer to be present.
@@ -189,37 +190,82 @@ bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
}
void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
- const AMDGPUMachineFunction *MFI = MF->getInfo<AMDGPUMachineFunction>();
- if (!MFI->isEntryFunction())
+ const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
+ if (!MFI.isEntryFunction())
+ return;
+ if (IsaInfo::hasCodeObjectV3(getSTI()) &&
+ TM.getTargetTriple().getOS() == Triple::AMDHSA)
return;
- const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
- amd_kernel_code_t KernelCode;
- if (STM.isAmdCodeObjectV2(*MF)) {
+ const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
+ const Function &F = MF->getFunction();
+ if (STM.isAmdCodeObjectV2(F) &&
+ (F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
+ F.getCallingConv() == CallingConv::SPIR_KERNEL)) {
+ amd_kernel_code_t KernelCode;
getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF);
-
- OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
getTargetStreamer()->EmitAMDKernelCodeT(KernelCode);
}
if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
return;
- HSAMetadataStream.emitKernel(MF->getFunction(),
- getHSACodeProps(*MF, CurrentProgramInfo),
- getHSADebugProps(*MF, CurrentProgramInfo));
+ HSAMetadataStream.emitKernel(*MF, CurrentProgramInfo);
+}
+
+void AMDGPUAsmPrinter::EmitFunctionBodyEnd() {
+ const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
+ if (!MFI.isEntryFunction())
+ return;
+ if (!IsaInfo::hasCodeObjectV3(getSTI()) ||
+ TM.getTargetTriple().getOS() != Triple::AMDHSA)
+ return;
+
+ auto &Streamer = getTargetStreamer()->getStreamer();
+ auto &Context = Streamer.getContext();
+ auto &ObjectFileInfo = *Context.getObjectFileInfo();
+ auto &ReadOnlySection = *ObjectFileInfo.getReadOnlySection();
+
+ Streamer.PushSection();
+ Streamer.SwitchSection(&ReadOnlySection);
+
+ // CP microcode requires the kernel descriptor to be allocated on 64 byte
+ // alignment.
+ Streamer.EmitValueToAlignment(64, 0, 1, 0);
+ if (ReadOnlySection.getAlignment() < 64)
+ ReadOnlySection.setAlignment(64);
+
+ SmallString<128> KernelName;
+ getNameWithPrefix(KernelName, &MF->getFunction());
+ getTargetStreamer()->EmitAmdhsaKernelDescriptor(
+ *getSTI(), KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
+ CurrentProgramInfo.NumVGPRsForWavesPerEU,
+ CurrentProgramInfo.NumSGPRsForWavesPerEU -
+ IsaInfo::getNumExtraSGPRs(getSTI()->getFeatureBits(),
+ CurrentProgramInfo.VCCUsed,
+ CurrentProgramInfo.FlatUsed),
+ CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
+ hasXNACK(*getSTI()));
+
+ Streamer.PopSection();
}
void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
+ if (IsaInfo::hasCodeObjectV3(getSTI()) &&
+ TM.getTargetTriple().getOS() == Triple::AMDHSA) {
+ AsmPrinter::EmitFunctionEntryLabel();
+ return;
+ }
+
const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
- const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
- if (MFI->isEntryFunction() && STM.isAmdCodeObjectV2(*MF)) {
+ const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
+ if (MFI->isEntryFunction() && STM.isAmdCodeObjectV2(MF->getFunction())) {
SmallString<128> SymbolName;
getNameWithPrefix(SymbolName, &MF->getFunction()),
getTargetStreamer()->EmitAMDGPUSymbolType(
SymbolName, ELF::STT_AMDGPU_HSA_KERNEL);
}
- const AMDGPUSubtarget &STI = MF->getSubtarget<AMDGPUSubtarget>();
+ const GCNSubtarget &STI = MF->getSubtarget<GCNSubtarget>();
if (STI.dumpCode()) {
// Disassemble function name label to text.
DisasmLines.push_back(MF->getName().str() + ":");
@@ -231,7 +277,7 @@ void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
}
void AMDGPUAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
- const AMDGPUSubtarget &STI = MBB.getParent()->getSubtarget<AMDGPUSubtarget>();
+ const GCNSubtarget &STI = MBB.getParent()->getSubtarget<GCNSubtarget>();
if (STI.dumpCode() && !isBlockOnlyReachableByFallthrough(&MBB)) {
// Write a line for the basic block label if it is not only fallthrough.
DisasmLines.push_back(
@@ -283,11 +329,66 @@ void AMDGPUAsmPrinter::emitCommonFunctionComments(
uint32_t NumVGPR,
uint32_t NumSGPR,
uint64_t ScratchSize,
- uint64_t CodeSize) {
+ uint64_t CodeSize,
+ const AMDGPUMachineFunction *MFI) {
OutStreamer->emitRawComment(" codeLenInByte = " + Twine(CodeSize), false);
OutStreamer->emitRawComment(" NumSgprs: " + Twine(NumSGPR), false);
OutStreamer->emitRawComment(" NumVgprs: " + Twine(NumVGPR), false);
OutStreamer->emitRawComment(" ScratchSize: " + Twine(ScratchSize), false);
+ OutStreamer->emitRawComment(" MemoryBound: " + Twine(MFI->isMemoryBound()),
+ false);
+}
+
+uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
+ const MachineFunction &MF) const {
+ const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+ uint16_t KernelCodeProperties = 0;
+
+ if (MFI.hasPrivateSegmentBuffer()) {
+ KernelCodeProperties |=
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
+ }
+ if (MFI.hasDispatchPtr()) {
+ KernelCodeProperties |=
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
+ }
+ if (MFI.hasQueuePtr()) {
+ KernelCodeProperties |=
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
+ }
+ if (MFI.hasKernargSegmentPtr()) {
+ KernelCodeProperties |=
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
+ }
+ if (MFI.hasDispatchID()) {
+ KernelCodeProperties |=
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
+ }
+ if (MFI.hasFlatScratchInit()) {
+ KernelCodeProperties |=
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
+ }
+
+ return KernelCodeProperties;
+}
+
+amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(
+ const MachineFunction &MF,
+ const SIProgramInfo &PI) const {
+ amdhsa::kernel_descriptor_t KernelDescriptor;
+ memset(&KernelDescriptor, 0x0, sizeof(KernelDescriptor));
+
+ assert(isUInt<32>(PI.ScratchSize));
+ assert(isUInt<32>(PI.ComputePGMRSrc1));
+ assert(isUInt<32>(PI.ComputePGMRSrc2));
+
+ KernelDescriptor.group_segment_fixed_size = PI.LDSSize;
+ KernelDescriptor.private_segment_fixed_size = PI.ScratchSize;
+ KernelDescriptor.compute_pgm_rsrc1 = PI.ComputePGMRSrc1;
+ KernelDescriptor.compute_pgm_rsrc2 = PI.ComputePGMRSrc2;
+ KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF);
+
+ return KernelDescriptor;
}
bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
@@ -301,32 +402,29 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
SetupMachineFunction(MF);
- const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
+ const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
MCContext &Context = getObjFileLowering().getContext();
- if (!STM.isAmdHsaOS()) {
+ // FIXME: This should be an explicit check for Mesa.
+ if (!STM.isAmdHsaOS() && !STM.isAmdPalOS()) {
MCSectionELF *ConfigSection =
Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
OutStreamer->SwitchSection(ConfigSection);
}
- if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
- if (MFI->isEntryFunction()) {
- getSIProgramInfo(CurrentProgramInfo, MF);
- } else {
- auto I = CallGraphResourceInfo.insert(
- std::make_pair(&MF.getFunction(), SIFunctionResourceInfo()));
- SIFunctionResourceInfo &Info = I.first->second;
- assert(I.second && "should only be called once per function");
- Info = analyzeResourceUsage(MF);
- }
-
- if (STM.isAmdPalOS())
- EmitPALMetadata(MF, CurrentProgramInfo);
- if (!STM.isAmdHsaOS()) {
- EmitProgramInfoSI(MF, CurrentProgramInfo);
- }
+ if (MFI->isEntryFunction()) {
+ getSIProgramInfo(CurrentProgramInfo, MF);
} else {
- EmitProgramInfoR600(MF);
+ auto I = CallGraphResourceInfo.insert(
+ std::make_pair(&MF.getFunction(), SIFunctionResourceInfo()));
+ SIFunctionResourceInfo &Info = I.first->second;
+ assert(I.second && "should only be called once per function");
+ Info = analyzeResourceUsage(MF);
+ }
+
+ if (STM.isAmdPalOS())
+ EmitPALMetadata(MF, CurrentProgramInfo);
+ else if (!STM.isAmdHsaOS()) {
+ EmitProgramInfoSI(MF, CurrentProgramInfo);
}
DisasmLines.clear();
@@ -340,84 +438,74 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);
OutStreamer->SwitchSection(CommentSection);
- if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
- if (!MFI->isEntryFunction()) {
- OutStreamer->emitRawComment(" Function info:", false);
- SIFunctionResourceInfo &Info = CallGraphResourceInfo[&MF.getFunction()];
- emitCommonFunctionComments(
- Info.NumVGPR,
- Info.getTotalNumSGPRs(MF.getSubtarget<SISubtarget>()),
- Info.PrivateSegmentSize,
- getFunctionCodeSize(MF));
- return false;
- }
-
- OutStreamer->emitRawComment(" Kernel info:", false);
- emitCommonFunctionComments(CurrentProgramInfo.NumVGPR,
- CurrentProgramInfo.NumSGPR,
- CurrentProgramInfo.ScratchSize,
- getFunctionCodeSize(MF));
-
- OutStreamer->emitRawComment(
- " FloatMode: " + Twine(CurrentProgramInfo.FloatMode), false);
- OutStreamer->emitRawComment(
- " IeeeMode: " + Twine(CurrentProgramInfo.IEEEMode), false);
- OutStreamer->emitRawComment(
- " LDSByteSize: " + Twine(CurrentProgramInfo.LDSSize) +
- " bytes/workgroup (compile time only)", false);
-
- OutStreamer->emitRawComment(
- " SGPRBlocks: " + Twine(CurrentProgramInfo.SGPRBlocks), false);
- OutStreamer->emitRawComment(
- " VGPRBlocks: " + Twine(CurrentProgramInfo.VGPRBlocks), false);
-
- OutStreamer->emitRawComment(
- " NumSGPRsForWavesPerEU: " +
- Twine(CurrentProgramInfo.NumSGPRsForWavesPerEU), false);
- OutStreamer->emitRawComment(
- " NumVGPRsForWavesPerEU: " +
- Twine(CurrentProgramInfo.NumVGPRsForWavesPerEU), false);
-
- OutStreamer->emitRawComment(
- " ReservedVGPRFirst: " + Twine(CurrentProgramInfo.ReservedVGPRFirst),
- false);
- OutStreamer->emitRawComment(
- " ReservedVGPRCount: " + Twine(CurrentProgramInfo.ReservedVGPRCount),
- false);
-
- if (MF.getSubtarget<SISubtarget>().debuggerEmitPrologue()) {
- OutStreamer->emitRawComment(
- " DebuggerWavefrontPrivateSegmentOffsetSGPR: s" +
- Twine(CurrentProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR), false);
- OutStreamer->emitRawComment(
- " DebuggerPrivateSegmentBufferSGPR: s" +
- Twine(CurrentProgramInfo.DebuggerPrivateSegmentBufferSGPR), false);
- }
+ if (!MFI->isEntryFunction()) {
+ OutStreamer->emitRawComment(" Function info:", false);
+ SIFunctionResourceInfo &Info = CallGraphResourceInfo[&MF.getFunction()];
+ emitCommonFunctionComments(
+ Info.NumVGPR,
+ Info.getTotalNumSGPRs(MF.getSubtarget<GCNSubtarget>()),
+ Info.PrivateSegmentSize,
+ getFunctionCodeSize(MF), MFI);
+ return false;
+ }
+ OutStreamer->emitRawComment(" Kernel info:", false);
+ emitCommonFunctionComments(CurrentProgramInfo.NumVGPR,
+ CurrentProgramInfo.NumSGPR,
+ CurrentProgramInfo.ScratchSize,
+ getFunctionCodeSize(MF), MFI);
+
+ OutStreamer->emitRawComment(
+ " FloatMode: " + Twine(CurrentProgramInfo.FloatMode), false);
+ OutStreamer->emitRawComment(
+ " IeeeMode: " + Twine(CurrentProgramInfo.IEEEMode), false);
+ OutStreamer->emitRawComment(
+ " LDSByteSize: " + Twine(CurrentProgramInfo.LDSSize) +
+ " bytes/workgroup (compile time only)", false);
+
+ OutStreamer->emitRawComment(
+ " SGPRBlocks: " + Twine(CurrentProgramInfo.SGPRBlocks), false);
+ OutStreamer->emitRawComment(
+ " VGPRBlocks: " + Twine(CurrentProgramInfo.VGPRBlocks), false);
+
+ OutStreamer->emitRawComment(
+ " NumSGPRsForWavesPerEU: " +
+ Twine(CurrentProgramInfo.NumSGPRsForWavesPerEU), false);
+ OutStreamer->emitRawComment(
+ " NumVGPRsForWavesPerEU: " +
+ Twine(CurrentProgramInfo.NumVGPRsForWavesPerEU), false);
+
+ OutStreamer->emitRawComment(
+ " WaveLimiterHint : " + Twine(MFI->needsWaveLimiter()), false);
+
+ if (MF.getSubtarget<GCNSubtarget>().debuggerEmitPrologue()) {
OutStreamer->emitRawComment(
- " COMPUTE_PGM_RSRC2:USER_SGPR: " +
- Twine(G_00B84C_USER_SGPR(CurrentProgramInfo.ComputePGMRSrc2)), false);
- OutStreamer->emitRawComment(
- " COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +
- Twine(G_00B84C_TRAP_HANDLER(CurrentProgramInfo.ComputePGMRSrc2)), false);
+ " DebuggerWavefrontPrivateSegmentOffsetSGPR: s" +
+ Twine(CurrentProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR), false);
OutStreamer->emitRawComment(
- " COMPUTE_PGM_RSRC2:TGID_X_EN: " +
- Twine(G_00B84C_TGID_X_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
- OutStreamer->emitRawComment(
- " COMPUTE_PGM_RSRC2:TGID_Y_EN: " +
- Twine(G_00B84C_TGID_Y_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
- OutStreamer->emitRawComment(
- " COMPUTE_PGM_RSRC2:TGID_Z_EN: " +
- Twine(G_00B84C_TGID_Z_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
- OutStreamer->emitRawComment(
- " COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +
- Twine(G_00B84C_TIDIG_COMP_CNT(CurrentProgramInfo.ComputePGMRSrc2)),
- false);
- } else {
- R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
- OutStreamer->emitRawComment(
- Twine("SQ_PGM_RESOURCES:STACK_SIZE = " + Twine(MFI->CFStackSize)));
+ " DebuggerPrivateSegmentBufferSGPR: s" +
+ Twine(CurrentProgramInfo.DebuggerPrivateSegmentBufferSGPR), false);
}
+
+ OutStreamer->emitRawComment(
+ " COMPUTE_PGM_RSRC2:USER_SGPR: " +
+ Twine(G_00B84C_USER_SGPR(CurrentProgramInfo.ComputePGMRSrc2)), false);
+ OutStreamer->emitRawComment(
+ " COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +
+ Twine(G_00B84C_TRAP_HANDLER(CurrentProgramInfo.ComputePGMRSrc2)), false);
+ OutStreamer->emitRawComment(
+ " COMPUTE_PGM_RSRC2:TGID_X_EN: " +
+ Twine(G_00B84C_TGID_X_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
+ OutStreamer->emitRawComment(
+ " COMPUTE_PGM_RSRC2:TGID_Y_EN: " +
+ Twine(G_00B84C_TGID_Y_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
+ OutStreamer->emitRawComment(
+ " COMPUTE_PGM_RSRC2:TGID_Z_EN: " +
+ Twine(G_00B84C_TGID_Z_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
+ OutStreamer->emitRawComment(
+ " COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +
+ Twine(G_00B84C_TIDIG_COMP_CNT(CurrentProgramInfo.ComputePGMRSrc2)),
+ false);
}
if (STM.dumpCode()) {
@@ -440,67 +528,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
return false;
}
-void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
- unsigned MaxGPR = 0;
- bool killPixel = false;
- const R600Subtarget &STM = MF.getSubtarget<R600Subtarget>();
- const R600RegisterInfo *RI = STM.getRegisterInfo();
- const R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
-
- for (const MachineBasicBlock &MBB : MF) {
- for (const MachineInstr &MI : MBB) {
- if (MI.getOpcode() == AMDGPU::KILLGT)
- killPixel = true;
- unsigned numOperands = MI.getNumOperands();
- for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
- const MachineOperand &MO = MI.getOperand(op_idx);
- if (!MO.isReg())
- continue;
- unsigned HWReg = RI->getHWRegIndex(MO.getReg());
-
- // Register with value > 127 aren't GPR
- if (HWReg > 127)
- continue;
- MaxGPR = std::max(MaxGPR, HWReg);
- }
- }
- }
-
- unsigned RsrcReg;
- if (STM.getGeneration() >= R600Subtarget::EVERGREEN) {
- // Evergreen / Northern Islands
- switch (MF.getFunction().getCallingConv()) {
- default: LLVM_FALLTHROUGH;
- case CallingConv::AMDGPU_CS: RsrcReg = R_0288D4_SQ_PGM_RESOURCES_LS; break;
- case CallingConv::AMDGPU_GS: RsrcReg = R_028878_SQ_PGM_RESOURCES_GS; break;
- case CallingConv::AMDGPU_PS: RsrcReg = R_028844_SQ_PGM_RESOURCES_PS; break;
- case CallingConv::AMDGPU_VS: RsrcReg = R_028860_SQ_PGM_RESOURCES_VS; break;
- }
- } else {
- // R600 / R700
- switch (MF.getFunction().getCallingConv()) {
- default: LLVM_FALLTHROUGH;
- case CallingConv::AMDGPU_GS: LLVM_FALLTHROUGH;
- case CallingConv::AMDGPU_CS: LLVM_FALLTHROUGH;
- case CallingConv::AMDGPU_VS: RsrcReg = R_028868_SQ_PGM_RESOURCES_VS; break;
- case CallingConv::AMDGPU_PS: RsrcReg = R_028850_SQ_PGM_RESOURCES_PS; break;
- }
- }
-
- OutStreamer->EmitIntValue(RsrcReg, 4);
- OutStreamer->EmitIntValue(S_NUM_GPRS(MaxGPR + 1) |
- S_STACK_SIZE(MFI->CFStackSize), 4);
- OutStreamer->EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4);
- OutStreamer->EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4);
-
- if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
- OutStreamer->EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4);
- OutStreamer->EmitIntValue(alignTo(MFI->getLDSSize(), 4) >> 2, 4);
- }
-}
-
uint64_t AMDGPUAsmPrinter::getFunctionCodeSize(const MachineFunction &MF) const {
- const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
+ const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = STM.getInstrInfo();
uint64_t CodeSize = 0;
@@ -510,7 +539,7 @@ uint64_t AMDGPUAsmPrinter::getFunctionCodeSize(const MachineFunction &MF) const
// TODO: CodeSize should account for multiple functions.
// TODO: Should we count size of debug info?
- if (MI.isDebugValue())
+ if (MI.isDebugInstr())
continue;
CodeSize += TII->getInstSizeInBytes(MI);
@@ -531,30 +560,10 @@ static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI,
return false;
}
-static unsigned getNumExtraSGPRs(const SISubtarget &ST,
- bool VCCUsed,
- bool FlatScrUsed) {
- unsigned ExtraSGPRs = 0;
- if (VCCUsed)
- ExtraSGPRs = 2;
-
- if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS) {
- if (FlatScrUsed)
- ExtraSGPRs = 4;
- } else {
- if (ST.isXNACKEnabled())
- ExtraSGPRs = 4;
-
- if (FlatScrUsed)
- ExtraSGPRs = 6;
- }
-
- return ExtraSGPRs;
-}
-
int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs(
- const SISubtarget &ST) const {
- return NumExplicitSGPR + getNumExtraSGPRs(ST, UsesVCC, UsesFlatScratch);
+ const GCNSubtarget &ST) const {
+ return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(),
+ UsesVCC, UsesFlatScratch);
}
AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
@@ -562,7 +571,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
SIFunctionResourceInfo Info;
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
const MachineRegisterInfo &MRI = MF.getRegInfo();
const SIInstrInfo *TII = ST.getInstrInfo();
@@ -586,6 +595,8 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects();
Info.PrivateSegmentSize = FrameInfo.getStackSize();
+ if (MFI->isStackRealigned())
+ Info.PrivateSegmentSize += FrameInfo.getMaxAlignment();
Info.UsesVCC = MRI.isPhysRegUsed(AMDGPU::VCC_LO) ||
@@ -649,7 +660,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
continue;
case AMDGPU::NoRegister:
- assert(MI.isDebugValue());
+ assert(MI.isDebugInstr());
continue;
case AMDGPU::VCC:
@@ -663,6 +674,11 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
case AMDGPU::FLAT_SCR_HI:
continue;
+ case AMDGPU::XNACK_MASK:
+ case AMDGPU::XNACK_MASK_LO:
+ case AMDGPU::XNACK_MASK_HI:
+ llvm_unreachable("xnack_mask registers should not be used");
+
case AMDGPU::TBA:
case AMDGPU::TBA_LO:
case AMDGPU::TBA_HI:
@@ -742,8 +758,9 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
// conservative guesses.
// 48 SGPRs - vcc, - flat_scr, -xnack
- int MaxSGPRGuess = 47 - getNumExtraSGPRs(ST, true,
- ST.hasFlatAddressSpace());
+ int MaxSGPRGuess =
+ 47 - IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(), true,
+ ST.hasFlatAddressSpace());
MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess);
MaxVGPR = std::max(MaxVGPR, 23);
@@ -798,15 +815,16 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
MF.getFunction().getContext().diagnose(DiagStackSize);
}
- const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
+ const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
const SIInstrInfo *TII = STM.getInstrInfo();
const SIRegisterInfo *RI = &TII->getRegisterInfo();
- unsigned ExtraSGPRs = getNumExtraSGPRs(STM,
- ProgInfo.VCCUsed,
- ProgInfo.FlatUsed);
- unsigned ExtraVGPRs = STM.getReservedNumVGPRs(MF);
+ // TODO(scott.linder): The calculations related to SGPR/VGPR blocks are
+ // duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be
+ // unified.
+ unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
+ STM.getFeatureBits(), ProgInfo.VCCUsed, ProgInfo.FlatUsed);
// Check the addressable register limit before we add ExtraSGPRs.
if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
@@ -827,7 +845,19 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
// Account for extra SGPRs and VGPRs reserved for debugger use.
ProgInfo.NumSGPR += ExtraSGPRs;
- ProgInfo.NumVGPR += ExtraVGPRs;
+
+ // Ensure there are enough SGPRs and VGPRs for wave dispatch, where wave
+ // dispatch registers are function args.
+ unsigned WaveDispatchNumSGPR = 0, WaveDispatchNumVGPR = 0;
+ for (auto &Arg : MF.getFunction().args()) {
+ unsigned NumRegs = (Arg.getType()->getPrimitiveSizeInBits() + 31) / 32;
+ if (Arg.hasAttribute(Attribute::InReg))
+ WaveDispatchNumSGPR += NumRegs;
+ else
+ WaveDispatchNumVGPR += NumRegs;
+ }
+ ProgInfo.NumSGPR = std::max(ProgInfo.NumSGPR, WaveDispatchNumSGPR);
+ ProgInfo.NumVGPR = std::max(ProgInfo.NumVGPR, WaveDispatchNumVGPR);
// Adjust number of registers used to meet default/requested minimum/maximum
// number of waves per execution unit request.
@@ -875,19 +905,10 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
Ctx.diagnose(Diag);
}
- // SGPRBlocks is actual number of SGPR blocks minus 1.
- ProgInfo.SGPRBlocks = alignTo(ProgInfo.NumSGPRsForWavesPerEU,
- STM.getSGPREncodingGranule());
- ProgInfo.SGPRBlocks = ProgInfo.SGPRBlocks / STM.getSGPREncodingGranule() - 1;
-
- // VGPRBlocks is actual number of VGPR blocks minus 1.
- ProgInfo.VGPRBlocks = alignTo(ProgInfo.NumVGPRsForWavesPerEU,
- STM.getVGPREncodingGranule());
- ProgInfo.VGPRBlocks = ProgInfo.VGPRBlocks / STM.getVGPREncodingGranule() - 1;
-
- // Record first reserved VGPR and number of reserved VGPRs.
- ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? ProgInfo.NumVGPR : 0;
- ProgInfo.ReservedVGPRCount = STM.getReservedNumVGPRs(MF);
+ ProgInfo.SGPRBlocks = IsaInfo::getNumSGPRBlocks(
+ STM.getFeatureBits(), ProgInfo.NumSGPRsForWavesPerEU);
+ ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks(
+ STM.getFeatureBits(), ProgInfo.NumVGPRsForWavesPerEU);
// Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
// DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
@@ -909,7 +930,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ProgInfo.DX10Clamp = STM.enableDX10Clamp();
unsigned LDSAlignShift;
- if (STM.getGeneration() < SISubtarget::SEA_ISLANDS) {
+ if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
// LDS is allocated in 64 dword blocks.
LDSAlignShift = 8;
} else {
@@ -954,7 +975,8 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ProgInfo.ComputePGMRSrc2 =
S_00B84C_SCRATCH_EN(ProgInfo.ScratchBlocks > 0) |
S_00B84C_USER_SGPR(MFI->getNumUserSGPRs()) |
- S_00B84C_TRAP_HANDLER(STM.isTrapHandlerEnabled()) |
+ // For AMDHSA, TRAP_HANDLER must be zero, as it is populated by the CP.
+ S_00B84C_TRAP_HANDLER(STM.isAmdHsaOS() ? 0 : STM.isTrapHandlerEnabled()) |
S_00B84C_TGID_X_EN(MFI->hasWorkGroupIDX()) |
S_00B84C_TGID_Y_EN(MFI->hasWorkGroupIDY()) |
S_00B84C_TGID_Z_EN(MFI->hasWorkGroupIDZ()) |
@@ -981,7 +1003,7 @@ static unsigned getRsrcReg(CallingConv::ID CallConv) {
void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
const SIProgramInfo &CurrentProgramInfo) {
- const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
+ const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
unsigned RsrcReg = getRsrcReg(MF.getFunction().getCallingConv());
@@ -1002,26 +1024,21 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
OutStreamer->EmitIntValue(RsrcReg, 4);
OutStreamer->EmitIntValue(S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) |
S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks), 4);
- unsigned Rsrc2Val = 0;
if (STM.isVGPRSpillingEnabled(MF.getFunction())) {
OutStreamer->EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4);
OutStreamer->EmitIntValue(S_0286E8_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4);
- if (TM.getTargetTriple().getOS() == Triple::AMDPAL)
- Rsrc2Val = S_00B84C_SCRATCH_EN(CurrentProgramInfo.ScratchBlocks > 0);
- }
- if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) {
- OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
- OutStreamer->EmitIntValue(MFI->getPSInputEnable(), 4);
- OutStreamer->EmitIntValue(R_0286D0_SPI_PS_INPUT_ADDR, 4);
- OutStreamer->EmitIntValue(MFI->getPSInputAddr(), 4);
- Rsrc2Val |= S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks);
- }
- if (Rsrc2Val) {
- OutStreamer->EmitIntValue(RsrcReg + 4 /*rsrc2*/, 4);
- OutStreamer->EmitIntValue(Rsrc2Val, 4);
}
}
+ if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) {
+ OutStreamer->EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
+ OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks), 4);
+ OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
+ OutStreamer->EmitIntValue(MFI->getPSInputEnable(), 4);
+ OutStreamer->EmitIntValue(R_0286D0_SPI_PS_INPUT_ADDR, 4);
+ OutStreamer->EmitIntValue(MFI->getPSInputAddr(), 4);
+ }
+
OutStreamer->EmitIntValue(R_SPILLED_SGPRS, 4);
OutStreamer->EmitIntValue(MFI->getNumSpilledSGPRs(), 4);
OutStreamer->EmitIntValue(R_SPILLED_VGPRS, 4);
@@ -1114,8 +1131,12 @@ static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) {
void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
const SIProgramInfo &CurrentProgramInfo,
const MachineFunction &MF) const {
+ const Function &F = MF.getFunction();
+ assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
+ F.getCallingConv() == CallingConv::SPIR_KERNEL);
+
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
+ const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
AMDGPU::initDefaultAMDKernelCodeT(Out, STM.getFeatureBits());
@@ -1151,21 +1172,6 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
if (MFI->hasFlatScratchInit())
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
- if (MFI->hasGridWorkgroupCountX()) {
- Out.code_properties |=
- AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X;
- }
-
- if (MFI->hasGridWorkgroupCountY()) {
- Out.code_properties |=
- AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y;
- }
-
- if (MFI->hasGridWorkgroupCountZ()) {
- Out.code_properties |=
- AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z;
- }
-
if (MFI->hasDispatchPtr())
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
@@ -1175,20 +1181,17 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
if (STM.isXNACKEnabled())
Out.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
- // FIXME: Should use getKernArgSize
- Out.kernarg_segment_byte_size =
- STM.getKernArgSegmentSize(MF, MFI->getABIArgOffset());
+ unsigned MaxKernArgAlign;
+ Out.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR;
Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR;
Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize;
Out.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;
- Out.reserved_vgpr_first = CurrentProgramInfo.ReservedVGPRFirst;
- Out.reserved_vgpr_count = CurrentProgramInfo.ReservedVGPRCount;
// These alignment values are specified in powers of two, so alignment =
// 2^n. The minimum alignment is 2^4 = 16.
Out.kernarg_segment_alignment = std::max((size_t)4,
- countTrailingZeros(MFI->getMaxKernArgAlign()));
+ countTrailingZeros(MaxKernArgAlign));
if (STM.debuggerEmitPrologue()) {
Out.debug_wavefront_private_segment_offset_sgpr =
@@ -1198,55 +1201,6 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
}
}
-AMDGPU::HSAMD::Kernel::CodeProps::Metadata AMDGPUAsmPrinter::getHSACodeProps(
- const MachineFunction &MF,
- const SIProgramInfo &ProgramInfo) const {
- const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
- const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
- HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
-
- HSACodeProps.mKernargSegmentSize =
- STM.getKernArgSegmentSize(MF, MFI.getABIArgOffset());
- HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
- HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
- HSACodeProps.mKernargSegmentAlign =
- std::max(uint32_t(4), MFI.getMaxKernArgAlign());
- HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
- HSACodeProps.mNumSGPRs = CurrentProgramInfo.NumSGPR;
- HSACodeProps.mNumVGPRs = CurrentProgramInfo.NumVGPR;
- HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
- HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
- HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
- HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
- HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
-
- return HSACodeProps;
-}
-
-AMDGPU::HSAMD::Kernel::DebugProps::Metadata AMDGPUAsmPrinter::getHSADebugProps(
- const MachineFunction &MF,
- const SIProgramInfo &ProgramInfo) const {
- const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
- HSAMD::Kernel::DebugProps::Metadata HSADebugProps;
-
- if (!STM.debuggerSupported())
- return HSADebugProps;
-
- HSADebugProps.mDebuggerABIVersion.push_back(1);
- HSADebugProps.mDebuggerABIVersion.push_back(0);
- HSADebugProps.mReservedNumVGPRs = ProgramInfo.ReservedVGPRCount;
- HSADebugProps.mReservedFirstVGPR = ProgramInfo.ReservedVGPRFirst;
-
- if (STM.debuggerEmitPrologue()) {
- HSADebugProps.mPrivateSegmentBufferSGPR =
- ProgramInfo.DebuggerPrivateSegmentBufferSGPR;
- HSADebugProps.mWavefrontPrivateSegmentOffsetSGPR =
- ProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
- }
-
- return HSADebugProps;
-}
-
bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant,
const char *ExtraCode, raw_ostream &O) {