aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp')
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp339
1 files changed, 179 insertions, 160 deletions
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 2ded7cdb6489..743ac64b8f10 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -1,9 +1,8 @@
//===-- AMDGPUAsmPrinter.cpp - AMDGPU assembly printer -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -20,7 +19,7 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
-#include "InstPrinter/AMDGPUInstPrinter.h"
+#include "MCTargetDesc/AMDGPUInstPrinter.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
#include "R600AsmPrinter.h"
@@ -31,10 +30,12 @@
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
+#include "TargetInfo/AMDGPUTargetInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
@@ -100,7 +101,7 @@ extern "C" void LLVMInitializeAMDGPUAsmPrinter() {
AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
: AsmPrinter(TM, std::move(Streamer)) {
- if (IsaInfo::hasCodeObjectV3(getSTI()))
+ if (IsaInfo::hasCodeObjectV3(getGlobalSTI()))
HSAMetadataStream.reset(new MetadataStreamerV3());
else
HSAMetadataStream.reset(new MetadataStreamerV2());
@@ -110,7 +111,7 @@ StringRef AMDGPUAsmPrinter::getPassName() const {
return "AMDGPU Assembly Printer";
}
-const MCSubtargetInfo* AMDGPUAsmPrinter::getSTI() const {
+const MCSubtargetInfo *AMDGPUAsmPrinter::getGlobalSTI() const {
return TM.getMCSubtargetInfo();
}
@@ -121,10 +122,10 @@ AMDGPUTargetStreamer* AMDGPUAsmPrinter::getTargetStreamer() const {
}
void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
- if (IsaInfo::hasCodeObjectV3(getSTI())) {
+ if (IsaInfo::hasCodeObjectV3(getGlobalSTI())) {
std::string ExpectedTarget;
raw_string_ostream ExpectedTargetOS(ExpectedTarget);
- IsaInfo::streamIsaVersion(getSTI(), ExpectedTargetOS);
+ IsaInfo::streamIsaVersion(getGlobalSTI(), ExpectedTargetOS);
getTargetStreamer()->EmitDirectiveAMDGCNTarget(ExpectedTarget);
}
@@ -137,9 +138,9 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
HSAMetadataStream->begin(M);
if (TM.getTargetTriple().getOS() == Triple::AMDPAL)
- readPALMetadata(M);
+ getTargetStreamer()->getPALMetadata()->readFromIR(M);
- if (IsaInfo::hasCodeObjectV3(getSTI()))
+ if (IsaInfo::hasCodeObjectV3(getGlobalSTI()))
return;
// HSA emits NT_AMDGPU_HSA_CODE_OBJECT_VERSION for code objects v2.
@@ -147,7 +148,7 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1);
// HSA and PAL emit NT_AMDGPU_HSA_ISA for code objects v2.
- IsaVersion Version = getIsaVersion(getSTI()->getCPU());
+ IsaVersion Version = getIsaVersion(getGlobalSTI()->getCPU());
getTargetStreamer()->EmitDirectiveHSACodeObjectISA(
Version.Major, Version.Minor, Version.Stepping, "AMD", "AMDGPU");
}
@@ -157,11 +158,11 @@ void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
if (!getTargetStreamer())
return;
- if (!IsaInfo::hasCodeObjectV3(getSTI())) {
+ if (!IsaInfo::hasCodeObjectV3(getGlobalSTI())) {
// Emit ISA Version (NT_AMD_AMDGPU_ISA).
std::string ISAVersionString;
raw_string_ostream ISAVersionStream(ISAVersionString);
- IsaInfo::streamIsaVersion(getSTI(), ISAVersionStream);
+ IsaInfo::streamIsaVersion(getGlobalSTI(), ISAVersionStream);
getTargetStreamer()->EmitISAVersion(ISAVersionStream.str());
}
@@ -172,20 +173,6 @@ void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
(void)Success;
assert(Success && "Malformed HSA Metadata");
}
-
- if (!IsaInfo::hasCodeObjectV3(getSTI())) {
- // Emit PAL Metadata (NT_AMD_AMDGPU_PAL_METADATA).
- if (TM.getTargetTriple().getOS() == Triple::AMDPAL) {
- // Copy the PAL metadata from the map where we collected it into a vector,
- // then write it as a .note.
- PALMD::Metadata PALMetadataVector;
- for (auto i : PALMetadataMap) {
- PALMetadataVector.push_back(i.first);
- PALMetadataVector.push_back(i.second);
- }
- getTargetStreamer()->EmitPALMetadata(PALMetadataVector);
- }
- }
}
bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
@@ -225,7 +212,8 @@ void AMDGPUAsmPrinter::EmitFunctionBodyEnd() {
const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
if (!MFI.isEntryFunction())
return;
- if (!IsaInfo::hasCodeObjectV3(getSTI()) ||
+
+ if (!IsaInfo::hasCodeObjectV3(getGlobalSTI()) ||
TM.getTargetTriple().getOS() != Triple::AMDHSA)
return;
@@ -243,23 +231,25 @@ void AMDGPUAsmPrinter::EmitFunctionBodyEnd() {
if (ReadOnlySection.getAlignment() < 64)
ReadOnlySection.setAlignment(64);
+ const MCSubtargetInfo &STI = MF->getSubtarget();
+
SmallString<128> KernelName;
getNameWithPrefix(KernelName, &MF->getFunction());
getTargetStreamer()->EmitAmdhsaKernelDescriptor(
- *getSTI(), KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
+ STI, KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
CurrentProgramInfo.NumVGPRsForWavesPerEU,
CurrentProgramInfo.NumSGPRsForWavesPerEU -
- IsaInfo::getNumExtraSGPRs(getSTI(),
+ IsaInfo::getNumExtraSGPRs(&STI,
CurrentProgramInfo.VCCUsed,
CurrentProgramInfo.FlatUsed),
CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
- hasXNACK(*getSTI()));
+ hasXNACK(STI));
Streamer.PopSection();
}
void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
- if (IsaInfo::hasCodeObjectV3(getSTI()) &&
+ if (IsaInfo::hasCodeObjectV3(getGlobalSTI()) &&
TM.getTargetTriple().getOS() == Triple::AMDHSA) {
AsmPrinter::EmitFunctionEntryLabel();
return;
@@ -273,8 +263,7 @@ void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
getTargetStreamer()->EmitAMDGPUSymbolType(
SymbolName, ELF::STT_AMDGPU_HSA_KERNEL);
}
- const GCNSubtarget &STI = MF->getSubtarget<GCNSubtarget>();
- if (STI.dumpCode()) {
+ if (DumpCodeInstEmitter) {
// Disassemble function name label to text.
DisasmLines.push_back(MF->getName().str() + ":");
DisasmLineMaxLen = std::max(DisasmLineMaxLen, DisasmLines.back().size());
@@ -285,8 +274,7 @@ void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
}
void AMDGPUAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
- const GCNSubtarget &STI = MBB.getParent()->getSubtarget<GCNSubtarget>();
- if (STI.dumpCode() && !isBlockOnlyReachableByFallthrough(&MBB)) {
+ if (DumpCodeInstEmitter && !isBlockOnlyReachableByFallthrough(&MBB)) {
// Write a line for the basic block label if it is not only fallthrough.
DisasmLines.push_back(
(Twine("BB") + Twine(getFunctionNumber())
@@ -298,38 +286,57 @@ void AMDGPUAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
}
void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+ if (GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+ if (GV->hasInitializer() && !isa<UndefValue>(GV->getInitializer())) {
+ OutContext.reportError({},
+ Twine(GV->getName()) +
+ ": unsupported initializer for address space");
+ return;
+ }
+
+ // LDS variables aren't emitted in HSA or PAL yet.
+ const Triple::OSType OS = TM.getTargetTriple().getOS();
+ if (OS == Triple::AMDHSA || OS == Triple::AMDPAL)
+ return;
- // Group segment variables aren't emitted in HSA.
- if (AMDGPU::isGroupSegment(GV))
+ MCSymbol *GVSym = getSymbol(GV);
+
+ GVSym->redefineIfPossible();
+ if (GVSym->isDefined() || GVSym->isVariable())
+ report_fatal_error("symbol '" + Twine(GVSym->getName()) +
+ "' is already defined");
+
+ const DataLayout &DL = GV->getParent()->getDataLayout();
+ uint64_t Size = DL.getTypeAllocSize(GV->getValueType());
+ unsigned Align = GV->getAlignment();
+ if (!Align)
+ Align = 4;
+
+ EmitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration());
+ EmitLinkage(GV, GVSym);
+ if (auto TS = getTargetStreamer())
+ TS->emitAMDGPULDS(GVSym, Size, Align);
return;
+ }
AsmPrinter::EmitGlobalVariable(GV);
}
bool AMDGPUAsmPrinter::doFinalization(Module &M) {
CallGraphResourceInfo.clear();
- return AsmPrinter::doFinalization(M);
-}
-// For the amdpal OS type, read the amdgpu.pal.metadata supplied by the
-// frontend into our PALMetadataMap, ready for per-function modification. It
-// is a NamedMD containing an MDTuple containing a number of MDNodes each of
-// which is an integer value, and each two integer values forms a key=value
-// pair that we store as PALMetadataMap[key]=value in the map.
-void AMDGPUAsmPrinter::readPALMetadata(Module &M) {
- auto NamedMD = M.getNamedMetadata("amdgpu.pal.metadata");
- if (!NamedMD || !NamedMD->getNumOperands())
- return;
- auto Tuple = dyn_cast<MDTuple>(NamedMD->getOperand(0));
- if (!Tuple)
- return;
- for (unsigned I = 0, E = Tuple->getNumOperands() & -2; I != E; I += 2) {
- auto Key = mdconst::dyn_extract<ConstantInt>(Tuple->getOperand(I));
- auto Val = mdconst::dyn_extract<ConstantInt>(Tuple->getOperand(I + 1));
- if (!Key || !Val)
- continue;
- PALMetadataMap[Key->getZExtValue()] = Val->getZExtValue();
+ // Pad with s_code_end to help tools and guard against instruction prefetch
+ // causing stale data in caches. Arguably this should be done by the linker,
+ // which is why this isn't done for Mesa.
+ const MCSubtargetInfo &STI = *getGlobalSTI();
+ if (AMDGPU::isGFX10(STI) &&
+ (STI.getTargetTriple().getOS() == Triple::AMDHSA ||
+ STI.getTargetTriple().getOS() == Triple::AMDPAL)) {
+ OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
+ getTargetStreamer()->EmitCodeEnd();
}
+
+ return AsmPrinter::doFinalization(M);
}
// Print comments that apply to both callable functions and entry points.
@@ -376,6 +383,10 @@ uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
KernelCodeProperties |=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
}
+ if (MF.getSubtarget<GCNSubtarget>().isWave32()) {
+ KernelCodeProperties |=
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
+ }
return KernelCodeProperties;
}
@@ -435,6 +446,18 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
EmitProgramInfoSI(MF, CurrentProgramInfo);
}
+ DumpCodeInstEmitter = nullptr;
+ if (STM.dumpCode()) {
+ // For -dumpcode, get the assembler out of the streamer, even if it does
+ // not really want to let us have it. This only works with -filetype=obj.
+ bool SaveFlag = OutStreamer->getUseAssemblerInfoForParsing();
+ OutStreamer->setUseAssemblerInfoForParsing(true);
+ MCAssembler *Assembler = OutStreamer->getAssemblerPtr();
+ OutStreamer->setUseAssemblerInfoForParsing(SaveFlag);
+ if (Assembler)
+ DumpCodeInstEmitter = Assembler->getEmitterPtr();
+ }
+
DisasmLines.clear();
HexLines.clear();
DisasmLineMaxLen = 0;
@@ -486,15 +509,6 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
OutStreamer->emitRawComment(
" WaveLimiterHint : " + Twine(MFI->needsWaveLimiter()), false);
- if (MF.getSubtarget<GCNSubtarget>().debuggerEmitPrologue()) {
- OutStreamer->emitRawComment(
- " DebuggerWavefrontPrivateSegmentOffsetSGPR: s" +
- Twine(CurrentProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR), false);
- OutStreamer->emitRawComment(
- " DebuggerPrivateSegmentBufferSGPR: s" +
- Twine(CurrentProgramInfo.DebuggerPrivateSegmentBufferSGPR), false);
- }
-
OutStreamer->emitRawComment(
" COMPUTE_PGM_RSRC2:USER_SGPR: " +
Twine(G_00B84C_USER_SGPR(CurrentProgramInfo.ComputePGMRSrc2)), false);
@@ -516,7 +530,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
false);
}
- if (STM.dumpCode()) {
+ if (DumpCodeInstEmitter) {
OutStreamer->SwitchSection(
Context.getELFSection(".AMDGPU.disasm", ELF::SHT_NOTE, 0));
@@ -620,6 +634,11 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
HighestVGPRReg = Reg;
break;
}
+ MCPhysReg AReg = AMDGPU::AGPR0 + TRI.getHWRegIndex(Reg);
+ if (MRI.isPhysRegUsed(AReg)) {
+ HighestVGPRReg = AReg;
+ break;
+ }
}
MCPhysReg HighestSGPRReg = AMDGPU::NoRegister;
@@ -665,8 +684,12 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
case AMDGPU::SRC_SHARED_LIMIT:
case AMDGPU::SRC_PRIVATE_BASE:
case AMDGPU::SRC_PRIVATE_LIMIT:
+ case AMDGPU::SGPR_NULL:
continue;
+ case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
+ llvm_unreachable("src_pops_exiting_wave_id should not be used");
+
case AMDGPU::NoRegister:
assert(MI.isDebugInstr());
continue;
@@ -687,6 +710,9 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
case AMDGPU::XNACK_MASK_HI:
llvm_unreachable("xnack_mask registers should not be used");
+ case AMDGPU::LDS_DIRECT:
+ llvm_unreachable("lds_direct register should not be used");
+
case AMDGPU::TBA:
case AMDGPU::TBA_LO:
case AMDGPU::TBA_HI:
@@ -695,6 +721,15 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
case AMDGPU::TMA_HI:
llvm_unreachable("trap handler registers should not be used");
+ case AMDGPU::SRC_VCCZ:
+ llvm_unreachable("src_vccz register should not be used");
+
+ case AMDGPU::SRC_EXECZ:
+ llvm_unreachable("src_execz register should not be used");
+
+ case AMDGPU::SRC_SCC:
+ llvm_unreachable("src_scc register should not be used");
+
default:
break;
}
@@ -707,6 +742,9 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
} else if (AMDGPU::VGPR_32RegClass.contains(Reg)) {
IsSGPR = false;
Width = 1;
+ } else if (AMDGPU::AGPR_32RegClass.contains(Reg)) {
+ IsSGPR = false;
+ Width = 1;
} else if (AMDGPU::SReg_64RegClass.contains(Reg)) {
assert(!AMDGPU::TTMP_64RegClass.contains(Reg) &&
"trap handler registers should not be used");
@@ -715,9 +753,14 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
} else if (AMDGPU::VReg_64RegClass.contains(Reg)) {
IsSGPR = false;
Width = 2;
+ } else if (AMDGPU::AReg_64RegClass.contains(Reg)) {
+ IsSGPR = false;
+ Width = 2;
} else if (AMDGPU::VReg_96RegClass.contains(Reg)) {
IsSGPR = false;
Width = 3;
+ } else if (AMDGPU::SReg_96RegClass.contains(Reg)) {
+ Width = 3;
} else if (AMDGPU::SReg_128RegClass.contains(Reg)) {
assert(!AMDGPU::TTMP_128RegClass.contains(Reg) &&
"trap handler registers should not be used");
@@ -726,6 +769,9 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
} else if (AMDGPU::VReg_128RegClass.contains(Reg)) {
IsSGPR = false;
Width = 4;
+ } else if (AMDGPU::AReg_128RegClass.contains(Reg)) {
+ IsSGPR = false;
+ Width = 4;
} else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
assert(!AMDGPU::TTMP_256RegClass.contains(Reg) &&
"trap handler registers should not be used");
@@ -742,6 +788,18 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
} else if (AMDGPU::VReg_512RegClass.contains(Reg)) {
IsSGPR = false;
Width = 16;
+ } else if (AMDGPU::AReg_512RegClass.contains(Reg)) {
+ IsSGPR = false;
+ Width = 16;
+ } else if (AMDGPU::SReg_1024RegClass.contains(Reg)) {
+ IsSGPR = true;
+ Width = 32;
+ } else if (AMDGPU::VReg_1024RegClass.contains(Reg)) {
+ IsSGPR = false;
+ Width = 32;
+ } else if (AMDGPU::AReg_1024RegClass.contains(Reg)) {
+ IsSGPR = false;
+ Width = 32;
} else {
llvm_unreachable("Unknown register class");
}
@@ -767,8 +825,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
// 48 SGPRs - vcc, - flat_scr, -xnack
int MaxSGPRGuess =
- 47 - IsaInfo::getNumExtraSGPRs(getSTI(), true,
- ST.hasFlatAddressSpace());
+ 47 - IsaInfo::getNumExtraSGPRs(&ST, true, ST.hasFlatAddressSpace());
MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess);
MaxVGPR = std::max(MaxVGPR, 23);
@@ -779,9 +836,19 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
} else {
// We force CodeGen to run in SCC order, so the callee's register
// usage etc. should be the cumulative usage of all callees.
+
auto I = CallGraphResourceInfo.find(Callee);
- assert(I != CallGraphResourceInfo.end() &&
- "callee should have been handled before caller");
+ if (I == CallGraphResourceInfo.end()) {
+ // Avoid crashing on undefined behavior with an illegal call to a
+ // kernel. If a callsite's calling convention doesn't match the
+ // function's, it's undefined behavior. If the callsite calling
+ // convention does match, that would have errored earlier.
+ // FIXME: The verifier shouldn't allow this.
+ if (AMDGPU::isEntryFunctionCC(Callee->getCallingConv()))
+ report_fatal_error("invalid call to entry function");
+
+ llvm_unreachable("callee should have been handled before caller");
+ }
MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR);
MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR);
@@ -825,14 +892,12 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- const SIInstrInfo *TII = STM.getInstrInfo();
- const SIRegisterInfo *RI = &TII->getRegisterInfo();
// TODO(scott.linder): The calculations related to SGPR/VGPR blocks are
// duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be
// unified.
unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
- getSTI(), ProgInfo.VCCUsed, ProgInfo.FlatUsed);
+ &STM, ProgInfo.VCCUsed, ProgInfo.FlatUsed);
// Check the addressable register limit before we add ExtraSGPRs.
if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
@@ -918,24 +983,15 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks(
&STM, ProgInfo.NumVGPRsForWavesPerEU);
- // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
- // DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
- // attribute was requested.
- if (STM.debuggerEmitPrologue()) {
- ProgInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR =
- RI->getHWRegIndex(MFI->getScratchWaveOffsetReg());
- ProgInfo.DebuggerPrivateSegmentBufferSGPR =
- RI->getHWRegIndex(MFI->getScratchRSrcReg());
- }
-
// Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
// register.
ProgInfo.FloatMode = getFPMode(MF);
- ProgInfo.IEEEMode = STM.enableIEEEBit(MF);
+ const SIModeRegisterDefaults Mode = MFI->getMode();
+ ProgInfo.IEEEMode = Mode.IEEE;
// Make clamp modifier on NaN input returns 0.
- ProgInfo.DX10Clamp = STM.enableDX10Clamp();
+ ProgInfo.DX10Clamp = Mode.DX10Clamp;
unsigned LDSAlignShift;
if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
@@ -963,6 +1019,11 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
1ULL << ScratchAlignShift) >>
ScratchAlignShift;
+ if (getIsaVersion(getGlobalSTI()->getCPU()).Major >= 10) {
+ ProgInfo.WgpMode = STM.isCuModeEnabled() ? 0 : 1;
+ ProgInfo.MemOrdered = 1;
+ }
+
ProgInfo.ComputePGMRSrc1 =
S_00B848_VGPRS(ProgInfo.VGPRBlocks) |
S_00B848_SGPRS(ProgInfo.SGPRBlocks) |
@@ -971,7 +1032,9 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
S_00B848_PRIV(ProgInfo.Priv) |
S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp) |
S_00B848_DEBUG_MODE(ProgInfo.DebugMode) |
- S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
+ S_00B848_IEEE_MODE(ProgInfo.IEEEMode) |
+ S_00B848_WGP_MODE(ProgInfo.WgpMode) |
+ S_00B848_MEM_ORDERED(ProgInfo.MemOrdered);
// 0 = X, 1 = XY, 2 = XYZ
unsigned TIDIGCompCnt = 0;
@@ -1053,71 +1116,38 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
// This is the equivalent of EmitProgramInfoSI above, but for when the OS type
// is AMDPAL. It stores each compute/SPI register setting and other PAL
-// metadata items into the PALMetadataMap, combining with any provided by the
-// frontend as LLVM metadata. Once all functions are written, PALMetadataMap is
-// then written as a single block in the .note section.
+// metadata items into the PALMD::Metadata, combining with any provided by the
+// frontend as LLVM metadata. Once all functions are written, the PAL metadata
+// is then written as a single block in the .note section.
void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
const SIProgramInfo &CurrentProgramInfo) {
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- // Given the calling convention, calculate the register number for rsrc1. In
- // principle the register number could change in future hardware, but we know
- // it is the same for gfx6-9 (except that LS and ES don't exist on gfx9), so
- // we can use the same fixed value that .AMDGPU.config has for Mesa. Note
- // that we use a register number rather than a byte offset, so we need to
- // divide by 4.
- unsigned Rsrc1Reg = getRsrcReg(MF.getFunction().getCallingConv()) / 4;
- unsigned Rsrc2Reg = Rsrc1Reg + 1;
- // Also calculate the PAL metadata key for *S_SCRATCH_SIZE. It can be used
- // with a constant offset to access any non-register shader-specific PAL
- // metadata key.
- unsigned ScratchSizeKey = PALMD::Key::CS_SCRATCH_SIZE;
- switch (MF.getFunction().getCallingConv()) {
- case CallingConv::AMDGPU_PS:
- ScratchSizeKey = PALMD::Key::PS_SCRATCH_SIZE;
- break;
- case CallingConv::AMDGPU_VS:
- ScratchSizeKey = PALMD::Key::VS_SCRATCH_SIZE;
- break;
- case CallingConv::AMDGPU_GS:
- ScratchSizeKey = PALMD::Key::GS_SCRATCH_SIZE;
- break;
- case CallingConv::AMDGPU_ES:
- ScratchSizeKey = PALMD::Key::ES_SCRATCH_SIZE;
- break;
- case CallingConv::AMDGPU_HS:
- ScratchSizeKey = PALMD::Key::HS_SCRATCH_SIZE;
- break;
- case CallingConv::AMDGPU_LS:
- ScratchSizeKey = PALMD::Key::LS_SCRATCH_SIZE;
- break;
- }
- unsigned NumUsedVgprsKey = ScratchSizeKey +
- PALMD::Key::VS_NUM_USED_VGPRS - PALMD::Key::VS_SCRATCH_SIZE;
- unsigned NumUsedSgprsKey = ScratchSizeKey +
- PALMD::Key::VS_NUM_USED_SGPRS - PALMD::Key::VS_SCRATCH_SIZE;
- PALMetadataMap[NumUsedVgprsKey] = CurrentProgramInfo.NumVGPRsForWavesPerEU;
- PALMetadataMap[NumUsedSgprsKey] = CurrentProgramInfo.NumSGPRsForWavesPerEU;
+ auto CC = MF.getFunction().getCallingConv();
+ auto MD = getTargetStreamer()->getPALMetadata();
+
+ MD->setEntryPoint(CC, MF.getFunction().getName());
+ MD->setNumUsedVgprs(CC, CurrentProgramInfo.NumVGPRsForWavesPerEU);
+ MD->setNumUsedSgprs(CC, CurrentProgramInfo.NumSGPRsForWavesPerEU);
if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
- PALMetadataMap[Rsrc1Reg] |= CurrentProgramInfo.ComputePGMRSrc1;
- PALMetadataMap[Rsrc2Reg] |= CurrentProgramInfo.ComputePGMRSrc2;
- // ScratchSize is in bytes, 16 aligned.
- PALMetadataMap[ScratchSizeKey] |=
- alignTo(CurrentProgramInfo.ScratchSize, 16);
+ MD->setRsrc1(CC, CurrentProgramInfo.ComputePGMRSrc1);
+ MD->setRsrc2(CC, CurrentProgramInfo.ComputePGMRSrc2);
} else {
- PALMetadataMap[Rsrc1Reg] |= S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) |
- S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks);
+ MD->setRsrc1(CC, S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) |
+ S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks));
if (CurrentProgramInfo.ScratchBlocks > 0)
- PALMetadataMap[Rsrc2Reg] |= S_00B84C_SCRATCH_EN(1);
- // ScratchSize is in bytes, 16 aligned.
- PALMetadataMap[ScratchSizeKey] |=
- alignTo(CurrentProgramInfo.ScratchSize, 16);
+ MD->setRsrc2(CC, S_00B84C_SCRATCH_EN(1));
}
+ // ScratchSize is in bytes, 16 aligned.
+ MD->setScratchSize(CC, alignTo(CurrentProgramInfo.ScratchSize, 16));
if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) {
- PALMetadataMap[Rsrc2Reg] |=
- S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks);
- PALMetadataMap[R_0286CC_SPI_PS_INPUT_ENA / 4] |= MFI->getPSInputEnable();
- PALMetadataMap[R_0286D0_SPI_PS_INPUT_ADDR / 4] |= MFI->getPSInputAddr();
+ MD->setRsrc2(CC, S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks));
+ MD->setSpiPsInputEna(MFI->getPSInputEnable());
+ MD->setSpiPsInputAddr(MFI->getPSInputAddr());
}
+
+ const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
+ if (STM.isWave32())
+ MD->setWave32(MF.getFunction().getCallingConv());
}
// This is supposed to be log2(Size)
@@ -1144,12 +1174,12 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
- AMDGPU::initDefaultAMDKernelCodeT(Out, getSTI());
+ AMDGPU::initDefaultAMDKernelCodeT(Out, &STM);
Out.compute_pgm_resource_registers =
CurrentProgramInfo.ComputePGMRSrc1 |
(CurrentProgramInfo.ComputePGMRSrc2 << 32);
- Out.code_properties = AMD_CODE_PROPERTY_IS_PTR64;
+ Out.code_properties |= AMD_CODE_PROPERTY_IS_PTR64;
if (CurrentProgramInfo.DynamicCallStack)
Out.code_properties |= AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK;
@@ -1181,9 +1211,6 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
if (MFI->hasDispatchPtr())
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
- if (STM.debuggerSupported())
- Out.code_properties |= AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED;
-
if (STM.isXNACKEnabled())
Out.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
@@ -1196,22 +1223,14 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
// These alignment values are specified in powers of two, so alignment =
// 2^n. The minimum alignment is 2^4 = 16.
- Out.kernarg_segment_alignment = std::max((size_t)4,
+ Out.kernarg_segment_alignment = std::max<size_t>(4,
countTrailingZeros(MaxKernArgAlign));
-
- if (STM.debuggerEmitPrologue()) {
- Out.debug_wavefront_private_segment_offset_sgpr =
- CurrentProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
- Out.debug_private_segment_buffer_sgpr =
- CurrentProgramInfo.DebuggerPrivateSegmentBufferSGPR;
- }
}
bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant,
const char *ExtraCode, raw_ostream &O) {
// First try the generic code, which knows about modifiers like 'c' and 'n'.
- if (!AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O))
+ if (!AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O))
return false;
if (ExtraCode && ExtraCode[0]) {