summaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/SIMachineFunctionInfo.cpp')
-rw-r--r--lib/Target/AMDGPU/SIMachineFunctionInfo.cpp189
1 files changed, 98 insertions, 91 deletions
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index a7c8166ff6d2..6013ebc81d9f 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -1,4 +1,4 @@
-//===-- SIMachineFunctionInfo.cpp -------- SI Machine Function Info -------===//
+//===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,13 +8,19 @@
//===----------------------------------------------------------------------===//
#include "SIMachineFunctionInfo.h"
+#include "AMDGPUArgumentUsageInfo.h"
#include "AMDGPUSubtarget.h"
-#include "SIInstrInfo.h"
+#include "SIRegisterInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
+#include <cassert>
+#include <vector>
#define MAX_LANES 64
@@ -22,44 +28,8 @@ using namespace llvm;
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
: AMDGPUMachineFunction(MF),
- TIDReg(AMDGPU::NoRegister),
- ScratchRSrcReg(AMDGPU::PRIVATE_RSRC_REG),
- ScratchWaveOffsetReg(AMDGPU::SCRATCH_WAVE_OFFSET_REG),
- FrameOffsetReg(AMDGPU::FP_REG),
- StackPtrOffsetReg(AMDGPU::SP_REG),
- PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister),
- DispatchPtrUserSGPR(AMDGPU::NoRegister),
- QueuePtrUserSGPR(AMDGPU::NoRegister),
- KernargSegmentPtrUserSGPR(AMDGPU::NoRegister),
- DispatchIDUserSGPR(AMDGPU::NoRegister),
- FlatScratchInitUserSGPR(AMDGPU::NoRegister),
- PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister),
- GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister),
- GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister),
- GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister),
- WorkGroupIDXSystemSGPR(AMDGPU::NoRegister),
- WorkGroupIDYSystemSGPR(AMDGPU::NoRegister),
- WorkGroupIDZSystemSGPR(AMDGPU::NoRegister),
- WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
- PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
- WorkItemIDXVGPR(AMDGPU::NoRegister),
- WorkItemIDYVGPR(AMDGPU::NoRegister),
- WorkItemIDZVGPR(AMDGPU::NoRegister),
- PSInputAddr(0),
- PSInputEnable(0),
- ReturnsVoid(true),
- FlatWorkGroupSizes(0, 0),
- WavesPerEU(0, 0),
- DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}),
- DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}}),
- LDSWaveSpillSize(0),
- NumUserSGPRs(0),
- NumSystemSGPRs(0),
- HasSpilledSGPRs(false),
- HasSpilledVGPRs(false),
- HasNonSpillStackObjects(false),
- NumSpilledSGPRs(0),
- NumSpilledVGPRs(0),
+ BufferPSV(*(MF.getSubtarget().getInstrInfo())),
+ ImagePSV(*(MF.getSubtarget().getInstrInfo())),
PrivateSegmentBuffer(false),
DispatchPtr(false),
QueuePtr(false),
@@ -77,11 +47,13 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
WorkItemIDX(false),
WorkItemIDY(false),
WorkItemIDZ(false),
- ImplicitBufferPtr(false) {
+ ImplicitBufferPtr(false),
+ ImplicitArgPtr(false),
+ GITPtrHigh(0xffffffff) {
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
- const Function *F = MF.getFunction();
- FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F);
- WavesPerEU = ST.getWavesPerEU(*F);
+ const Function &F = MF.getFunction();
+ FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
+ WavesPerEU = ST.getWavesPerEU(F);
if (!isEntryFunction()) {
// Non-entry functions have no special inputs for now, other registers
@@ -91,17 +63,26 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
FrameOffsetReg = AMDGPU::SGPR5;
StackPtrOffsetReg = AMDGPU::SGPR32;
- // FIXME: Not really a system SGPR.
- PrivateSegmentWaveByteOffsetSystemSGPR = ScratchWaveOffsetReg;
+ ArgInfo.PrivateSegmentBuffer =
+ ArgDescriptor::createRegister(ScratchRSrcReg);
+ ArgInfo.PrivateSegmentWaveByteOffset =
+ ArgDescriptor::createRegister(ScratchWaveOffsetReg);
+
+ if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
+ ImplicitArgPtr = true;
+ } else {
+ if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
+ KernargSegmentPtr = true;
}
- CallingConv::ID CC = F->getCallingConv();
+ CallingConv::ID CC = F.getCallingConv();
if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
- KernargSegmentPtr = !F->arg_empty();
+ if (!F.arg_empty())
+ KernargSegmentPtr = true;
WorkGroupIDX = true;
WorkItemIDX = true;
} else if (CC == CallingConv::AMDGPU_PS) {
- PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
+ PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
}
if (ST.debuggerEmitPrologue()) {
@@ -113,27 +94,27 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
WorkItemIDY = true;
WorkItemIDZ = true;
} else {
- if (F->hasFnAttribute("amdgpu-work-group-id-x"))
+ if (F.hasFnAttribute("amdgpu-work-group-id-x"))
WorkGroupIDX = true;
- if (F->hasFnAttribute("amdgpu-work-group-id-y"))
+ if (F.hasFnAttribute("amdgpu-work-group-id-y"))
WorkGroupIDY = true;
- if (F->hasFnAttribute("amdgpu-work-group-id-z"))
+ if (F.hasFnAttribute("amdgpu-work-group-id-z"))
WorkGroupIDZ = true;
- if (F->hasFnAttribute("amdgpu-work-item-id-x"))
+ if (F.hasFnAttribute("amdgpu-work-item-id-x"))
WorkItemIDX = true;
- if (F->hasFnAttribute("amdgpu-work-item-id-y"))
+ if (F.hasFnAttribute("amdgpu-work-item-id-y"))
WorkItemIDY = true;
- if (F->hasFnAttribute("amdgpu-work-item-id-z"))
+ if (F.hasFnAttribute("amdgpu-work-item-id-z"))
WorkItemIDZ = true;
}
const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
- bool MaySpill = ST.isVGPRSpillingEnabled(*F);
+ bool MaySpill = ST.isVGPRSpillingEnabled(F);
bool HasStackObjects = FrameInfo.hasStackObjects();
if (isEntryFunction()) {
@@ -145,10 +126,11 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
if (HasStackObjects || MaySpill) {
PrivateSegmentWaveByteOffset = true;
- // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
- if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
- (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
- PrivateSegmentWaveByteOffsetSystemSGPR = AMDGPU::SGPR5;
+ // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
+ if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
+ (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
+ ArgInfo.PrivateSegmentWaveByteOffset
+ = ArgDescriptor::createRegister(AMDGPU::SGPR5);
}
}
@@ -157,78 +139,94 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
if (HasStackObjects || MaySpill)
PrivateSegmentBuffer = true;
- if (F->hasFnAttribute("amdgpu-dispatch-ptr"))
+ if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
DispatchPtr = true;
- if (F->hasFnAttribute("amdgpu-queue-ptr"))
+ if (F.hasFnAttribute("amdgpu-queue-ptr"))
QueuePtr = true;
- if (F->hasFnAttribute("amdgpu-dispatch-id"))
+ if (F.hasFnAttribute("amdgpu-dispatch-id"))
DispatchID = true;
} else if (ST.isMesaGfxShader(MF)) {
if (HasStackObjects || MaySpill)
ImplicitBufferPtr = true;
}
- if (F->hasFnAttribute("amdgpu-kernarg-segment-ptr"))
+ if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
KernargSegmentPtr = true;
if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) {
// TODO: This could be refined a lot. The attribute is a poor way of
// detecting calls that may require it before argument lowering.
- if (HasStackObjects || F->hasFnAttribute("amdgpu-flat-scratch"))
+ if (HasStackObjects || F.hasFnAttribute("amdgpu-flat-scratch"))
FlatScratchInit = true;
}
+
+ Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
+ StringRef S = A.getValueAsString();
+ if (!S.empty())
+ S.consumeInteger(0, GITPtrHigh);
}
unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
const SIRegisterInfo &TRI) {
- PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg(
- getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
+ ArgInfo.PrivateSegmentBuffer =
+ ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
+ getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass));
NumUserSGPRs += 4;
- return PrivateSegmentBufferUserSGPR;
+ return ArgInfo.PrivateSegmentBuffer.getRegister();
}
unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
- DispatchPtrUserSGPR = TRI.getMatchingSuperReg(
- getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
+ ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
+ getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
NumUserSGPRs += 2;
- return DispatchPtrUserSGPR;
+ return ArgInfo.DispatchPtr.getRegister();
}
unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
- QueuePtrUserSGPR = TRI.getMatchingSuperReg(
- getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
+ ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
+ getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
NumUserSGPRs += 2;
- return QueuePtrUserSGPR;
+ return ArgInfo.QueuePtr.getRegister();
}
unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
- KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg(
- getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
+ ArgInfo.KernargSegmentPtr
+ = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
+ getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
NumUserSGPRs += 2;
- return KernargSegmentPtrUserSGPR;
+ return ArgInfo.KernargSegmentPtr.getRegister();
}
unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
- DispatchIDUserSGPR = TRI.getMatchingSuperReg(
- getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
+ ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
+ getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
NumUserSGPRs += 2;
- return DispatchIDUserSGPR;
+ return ArgInfo.DispatchID.getRegister();
}
unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
- FlatScratchInitUserSGPR = TRI.getMatchingSuperReg(
- getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
+ ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
+ getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
NumUserSGPRs += 2;
- return FlatScratchInitUserSGPR;
+ return ArgInfo.FlatScratchInit.getRegister();
}
unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
- ImplicitBufferPtrUserSGPR = TRI.getMatchingSuperReg(
- getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
+ ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
+ getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
NumUserSGPRs += 2;
- return ImplicitBufferPtrUserSGPR;
+ return ArgInfo.ImplicitBufferPtr.getRegister();
+}
+
+static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) {
+ for (unsigned I = 0; CSRegs[I]; ++I) {
+ if (CSRegs[I] == Reg)
+ return true;
+ }
+
+ return false;
}
/// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
@@ -252,6 +250,8 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
int NumLanes = Size / 4;
+ const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
+
// Make sure to handle the case where a wide SGPR spill may span between two
// VGPRs.
for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
@@ -261,21 +261,28 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
if (VGPRIndex == 0) {
LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
if (LaneVGPR == AMDGPU::NoRegister) {
- // We have no VGPRs left for spilling SGPRs. Reset because we won't
+ // We have no VGPRs left for spilling SGPRs. Reset because we will not
// partially spill the SGPR to VGPRs.
SGPRToVGPRSpills.erase(FI);
NumVGPRSpillLanes -= I;
return false;
}
- SpillVGPRs.push_back(LaneVGPR);
+ Optional<int> CSRSpillFI;
+ if (FrameInfo.hasCalls() && CSRegs && isCalleeSavedReg(CSRegs, LaneVGPR)) {
+ // TODO: Should this be a CreateSpillStackObject? This is technically a
+ // weird CSR spill.
+ CSRSpillFI = FrameInfo.CreateStackObject(4, 4, false);
+ }
+
+ SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
// Add this register as live-in to all blocks to avoid machine verifer
// complaining about use of an undefined physical register.
for (MachineBasicBlock &BB : MF)
BB.addLiveIn(LaneVGPR);
} else {
- LaneVGPR = SpillVGPRs.back();
+ LaneVGPR = SpillVGPRs.back().VGPR;
}
SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));