summaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/SIMachineFunctionInfo.cpp')
-rw-r--r--lib/Target/AMDGPU/SIMachineFunctionInfo.cpp68
1 files changed, 44 insertions, 24 deletions
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 3203c38dae34..a7c8166ff6d2 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -23,10 +23,10 @@ using namespace llvm;
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
: AMDGPUMachineFunction(MF),
TIDReg(AMDGPU::NoRegister),
- ScratchRSrcReg(AMDGPU::NoRegister),
- ScratchWaveOffsetReg(AMDGPU::NoRegister),
- FrameOffsetReg(AMDGPU::NoRegister),
- StackPtrOffsetReg(AMDGPU::NoRegister),
+ ScratchRSrcReg(AMDGPU::PRIVATE_RSRC_REG),
+ ScratchWaveOffsetReg(AMDGPU::SCRATCH_WAVE_OFFSET_REG),
+ FrameOffsetReg(AMDGPU::FP_REG),
+ StackPtrOffsetReg(AMDGPU::SP_REG),
PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister),
DispatchPtrUserSGPR(AMDGPU::NoRegister),
QueuePtrUserSGPR(AMDGPU::NoRegister),
@@ -42,6 +42,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
WorkGroupIDZSystemSGPR(AMDGPU::NoRegister),
WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
+ WorkItemIDXVGPR(AMDGPU::NoRegister),
+ WorkItemIDYVGPR(AMDGPU::NoRegister),
+ WorkItemIDZVGPR(AMDGPU::NoRegister),
PSInputAddr(0),
PSInputEnable(0),
ReturnsVoid(true),
@@ -87,12 +90,14 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
ScratchWaveOffsetReg = AMDGPU::SGPR4;
FrameOffsetReg = AMDGPU::SGPR5;
StackPtrOffsetReg = AMDGPU::SGPR32;
- return;
+
+ // FIXME: Not really a system SGPR.
+ PrivateSegmentWaveByteOffsetSystemSGPR = ScratchWaveOffsetReg;
}
CallingConv::ID CC = F->getCallingConv();
if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
- KernargSegmentPtr = true;
+ KernargSegmentPtr = !F->arg_empty();
WorkGroupIDX = true;
WorkItemIDX = true;
} else if (CC == CallingConv::AMDGPU_PS) {
@@ -101,17 +106,25 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
if (ST.debuggerEmitPrologue()) {
// Enable everything.
+ WorkGroupIDX = true;
WorkGroupIDY = true;
WorkGroupIDZ = true;
+ WorkItemIDX = true;
WorkItemIDY = true;
WorkItemIDZ = true;
} else {
+ if (F->hasFnAttribute("amdgpu-work-group-id-x"))
+ WorkGroupIDX = true;
+
if (F->hasFnAttribute("amdgpu-work-group-id-y"))
WorkGroupIDY = true;
if (F->hasFnAttribute("amdgpu-work-group-id-z"))
WorkGroupIDZ = true;
+ if (F->hasFnAttribute("amdgpu-work-item-id-x"))
+ WorkItemIDX = true;
+
if (F->hasFnAttribute("amdgpu-work-item-id-y"))
WorkItemIDY = true;
@@ -119,25 +132,28 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
WorkItemIDZ = true;
}
- // X, XY, and XYZ are the only supported combinations, so make sure Y is
- // enabled if Z is.
- if (WorkItemIDZ)
- WorkItemIDY = true;
-
const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
bool MaySpill = ST.isVGPRSpillingEnabled(*F);
- bool HasStackObjects = FrameInfo.hasStackObjects() || FrameInfo.hasCalls();
+ bool HasStackObjects = FrameInfo.hasStackObjects();
+
+ if (isEntryFunction()) {
+ // X, XY, and XYZ are the only supported combinations, so make sure Y is
+ // enabled if Z is.
+ if (WorkItemIDZ)
+ WorkItemIDY = true;
- if (HasStackObjects || MaySpill) {
- PrivateSegmentWaveByteOffset = true;
+ if (HasStackObjects || MaySpill) {
+ PrivateSegmentWaveByteOffset = true;
- // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
- if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
- (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
- PrivateSegmentWaveByteOffsetSystemSGPR = AMDGPU::SGPR5;
+ // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
+ if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
+ (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
+ PrivateSegmentWaveByteOffsetSystemSGPR = AMDGPU::SGPR5;
+ }
}
- if (ST.isAmdCodeObjectV2(MF)) {
+ bool IsCOV2 = ST.isAmdCodeObjectV2(MF);
+ if (IsCOV2) {
if (HasStackObjects || MaySpill)
PrivateSegmentBuffer = true;
@@ -154,11 +170,15 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
ImplicitBufferPtr = true;
}
- // We don't need to worry about accessing spills with flat instructions.
- // TODO: On VI where we must use flat for global, we should be able to omit
- // this if it is never used for generic access.
- if (HasStackObjects && ST.hasFlatAddressSpace() && ST.isAmdHsaOS())
- FlatScratchInit = true;
+ if (F->hasFnAttribute("amdgpu-kernarg-segment-ptr"))
+ KernargSegmentPtr = true;
+
+ if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) {
+ // TODO: This could be refined a lot. The attribute is a poor way of
+ // detecting calls that may require it before argument lowering.
+ if (HasStackObjects || F->hasFnAttribute("amdgpu-flat-scratch"))
+ FlatScratchInit = true;
+ }
}
unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(