summaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/SIMachineFunctionInfo.cpp')
-rw-r--r--lib/Target/AMDGPU/SIMachineFunctionInfo.cpp104
1 files changed, 69 insertions, 35 deletions
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 49677fc2b0a31..4d12a1ef9a93b 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -1,19 +1,17 @@
-//===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===//
+//===-- SIMachineFunctionInfo.cpp -------- SI Machine Function Info -------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
-/// \file
//===----------------------------------------------------------------------===//
-
#include "SIMachineFunctionInfo.h"
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
@@ -22,6 +20,11 @@
using namespace llvm;
+static cl::opt<bool> EnableSpillSGPRToVGPR(
+ "amdgpu-spill-sgpr-to-vgpr",
+ cl::desc("Enable spilling VGPRs to SGPRs"),
+ cl::ReallyHidden,
+ cl::init(true));
// Pin the vtable to this file.
void SIMachineFunctionInfo::anchor() {}
@@ -48,12 +51,20 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
PSInputAddr(0),
ReturnsVoid(true),
+ MaximumWorkGroupSize(0),
+ DebuggerReservedVGPRCount(0),
+ DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}),
+ DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}}),
LDSWaveSpillSize(0),
PSInputEna(0),
NumUserSGPRs(0),
NumSystemSGPRs(0),
HasSpilledSGPRs(false),
HasSpilledVGPRs(false),
+ HasNonSpillStackObjects(false),
+ HasFlatInstructions(false),
+ NumSpilledSGPRs(0),
+ NumSpilledVGPRs(0),
PrivateSegmentBuffer(false),
DispatchPtr(false),
QueuePtr(false),
@@ -63,37 +74,45 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
GridWorkgroupCountX(false),
GridWorkgroupCountY(false),
GridWorkgroupCountZ(false),
- WorkGroupIDX(true),
+ WorkGroupIDX(false),
WorkGroupIDY(false),
WorkGroupIDZ(false),
WorkGroupInfo(false),
PrivateSegmentWaveByteOffset(false),
- WorkItemIDX(true),
+ WorkItemIDX(false),
WorkItemIDY(false),
WorkItemIDZ(false) {
- const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
const Function *F = MF.getFunction();
PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
- if (getShaderType() == ShaderType::COMPUTE)
+ if (!AMDGPU::isShader(F->getCallingConv())) {
KernargSegmentPtr = true;
+ WorkGroupIDX = true;
+ WorkItemIDX = true;
+ }
- if (F->hasFnAttribute("amdgpu-work-group-id-y"))
+ if (F->hasFnAttribute("amdgpu-work-group-id-y") || ST.debuggerEmitPrologue())
WorkGroupIDY = true;
- if (F->hasFnAttribute("amdgpu-work-group-id-z"))
+ if (F->hasFnAttribute("amdgpu-work-group-id-z") || ST.debuggerEmitPrologue())
WorkGroupIDZ = true;
- if (F->hasFnAttribute("amdgpu-work-item-id-y"))
+ if (F->hasFnAttribute("amdgpu-work-item-id-y") || ST.debuggerEmitPrologue())
WorkItemIDY = true;
- if (F->hasFnAttribute("amdgpu-work-item-id-z"))
+ if (F->hasFnAttribute("amdgpu-work-item-id-z") || ST.debuggerEmitPrologue())
WorkItemIDZ = true;
- bool MaySpill = ST.isVGPRSpillingEnabled(this);
+ // X, XY, and XYZ are the only supported combinations, so make sure Y is
+ // enabled if Z is.
+ if (WorkItemIDZ)
+ WorkItemIDY = true;
+
+ bool MaySpill = ST.isVGPRSpillingEnabled(*F);
bool HasStackObjects = FrameInfo->hasStackObjects();
if (HasStackObjects || MaySpill)
@@ -105,12 +124,25 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
if (F->hasFnAttribute("amdgpu-dispatch-ptr"))
DispatchPtr = true;
+
+ if (F->hasFnAttribute("amdgpu-queue-ptr"))
+ QueuePtr = true;
}
- // X, XY, and XYZ are the only supported combinations, so make sure Y is
- // enabled if Z is.
- if (WorkItemIDZ)
- WorkItemIDY = true;
+ // We don't need to worry about accessing spills with flat instructions.
+ // TODO: On VI where we must use flat for global, we should be able to omit
+ // this if it is never used for generic access.
+ if (HasStackObjects && ST.getGeneration() >= SISubtarget::SEA_ISLANDS &&
+ ST.isAmdHsaOS())
+ FlatScratchInit = true;
+
+ if (AMDGPU::isCompute(F->getCallingConv()))
+ MaximumWorkGroupSize = AMDGPU::getMaximumWorkGroupSize(*F);
+ else
+ MaximumWorkGroupSize = ST.getWavefrontSize();
+
+ if (ST.debuggerReserveRegs())
+ DebuggerReservedVGPRCount = 4;
}
unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
@@ -142,13 +174,24 @@ unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI)
return KernargSegmentPtrUserSGPR;
}
-SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
+unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
+ FlatScratchInitUserSGPR = TRI.getMatchingSuperReg(
+ getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
+ NumUserSGPRs += 2;
+ return FlatScratchInitUserSGPR;
+}
+
+SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg (
MachineFunction *MF,
unsigned FrameIndex,
unsigned SubIdx) {
- const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
- const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
- MF->getSubtarget<AMDGPUSubtarget>().getRegisterInfo());
+ if (!EnableSpillSGPRToVGPR)
+ return SpilledReg();
+
+ const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+
+ MachineFrameInfo *FrameInfo = MF->getFrameInfo();
MachineRegisterInfo &MRI = MF->getRegInfo();
int64_t Offset = FrameInfo->getObjectOffset(FrameIndex);
Offset += SubIdx * 4;
@@ -157,19 +200,14 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
unsigned Lane = (Offset / 4) % 64;
struct SpilledReg Spill;
+ Spill.Lane = Lane;
if (!LaneVGPRs.count(LaneVGPRIdx)) {
unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);
- if (LaneVGPR == AMDGPU::NoRegister) {
- LLVMContext &Ctx = MF->getFunction()->getContext();
- Ctx.emitError("Ran out of VGPRs for spilling SGPR");
-
- // When compiling from inside Mesa, the compilation continues.
- // Select an arbitrary register to avoid triggering assertions
- // during subsequent passes.
- LaneVGPR = AMDGPU::VGPR0;
- }
+ if (LaneVGPR == AMDGPU::NoRegister)
+ // We have no VGPRs left for spilling SGPRs.
+ return Spill;
LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
@@ -182,14 +220,10 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
}
Spill.VGPR = LaneVGPRs[LaneVGPRIdx];
- Spill.Lane = Lane;
return Spill;
}
unsigned SIMachineFunctionInfo::getMaximumWorkGroupSize(
const MachineFunction &MF) const {
- const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
- // FIXME: We should get this information from kernel attributes if it
- // is available.
- return getShaderType() == ShaderType::COMPUTE ? 256 : ST.getWavefrontSize();
+ return MaximumWorkGroupSize;
}