summaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/SIMachineFunctionInfo.cpp')
-rw-r--r--lib/Target/AMDGPU/SIMachineFunctionInfo.cpp90
1 files changed, 65 insertions, 25 deletions
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 6013ebc81d9f..0d5ff75e37ed 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -11,6 +11,7 @@
#include "AMDGPUArgumentUsageInfo.h"
#include "AMDGPUSubtarget.h"
#include "SIRegisterInfo.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -28,17 +29,12 @@ using namespace llvm;
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
: AMDGPUMachineFunction(MF),
- BufferPSV(*(MF.getSubtarget().getInstrInfo())),
- ImagePSV(*(MF.getSubtarget().getInstrInfo())),
PrivateSegmentBuffer(false),
DispatchPtr(false),
QueuePtr(false),
KernargSegmentPtr(false),
DispatchID(false),
FlatScratchInit(false),
- GridWorkgroupCountX(false),
- GridWorkgroupCountY(false),
- GridWorkgroupCountZ(false),
WorkGroupIDX(false),
WorkGroupIDY(false),
WorkGroupIDZ(false),
@@ -49,12 +45,26 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
WorkItemIDZ(false),
ImplicitBufferPtr(false),
ImplicitArgPtr(false),
- GITPtrHigh(0xffffffff) {
- const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ GITPtrHigh(0xffffffff),
+ HighBitsOf32BitAddress(0) {
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const Function &F = MF.getFunction();
FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
WavesPerEU = ST.getWavesPerEU(F);
+ Occupancy = getMaxWavesPerEU();
+ limitOccupancy(MF);
+ CallingConv::ID CC = F.getCallingConv();
+
+ if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
+ if (!F.arg_empty())
+ KernargSegmentPtr = true;
+ WorkGroupIDX = true;
+ WorkItemIDX = true;
+ } else if (CC == CallingConv::AMDGPU_PS) {
+ PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
+ }
+
if (!isEntryFunction()) {
// Non-entry functions have no special inputs for now, other registers
// required for scratch access.
@@ -71,18 +81,11 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
ImplicitArgPtr = true;
} else {
- if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
- KernargSegmentPtr = true;
- }
-
- CallingConv::ID CC = F.getCallingConv();
- if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
- if (!F.arg_empty())
+ if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) {
KernargSegmentPtr = true;
- WorkGroupIDX = true;
- WorkItemIDX = true;
- } else if (CC == CallingConv::AMDGPU_PS) {
- PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
+ MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
+ MaxKernArgAlign);
+ }
}
if (ST.debuggerEmitPrologue()) {
@@ -134,7 +137,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
}
}
- bool IsCOV2 = ST.isAmdCodeObjectV2(MF);
+ bool IsCOV2 = ST.isAmdCodeObjectV2(F);
if (IsCOV2) {
if (HasStackObjects || MaySpill)
PrivateSegmentBuffer = true;
@@ -147,7 +150,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
if (F.hasFnAttribute("amdgpu-dispatch-id"))
DispatchID = true;
- } else if (ST.isMesaGfxShader(MF)) {
+ } else if (ST.isMesaGfxShader(F)) {
if (HasStackObjects || MaySpill)
ImplicitBufferPtr = true;
}
@@ -166,6 +169,18 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
StringRef S = A.getValueAsString();
if (!S.empty())
S.consumeInteger(0, GITPtrHigh);
+
+ A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
+ S = A.getValueAsString();
+ if (!S.empty())
+ S.consumeInteger(0, HighBitsOf32BitAddress);
+}
+
+void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
+ limitOccupancy(getMaxWavesPerEU());
+ const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
+ limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
+ MF.getFunction()));
}
unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
@@ -238,7 +253,7 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
if (!SpillLanes.empty())
return true;
- const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -269,10 +284,9 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
}
Optional<int> CSRSpillFI;
- if (FrameInfo.hasCalls() && CSRegs && isCalleeSavedReg(CSRegs, LaneVGPR)) {
- // TODO: Should this be a CreateSpillStackObject? This is technically a
- // weird CSR spill.
- CSRSpillFI = FrameInfo.CreateStackObject(4, 4, false);
+ if ((FrameInfo.hasCalls() || !isEntryFunction()) && CSRegs &&
+ isCalleeSavedReg(CSRegs, LaneVGPR)) {
+ CSRSpillFI = FrameInfo.CreateSpillStackObject(4, 4);
}
SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
@@ -295,3 +309,29 @@ void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI)
for (auto &R : SGPRToVGPRSpills)
MFI.RemoveStackObject(R.first);
}
+
+
+/// \returns VGPR used for \p Dim' work item ID.
+unsigned SIMachineFunctionInfo::getWorkItemIDVGPR(unsigned Dim) const {
+ switch (Dim) {
+ case 0:
+ assert(hasWorkItemIDX());
+ return AMDGPU::VGPR0;
+ case 1:
+ assert(hasWorkItemIDY());
+ return AMDGPU::VGPR1;
+ case 2:
+ assert(hasWorkItemIDZ());
+ return AMDGPU::VGPR2;
+ }
+ llvm_unreachable("unexpected dimension");
+}
+
+MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
+ assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
+ return AMDGPU::SGPR0 + NumUserSGPRs;
+}
+
+MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
+ return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
+}