summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp152
1 files changed, 94 insertions, 58 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 85cfe36df16a..c4007f56f350 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -67,9 +67,11 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
const bool UseFixedABI = AMDGPUTargetMachine::EnableFixedFunctionABI &&
CC != CallingConv::AMDGPU_Gfx &&
(!isEntryFunction() || HasCalls);
+ const bool IsKernel = CC == CallingConv::AMDGPU_KERNEL ||
+ CC == CallingConv::SPIR_KERNEL;
- if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
- if (!F.arg_empty())
+ if (IsKernel) {
+ if (!F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0)
KernargSegmentPtr = true;
WorkGroupIDX = true;
WorkItemIDX = true;
@@ -94,45 +96,76 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
ArgDescriptor::createRegister(ScratchRSrcReg);
}
- if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
+ if (!F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
ImplicitArgPtr = true;
} else {
- if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) {
- KernargSegmentPtr = true;
- MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
- MaxKernArgAlign);
- }
+ ImplicitArgPtr = false;
+ MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
+ MaxKernArgAlign);
}
+ bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
+ if (isAmdHsaOrMesa && !ST.enableFlatScratch())
+ PrivateSegmentBuffer = true;
+ else if (ST.isMesaGfxShader(F))
+ ImplicitBufferPtr = true;
+
if (UseFixedABI) {
+ DispatchPtr = true;
+ QueuePtr = true;
+ ImplicitArgPtr = true;
WorkGroupIDX = true;
WorkGroupIDY = true;
WorkGroupIDZ = true;
WorkItemIDX = true;
WorkItemIDY = true;
WorkItemIDZ = true;
- ImplicitArgPtr = true;
- } else {
- if (F.hasFnAttribute("amdgpu-work-group-id-x"))
+
+ // FIXME: We don't need this?
+ DispatchID = true;
+ } else if (!AMDGPU::isGraphics(CC)) {
+ if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x"))
WorkGroupIDX = true;
- if (F.hasFnAttribute("amdgpu-work-group-id-y"))
+ if (!F.hasFnAttribute("amdgpu-no-workgroup-id-y"))
WorkGroupIDY = true;
- if (F.hasFnAttribute("amdgpu-work-group-id-z"))
+ if (!F.hasFnAttribute("amdgpu-no-workgroup-id-z"))
WorkGroupIDZ = true;
- if (F.hasFnAttribute("amdgpu-work-item-id-x"))
+ if (IsKernel || !F.hasFnAttribute("amdgpu-no-workitem-id-x"))
WorkItemIDX = true;
- if (F.hasFnAttribute("amdgpu-work-item-id-y"))
+ if (!F.hasFnAttribute("amdgpu-no-workitem-id-y"))
WorkItemIDY = true;
- if (F.hasFnAttribute("amdgpu-work-item-id-z"))
+ if (!F.hasFnAttribute("amdgpu-no-workitem-id-z"))
WorkItemIDZ = true;
+
+ if (!F.hasFnAttribute("amdgpu-no-dispatch-ptr"))
+ DispatchPtr = true;
+
+ if (!F.hasFnAttribute("amdgpu-no-queue-ptr"))
+ QueuePtr = true;
+
+ if (!F.hasFnAttribute("amdgpu-no-dispatch-id"))
+ DispatchID = true;
}
+ // FIXME: This attribute is a hack, we just need an analysis on the function
+ // to look for allocas.
bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
+
+ // TODO: This could be refined a lot. The attribute is a poor way of
+ // detecting calls or stack objects that may require it before argument
+ // lowering.
+ if (ST.hasFlatAddressSpace() && isEntryFunction() &&
+ (isAmdHsaOrMesa || ST.enableFlatScratch()) &&
+ (HasCalls || HasStackObjects || ST.enableFlatScratch()) &&
+ !ST.flatScratchIsArchitected()) {
+ FlatScratchInit = true;
+ }
+
if (isEntryFunction()) {
// X, XY, and XYZ are the only supported combinations, so make sure Y is
// enabled if Z is.
@@ -150,44 +183,6 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
}
}
- bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
- if (isAmdHsaOrMesa) {
- if (!ST.enableFlatScratch())
- PrivateSegmentBuffer = true;
-
- if (UseFixedABI) {
- DispatchPtr = true;
- QueuePtr = true;
-
- // FIXME: We don't need this?
- DispatchID = true;
- } else {
- if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
- DispatchPtr = true;
-
- if (F.hasFnAttribute("amdgpu-queue-ptr"))
- QueuePtr = true;
-
- if (F.hasFnAttribute("amdgpu-dispatch-id"))
- DispatchID = true;
- }
- } else if (ST.isMesaGfxShader(F)) {
- ImplicitBufferPtr = true;
- }
-
- if (UseFixedABI || F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
- KernargSegmentPtr = true;
-
- // TODO: This could be refined a lot. The attribute is a poor way of
- // detecting calls or stack objects that may require it before argument
- // lowering.
- if (ST.hasFlatAddressSpace() && isEntryFunction() &&
- (isAmdHsaOrMesa || ST.enableFlatScratch()) &&
- (HasCalls || HasStackObjects || ST.enableFlatScratch()) &&
- !ST.flatScratchIsArchitected()) {
- FlatScratchInit = true;
- }
-
Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
StringRef S = A.getValueAsString();
if (!S.empty())
@@ -426,7 +421,7 @@ bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
OtherUsedRegs.set(Reg);
SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
- for (unsigned I = 0; I < NumLanes; ++I) {
+ for (int I = NumLanes - 1; I >= 0; --I) {
NextSpillReg = std::find_if(
NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
@@ -447,10 +442,16 @@ bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
}
void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
- // The FP & BP spills haven't been inserted yet, so keep them around.
- for (auto &R : SGPRToVGPRSpills) {
- if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex)
+ // Remove dead frame indices from function frame, however keep FP & BP since
+ // spills for them haven't been inserted yet. And also make sure to remove the
+ // frame indices from `SGPRToVGPRSpills` data structure, otherwise, it could
+ // result in an unexpected side effect and bug, in case of any re-mapping of
+ // freed frame indices by later pass(es) like "stack slot coloring".
+ for (auto &R : make_early_inc_range(SGPRToVGPRSpills)) {
+ if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex) {
MFI.RemoveStackObject(R.first);
+ SGPRToVGPRSpills.erase(R.first);
+ }
}
// All other SPGRs must be allocated on the default stack, so reset the stack
@@ -650,3 +651,38 @@ bool SIMachineFunctionInfo::removeVGPRForSGPRSpill(Register ReservedVGPR,
}
return false;
}
+
+bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const {
+ if (UsesAGPRs)
+ return *UsesAGPRs;
+
+ if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) ||
+ MF.getFrameInfo().hasCalls()) {
+ UsesAGPRs = true;
+ return true;
+ }
+
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
+ const Register Reg = Register::index2VirtReg(I);
+ const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg);
+ if (RC && SIRegisterInfo::isAGPRClass(RC)) {
+ UsesAGPRs = true;
+ return true;
+ } else if (!RC && !MRI.use_empty(Reg) && MRI.getType(Reg).isValid()) {
+ // Defer caching UsesAGPRs, function might not yet been regbank selected.
+ return true;
+ }
+ }
+
+ for (MCRegister Reg : AMDGPU::AGPR_32RegClass) {
+ if (MRI.isPhysRegUsed(Reg)) {
+ UsesAGPRs = true;
+ return true;
+ }
+ }
+
+ UsesAGPRs = false;
+ return false;
+}