diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 152 |
1 files changed, 94 insertions, 58 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 85cfe36df16a..c4007f56f350 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -67,9 +67,11 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) const bool UseFixedABI = AMDGPUTargetMachine::EnableFixedFunctionABI && CC != CallingConv::AMDGPU_Gfx && (!isEntryFunction() || HasCalls); + const bool IsKernel = CC == CallingConv::AMDGPU_KERNEL || + CC == CallingConv::SPIR_KERNEL; - if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) { - if (!F.arg_empty()) + if (IsKernel) { + if (!F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0) KernargSegmentPtr = true; WorkGroupIDX = true; WorkItemIDX = true; @@ -94,45 +96,76 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) ArgDescriptor::createRegister(ScratchRSrcReg); } - if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) + if (!F.hasFnAttribute("amdgpu-no-implicitarg-ptr")) ImplicitArgPtr = true; } else { - if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) { - KernargSegmentPtr = true; - MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(), - MaxKernArgAlign); - } + ImplicitArgPtr = false; + MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(), + MaxKernArgAlign); } + bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F); + if (isAmdHsaOrMesa && !ST.enableFlatScratch()) + PrivateSegmentBuffer = true; + else if (ST.isMesaGfxShader(F)) + ImplicitBufferPtr = true; + if (UseFixedABI) { + DispatchPtr = true; + QueuePtr = true; + ImplicitArgPtr = true; WorkGroupIDX = true; WorkGroupIDY = true; WorkGroupIDZ = true; WorkItemIDX = true; WorkItemIDY = true; WorkItemIDZ = true; - ImplicitArgPtr = true; - } else { - if (F.hasFnAttribute("amdgpu-work-group-id-x")) + + // FIXME: We don't need this? + DispatchID = true; + } else if (!AMDGPU::isGraphics(CC)) { + if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x")) WorkGroupIDX = true; - if (F.hasFnAttribute("amdgpu-work-group-id-y")) + if (!F.hasFnAttribute("amdgpu-no-workgroup-id-y")) WorkGroupIDY = true; - if (F.hasFnAttribute("amdgpu-work-group-id-z")) + if (!F.hasFnAttribute("amdgpu-no-workgroup-id-z")) WorkGroupIDZ = true; - if (F.hasFnAttribute("amdgpu-work-item-id-x")) + if (IsKernel || !F.hasFnAttribute("amdgpu-no-workitem-id-x")) WorkItemIDX = true; - if (F.hasFnAttribute("amdgpu-work-item-id-y")) + if (!F.hasFnAttribute("amdgpu-no-workitem-id-y")) WorkItemIDY = true; - if (F.hasFnAttribute("amdgpu-work-item-id-z")) + if (!F.hasFnAttribute("amdgpu-no-workitem-id-z")) WorkItemIDZ = true; + + if (!F.hasFnAttribute("amdgpu-no-dispatch-ptr")) + DispatchPtr = true; + + if (!F.hasFnAttribute("amdgpu-no-queue-ptr")) + QueuePtr = true; + + if (!F.hasFnAttribute("amdgpu-no-dispatch-id")) + DispatchID = true; } + // FIXME: This attribute is a hack, we just need an analysis on the function + // to look for allocas. bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects"); + + // TODO: This could be refined a lot. The attribute is a poor way of + // detecting calls or stack objects that may require it before argument + // lowering. + if (ST.hasFlatAddressSpace() && isEntryFunction() && + (isAmdHsaOrMesa || ST.enableFlatScratch()) && + (HasCalls || HasStackObjects || ST.enableFlatScratch()) && + !ST.flatScratchIsArchitected()) { + FlatScratchInit = true; + } + if (isEntryFunction()) { // X, XY, and XYZ are the only supported combinations, so make sure Y is // enabled if Z is. @@ -150,44 +183,6 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) } } - bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F); - if (isAmdHsaOrMesa) { - if (!ST.enableFlatScratch()) - PrivateSegmentBuffer = true; - - if (UseFixedABI) { - DispatchPtr = true; - QueuePtr = true; - - // FIXME: We don't need this? - DispatchID = true; - } else { - if (F.hasFnAttribute("amdgpu-dispatch-ptr")) - DispatchPtr = true; - - if (F.hasFnAttribute("amdgpu-queue-ptr")) - QueuePtr = true; - - if (F.hasFnAttribute("amdgpu-dispatch-id")) - DispatchID = true; - } - } else if (ST.isMesaGfxShader(F)) { - ImplicitBufferPtr = true; - } - - if (UseFixedABI || F.hasFnAttribute("amdgpu-kernarg-segment-ptr")) - KernargSegmentPtr = true; - - // TODO: This could be refined a lot. The attribute is a poor way of - // detecting calls or stack objects that may require it before argument - // lowering. - if (ST.hasFlatAddressSpace() && isEntryFunction() && - (isAmdHsaOrMesa || ST.enableFlatScratch()) && - (HasCalls || HasStackObjects || ST.enableFlatScratch()) && - !ST.flatScratchIsArchitected()) { - FlatScratchInit = true; - } - Attribute A = F.getFnAttribute("amdgpu-git-ptr-high"); StringRef S = A.getValueAsString(); if (!S.empty()) @@ -426,7 +421,7 @@ bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF, OtherUsedRegs.set(Reg); SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin(); - for (unsigned I = 0; I < NumLanes; ++I) { + for (int I = NumLanes - 1; I >= 0; --I) { NextSpillReg = std::find_if( NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) { return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) && @@ -447,10 +442,16 @@ bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF, } void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) { - // The FP & BP spills haven't been inserted yet, so keep them around. - for (auto &R : SGPRToVGPRSpills) { - if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex) + // Remove dead frame indices from function frame, however keep FP & BP since + // spills for them haven't been inserted yet. And also make sure to remove the + // frame indices from `SGPRToVGPRSpills` data structure, otherwise, it could + // result in an unexpected side effect and bug, in case of any re-mapping of + // freed frame indices by later pass(es) like "stack slot coloring". + for (auto &R : make_early_inc_range(SGPRToVGPRSpills)) { + if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex) { MFI.RemoveStackObject(R.first); + SGPRToVGPRSpills.erase(R.first); + } } // All other SPGRs must be allocated on the default stack, so reset the stack @@ -650,3 +651,38 @@ bool SIMachineFunctionInfo::removeVGPRForSGPRSpill(Register ReservedVGPR, } return false; } + +bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const { + if (UsesAGPRs) + return *UsesAGPRs; + + if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) || + MF.getFrameInfo().hasCalls()) { + UsesAGPRs = true; + return true; + } + + const MachineRegisterInfo &MRI = MF.getRegInfo(); + + for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { + const Register Reg = Register::index2VirtReg(I); + const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg); + if (RC && SIRegisterInfo::isAGPRClass(RC)) { + UsesAGPRs = true; + return true; + } else if (!RC && !MRI.use_empty(Reg) && MRI.getType(Reg).isValid()) { + // Defer caching UsesAGPRs, function might not yet been regbank selected. + return true; + } + } + + for (MCRegister Reg : AMDGPU::AGPR_32RegClass) { + if (MRI.isPhysRegUsed(Reg)) { + UsesAGPRs = true; + return true; + } + } + + UsesAGPRs = false; + return false; +} |
