diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 190 |
1 files changed, 134 insertions, 56 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 0c67b1467a5d..788e9873f780 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -8,6 +8,7 @@ #include "SIMachineFunctionInfo.h" #include "AMDGPUArgumentUsageInfo.h" +#include "AMDGPUTargetMachine.h" #include "AMDGPUSubtarget.h" #include "SIRegisterInfo.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" @@ -52,9 +53,18 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F); WavesPerEU = ST.getWavesPerEU(F); - Occupancy = ST.computeOccupancy(MF, getLDSSize()); + Occupancy = ST.computeOccupancy(F, getLDSSize()); CallingConv::ID CC = F.getCallingConv(); + // FIXME: Should have analysis or something rather than attribute to detect + // calls. + const bool HasCalls = F.hasFnAttribute("amdgpu-calls"); + + // Enable all kernel inputs if we have the fixed ABI. Don't bother if we don't + // have any calls. + const bool UseFixedABI = AMDGPUTargetMachine::EnableFixedFunctionABI && + (!isEntryFunction() || HasCalls); + if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) { if (!F.arg_empty()) KernargSegmentPtr = true; @@ -68,16 +78,13 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) // Non-entry functions have no special inputs for now, other registers // required for scratch access. ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3; - ScratchWaveOffsetReg = AMDGPU::SGPR33; // TODO: Pick a high register, and shift down, similar to a kernel. - FrameOffsetReg = AMDGPU::SGPR34; + FrameOffsetReg = AMDGPU::SGPR33; StackPtrOffsetReg = AMDGPU::SGPR32; ArgInfo.PrivateSegmentBuffer = ArgDescriptor::createRegister(ScratchRSrcReg); - ArgInfo.PrivateSegmentWaveByteOffset = - ArgDescriptor::createRegister(ScratchWaveOffsetReg); if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) ImplicitArgPtr = true; @@ -89,27 +96,35 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) } } - if (F.hasFnAttribute("amdgpu-work-group-id-x")) + if (UseFixedABI) { WorkGroupIDX = true; - - if (F.hasFnAttribute("amdgpu-work-group-id-y")) WorkGroupIDY = true; - - if (F.hasFnAttribute("amdgpu-work-group-id-z")) WorkGroupIDZ = true; - - if (F.hasFnAttribute("amdgpu-work-item-id-x")) WorkItemIDX = true; - - if (F.hasFnAttribute("amdgpu-work-item-id-y")) WorkItemIDY = true; - - if (F.hasFnAttribute("amdgpu-work-item-id-z")) WorkItemIDZ = true; + ImplicitArgPtr = true; + } else { + if (F.hasFnAttribute("amdgpu-work-group-id-x")) + WorkGroupIDX = true; + + if (F.hasFnAttribute("amdgpu-work-group-id-y")) + WorkGroupIDY = true; + + if (F.hasFnAttribute("amdgpu-work-group-id-z")) + WorkGroupIDZ = true; + + if (F.hasFnAttribute("amdgpu-work-item-id-x")) + WorkItemIDX = true; - const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); - bool HasStackObjects = FrameInfo.hasStackObjects(); + if (F.hasFnAttribute("amdgpu-work-item-id-y")) + WorkItemIDY = true; + + if (F.hasFnAttribute("amdgpu-work-item-id-z")) + WorkItemIDZ = true; + } + bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects"); if (isEntryFunction()) { // X, XY, and XYZ are the only supported combinations, so make sure Y is // enabled if Z is. @@ -129,36 +144,34 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) if (isAmdHsaOrMesa) { PrivateSegmentBuffer = true; - if (F.hasFnAttribute("amdgpu-dispatch-ptr")) + if (UseFixedABI) { DispatchPtr = true; - - if (F.hasFnAttribute("amdgpu-queue-ptr")) QueuePtr = true; - if (F.hasFnAttribute("amdgpu-dispatch-id")) + // FIXME: We don't need this? DispatchID = true; + } else { + if (F.hasFnAttribute("amdgpu-dispatch-ptr")) + DispatchPtr = true; + + if (F.hasFnAttribute("amdgpu-queue-ptr")) + QueuePtr = true; + + if (F.hasFnAttribute("amdgpu-dispatch-id")) + DispatchID = true; + } } else if (ST.isMesaGfxShader(F)) { ImplicitBufferPtr = true; } - if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr")) + if (UseFixedABI || F.hasFnAttribute("amdgpu-kernarg-segment-ptr")) KernargSegmentPtr = true; if (ST.hasFlatAddressSpace() && isEntryFunction() && isAmdHsaOrMesa) { - auto hasNonSpillStackObjects = [&]() { - // Avoid expensive checking if there's no stack objects. - if (!HasStackObjects) - return false; - for (auto OI = FrameInfo.getObjectIndexBegin(), - OE = FrameInfo.getObjectIndexEnd(); OI != OE; ++OI) - if (!FrameInfo.isSpillSlotObjectIndex(OI)) - return true; - // All stack objects are spill slots. - return false; - }; // TODO: This could be refined a lot. The attribute is a poor way of - // detecting calls that may require it before argument lowering. - if (hasNonSpillStackObjects() || F.hasFnAttribute("amdgpu-flat-scratch")) + // detecting calls or stack objects that may require it before argument + // lowering. + if (HasCalls || HasStackObjects) FlatScratchInit = true; } @@ -184,7 +197,7 @@ void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) { MF.getFunction())); } -unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( +Register SIMachineFunctionInfo::addPrivateSegmentBuffer( const SIRegisterInfo &TRI) { ArgInfo.PrivateSegmentBuffer = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( @@ -193,21 +206,21 @@ unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( return ArgInfo.PrivateSegmentBuffer.getRegister(); } -unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) { +Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) { ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); NumUserSGPRs += 2; return ArgInfo.DispatchPtr.getRegister(); } -unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) { +Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) { ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); NumUserSGPRs += 2; return ArgInfo.QueuePtr.getRegister(); } -unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) { +Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) { ArgInfo.KernargSegmentPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); @@ -215,28 +228,29 @@ unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) return ArgInfo.KernargSegmentPtr.getRegister(); } -unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) { +Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) { ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); NumUserSGPRs += 2; return ArgInfo.DispatchID.getRegister(); } -unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) { +Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) { ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); NumUserSGPRs += 2; return ArgInfo.FlatScratchInit.getRegister(); } -unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) { +Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) { ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); NumUserSGPRs += 2; return ArgInfo.ImplicitBufferPtr.getRegister(); } -static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) { +bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs, + MCPhysReg Reg) { for (unsigned I = 0; CSRegs[I]; ++I) { if (CSRegs[I] == Reg) return true; @@ -270,22 +284,35 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, MachineFrameInfo &FrameInfo = MF.getFrameInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); unsigned WaveSize = ST.getWavefrontSize(); + SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); unsigned Size = FrameInfo.getObjectSize(FI); - assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size"); - assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs"); + unsigned NumLanes = Size / 4; + + if (NumLanes > WaveSize) + return false; - int NumLanes = Size / 4; + assert(Size >= 4 && "invalid sgpr spill size"); + assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs"); const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); // Make sure to handle the case where a wide SGPR spill may span between two // VGPRs. - for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) { - unsigned LaneVGPR; + for (unsigned I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) { + Register LaneVGPR; unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize); - if (VGPRIndex == 0) { + // Reserve a VGPR (when NumVGPRSpillLanes = 0, WaveSize, 2*WaveSize, ..) and + // when one of the two conditions is true: + // 1. One reserved VGPR being tracked by VGPRReservedForSGPRSpill is not yet + // reserved. + // 2. All spill lanes of reserved VGPR(s) are full and another spill lane is + // required. + if (FuncInfo->VGPRReservedForSGPRSpill && NumVGPRSpillLanes < WaveSize) { + assert(FuncInfo->VGPRReservedForSGPRSpill == SpillVGPRs.back().VGPR); + LaneVGPR = FuncInfo->VGPRReservedForSGPRSpill; + } else if (VGPRIndex == 0) { LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF); if (LaneVGPR == AMDGPU::NoRegister) { // We have no VGPRs left for spilling SGPRs. Reset because we will not @@ -298,7 +325,7 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, Optional<int> CSRSpillFI; if ((FrameInfo.hasCalls() || !isEntryFunction()) && CSRegs && isCalleeSavedReg(CSRegs, LaneVGPR)) { - CSRSpillFI = FrameInfo.CreateSpillStackObject(4, 4); + CSRSpillFI = FrameInfo.CreateSpillStackObject(4, Align(4)); } SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI)); @@ -317,6 +344,19 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, return true; } +/// Reserve a VGPR for spilling of SGPRs +bool SIMachineFunctionInfo::reserveVGPRforSGPRSpills(MachineFunction &MF) { + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); + + Register LaneVGPR = TRI->findUnusedRegister( + MF.getRegInfo(), &AMDGPU::VGPR_32RegClass, MF, true); + SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, None)); + FuncInfo->VGPRReservedForSGPRSpill = LaneVGPR; + return true; +} + /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI. /// Either AGPR is spilled to VGPR to vice versa. /// Returns true if a \p FI can be eliminated completely. @@ -386,9 +426,9 @@ bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF, } void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) { - // The FP spill hasn't been inserted yet, so keep it around. + // The FP & BP spills haven't been inserted yet, so keep them around. for (auto &R : SGPRToVGPRSpills) { - if (R.first != FramePointerSaveIndex) + if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex) MFI.RemoveStackObject(R.first); } @@ -396,7 +436,7 @@ void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) { // ID. for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e; ++i) - if (i != FramePointerSaveIndex) + if (i != FramePointerSaveIndex && i != BasePointerSaveIndex) MFI.setStackID(i, TargetStackID::Default); for (auto &R : VGPRToAGPRSpills) { @@ -414,7 +454,28 @@ MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const { return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs; } -static yaml::StringValue regToString(unsigned Reg, +Register +SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const { + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); + if (!ST.isAmdPalOS()) + return Register(); + Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in + if (ST.hasMergedShaders()) { + switch (MF.getFunction().getCallingConv()) { + case CallingConv::AMDGPU_HS: + case CallingConv::AMDGPU_GS: + // Low GIT address is passed in s8 rather than s0 for an LS+HS or + // ES+GS merged shader on gfx9+. + GitPtrLo = AMDGPU::SGPR8; + return GitPtrLo; + default: + return GitPtrLo; + } + } + return GitPtrLo; +} + +static yaml::StringValue regToString(Register Reg, const TargetRegisterInfo &TRI) { yaml::StringValue Dest; { @@ -487,7 +548,6 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo( WaveLimiter(MFI.needsWaveLimiter()), HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()), ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)), - ScratchWaveOffsetReg(regToString(MFI.getScratchWaveOffsetReg(), TRI)), FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)), StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)), ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), @@ -509,3 +569,21 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields( WaveLimiter = YamlMFI.WaveLimiter; return false; } + +// Remove VGPR which was reserved for SGPR spills if there are no spilled SGPRs +bool SIMachineFunctionInfo::removeVGPRForSGPRSpill(Register ReservedVGPR, + MachineFunction &MF) { + for (auto *i = SpillVGPRs.begin(); i < SpillVGPRs.end(); i++) { + if (i->VGPR == ReservedVGPR) { + SpillVGPRs.erase(i); + + for (MachineBasicBlock &MBB : MF) { + MBB.removeLiveIn(ReservedVGPR); + MBB.sortUniqueLiveIns(); + } + this->VGPRReservedForSGPRSpill = AMDGPU::NoRegister; + return true; + } + } + return false; +} |