diff options
Diffstat (limited to 'lib/Target/AMDGPU/SIMachineFunctionInfo.cpp')
| -rw-r--r-- | lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 189 |
1 files changed, 98 insertions, 91 deletions
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index a7c8166ff6d2..6013ebc81d9f 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -1,4 +1,4 @@ -//===-- SIMachineFunctionInfo.cpp -------- SI Machine Function Info -------===// +//===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===// // // The LLVM Compiler Infrastructure // @@ -8,13 +8,19 @@ //===----------------------------------------------------------------------===// #include "SIMachineFunctionInfo.h" +#include "AMDGPUArgumentUsageInfo.h" #include "AMDGPUSubtarget.h" -#include "SIInstrInfo.h" +#include "SIRegisterInfo.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/ADT/Optional.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/CallingConv.h" #include "llvm/IR/Function.h" -#include "llvm/IR/LLVMContext.h" +#include <cassert> +#include <vector> #define MAX_LANES 64 @@ -22,44 +28,8 @@ using namespace llvm; SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) : AMDGPUMachineFunction(MF), - TIDReg(AMDGPU::NoRegister), - ScratchRSrcReg(AMDGPU::PRIVATE_RSRC_REG), - ScratchWaveOffsetReg(AMDGPU::SCRATCH_WAVE_OFFSET_REG), - FrameOffsetReg(AMDGPU::FP_REG), - StackPtrOffsetReg(AMDGPU::SP_REG), - PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister), - DispatchPtrUserSGPR(AMDGPU::NoRegister), - QueuePtrUserSGPR(AMDGPU::NoRegister), - KernargSegmentPtrUserSGPR(AMDGPU::NoRegister), - DispatchIDUserSGPR(AMDGPU::NoRegister), - FlatScratchInitUserSGPR(AMDGPU::NoRegister), - PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister), - GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister), - GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister), - GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister), - WorkGroupIDXSystemSGPR(AMDGPU::NoRegister), - WorkGroupIDYSystemSGPR(AMDGPU::NoRegister), - WorkGroupIDZSystemSGPR(AMDGPU::NoRegister), - WorkGroupInfoSystemSGPR(AMDGPU::NoRegister), - PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister), - WorkItemIDXVGPR(AMDGPU::NoRegister), - WorkItemIDYVGPR(AMDGPU::NoRegister), - WorkItemIDZVGPR(AMDGPU::NoRegister), - PSInputAddr(0), - PSInputEnable(0), - ReturnsVoid(true), - FlatWorkGroupSizes(0, 0), - WavesPerEU(0, 0), - DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}), - DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}}), - LDSWaveSpillSize(0), - NumUserSGPRs(0), - NumSystemSGPRs(0), - HasSpilledSGPRs(false), - HasSpilledVGPRs(false), - HasNonSpillStackObjects(false), - NumSpilledSGPRs(0), - NumSpilledVGPRs(0), + BufferPSV(*(MF.getSubtarget().getInstrInfo())), + ImagePSV(*(MF.getSubtarget().getInstrInfo())), PrivateSegmentBuffer(false), DispatchPtr(false), QueuePtr(false), @@ -77,11 +47,13 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) WorkItemIDX(false), WorkItemIDY(false), WorkItemIDZ(false), - ImplicitBufferPtr(false) { + ImplicitBufferPtr(false), + ImplicitArgPtr(false), + GITPtrHigh(0xffffffff) { const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); - const Function *F = MF.getFunction(); - FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F); - WavesPerEU = ST.getWavesPerEU(*F); + const Function &F = MF.getFunction(); + FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F); + WavesPerEU = ST.getWavesPerEU(F); if (!isEntryFunction()) { // Non-entry functions have no special inputs for now, other registers @@ -91,17 +63,26 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) FrameOffsetReg = AMDGPU::SGPR5; StackPtrOffsetReg = AMDGPU::SGPR32; - // FIXME: Not really a system SGPR. - PrivateSegmentWaveByteOffsetSystemSGPR = ScratchWaveOffsetReg; + ArgInfo.PrivateSegmentBuffer = + ArgDescriptor::createRegister(ScratchRSrcReg); + ArgInfo.PrivateSegmentWaveByteOffset = + ArgDescriptor::createRegister(ScratchWaveOffsetReg); + + if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) + ImplicitArgPtr = true; + } else { + if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) + KernargSegmentPtr = true; } - CallingConv::ID CC = F->getCallingConv(); + CallingConv::ID CC = F.getCallingConv(); if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) { - KernargSegmentPtr = !F->arg_empty(); + if (!F.arg_empty()) + KernargSegmentPtr = true; WorkGroupIDX = true; WorkItemIDX = true; } else if (CC == CallingConv::AMDGPU_PS) { - PSInputAddr = AMDGPU::getInitialPSInputAddr(*F); + PSInputAddr = AMDGPU::getInitialPSInputAddr(F); } if (ST.debuggerEmitPrologue()) { @@ -113,27 +94,27 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) WorkItemIDY = true; WorkItemIDZ = true; } else { - if (F->hasFnAttribute("amdgpu-work-group-id-x")) + if (F.hasFnAttribute("amdgpu-work-group-id-x")) WorkGroupIDX = true; - if (F->hasFnAttribute("amdgpu-work-group-id-y")) + if (F.hasFnAttribute("amdgpu-work-group-id-y")) WorkGroupIDY = true; - if (F->hasFnAttribute("amdgpu-work-group-id-z")) + if (F.hasFnAttribute("amdgpu-work-group-id-z")) WorkGroupIDZ = true; - if (F->hasFnAttribute("amdgpu-work-item-id-x")) + if (F.hasFnAttribute("amdgpu-work-item-id-x")) WorkItemIDX = true; - if (F->hasFnAttribute("amdgpu-work-item-id-y")) + if (F.hasFnAttribute("amdgpu-work-item-id-y")) WorkItemIDY = true; - if (F->hasFnAttribute("amdgpu-work-item-id-z")) + if (F.hasFnAttribute("amdgpu-work-item-id-z")) WorkItemIDZ = true; } const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); - bool MaySpill = ST.isVGPRSpillingEnabled(*F); + bool MaySpill = ST.isVGPRSpillingEnabled(F); bool HasStackObjects = FrameInfo.hasStackObjects(); if (isEntryFunction()) { @@ -145,10 +126,11 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) if (HasStackObjects || MaySpill) { PrivateSegmentWaveByteOffset = true; - // HS and GS always have the scratch wave offset in SGPR5 on GFX9. - if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 && - (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS)) - PrivateSegmentWaveByteOffsetSystemSGPR = AMDGPU::SGPR5; + // HS and GS always have the scratch wave offset in SGPR5 on GFX9. + if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 && + (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS)) + ArgInfo.PrivateSegmentWaveByteOffset + = ArgDescriptor::createRegister(AMDGPU::SGPR5); } } @@ -157,78 +139,94 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) if (HasStackObjects || MaySpill) PrivateSegmentBuffer = true; - if (F->hasFnAttribute("amdgpu-dispatch-ptr")) + if (F.hasFnAttribute("amdgpu-dispatch-ptr")) DispatchPtr = true; - if (F->hasFnAttribute("amdgpu-queue-ptr")) + if (F.hasFnAttribute("amdgpu-queue-ptr")) QueuePtr = true; - if (F->hasFnAttribute("amdgpu-dispatch-id")) + if (F.hasFnAttribute("amdgpu-dispatch-id")) DispatchID = true; } else if (ST.isMesaGfxShader(MF)) { if (HasStackObjects || MaySpill) ImplicitBufferPtr = true; } - if (F->hasFnAttribute("amdgpu-kernarg-segment-ptr")) + if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr")) KernargSegmentPtr = true; if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) { // TODO: This could be refined a lot. The attribute is a poor way of // detecting calls that may require it before argument lowering. - if (HasStackObjects || F->hasFnAttribute("amdgpu-flat-scratch")) + if (HasStackObjects || F.hasFnAttribute("amdgpu-flat-scratch")) FlatScratchInit = true; } + + Attribute A = F.getFnAttribute("amdgpu-git-ptr-high"); + StringRef S = A.getValueAsString(); + if (!S.empty()) + S.consumeInteger(0, GITPtrHigh); } unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( const SIRegisterInfo &TRI) { - PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg( - getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass); + ArgInfo.PrivateSegmentBuffer = + ArgDescriptor::createRegister(TRI.getMatchingSuperReg( + getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass)); NumUserSGPRs += 4; - return PrivateSegmentBufferUserSGPR; + return ArgInfo.PrivateSegmentBuffer.getRegister(); } unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) { - DispatchPtrUserSGPR = TRI.getMatchingSuperReg( - getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); + ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( + getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); NumUserSGPRs += 2; - return DispatchPtrUserSGPR; + return ArgInfo.DispatchPtr.getRegister(); } unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) { - QueuePtrUserSGPR = TRI.getMatchingSuperReg( - getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); + ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( + getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); NumUserSGPRs += 2; - return QueuePtrUserSGPR; + return ArgInfo.QueuePtr.getRegister(); } unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) { - KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg( - getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); + ArgInfo.KernargSegmentPtr + = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( + getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); NumUserSGPRs += 2; - return KernargSegmentPtrUserSGPR; + return ArgInfo.KernargSegmentPtr.getRegister(); } unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) { - DispatchIDUserSGPR = TRI.getMatchingSuperReg( - getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); + ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( + getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); NumUserSGPRs += 2; - return DispatchIDUserSGPR; + return ArgInfo.DispatchID.getRegister(); } unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) { - FlatScratchInitUserSGPR = TRI.getMatchingSuperReg( - getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); + ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( + getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); NumUserSGPRs += 2; - return FlatScratchInitUserSGPR; + return ArgInfo.FlatScratchInit.getRegister(); } unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) { - ImplicitBufferPtrUserSGPR = TRI.getMatchingSuperReg( - getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); + ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( + getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); NumUserSGPRs += 2; - return ImplicitBufferPtrUserSGPR; + return ArgInfo.ImplicitBufferPtr.getRegister(); +} + +static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) { + for (unsigned I = 0; CSRegs[I]; ++I) { + if (CSRegs[I] == Reg) + return true; + } + + return false; } /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI. @@ -252,6 +250,8 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, int NumLanes = Size / 4; + const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); + // Make sure to handle the case where a wide SGPR spill may span between two // VGPRs. for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) { @@ -261,21 +261,28 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, if (VGPRIndex == 0) { LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF); if (LaneVGPR == AMDGPU::NoRegister) { - // We have no VGPRs left for spilling SGPRs. Reset because we won't + // We have no VGPRs left for spilling SGPRs. Reset because we will not // partially spill the SGPR to VGPRs. SGPRToVGPRSpills.erase(FI); NumVGPRSpillLanes -= I; return false; } - SpillVGPRs.push_back(LaneVGPR); + Optional<int> CSRSpillFI; + if (FrameInfo.hasCalls() && CSRegs && isCalleeSavedReg(CSRegs, LaneVGPR)) { + // TODO: Should this be a CreateSpillStackObject? This is technically a + // weird CSR spill. + CSRSpillFI = FrameInfo.CreateStackObject(4, 4, false); + } + + SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI)); // Add this register as live-in to all blocks to avoid machine verifer // complaining about use of an undefined physical register. for (MachineBasicBlock &BB : MF) BB.addLiveIn(LaneVGPR); } else { - LaneVGPR = SpillVGPRs.back(); + LaneVGPR = SpillVGPRs.back().VGPR; } SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex)); |
