diff options
Diffstat (limited to 'lib/Target/AMDGPU/SIMachineFunctionInfo.cpp')
-rw-r--r-- | lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 90 |
1 files changed, 65 insertions, 25 deletions
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 6013ebc81d9f..0d5ff75e37ed 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -11,6 +11,7 @@ #include "AMDGPUArgumentUsageInfo.h" #include "AMDGPUSubtarget.h" #include "SIRegisterInfo.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/Optional.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -28,17 +29,12 @@ using namespace llvm; SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) : AMDGPUMachineFunction(MF), - BufferPSV(*(MF.getSubtarget().getInstrInfo())), - ImagePSV(*(MF.getSubtarget().getInstrInfo())), PrivateSegmentBuffer(false), DispatchPtr(false), QueuePtr(false), KernargSegmentPtr(false), DispatchID(false), FlatScratchInit(false), - GridWorkgroupCountX(false), - GridWorkgroupCountY(false), - GridWorkgroupCountZ(false), WorkGroupIDX(false), WorkGroupIDY(false), WorkGroupIDZ(false), @@ -49,12 +45,26 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) WorkItemIDZ(false), ImplicitBufferPtr(false), ImplicitArgPtr(false), - GITPtrHigh(0xffffffff) { - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + GITPtrHigh(0xffffffff), + HighBitsOf32BitAddress(0) { + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); const Function &F = MF.getFunction(); FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F); WavesPerEU = ST.getWavesPerEU(F); + Occupancy = getMaxWavesPerEU(); + limitOccupancy(MF); + CallingConv::ID CC = F.getCallingConv(); + + if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) { + if (!F.arg_empty()) + KernargSegmentPtr = true; + WorkGroupIDX = true; + WorkItemIDX = true; + } else if (CC == CallingConv::AMDGPU_PS) { + PSInputAddr = AMDGPU::getInitialPSInputAddr(F); + } + if (!isEntryFunction()) { // Non-entry functions have no special inputs for now, other registers // required for scratch access. @@ -71,18 +81,11 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) ImplicitArgPtr = true; } else { - if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) - KernargSegmentPtr = true; - } - - CallingConv::ID CC = F.getCallingConv(); - if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) { - if (!F.arg_empty()) + if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) { KernargSegmentPtr = true; - WorkGroupIDX = true; - WorkItemIDX = true; - } else if (CC == CallingConv::AMDGPU_PS) { - PSInputAddr = AMDGPU::getInitialPSInputAddr(F); + MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(), + MaxKernArgAlign); + } } if (ST.debuggerEmitPrologue()) { @@ -134,7 +137,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) } } - bool IsCOV2 = ST.isAmdCodeObjectV2(MF); + bool IsCOV2 = ST.isAmdCodeObjectV2(F); if (IsCOV2) { if (HasStackObjects || MaySpill) PrivateSegmentBuffer = true; @@ -147,7 +150,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) if (F.hasFnAttribute("amdgpu-dispatch-id")) DispatchID = true; - } else if (ST.isMesaGfxShader(MF)) { + } else if (ST.isMesaGfxShader(F)) { if (HasStackObjects || MaySpill) ImplicitBufferPtr = true; } @@ -166,6 +169,18 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) StringRef S = A.getValueAsString(); if (!S.empty()) S.consumeInteger(0, GITPtrHigh); + + A = F.getFnAttribute("amdgpu-32bit-address-high-bits"); + S = A.getValueAsString(); + if (!S.empty()) + S.consumeInteger(0, HighBitsOf32BitAddress); +} + +void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) { + limitOccupancy(getMaxWavesPerEU()); + const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>(); + limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(), + MF.getFunction())); } unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( @@ -238,7 +253,7 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, if (!SpillLanes.empty()) return true; - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); MachineFrameInfo &FrameInfo = MF.getFrameInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -269,10 +284,9 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, } Optional<int> CSRSpillFI; - if (FrameInfo.hasCalls() && CSRegs && isCalleeSavedReg(CSRegs, LaneVGPR)) { - // TODO: Should this be a CreateSpillStackObject? This is technically a - // weird CSR spill. - CSRSpillFI = FrameInfo.CreateStackObject(4, 4, false); + if ((FrameInfo.hasCalls() || !isEntryFunction()) && CSRegs && + isCalleeSavedReg(CSRegs, LaneVGPR)) { + CSRSpillFI = FrameInfo.CreateSpillStackObject(4, 4); } SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI)); @@ -295,3 +309,29 @@ void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) for (auto &R : SGPRToVGPRSpills) MFI.RemoveStackObject(R.first); } + + +/// \returns VGPR used for \p Dim' work item ID. +unsigned SIMachineFunctionInfo::getWorkItemIDVGPR(unsigned Dim) const { + switch (Dim) { + case 0: + assert(hasWorkItemIDX()); + return AMDGPU::VGPR0; + case 1: + assert(hasWorkItemIDY()); + return AMDGPU::VGPR1; + case 2: + assert(hasWorkItemIDZ()); + return AMDGPU::VGPR2; + } + llvm_unreachable("unexpected dimension"); +} + +MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const { + assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs"); + return AMDGPU::SGPR0 + NumUserSGPRs; +} + +MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const { + return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs; +} |