diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2023-04-14 21:41:27 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2023-06-22 18:20:56 +0000 |
commit | bdd1243df58e60e85101c09001d9812a789b6bc4 (patch) | |
tree | a1ce621c7301dd47ba2ddc3b8eaa63b441389481 /contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | |
parent | 781624ca2d054430052c828ba8d2c2eaf2d733e7 (diff) | |
parent | e3b557809604d036af6e00c60f012c2025b59a5e (diff) |
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 235 |
1 files changed, 132 insertions, 103 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 9176e85568ee..b2a433dd3db9 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -12,7 +12,6 @@ #include "SIRegisterInfo.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "Utils/AMDGPUBaseInfo.h" -#include "llvm/ADT/Optional.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -23,17 +22,23 @@ #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include <cassert> +#include <optional> #include <vector> #define MAX_LANES 64 using namespace llvm; -SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) - : AMDGPUMachineFunction(MF), - BufferPSV(static_cast<const AMDGPUTargetMachine &>(MF.getTarget())), - ImagePSV(static_cast<const AMDGPUTargetMachine &>(MF.getTarget())), - GWSResourcePSV(static_cast<const AMDGPUTargetMachine &>(MF.getTarget())), +const GCNTargetMachine &getTM(const GCNSubtarget *STI) { + const SITargetLowering *TLI = STI->getTargetLowering(); + return static_cast<const GCNTargetMachine &>(TLI->getTargetMachine()); +} + +SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F, + const GCNSubtarget *STI) + : AMDGPUMachineFunction(F, *STI), + Mode(F), + GWSResourcePSV(getTM(STI)), PrivateSegmentBuffer(false), DispatchPtr(false), QueuePtr(false), @@ -53,8 +58,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) ImplicitArgPtr(false), GITPtrHigh(0xffffffff), HighBitsOf32BitAddress(0) { - const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); - const Function &F = MF.getFunction(); + const GCNSubtarget &ST = *static_cast<const GCNSubtarget *>(STI); FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F); WavesPerEU = ST.getWavesPerEU(F); @@ -105,7 +109,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) if (ST.hasGFX90AInsts() && ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() && - !mayUseAGPRs(MF)) + !mayUseAGPRs(F)) MayNeedAGPRs = false; // We will select all MAI with VGPR operands. } @@ -271,8 +275,32 @@ Register SIMachineFunctionInfo::addLDSKernelId() { return ArgInfo.LDSKernelId.getRegister(); } +void SIMachineFunctionInfo::allocateWWMSpill(MachineFunction &MF, Register VGPR, + uint64_t Size, Align Alignment) { + // Skip if it is an entry function or the register is already added. + if (isEntryFunction() || WWMSpills.count(VGPR)) + return; + + WWMSpills.insert(std::make_pair( + VGPR, MF.getFrameInfo().CreateSpillStackObject(Size, Alignment))); +} + +// Separate out the callee-saved and scratch registers. +void SIMachineFunctionInfo::splitWWMSpillRegisters( + MachineFunction &MF, + SmallVectorImpl<std::pair<Register, int>> &CalleeSavedRegs, + SmallVectorImpl<std::pair<Register, int>> &ScratchRegs) const { + const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs(); + for (auto &Reg : WWMSpills) { + if (isCalleeSavedReg(CSRegs, Reg.first)) + CalleeSavedRegs.push_back(Reg); + else + ScratchRegs.push_back(Reg); + } +} + bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs, - MCPhysReg Reg) { + MCPhysReg Reg) const { for (unsigned I = 0; CSRegs[I]; ++I) { if (CSRegs[I] == Reg) return true; @@ -281,30 +309,74 @@ bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs, return false; } -/// \p returns true if \p NumLanes slots are available in VGPRs already used for -/// SGPR spilling. -// -// FIXME: This only works after processFunctionBeforeFrameFinalized -bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF, - unsigned NumNeed) const { +bool SIMachineFunctionInfo::allocateVGPRForSGPRSpills(MachineFunction &MF, + int FI, + unsigned LaneIndex) { const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); - unsigned WaveSize = ST.getWavefrontSize(); - return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size(); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + Register LaneVGPR; + if (!LaneIndex) { + LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF); + if (LaneVGPR == AMDGPU::NoRegister) { + // We have no VGPRs left for spilling SGPRs. Reset because we will not + // partially spill the SGPR to VGPRs. + SGPRSpillToVGPRLanes.erase(FI); + return false; + } + + SpillVGPRs.push_back(LaneVGPR); + // Add this register as live-in to all blocks to avoid machine verifier + // complaining about use of an undefined physical register. + for (MachineBasicBlock &BB : MF) + BB.addLiveIn(LaneVGPR); + } else { + LaneVGPR = SpillVGPRs.back(); + } + + SGPRSpillToVGPRLanes[FI].push_back( + SIRegisterInfo::SpilledReg(LaneVGPR, LaneIndex)); + return true; } -/// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI. -bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, - int FI) { - std::vector<SIRegisterInfo::SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI]; +bool SIMachineFunctionInfo::allocateVGPRForPrologEpilogSGPRSpills( + MachineFunction &MF, int FI, unsigned LaneIndex) { + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + Register LaneVGPR; + if (!LaneIndex) { + LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF); + if (LaneVGPR == AMDGPU::NoRegister) { + // We have no VGPRs left for spilling SGPRs. Reset because we will not + // partially spill the SGPR to VGPRs. + PrologEpilogSGPRSpillToVGPRLanes.erase(FI); + return false; + } + + allocateWWMSpill(MF, LaneVGPR); + } else { + LaneVGPR = WWMSpills.back().first; + } + + PrologEpilogSGPRSpillToVGPRLanes[FI].push_back( + SIRegisterInfo::SpilledReg(LaneVGPR, LaneIndex)); + return true; +} + +bool SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane(MachineFunction &MF, + int FI, + bool IsPrologEpilog) { + std::vector<SIRegisterInfo::SpilledReg> &SpillLanes = + IsPrologEpilog ? PrologEpilogSGPRSpillToVGPRLanes[FI] + : SGPRSpillToVGPRLanes[FI]; // This has already been allocated. if (!SpillLanes.empty()) return true; const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); - const SIRegisterInfo *TRI = ST.getRegisterInfo(); MachineFrameInfo &FrameInfo = MF.getFrameInfo(); - MachineRegisterInfo &MRI = MF.getRegInfo(); unsigned WaveSize = ST.getWavefrontSize(); unsigned Size = FrameInfo.getObjectSize(FI); @@ -314,51 +386,23 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, return false; assert(Size >= 4 && "invalid sgpr spill size"); - assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs"); - - // Make sure to handle the case where a wide SGPR spill may span between two - // VGPRs. - for (unsigned I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) { - Register LaneVGPR; - unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize); - - if (VGPRIndex == 0) { - LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF); - if (LaneVGPR == AMDGPU::NoRegister) { - // We have no VGPRs left for spilling SGPRs. Reset because we will not - // partially spill the SGPR to VGPRs. - SGPRToVGPRSpills.erase(FI); - NumVGPRSpillLanes -= I; - - // FIXME: We can run out of free registers with split allocation if - // IPRA is enabled and a called function already uses every VGPR. -#if 0 - DiagnosticInfoResourceLimit DiagOutOfRegs(MF.getFunction(), - "VGPRs for SGPR spilling", - 0, DS_Error); - MF.getFunction().getContext().diagnose(DiagOutOfRegs); -#endif - return false; - } + assert(ST.getRegisterInfo()->spillSGPRToVGPR() && + "not spilling SGPRs to VGPRs"); - Optional<int> SpillFI; - // We need to preserve inactive lanes, so always save, even caller-save - // registers. - if (!isEntryFunction()) { - SpillFI = FrameInfo.CreateSpillStackObject(4, Align(4)); - } + unsigned &NumSpillLanes = + IsPrologEpilog ? NumVGPRPrologEpilogSpillLanes : NumVGPRSpillLanes; - SpillVGPRs.push_back(SGPRSpillVGPR(LaneVGPR, SpillFI)); + for (unsigned I = 0; I < NumLanes; ++I, ++NumSpillLanes) { + unsigned LaneIndex = (NumSpillLanes % WaveSize); - // Add this register as live-in to all blocks to avoid machine verifier - // complaining about use of an undefined physical register. - for (MachineBasicBlock &BB : MF) - BB.addLiveIn(LaneVGPR); - } else { - LaneVGPR = SpillVGPRs.back().VGPR; + bool Allocated = + IsPrologEpilog + ? allocateVGPRForPrologEpilogSGPRSpills(MF, FI, LaneIndex) + : allocateVGPRForSGPRSpills(MF, FI, LaneIndex); + if (!Allocated) { + NumSpillLanes -= I; + return false; } - - SpillLanes.push_back(SIRegisterInfo::SpilledReg(LaneVGPR, VGPRIndex)); } return true; @@ -426,6 +470,7 @@ bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF, OtherUsedRegs.set(*NextSpillReg); SpillRegs.push_back(*NextSpillReg); + MRI.reserveReg(*NextSpillReg, TRI); Spill.Lanes[I] = *NextSpillReg++; } @@ -434,28 +479,26 @@ bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF, bool SIMachineFunctionInfo::removeDeadFrameIndices( MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) { - // Remove dead frame indices from function frame, however keep FP & BP since - // spills for them haven't been inserted yet. And also make sure to remove the - // frame indices from `SGPRToVGPRSpills` data structure, otherwise, it could - // result in an unexpected side effect and bug, in case of any re-mapping of - // freed frame indices by later pass(es) like "stack slot coloring". - for (auto &R : make_early_inc_range(SGPRToVGPRSpills)) { - if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex) { - MFI.RemoveStackObject(R.first); - SGPRToVGPRSpills.erase(R.first); - } + // Remove dead frame indices from function frame. And also make sure to remove + // the frame indices from `SGPRSpillToVGPRLanes` data structure, otherwise, it + // could result in an unexpected side effect and bug, in case of any + // re-mapping of freed frame indices by later pass(es) like "stack slot + // coloring". + for (auto &R : make_early_inc_range(SGPRSpillToVGPRLanes)) { + MFI.RemoveStackObject(R.first); + SGPRSpillToVGPRLanes.erase(R.first); } bool HaveSGPRToMemory = false; if (ResetSGPRSpillStackIDs) { - // All other SPGRs must be allocated on the default stack, so reset the + // All other SGPRs must be allocated on the default stack, so reset the // stack ID. - for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e; - ++i) { - if (i != FramePointerSaveIndex && i != BasePointerSaveIndex) { - if (MFI.getStackID(i) == TargetStackID::SGPRSpill) { - MFI.setStackID(i, TargetStackID::Default); + for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); I != E; + ++I) { + if (!checkIndexInPrologEpilogSGPRSpills(I)) { + if (MFI.getStackID(I) == TargetStackID::SGPRSpill) { + MFI.setStackID(I, TargetStackID::Default); HaveSGPRToMemory = true; } } @@ -470,20 +513,6 @@ bool SIMachineFunctionInfo::removeDeadFrameIndices( return HaveSGPRToMemory; } -void SIMachineFunctionInfo::allocateWWMReservedSpillSlots( - MachineFrameInfo &MFI, const SIRegisterInfo &TRI) { - assert(WWMReservedFrameIndexes.empty()); - - WWMReservedFrameIndexes.resize(WWMReservedRegs.size()); - - int I = 0; - for (Register VGPR : WWMReservedRegs) { - const TargetRegisterClass *RC = TRI.getPhysRegClass(VGPR); - WWMReservedFrameIndexes[I++] = MFI.CreateSpillStackObject( - TRI.getSpillSize(*RC), TRI.getSpillAlign(*RC)); - } -} - int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI) { if (ScavengeFI) @@ -539,12 +568,12 @@ static yaml::StringValue regToString(Register Reg, return Dest; } -static Optional<yaml::SIArgumentInfo> +static std::optional<yaml::SIArgumentInfo> convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo, const TargetRegisterInfo &TRI) { yaml::SIArgumentInfo AI; - auto convertArg = [&](Optional<yaml::SIArgument> &A, + auto convertArg = [&](std::optional<yaml::SIArgument> &A, const ArgDescriptor &Arg) { if (!Arg) return false; @@ -588,7 +617,7 @@ convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo, if (Any) return AI; - return None; + return std::nullopt; } yaml::SIMachineFunctionInfo::SIMachineFunctionInfo( @@ -610,7 +639,7 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo( BytesInStackArgArea(MFI.getBytesInStackArgArea()), ReturnsVoid(MFI.returnsVoid()), ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) { - for (Register Reg : MFI.WWMReservedRegs) + for (Register Reg : MFI.getWWMReservedRegs()) WWMReservedRegs.push_back(regToString(Reg, TRI)); if (MFI.getVGPRForAGPRCopy()) @@ -652,19 +681,19 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields( Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, 1, SourceMgr::DK_Error, toString(FIOrErr.takeError()), - "", None, None); + "", std::nullopt, std::nullopt); SourceRange = YamlMFI.ScavengeFI->SourceRange; return true; } ScavengeFI = *FIOrErr; } else { - ScavengeFI = None; + ScavengeFI = std::nullopt; } return false; } -bool SIMachineFunctionInfo::mayUseAGPRs(const MachineFunction &MF) const { - for (const BasicBlock &BB : MF.getFunction()) { +bool SIMachineFunctionInfo::mayUseAGPRs(const Function &F) const { + for (const BasicBlock &BB : F) { for (const Instruction &I : BB) { const auto *CB = dyn_cast<CallBase>(&I); if (!CB) |