aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2023-04-14 21:41:27 +0000
committerDimitry Andric <dim@FreeBSD.org>2023-06-22 18:20:56 +0000
commitbdd1243df58e60e85101c09001d9812a789b6bc4 (patch)
treea1ce621c7301dd47ba2ddc3b8eaa63b441389481 /contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
parent781624ca2d054430052c828ba8d2c2eaf2d733e7 (diff)
parente3b557809604d036af6e00c60f012c2025b59a5e (diff)
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp235
1 files changed, 132 insertions, 103 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 9176e85568ee..b2a433dd3db9 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -12,7 +12,6 @@
#include "SIRegisterInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -23,17 +22,23 @@
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include <cassert>
+#include <optional>
#include <vector>
#define MAX_LANES 64
using namespace llvm;
-SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
- : AMDGPUMachineFunction(MF),
- BufferPSV(static_cast<const AMDGPUTargetMachine &>(MF.getTarget())),
- ImagePSV(static_cast<const AMDGPUTargetMachine &>(MF.getTarget())),
- GWSResourcePSV(static_cast<const AMDGPUTargetMachine &>(MF.getTarget())),
+const GCNTargetMachine &getTM(const GCNSubtarget *STI) {
+ const SITargetLowering *TLI = STI->getTargetLowering();
+ return static_cast<const GCNTargetMachine &>(TLI->getTargetMachine());
+}
+
+SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
+ const GCNSubtarget *STI)
+ : AMDGPUMachineFunction(F, *STI),
+ Mode(F),
+ GWSResourcePSV(getTM(STI)),
PrivateSegmentBuffer(false),
DispatchPtr(false),
QueuePtr(false),
@@ -53,8 +58,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
ImplicitArgPtr(false),
GITPtrHigh(0xffffffff),
HighBitsOf32BitAddress(0) {
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- const Function &F = MF.getFunction();
+ const GCNSubtarget &ST = *static_cast<const GCNSubtarget *>(STI);
FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
WavesPerEU = ST.getWavesPerEU(F);
@@ -105,7 +109,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
if (ST.hasGFX90AInsts() &&
ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
- !mayUseAGPRs(MF))
+ !mayUseAGPRs(F))
MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
}
@@ -271,8 +275,32 @@ Register SIMachineFunctionInfo::addLDSKernelId() {
return ArgInfo.LDSKernelId.getRegister();
}
+void SIMachineFunctionInfo::allocateWWMSpill(MachineFunction &MF, Register VGPR,
+ uint64_t Size, Align Alignment) {
+ // Skip if it is an entry function or the register is already added.
+ if (isEntryFunction() || WWMSpills.count(VGPR))
+ return;
+
+ WWMSpills.insert(std::make_pair(
+ VGPR, MF.getFrameInfo().CreateSpillStackObject(Size, Alignment)));
+}
+
+// Separate out the callee-saved and scratch registers.
+void SIMachineFunctionInfo::splitWWMSpillRegisters(
+ MachineFunction &MF,
+ SmallVectorImpl<std::pair<Register, int>> &CalleeSavedRegs,
+ SmallVectorImpl<std::pair<Register, int>> &ScratchRegs) const {
+ const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
+ for (auto &Reg : WWMSpills) {
+ if (isCalleeSavedReg(CSRegs, Reg.first))
+ CalleeSavedRegs.push_back(Reg);
+ else
+ ScratchRegs.push_back(Reg);
+ }
+}
+
bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
- MCPhysReg Reg) {
+ MCPhysReg Reg) const {
for (unsigned I = 0; CSRegs[I]; ++I) {
if (CSRegs[I] == Reg)
return true;
@@ -281,30 +309,74 @@ bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
return false;
}
-/// \p returns true if \p NumLanes slots are available in VGPRs already used for
-/// SGPR spilling.
-//
-// FIXME: This only works after processFunctionBeforeFrameFinalized
-bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF,
- unsigned NumNeed) const {
+bool SIMachineFunctionInfo::allocateVGPRForSGPRSpills(MachineFunction &MF,
+ int FI,
+ unsigned LaneIndex) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- unsigned WaveSize = ST.getWavefrontSize();
- return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ Register LaneVGPR;
+ if (!LaneIndex) {
+ LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
+ if (LaneVGPR == AMDGPU::NoRegister) {
+ // We have no VGPRs left for spilling SGPRs. Reset because we will not
+ // partially spill the SGPR to VGPRs.
+ SGPRSpillToVGPRLanes.erase(FI);
+ return false;
+ }
+
+ SpillVGPRs.push_back(LaneVGPR);
+ // Add this register as live-in to all blocks to avoid machine verifier
+ // complaining about use of an undefined physical register.
+ for (MachineBasicBlock &BB : MF)
+ BB.addLiveIn(LaneVGPR);
+ } else {
+ LaneVGPR = SpillVGPRs.back();
+ }
+
+ SGPRSpillToVGPRLanes[FI].push_back(
+ SIRegisterInfo::SpilledReg(LaneVGPR, LaneIndex));
+ return true;
}
-/// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
-bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
- int FI) {
- std::vector<SIRegisterInfo::SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
+bool SIMachineFunctionInfo::allocateVGPRForPrologEpilogSGPRSpills(
+ MachineFunction &MF, int FI, unsigned LaneIndex) {
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ Register LaneVGPR;
+ if (!LaneIndex) {
+ LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
+ if (LaneVGPR == AMDGPU::NoRegister) {
+ // We have no VGPRs left for spilling SGPRs. Reset because we will not
+ // partially spill the SGPR to VGPRs.
+ PrologEpilogSGPRSpillToVGPRLanes.erase(FI);
+ return false;
+ }
+
+ allocateWWMSpill(MF, LaneVGPR);
+ } else {
+ LaneVGPR = WWMSpills.back().first;
+ }
+
+ PrologEpilogSGPRSpillToVGPRLanes[FI].push_back(
+ SIRegisterInfo::SpilledReg(LaneVGPR, LaneIndex));
+ return true;
+}
+
+bool SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane(MachineFunction &MF,
+ int FI,
+ bool IsPrologEpilog) {
+ std::vector<SIRegisterInfo::SpilledReg> &SpillLanes =
+ IsPrologEpilog ? PrologEpilogSGPRSpillToVGPRLanes[FI]
+ : SGPRSpillToVGPRLanes[FI];
// This has already been allocated.
if (!SpillLanes.empty())
return true;
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- const SIRegisterInfo *TRI = ST.getRegisterInfo();
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
- MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned WaveSize = ST.getWavefrontSize();
unsigned Size = FrameInfo.getObjectSize(FI);
@@ -314,51 +386,23 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
return false;
assert(Size >= 4 && "invalid sgpr spill size");
- assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
-
- // Make sure to handle the case where a wide SGPR spill may span between two
- // VGPRs.
- for (unsigned I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
- Register LaneVGPR;
- unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
-
- if (VGPRIndex == 0) {
- LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
- if (LaneVGPR == AMDGPU::NoRegister) {
- // We have no VGPRs left for spilling SGPRs. Reset because we will not
- // partially spill the SGPR to VGPRs.
- SGPRToVGPRSpills.erase(FI);
- NumVGPRSpillLanes -= I;
-
- // FIXME: We can run out of free registers with split allocation if
- // IPRA is enabled and a called function already uses every VGPR.
-#if 0
- DiagnosticInfoResourceLimit DiagOutOfRegs(MF.getFunction(),
- "VGPRs for SGPR spilling",
- 0, DS_Error);
- MF.getFunction().getContext().diagnose(DiagOutOfRegs);
-#endif
- return false;
- }
+ assert(ST.getRegisterInfo()->spillSGPRToVGPR() &&
+ "not spilling SGPRs to VGPRs");
- Optional<int> SpillFI;
- // We need to preserve inactive lanes, so always save, even caller-save
- // registers.
- if (!isEntryFunction()) {
- SpillFI = FrameInfo.CreateSpillStackObject(4, Align(4));
- }
+ unsigned &NumSpillLanes =
+ IsPrologEpilog ? NumVGPRPrologEpilogSpillLanes : NumVGPRSpillLanes;
- SpillVGPRs.push_back(SGPRSpillVGPR(LaneVGPR, SpillFI));
+ for (unsigned I = 0; I < NumLanes; ++I, ++NumSpillLanes) {
+ unsigned LaneIndex = (NumSpillLanes % WaveSize);
- // Add this register as live-in to all blocks to avoid machine verifier
- // complaining about use of an undefined physical register.
- for (MachineBasicBlock &BB : MF)
- BB.addLiveIn(LaneVGPR);
- } else {
- LaneVGPR = SpillVGPRs.back().VGPR;
+ bool Allocated =
+ IsPrologEpilog
+ ? allocateVGPRForPrologEpilogSGPRSpills(MF, FI, LaneIndex)
+ : allocateVGPRForSGPRSpills(MF, FI, LaneIndex);
+ if (!Allocated) {
+ NumSpillLanes -= I;
+ return false;
}
-
- SpillLanes.push_back(SIRegisterInfo::SpilledReg(LaneVGPR, VGPRIndex));
}
return true;
@@ -426,6 +470,7 @@ bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
OtherUsedRegs.set(*NextSpillReg);
SpillRegs.push_back(*NextSpillReg);
+ MRI.reserveReg(*NextSpillReg, TRI);
Spill.Lanes[I] = *NextSpillReg++;
}
@@ -434,28 +479,26 @@ bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
bool SIMachineFunctionInfo::removeDeadFrameIndices(
MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) {
- // Remove dead frame indices from function frame, however keep FP & BP since
- // spills for them haven't been inserted yet. And also make sure to remove the
- // frame indices from `SGPRToVGPRSpills` data structure, otherwise, it could
- // result in an unexpected side effect and bug, in case of any re-mapping of
- // freed frame indices by later pass(es) like "stack slot coloring".
- for (auto &R : make_early_inc_range(SGPRToVGPRSpills)) {
- if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex) {
- MFI.RemoveStackObject(R.first);
- SGPRToVGPRSpills.erase(R.first);
- }
+ // Remove dead frame indices from function frame. And also make sure to remove
+ // the frame indices from `SGPRSpillToVGPRLanes` data structure, otherwise, it
+ // could result in an unexpected side effect and bug, in case of any
+ // re-mapping of freed frame indices by later pass(es) like "stack slot
+ // coloring".
+ for (auto &R : make_early_inc_range(SGPRSpillToVGPRLanes)) {
+ MFI.RemoveStackObject(R.first);
+ SGPRSpillToVGPRLanes.erase(R.first);
}
bool HaveSGPRToMemory = false;
if (ResetSGPRSpillStackIDs) {
- // All other SPGRs must be allocated on the default stack, so reset the
+ // All other SGPRs must be allocated on the default stack, so reset the
// stack ID.
- for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
- ++i) {
- if (i != FramePointerSaveIndex && i != BasePointerSaveIndex) {
- if (MFI.getStackID(i) == TargetStackID::SGPRSpill) {
- MFI.setStackID(i, TargetStackID::Default);
+ for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); I != E;
+ ++I) {
+ if (!checkIndexInPrologEpilogSGPRSpills(I)) {
+ if (MFI.getStackID(I) == TargetStackID::SGPRSpill) {
+ MFI.setStackID(I, TargetStackID::Default);
HaveSGPRToMemory = true;
}
}
@@ -470,20 +513,6 @@ bool SIMachineFunctionInfo::removeDeadFrameIndices(
return HaveSGPRToMemory;
}
-void SIMachineFunctionInfo::allocateWWMReservedSpillSlots(
- MachineFrameInfo &MFI, const SIRegisterInfo &TRI) {
- assert(WWMReservedFrameIndexes.empty());
-
- WWMReservedFrameIndexes.resize(WWMReservedRegs.size());
-
- int I = 0;
- for (Register VGPR : WWMReservedRegs) {
- const TargetRegisterClass *RC = TRI.getPhysRegClass(VGPR);
- WWMReservedFrameIndexes[I++] = MFI.CreateSpillStackObject(
- TRI.getSpillSize(*RC), TRI.getSpillAlign(*RC));
- }
-}
-
int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,
const SIRegisterInfo &TRI) {
if (ScavengeFI)
@@ -539,12 +568,12 @@ static yaml::StringValue regToString(Register Reg,
return Dest;
}
-static Optional<yaml::SIArgumentInfo>
+static std::optional<yaml::SIArgumentInfo>
convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
const TargetRegisterInfo &TRI) {
yaml::SIArgumentInfo AI;
- auto convertArg = [&](Optional<yaml::SIArgument> &A,
+ auto convertArg = [&](std::optional<yaml::SIArgument> &A,
const ArgDescriptor &Arg) {
if (!Arg)
return false;
@@ -588,7 +617,7 @@ convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
if (Any)
return AI;
- return None;
+ return std::nullopt;
}
yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
@@ -610,7 +639,7 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
BytesInStackArgArea(MFI.getBytesInStackArgArea()),
ReturnsVoid(MFI.returnsVoid()),
ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) {
- for (Register Reg : MFI.WWMReservedRegs)
+ for (Register Reg : MFI.getWWMReservedRegs())
WWMReservedRegs.push_back(regToString(Reg, TRI));
if (MFI.getVGPRForAGPRCopy())
@@ -652,19 +681,19 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields(
Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, 1,
SourceMgr::DK_Error, toString(FIOrErr.takeError()),
- "", None, None);
+ "", std::nullopt, std::nullopt);
SourceRange = YamlMFI.ScavengeFI->SourceRange;
return true;
}
ScavengeFI = *FIOrErr;
} else {
- ScavengeFI = None;
+ ScavengeFI = std::nullopt;
}
return false;
}
-bool SIMachineFunctionInfo::mayUseAGPRs(const MachineFunction &MF) const {
- for (const BasicBlock &BB : MF.getFunction()) {
+bool SIMachineFunctionInfo::mayUseAGPRs(const Function &F) const {
+ for (const BasicBlock &BB : F) {
for (const Instruction &I : BB) {
const auto *CB = dyn_cast<CallBase>(&I);
if (!CB)