src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2023-04-14 21:41:27 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2023-06-22 18:20:56 +0000
commit	bdd1243df58e60e85101c09001d9812a789b6bc4 (patch)
tree	a1ce621c7301dd47ba2ddc3b8eaa63b441389481 /contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
parent	781624ca2d054430052c828ba8d2c2eaf2d733e7 (diff)
parent	e3b557809604d036af6e00c60f012c2025b59a5e (diff)

Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp')

-rw-r--r--

contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

235

1 files changed, 132 insertions, 103 deletions

diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 9176e85568ee..b2a433dd3db9 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

@@ -12,7 +12,6 @@

#include "SIRegisterInfo.h"

#include "MCTargetDesc/AMDGPUMCTargetDesc.h"

#include "Utils/AMDGPUBaseInfo.h"

-#include "llvm/ADT/Optional.h"

#include "llvm/CodeGen/LiveIntervals.h"

#include "llvm/CodeGen/MachineBasicBlock.h"

#include "llvm/CodeGen/MachineFrameInfo.h"

@@ -23,17 +22,23 @@

#include "llvm/IR/DiagnosticInfo.h"

#include "llvm/IR/Function.h"

#include <cassert>

+#include <optional>

#include <vector>

#define MAX_LANES 64

using namespace llvm;

-SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)

- : AMDGPUMachineFunction(MF),

- BufferPSV(static_cast<const AMDGPUTargetMachine &>(MF.getTarget())),

- ImagePSV(static_cast<const AMDGPUTargetMachine &>(MF.getTarget())),

- GWSResourcePSV(static_cast<const AMDGPUTargetMachine &>(MF.getTarget())),

+const GCNTargetMachine &getTM(const GCNSubtarget *STI) {

+ const SITargetLowering *TLI = STI->getTargetLowering();

+ return static_cast<const GCNTargetMachine &>(TLI->getTargetMachine());

+SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,

+ const GCNSubtarget *STI)

+ : AMDGPUMachineFunction(F, *STI),

+ Mode(F),

+ GWSResourcePSV(getTM(STI)),

PrivateSegmentBuffer(false),

DispatchPtr(false),

QueuePtr(false),

@@ -53,8 +58,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)

ImplicitArgPtr(false),

GITPtrHigh(0xffffffff),

HighBitsOf32BitAddress(0) {

- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

- const Function &F = MF.getFunction();

+ const GCNSubtarget &ST = *static_cast<const GCNSubtarget *>(STI);

FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);

WavesPerEU = ST.getWavesPerEU(F);

@@ -105,7 +109,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)

if (ST.hasGFX90AInsts() &&

ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&

- !mayUseAGPRs(MF))

+ !mayUseAGPRs(F))

MayNeedAGPRs = false; // We will select all MAI with VGPR operands.

}

@@ -271,8 +275,32 @@ Register SIMachineFunctionInfo::addLDSKernelId() {

return ArgInfo.LDSKernelId.getRegister();

}

+void SIMachineFunctionInfo::allocateWWMSpill(MachineFunction &MF, Register VGPR,

+ uint64_t Size, Align Alignment) {

+ // Skip if it is an entry function or the register is already added.

+ if (isEntryFunction() || WWMSpills.count(VGPR))

+ return;

+ WWMSpills.insert(std::make_pair(

+ VGPR, MF.getFrameInfo().CreateSpillStackObject(Size, Alignment)));

+// Separate out the callee-saved and scratch registers.

+void SIMachineFunctionInfo::splitWWMSpillRegisters(

+ MachineFunction &MF,

+ SmallVectorImpl<std::pair<Register, int>> &CalleeSavedRegs,

+ SmallVectorImpl<std::pair<Register, int>> &ScratchRegs) const {

+ const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();

+ for (auto &Reg : WWMSpills) {

+ if (isCalleeSavedReg(CSRegs, Reg.first))

+ CalleeSavedRegs.push_back(Reg);

+ else

+ ScratchRegs.push_back(Reg);

+ }

bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,

- MCPhysReg Reg) {

+ MCPhysReg Reg) const {

for (unsigned I = 0; CSRegs[I]; ++I) {

if (CSRegs[I] == Reg)

return true;

@@ -281,30 +309,74 @@ bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,

return false;

}

-/// \p returns true if \p NumLanes slots are available in VGPRs already used for

-/// SGPR spilling.

-//

-// FIXME: This only works after processFunctionBeforeFrameFinalized

-bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF,

- unsigned NumNeed) const {

+bool SIMachineFunctionInfo::allocateVGPRForSGPRSpills(MachineFunction &MF,

+ int FI,

+ unsigned LaneIndex) {

const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

- unsigned WaveSize = ST.getWavefrontSize();

- return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size();

+ const SIRegisterInfo *TRI = ST.getRegisterInfo();

+ MachineRegisterInfo &MRI = MF.getRegInfo();

+ Register LaneVGPR;

+ if (!LaneIndex) {

+ LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);

+ if (LaneVGPR == AMDGPU::NoRegister) {

+ // We have no VGPRs left for spilling SGPRs. Reset because we will not

+ // partially spill the SGPR to VGPRs.

+ SGPRSpillToVGPRLanes.erase(FI);

+ return false;

+ }

+ SpillVGPRs.push_back(LaneVGPR);

+ // Add this register as live-in to all blocks to avoid machine verifier

+ // complaining about use of an undefined physical register.

+ for (MachineBasicBlock &BB : MF)

+ BB.addLiveIn(LaneVGPR);

+ } else {

+ LaneVGPR = SpillVGPRs.back();

+ }

+ SGPRSpillToVGPRLanes[FI].push_back(

+ SIRegisterInfo::SpilledReg(LaneVGPR, LaneIndex));

+ return true;

}

-/// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.

-bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,

- int FI) {

- std::vector<SIRegisterInfo::SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];

+bool SIMachineFunctionInfo::allocateVGPRForPrologEpilogSGPRSpills(

+ MachineFunction &MF, int FI, unsigned LaneIndex) {

+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

+ const SIRegisterInfo *TRI = ST.getRegisterInfo();

+ MachineRegisterInfo &MRI = MF.getRegInfo();

+ Register LaneVGPR;

+ if (!LaneIndex) {

+ LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);

+ if (LaneVGPR == AMDGPU::NoRegister) {

+ // We have no VGPRs left for spilling SGPRs. Reset because we will not

+ // partially spill the SGPR to VGPRs.

+ PrologEpilogSGPRSpillToVGPRLanes.erase(FI);

+ return false;

+ }

+ allocateWWMSpill(MF, LaneVGPR);

+ } else {

+ LaneVGPR = WWMSpills.back().first;

+ }

+ PrologEpilogSGPRSpillToVGPRLanes[FI].push_back(

+ SIRegisterInfo::SpilledReg(LaneVGPR, LaneIndex));

+ return true;

+bool SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane(MachineFunction &MF,

+ int FI,

+ bool IsPrologEpilog) {

+ std::vector<SIRegisterInfo::SpilledReg> &SpillLanes =

+ IsPrologEpilog ? PrologEpilogSGPRSpillToVGPRLanes[FI]

+ : SGPRSpillToVGPRLanes[FI];

// This has already been allocated.

if (!SpillLanes.empty())

return true;

const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

- const SIRegisterInfo *TRI = ST.getRegisterInfo();

MachineFrameInfo &FrameInfo = MF.getFrameInfo();

- MachineRegisterInfo &MRI = MF.getRegInfo();

unsigned WaveSize = ST.getWavefrontSize();

unsigned Size = FrameInfo.getObjectSize(FI);

@@ -314,51 +386,23 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,

return false;

assert(Size >= 4 && "invalid sgpr spill size");

- assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");

- // Make sure to handle the case where a wide SGPR spill may span between two

- // VGPRs.

- for (unsigned I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {

- Register LaneVGPR;

- unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);

- if (VGPRIndex == 0) {

- LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);

- if (LaneVGPR == AMDGPU::NoRegister) {

- // We have no VGPRs left for spilling SGPRs. Reset because we will not

- // partially spill the SGPR to VGPRs.

- SGPRToVGPRSpills.erase(FI);

- NumVGPRSpillLanes -= I;

- // FIXME: We can run out of free registers with split allocation if

- // IPRA is enabled and a called function already uses every VGPR.

-#if 0

- DiagnosticInfoResourceLimit DiagOutOfRegs(MF.getFunction(),

- "VGPRs for SGPR spilling",

- 0, DS_Error);

- MF.getFunction().getContext().diagnose(DiagOutOfRegs);

-#endif

- return false;

- }

+ assert(ST.getRegisterInfo()->spillSGPRToVGPR() &&

+ "not spilling SGPRs to VGPRs");

- Optional<int> SpillFI;

- // We need to preserve inactive lanes, so always save, even caller-save

- // registers.

- if (!isEntryFunction()) {

- SpillFI = FrameInfo.CreateSpillStackObject(4, Align(4));

- }

+ unsigned &NumSpillLanes =

+ IsPrologEpilog ? NumVGPRPrologEpilogSpillLanes : NumVGPRSpillLanes;

- SpillVGPRs.push_back(SGPRSpillVGPR(LaneVGPR, SpillFI));

+ for (unsigned I = 0; I < NumLanes; ++I, ++NumSpillLanes) {

+ unsigned LaneIndex = (NumSpillLanes % WaveSize);

- // Add this register as live-in to all blocks to avoid machine verifier

- // complaining about use of an undefined physical register.

- for (MachineBasicBlock &BB : MF)

- BB.addLiveIn(LaneVGPR);

- } else {

- LaneVGPR = SpillVGPRs.back().VGPR;

+ bool Allocated =

+ IsPrologEpilog

+ ? allocateVGPRForPrologEpilogSGPRSpills(MF, FI, LaneIndex)

+ : allocateVGPRForSGPRSpills(MF, FI, LaneIndex);

+ if (!Allocated) {

+ NumSpillLanes -= I;

+ return false;

}

- SpillLanes.push_back(SIRegisterInfo::SpilledReg(LaneVGPR, VGPRIndex));

}

return true;

@@ -426,6 +470,7 @@ bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,

OtherUsedRegs.set(*NextSpillReg);

SpillRegs.push_back(*NextSpillReg);

+ MRI.reserveReg(*NextSpillReg, TRI);

Spill.Lanes[I] = *NextSpillReg++;

}

@@ -434,28 +479,26 @@ bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,

bool SIMachineFunctionInfo::removeDeadFrameIndices(

MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) {

- // Remove dead frame indices from function frame, however keep FP & BP since

- // spills for them haven't been inserted yet. And also make sure to remove the

- // frame indices from `SGPRToVGPRSpills` data structure, otherwise, it could

- // result in an unexpected side effect and bug, in case of any re-mapping of

- // freed frame indices by later pass(es) like "stack slot coloring".

- for (auto &R : make_early_inc_range(SGPRToVGPRSpills)) {

- if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex) {

- MFI.RemoveStackObject(R.first);

- SGPRToVGPRSpills.erase(R.first);

- }

+ // Remove dead frame indices from function frame. And also make sure to remove

+ // the frame indices from `SGPRSpillToVGPRLanes` data structure, otherwise, it

+ // could result in an unexpected side effect and bug, in case of any

+ // re-mapping of freed frame indices by later pass(es) like "stack slot

+ // coloring".

+ for (auto &R : make_early_inc_range(SGPRSpillToVGPRLanes)) {

+ MFI.RemoveStackObject(R.first);

+ SGPRSpillToVGPRLanes.erase(R.first);

}

bool HaveSGPRToMemory = false;

if (ResetSGPRSpillStackIDs) {

- // All other SPGRs must be allocated on the default stack, so reset the

+ // All other SGPRs must be allocated on the default stack, so reset the

// stack ID.

- for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;

- ++i) {

- if (i != FramePointerSaveIndex && i != BasePointerSaveIndex) {

- if (MFI.getStackID(i) == TargetStackID::SGPRSpill) {

- MFI.setStackID(i, TargetStackID::Default);

+ for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); I != E;

+ ++I) {

+ if (!checkIndexInPrologEpilogSGPRSpills(I)) {

+ if (MFI.getStackID(I) == TargetStackID::SGPRSpill) {

+ MFI.setStackID(I, TargetStackID::Default);

HaveSGPRToMemory = true;

}

@@ -470,20 +513,6 @@ bool SIMachineFunctionInfo::removeDeadFrameIndices(

return HaveSGPRToMemory;

}

-void SIMachineFunctionInfo::allocateWWMReservedSpillSlots(

- MachineFrameInfo &MFI, const SIRegisterInfo &TRI) {

- assert(WWMReservedFrameIndexes.empty());

- WWMReservedFrameIndexes.resize(WWMReservedRegs.size());

- int I = 0;

- for (Register VGPR : WWMReservedRegs) {

- const TargetRegisterClass *RC = TRI.getPhysRegClass(VGPR);

- WWMReservedFrameIndexes[I++] = MFI.CreateSpillStackObject(

- TRI.getSpillSize(*RC), TRI.getSpillAlign(*RC));

- }

int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,

const SIRegisterInfo &TRI) {

if (ScavengeFI)

@@ -539,12 +568,12 @@ static yaml::StringValue regToString(Register Reg,

return Dest;

}

-static Optional<yaml::SIArgumentInfo>

+static std::optional<yaml::SIArgumentInfo>

convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,

const TargetRegisterInfo &TRI) {

yaml::SIArgumentInfo AI;

- auto convertArg = [&](Optional<yaml::SIArgument> &A,

+ auto convertArg = [&](std::optional<yaml::SIArgument> &A,

const ArgDescriptor &Arg) {

if (!Arg)

return false;

@@ -588,7 +617,7 @@ convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,

if (Any)

return AI;

- return None;

+ return std::nullopt;

}

yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(

@@ -610,7 +639,7 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(

BytesInStackArgArea(MFI.getBytesInStackArgArea()),

ReturnsVoid(MFI.returnsVoid()),

ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) {

- for (Register Reg : MFI.WWMReservedRegs)

+ for (Register Reg : MFI.getWWMReservedRegs())

WWMReservedRegs.push_back(regToString(Reg, TRI));

if (MFI.getVGPRForAGPRCopy())

@@ -652,19 +681,19 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields(

Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1, 1,

SourceMgr::DK_Error, toString(FIOrErr.takeError()),

- "", None, None);

+ "", std::nullopt, std::nullopt);

SourceRange = YamlMFI.ScavengeFI->SourceRange;

return true;

}

ScavengeFI = *FIOrErr;

} else {

- ScavengeFI = None;

+ ScavengeFI = std::nullopt;

}

return false;

}

-bool SIMachineFunctionInfo::mayUseAGPRs(const MachineFunction &MF) const {

- for (const BasicBlock &BB : MF.getFunction()) {

+bool SIMachineFunctionInfo::mayUseAGPRs(const Function &F) const {

+ for (const BasicBlock &BB : F) {

for (const Instruction &I : BB) {

const auto *CB = dyn_cast<CallBase>(&I);

if (!CB)