aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp126
1 files changed, 110 insertions, 16 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index cca8565c9ff9..0504c59ebd9e 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -31,6 +31,9 @@ using namespace llvm;
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
: AMDGPUMachineFunction(MF),
+ BufferPSV(static_cast<const AMDGPUTargetMachine &>(MF.getTarget())),
+ ImagePSV(static_cast<const AMDGPUTargetMachine &>(MF.getTarget())),
+ GWSResourcePSV(static_cast<const AMDGPUTargetMachine &>(MF.getTarget())),
PrivateSegmentBuffer(false),
DispatchPtr(false),
QueuePtr(false),
@@ -48,8 +51,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
ImplicitBufferPtr(false),
ImplicitArgPtr(false),
GITPtrHigh(0xffffffff),
- HighBitsOf32BitAddress(0),
- GDSSize(0) {
+ HighBitsOf32BitAddress(0) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const Function &F = MF.getFunction();
FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
@@ -74,6 +76,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
}
+ MayNeedAGPRs = ST.hasMAIInsts();
+
if (!isEntryFunction()) {
if (CC != CallingConv::AMDGPU_Gfx)
ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
@@ -97,6 +101,11 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
ImplicitArgPtr = false;
MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
MaxKernArgAlign);
+
+ if (ST.hasGFX90AInsts() &&
+ ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
+ !mayUseAGPRs(MF))
+ MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
}
bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
@@ -177,9 +186,20 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
if (!S.empty())
S.consumeInteger(0, HighBitsOf32BitAddress);
- S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
- if (!S.empty())
- S.consumeInteger(0, GDSSize);
+ // On GFX908, in order to guarantee copying between AGPRs, we need a scratch
+ // VGPR available at all times. For now, reserve highest available VGPR. After
+ // RA, shift it to the lowest available unused VGPR if the one exist.
+ if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
+ VGPRForAGPRCopy =
+ AMDGPU::VGPR_32RegClass.getRegister(ST.getMaxNumVGPRs(F) - 1);
+ }
+}
+
+MachineFunctionInfo *SIMachineFunctionInfo::clone(
+ BumpPtrAllocator &Allocator, MachineFunction &DestMF,
+ const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
+ const {
+ return DestMF.cloneInfo<SIMachineFunctionInfo>(*this);
}
void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
@@ -265,7 +285,7 @@ bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF,
/// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
int FI) {
- std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
+ std::vector<SIRegisterInfo::SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
// This has already been allocated.
if (!SpillLanes.empty())
@@ -320,7 +340,7 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
SpillVGPRs.push_back(SGPRSpillVGPR(LaneVGPR, SpillFI));
- // Add this register as live-in to all blocks to avoid machine verifer
+ // Add this register as live-in to all blocks to avoid machine verifier
// complaining about use of an undefined physical register.
for (MachineBasicBlock &BB : MF)
BB.addLiveIn(LaneVGPR);
@@ -328,7 +348,7 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
LaneVGPR = SpillVGPRs.back().VGPR;
}
- SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
+ SpillLanes.push_back(SIRegisterInfo::SpilledReg(LaneVGPR, VGPRIndex));
}
return true;
@@ -402,7 +422,8 @@ bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
return Spill.FullyAllocated;
}
-void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
+bool SIMachineFunctionInfo::removeDeadFrameIndices(
+ MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) {
// Remove dead frame indices from function frame, however keep FP & BP since
// spills for them haven't been inserted yet. And also make sure to remove the
// frame indices from `SGPRToVGPRSpills` data structure, otherwise, it could
@@ -415,17 +436,42 @@ void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
}
}
- // All other SPGRs must be allocated on the default stack, so reset the stack
- // ID.
- for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
- ++i)
- if (i != FramePointerSaveIndex && i != BasePointerSaveIndex)
- MFI.setStackID(i, TargetStackID::Default);
+ bool HaveSGPRToMemory = false;
+
+ if (ResetSGPRSpillStackIDs) {
+ // All other SPGRs must be allocated on the default stack, so reset the
+ // stack ID.
+ for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
+ ++i) {
+ if (i != FramePointerSaveIndex && i != BasePointerSaveIndex) {
+ if (MFI.getStackID(i) == TargetStackID::SGPRSpill) {
+ MFI.setStackID(i, TargetStackID::Default);
+ HaveSGPRToMemory = true;
+ }
+ }
+ }
+ }
for (auto &R : VGPRToAGPRSpills) {
if (R.second.IsDead)
MFI.RemoveStackObject(R.first);
}
+
+ return HaveSGPRToMemory;
+}
+
+void SIMachineFunctionInfo::allocateWWMReservedSpillSlots(
+ MachineFrameInfo &MFI, const SIRegisterInfo &TRI) {
+ assert(WWMReservedFrameIndexes.empty());
+
+ WWMReservedFrameIndexes.resize(WWMReservedRegs.size());
+
+ int I = 0;
+ for (Register VGPR : WWMReservedRegs) {
+ const TargetRegisterClass *RC = TRI.getPhysRegClass(VGPR);
+ WWMReservedFrameIndexes[I++] = MFI.CreateSpillStackObject(
+ TRI.getSpillSize(*RC), TRI.getSpillAlign(*RC));
+ }
}
int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,
@@ -539,6 +585,7 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
const llvm::MachineFunction &MF)
: ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
+ GDSSize(MFI.getGDSSize()),
DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
@@ -549,7 +596,14 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
+ BytesInStackArgArea(MFI.getBytesInStackArgArea()),
+ ReturnsVoid(MFI.returnsVoid()),
ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) {
+ for (Register Reg : MFI.WWMReservedRegs)
+ WWMReservedRegs.push_back(regToString(Reg, TRI));
+
+ if (MFI.getVGPRForAGPRCopy())
+ VGPRForAGPRCopy = regToString(MFI.getVGPRForAGPRCopy(), TRI);
auto SFI = MFI.getOptionalScavengeFI();
if (SFI)
ScavengeFI = yaml::FrameIndex(*SFI, MF.getFrameInfo());
@@ -563,8 +617,9 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields(
const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF,
PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) {
ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
- MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign);
+ MaxKernArgAlign = YamlMFI.MaxKernArgAlign;
LDSSize = YamlMFI.LDSSize;
+ GDSSize = YamlMFI.GDSSize;
DynLDSAlign = YamlMFI.DynLDSAlign;
HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
Occupancy = YamlMFI.Occupancy;
@@ -574,6 +629,8 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields(
WaveLimiter = YamlMFI.WaveLimiter;
HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs;
HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs;
+ BytesInStackArgArea = YamlMFI.BytesInStackArgArea;
+ ReturnsVoid = YamlMFI.ReturnsVoid;
if (YamlMFI.ScavengeFI) {
auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo());
@@ -595,10 +652,47 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields(
return false;
}
+bool SIMachineFunctionInfo::mayUseAGPRs(const MachineFunction &MF) const {
+ for (const BasicBlock &BB : MF.getFunction()) {
+ for (const Instruction &I : BB) {
+ const auto *CB = dyn_cast<CallBase>(&I);
+ if (!CB)
+ continue;
+
+ if (CB->isInlineAsm()) {
+ const InlineAsm *IA = dyn_cast<InlineAsm>(CB->getCalledOperand());
+ for (const auto &CI : IA->ParseConstraints()) {
+ for (StringRef Code : CI.Codes) {
+ Code.consume_front("{");
+ if (Code.startswith("a"))
+ return true;
+ }
+ }
+ continue;
+ }
+
+ const Function *Callee =
+ dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
+ if (!Callee)
+ return true;
+
+ if (Callee->getIntrinsicID() == Intrinsic::not_intrinsic)
+ return true;
+ }
+ }
+
+ return false;
+}
+
bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const {
if (UsesAGPRs)
return *UsesAGPRs;
+ if (!mayNeedAGPRs()) {
+ UsesAGPRs = false;
+ return false;
+ }
+
if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) ||
MF.getFrameInfo().hasCalls()) {
UsesAGPRs = true;