summaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/SIFrameLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/SIFrameLowering.cpp')
-rw-r--r--lib/Target/AMDGPU/SIFrameLowering.cpp120
1 files changed, 82 insertions, 38 deletions
diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp
index abe6af9a6d3fc..86e3b37b09e94 100644
--- a/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -101,10 +101,12 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(
const SIRegisterInfo *TRI,
SIMachineFunctionInfo *MFI,
MachineFunction &MF) const {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
// We need to insert initialization of the scratch resource descriptor.
unsigned ScratchRsrcReg = MFI->getScratchRSrcReg();
- if (ScratchRsrcReg == AMDGPU::NoRegister)
+ if (ScratchRsrcReg == AMDGPU::NoRegister ||
+ !MRI.isPhysRegUsed(ScratchRsrcReg))
return AMDGPU::NoRegister;
if (ST.hasSGPRInitBug() ||
@@ -122,8 +124,6 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(
// We find the resource first because it has an alignment requirement.
- MachineRegisterInfo &MRI = MF.getRegInfo();
-
unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
ArrayRef<MCPhysReg> AllSGPR128s = getAllSGPR128(ST, MF);
AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
@@ -143,24 +143,34 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(
return ScratchRsrcReg;
}
-unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
+// Shift down registers reserved for the scratch wave offset and stack pointer
+// SGPRs.
+std::pair<unsigned, unsigned>
+SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
const SISubtarget &ST,
const SIInstrInfo *TII,
const SIRegisterInfo *TRI,
SIMachineFunctionInfo *MFI,
MachineFunction &MF) const {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
- if (ST.hasSGPRInitBug() ||
- ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF))
- return ScratchWaveOffsetReg;
- unsigned ScratchRsrcReg = MFI->getScratchRSrcReg();
- MachineRegisterInfo &MRI = MF.getRegInfo();
+ // No replacement necessary.
+ if (ScratchWaveOffsetReg == AMDGPU::NoRegister ||
+ !MRI.isPhysRegUsed(ScratchWaveOffsetReg)) {
+ assert(MFI->getStackPtrOffsetReg() == AMDGPU::NoRegister);
+ return std::make_pair(AMDGPU::NoRegister, AMDGPU::NoRegister);
+ }
+
+ unsigned SPReg = MFI->getStackPtrOffsetReg();
+ if (ST.hasSGPRInitBug())
+ return std::make_pair(ScratchWaveOffsetReg, SPReg);
+
unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(ST, MF);
if (NumPreloaded > AllSGPRs.size())
- return ScratchWaveOffsetReg;
+ return std::make_pair(ScratchWaveOffsetReg, SPReg);
AllSGPRs = AllSGPRs.slice(NumPreloaded);
@@ -175,26 +185,42 @@ unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
// register from the list to consider, it means that when this
// register is being used for the scratch wave offset and there
// are no other free SGPRs, then the value will stay in this register.
+ // + 1 if stack pointer is used.
// ----
- // 13
- if (AllSGPRs.size() < 13)
- return ScratchWaveOffsetReg;
+ // 13 (+1)
+ unsigned ReservedRegCount = 13;
+ if (SPReg != AMDGPU::NoRegister)
+ ++ReservedRegCount;
- for (MCPhysReg Reg : AllSGPRs.drop_back(13)) {
+ if (AllSGPRs.size() < ReservedRegCount)
+ return std::make_pair(ScratchWaveOffsetReg, SPReg);
+
+ bool HandledScratchWaveOffsetReg =
+ ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF);
+
+ for (MCPhysReg Reg : AllSGPRs.drop_back(ReservedRegCount)) {
// Pick the first unallocated SGPR. Be careful not to pick an alias of the
// scratch descriptor, since we haven’t added its uses yet.
- if (!MRI.isPhysRegUsed(Reg)) {
- if (!MRI.isAllocatable(Reg) ||
- TRI->isSubRegisterEq(ScratchRsrcReg, Reg))
- continue;
+ if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg)) {
+ if (!HandledScratchWaveOffsetReg) {
+ HandledScratchWaveOffsetReg = true;
- MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
- MFI->setScratchWaveOffsetReg(Reg);
- return Reg;
+ MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
+ MFI->setScratchWaveOffsetReg(Reg);
+ ScratchWaveOffsetReg = Reg;
+ } else {
+ if (SPReg == AMDGPU::NoRegister)
+ break;
+
+ MRI.replaceRegWith(SPReg, Reg);
+ MFI->setStackPtrOffsetReg(Reg);
+ SPReg = Reg;
+ break;
+ }
}
}
- return ScratchWaveOffsetReg;
+ return std::make_pair(ScratchWaveOffsetReg, SPReg);
}
void SIFrameLowering::emitPrologue(MachineFunction &MF,
@@ -220,18 +246,6 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
const SIRegisterInfo *TRI = &TII->getRegisterInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
- unsigned ScratchRsrcReg
- = getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF);
- unsigned ScratchWaveOffsetReg
- = getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF);
-
- if (ScratchRsrcReg == AMDGPU::NoRegister) {
- assert(ScratchWaveOffsetReg == AMDGPU::NoRegister);
- return;
- }
-
- assert(!TRI->isSubRegister(ScratchRsrcReg, ScratchWaveOffsetReg));
-
// We need to do the replacement of the private segment buffer and wave offset
// register even if there are no stack objects. There could be stores to undef
// or a constant without an associated object.
@@ -244,19 +258,49 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
if (MF.getFrameInfo().hasStackObjects() && MFI->hasFlatScratchInit())
emitFlatScratchInit(ST, MF, MBB);
+ unsigned SPReg = MFI->getStackPtrOffsetReg();
+ if (SPReg != AMDGPU::NoRegister) {
+ DebugLoc DL;
+ int64_t StackSize = MF.getFrameInfo().getStackSize();
+
+ if (StackSize == 0) {
+ BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::COPY), SPReg)
+ .addReg(MFI->getScratchWaveOffsetReg());
+ } else {
+ BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::S_ADD_U32), SPReg)
+ .addReg(MFI->getScratchWaveOffsetReg())
+ .addImm(StackSize * ST.getWavefrontSize());
+ }
+ }
+
+ unsigned ScratchRsrcReg
+ = getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF);
+
+ unsigned ScratchWaveOffsetReg;
+ std::tie(ScratchWaveOffsetReg, SPReg)
+ = getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF);
+
+ // It's possible to have uses of only ScratchWaveOffsetReg without
+ // ScratchRsrcReg if it's only used for the initialization of flat_scratch,
+ // but the inverse is not true.
+ if (ScratchWaveOffsetReg == AMDGPU::NoRegister) {
+ assert(ScratchRsrcReg == AMDGPU::NoRegister);
+ return;
+ }
+
// We need to insert initialization of the scratch resource descriptor.
unsigned PreloadedScratchWaveOffsetReg = TRI->getPreloadedValue(
MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
-
unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
if (ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF)) {
PreloadedPrivateBufferReg = TRI->getPreloadedValue(
MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
}
- bool OffsetRegUsed = !MRI.use_empty(ScratchWaveOffsetReg);
- bool ResourceRegUsed = !MRI.use_empty(ScratchRsrcReg);
+ bool OffsetRegUsed = MRI.isPhysRegUsed(ScratchWaveOffsetReg);
+ bool ResourceRegUsed = ScratchRsrcReg != AMDGPU::NoRegister &&
+ MRI.isPhysRegUsed(ScratchRsrcReg);
// We added live-ins during argument lowering, but since they were not used
// they were deleted. We're adding the uses now, so add them back.
@@ -469,7 +513,7 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
// this also ensures we shouldn't need a register for the offset when
// emergency scavenging.
int ScavengeFI = MFI.CreateFixedObject(
- AMDGPU::SGPR_32RegClass.getSize(), 0, false);
+ TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
RS->addScavengingFrameIndex(ScavengeFI);
}
}