aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp230
1 files changed, 119 insertions, 111 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 6078f4a0577a..a57e81eb4e4a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -749,7 +749,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
return;
}
- const MachineFrameInfo &MFI = MF.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
@@ -789,19 +789,13 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
*Reg.FI);
}
- // VGPRs used for Whole Wave Mode
- for (const auto &Reg : FuncInfo->WWMReservedRegs) {
- auto VGPR = Reg.first;
- auto FI = Reg.second;
- if (!FI)
- continue;
-
+ for (auto ReservedWWM : FuncInfo->wwmAllocation()) {
if (!ScratchExecCopy)
ScratchExecCopy =
buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ true);
- buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, VGPR,
- *FI);
+ buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
+ std::get<0>(ReservedWWM), std::get<1>(ReservedWWM));
}
if (ScratchExecCopy) {
@@ -813,27 +807,8 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
LiveRegs.addReg(ScratchExecCopy);
}
- if (FPSaveIndex && spilledToMemory(MF, *FPSaveIndex)) {
- const int FramePtrFI = *FPSaveIndex;
- assert(!MFI.isDeadObjectIndex(FramePtrFI));
-
- initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
-
- MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
- MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
- if (!TmpVGPR)
- report_fatal_error("failed to find free scratch register");
-
- BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
- .addReg(FramePtrReg);
-
- buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR,
- FramePtrFI);
- }
-
- if (BPSaveIndex && spilledToMemory(MF, *BPSaveIndex)) {
- const int BasePtrFI = *BPSaveIndex;
- assert(!MFI.isDeadObjectIndex(BasePtrFI));
+ auto SaveSGPRToMemory = [&](Register Reg, const int FI) {
+ assert(!MFI.isDeadObjectIndex(FI));
initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
@@ -843,44 +818,31 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
report_fatal_error("failed to find free scratch register");
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
- .addReg(BasePtrReg);
+ .addReg(Reg);
buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR,
- BasePtrFI);
- }
+ FI);
+ };
- // In this case, spill the FP to a reserved VGPR.
- if (FPSaveIndex && !spilledToMemory(MF, *FPSaveIndex)) {
- const int FramePtrFI = *FPSaveIndex;
- assert(!MFI.isDeadObjectIndex(FramePtrFI));
+ auto SaveSGPRToVGPRLane = [&](Register Reg, const int FI) {
+ assert(!MFI.isDeadObjectIndex(FI));
- assert(MFI.getStackID(FramePtrFI) == TargetStackID::SGPRSpill);
- ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
- FuncInfo->getSGPRToVGPRSpills(FramePtrFI);
+ assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
+ ArrayRef<SIRegisterInfo::SpilledReg> Spill =
+ FuncInfo->getSGPRToVGPRSpills(FI);
assert(Spill.size() == 1);
- // Save FP before setting it up.
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR)
- .addReg(FramePtrReg)
+ .addReg(Reg)
.addImm(Spill[0].Lane)
.addReg(Spill[0].VGPR, RegState::Undef);
- }
+ };
- // In this case, spill the BP to a reserved VGPR.
- if (BPSaveIndex && !spilledToMemory(MF, *BPSaveIndex)) {
- const int BasePtrFI = *BPSaveIndex;
- assert(!MFI.isDeadObjectIndex(BasePtrFI));
-
- assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill);
- ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
- FuncInfo->getSGPRToVGPRSpills(BasePtrFI);
- assert(Spill.size() == 1);
-
- // Save BP before setting it up.
- BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR)
- .addReg(BasePtrReg)
- .addImm(Spill[0].Lane)
- .addReg(Spill[0].VGPR, RegState::Undef);
+ if (FPSaveIndex) {
+ if (spilledToMemory(MF, *FPSaveIndex))
+ SaveSGPRToMemory(FramePtrReg, *FPSaveIndex);
+ else
+ SaveSGPRToVGPRLane(FramePtrReg, *FPSaveIndex);
}
// Emit the copy if we need an FP, and are using a free SGPR to save it.
@@ -891,6 +853,13 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
.setMIFlag(MachineInstr::FrameSetup);
}
+ if (BPSaveIndex) {
+ if (spilledToMemory(MF, *BPSaveIndex))
+ SaveSGPRToMemory(BasePtrReg, *BPSaveIndex);
+ else
+ SaveSGPRToVGPRLane(BasePtrReg, *BPSaveIndex);
+ }
+
// Emit the copy if we need a BP, and are using a free SGPR to save it.
if (FuncInfo->SGPRForBPSaveRestoreCopy) {
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
@@ -1034,56 +1003,44 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
.setMIFlag(MachineInstr::FrameDestroy);
}
+ auto RestoreSGPRFromMemory = [&](Register Reg, const int FI) {
+ initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
+ MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
+ MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
+ if (!TmpVGPR)
+ report_fatal_error("failed to find free scratch register");
+ buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR,
+ FI);
+ BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), Reg)
+ .addReg(TmpVGPR, RegState::Kill);
+ };
+
+ auto RestoreSGPRFromVGPRLane = [&](Register Reg, const int FI) {
+ assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
+ ArrayRef<SIRegisterInfo::SpilledReg> Spill =
+ FuncInfo->getSGPRToVGPRSpills(FI);
+ assert(Spill.size() == 1);
+ BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), Reg)
+ .addReg(Spill[0].VGPR)
+ .addImm(Spill[0].Lane);
+ };
+
if (FPSaveIndex) {
const int FramePtrFI = *FPSaveIndex;
assert(!MFI.isDeadObjectIndex(FramePtrFI));
- if (spilledToMemory(MF, FramePtrFI)) {
- initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
-
- MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
- MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
- if (!TmpVGPR)
- report_fatal_error("failed to find free scratch register");
- buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
- TmpVGPR, FramePtrFI);
- BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), FramePtrReg)
- .addReg(TmpVGPR, RegState::Kill);
- } else {
- // Reload from VGPR spill.
- assert(MFI.getStackID(FramePtrFI) == TargetStackID::SGPRSpill);
- ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
- FuncInfo->getSGPRToVGPRSpills(FramePtrFI);
- assert(Spill.size() == 1);
- BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), FramePtrReg)
- .addReg(Spill[0].VGPR)
- .addImm(Spill[0].Lane);
- }
+ if (spilledToMemory(MF, FramePtrFI))
+ RestoreSGPRFromMemory(FramePtrReg, FramePtrFI);
+ else
+ RestoreSGPRFromVGPRLane(FramePtrReg, FramePtrFI);
}
if (BPSaveIndex) {
const int BasePtrFI = *BPSaveIndex;
assert(!MFI.isDeadObjectIndex(BasePtrFI));
- if (spilledToMemory(MF, BasePtrFI)) {
- initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
-
- MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
- MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
- if (!TmpVGPR)
- report_fatal_error("failed to find free scratch register");
- buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
- TmpVGPR, BasePtrFI);
- BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), BasePtrReg)
- .addReg(TmpVGPR, RegState::Kill);
- } else {
- // Reload from VGPR spill.
- assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill);
- ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
- FuncInfo->getSGPRToVGPRSpills(BasePtrFI);
- assert(Spill.size() == 1);
- BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), BasePtrReg)
- .addReg(Spill[0].VGPR)
- .addImm(Spill[0].Lane);
- }
+ if (spilledToMemory(MF, BasePtrFI))
+ RestoreSGPRFromMemory(BasePtrReg, BasePtrFI);
+ else
+ RestoreSGPRFromVGPRLane(BasePtrReg, BasePtrFI);
}
Register ScratchExecCopy;
@@ -1100,18 +1057,13 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
Reg.VGPR, *Reg.FI);
}
- for (const auto &Reg : FuncInfo->WWMReservedRegs) {
- auto VGPR = Reg.first;
- auto FI = Reg.second;
- if (!FI)
- continue;
-
+ for (auto ReservedWWM : FuncInfo->wwmAllocation()) {
if (!ScratchExecCopy)
ScratchExecCopy =
buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false);
- buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, VGPR,
- *FI);
+ buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
+ std::get<0>(ReservedWWM), std::get<1>(ReservedWWM));
}
if (ScratchExecCopy) {
@@ -1161,6 +1113,11 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
MachineRegisterInfo &MRI = MF.getRegInfo();
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
+ if (!FuncInfo->isEntryFunction()) {
+ // Spill VGPRs used for Whole Wave Mode
+ FuncInfo->allocateWWMReservedSpillSlots(MFI, *TRI);
+ }
+
const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
&& EnableSpillVGPRToAGPR;
@@ -1200,7 +1157,7 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
}
}
- // Stack slot coloring may assign different objets to the same stack slot.
+ // Stack slot coloring may assign different objects to the same stack slot.
// If not, then the VGPR to AGPR spill slot is dead.
for (unsigned FI : SpillFIs.set_bits())
if (!NonVGPRSpillFIs.test(FI))
@@ -1229,7 +1186,11 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
}
}
- FuncInfo->removeDeadFrameIndices(MFI);
+ // At this point we've already allocated all spilled SGPRs to VGPRs if we
+ // can. Any remaining SGPR spills will go to memory, so move them back to the
+ // default stack.
+ bool HaveSGPRToVMemSpill =
+ FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ true);
assert(allSGPRSpillsAreDead(MF) &&
"SGPR spill should have been removed in SILowerSGPRSpills");
@@ -1241,6 +1202,39 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
// Add an emergency spill slot
RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI));
+
+ // If we are spilling SGPRs to memory with a large frame, we may need a
+ // second VGPR emergency frame index.
+ if (HaveSGPRToVMemSpill &&
+ allocateScavengingFrameIndexesNearIncomingSP(MF)) {
+ RS->addScavengingFrameIndex(MFI.CreateStackObject(4, Align(4), false));
+ }
+ }
+}
+
+void SIFrameLowering::processFunctionBeforeFrameIndicesReplaced(
+ MachineFunction &MF, RegScavenger *RS) const {
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
+
+ if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
+ // On gfx908, we had initially reserved highest available VGPR for AGPR
+ // copy. Now since we are done with RA, check if there exist an unused VGPR
+ // which is lower than the eariler reserved VGPR before RA. If one exist,
+ // use it for AGPR copy instead of one reserved before RA.
+ Register VGPRForAGPRCopy = FuncInfo->getVGPRForAGPRCopy();
+ Register UnusedLowVGPR =
+ TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
+ if (UnusedLowVGPR && (TRI->getHWRegIndex(UnusedLowVGPR) <
+ TRI->getHWRegIndex(VGPRForAGPRCopy))) {
+ // Call to setVGPRForAGPRCopy() should happen first before calling
+ // freezeReservedRegs() so that getReservedRegs() can reserve this newly
+ // identified VGPR (for AGPR copy).
+ FuncInfo->setVGPRForAGPRCopy(UnusedLowVGPR);
+ MRI.freezeReservedRegs(MF);
+ }
}
}
@@ -1333,6 +1327,20 @@ void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
// FP will be specially managed like SP.
if (WillHaveFP || hasFP(MF))
SavedRegs.reset(MFI->getFrameOffsetReg());
+
+ // Return address use with return instruction is hidden through the SI_RETURN
+ // pseudo. Given that and since the IPRA computes actual register usage and
+ // does not use CSR list, the clobbering of return address by function calls
+ // (D117243) or otherwise (D120922) is ignored/not seen by the IPRA's register
+ // usage collection. This will ensure save/restore of return address happens
+ // in those scenarios.
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ Register RetAddrReg = TRI->getReturnAddressReg(MF);
+ if (!MFI->isEntryFunction() &&
+ (FrameInfo.hasCalls() || MRI.isPhysRegModified(RetAddrReg))) {
+ SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub0));
+ SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub1));
+ }
}
bool SIFrameLowering::assignCalleeSavedSpillSlots(