diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 230 |
1 files changed, 119 insertions, 111 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 6078f4a0577a..a57e81eb4e4a 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -749,7 +749,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, return; } - const MachineFrameInfo &MFI = MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); @@ -789,19 +789,13 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, *Reg.FI); } - // VGPRs used for Whole Wave Mode - for (const auto &Reg : FuncInfo->WWMReservedRegs) { - auto VGPR = Reg.first; - auto FI = Reg.second; - if (!FI) - continue; - + for (auto ReservedWWM : FuncInfo->wwmAllocation()) { if (!ScratchExecCopy) ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ true); - buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, VGPR, - *FI); + buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, + std::get<0>(ReservedWWM), std::get<1>(ReservedWWM)); } if (ScratchExecCopy) { @@ -813,27 +807,8 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, LiveRegs.addReg(ScratchExecCopy); } - if (FPSaveIndex && spilledToMemory(MF, *FPSaveIndex)) { - const int FramePtrFI = *FPSaveIndex; - assert(!MFI.isDeadObjectIndex(FramePtrFI)); - - initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true); - - MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( - MRI, LiveRegs, AMDGPU::VGPR_32RegClass); - if (!TmpVGPR) - report_fatal_error("failed to find free scratch register"); - - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) - .addReg(FramePtrReg); - - buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR, - FramePtrFI); - } - - if (BPSaveIndex && spilledToMemory(MF, *BPSaveIndex)) { - const int BasePtrFI = *BPSaveIndex; - assert(!MFI.isDeadObjectIndex(BasePtrFI)); + auto SaveSGPRToMemory = [&](Register Reg, const int FI) { + assert(!MFI.isDeadObjectIndex(FI)); initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true); @@ -843,44 +818,31 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, report_fatal_error("failed to find free scratch register"); BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) - .addReg(BasePtrReg); + .addReg(Reg); buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR, - BasePtrFI); - } + FI); + }; - // In this case, spill the FP to a reserved VGPR. - if (FPSaveIndex && !spilledToMemory(MF, *FPSaveIndex)) { - const int FramePtrFI = *FPSaveIndex; - assert(!MFI.isDeadObjectIndex(FramePtrFI)); + auto SaveSGPRToVGPRLane = [&](Register Reg, const int FI) { + assert(!MFI.isDeadObjectIndex(FI)); - assert(MFI.getStackID(FramePtrFI) == TargetStackID::SGPRSpill); - ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill = - FuncInfo->getSGPRToVGPRSpills(FramePtrFI); + assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); + ArrayRef<SIRegisterInfo::SpilledReg> Spill = + FuncInfo->getSGPRToVGPRSpills(FI); assert(Spill.size() == 1); - // Save FP before setting it up. BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR) - .addReg(FramePtrReg) + .addReg(Reg) .addImm(Spill[0].Lane) .addReg(Spill[0].VGPR, RegState::Undef); - } + }; - // In this case, spill the BP to a reserved VGPR. - if (BPSaveIndex && !spilledToMemory(MF, *BPSaveIndex)) { - const int BasePtrFI = *BPSaveIndex; - assert(!MFI.isDeadObjectIndex(BasePtrFI)); - - assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill); - ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill = - FuncInfo->getSGPRToVGPRSpills(BasePtrFI); - assert(Spill.size() == 1); - - // Save BP before setting it up. - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR) - .addReg(BasePtrReg) - .addImm(Spill[0].Lane) - .addReg(Spill[0].VGPR, RegState::Undef); + if (FPSaveIndex) { + if (spilledToMemory(MF, *FPSaveIndex)) + SaveSGPRToMemory(FramePtrReg, *FPSaveIndex); + else + SaveSGPRToVGPRLane(FramePtrReg, *FPSaveIndex); } // Emit the copy if we need an FP, and are using a free SGPR to save it. @@ -891,6 +853,13 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, .setMIFlag(MachineInstr::FrameSetup); } + if (BPSaveIndex) { + if (spilledToMemory(MF, *BPSaveIndex)) + SaveSGPRToMemory(BasePtrReg, *BPSaveIndex); + else + SaveSGPRToVGPRLane(BasePtrReg, *BPSaveIndex); + } + // Emit the copy if we need a BP, and are using a free SGPR to save it. if (FuncInfo->SGPRForBPSaveRestoreCopy) { BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), @@ -1034,56 +1003,44 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF, .setMIFlag(MachineInstr::FrameDestroy); } + auto RestoreSGPRFromMemory = [&](Register Reg, const int FI) { + initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false); + MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( + MRI, LiveRegs, AMDGPU::VGPR_32RegClass); + if (!TmpVGPR) + report_fatal_error("failed to find free scratch register"); + buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR, + FI); + BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), Reg) + .addReg(TmpVGPR, RegState::Kill); + }; + + auto RestoreSGPRFromVGPRLane = [&](Register Reg, const int FI) { + assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); + ArrayRef<SIRegisterInfo::SpilledReg> Spill = + FuncInfo->getSGPRToVGPRSpills(FI); + assert(Spill.size() == 1); + BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), Reg) + .addReg(Spill[0].VGPR) + .addImm(Spill[0].Lane); + }; + if (FPSaveIndex) { const int FramePtrFI = *FPSaveIndex; assert(!MFI.isDeadObjectIndex(FramePtrFI)); - if (spilledToMemory(MF, FramePtrFI)) { - initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false); - - MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( - MRI, LiveRegs, AMDGPU::VGPR_32RegClass); - if (!TmpVGPR) - report_fatal_error("failed to find free scratch register"); - buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, - TmpVGPR, FramePtrFI); - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), FramePtrReg) - .addReg(TmpVGPR, RegState::Kill); - } else { - // Reload from VGPR spill. - assert(MFI.getStackID(FramePtrFI) == TargetStackID::SGPRSpill); - ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill = - FuncInfo->getSGPRToVGPRSpills(FramePtrFI); - assert(Spill.size() == 1); - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), FramePtrReg) - .addReg(Spill[0].VGPR) - .addImm(Spill[0].Lane); - } + if (spilledToMemory(MF, FramePtrFI)) + RestoreSGPRFromMemory(FramePtrReg, FramePtrFI); + else + RestoreSGPRFromVGPRLane(FramePtrReg, FramePtrFI); } if (BPSaveIndex) { const int BasePtrFI = *BPSaveIndex; assert(!MFI.isDeadObjectIndex(BasePtrFI)); - if (spilledToMemory(MF, BasePtrFI)) { - initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false); - - MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( - MRI, LiveRegs, AMDGPU::VGPR_32RegClass); - if (!TmpVGPR) - report_fatal_error("failed to find free scratch register"); - buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, - TmpVGPR, BasePtrFI); - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), BasePtrReg) - .addReg(TmpVGPR, RegState::Kill); - } else { - // Reload from VGPR spill. - assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill); - ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill = - FuncInfo->getSGPRToVGPRSpills(BasePtrFI); - assert(Spill.size() == 1); - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), BasePtrReg) - .addReg(Spill[0].VGPR) - .addImm(Spill[0].Lane); - } + if (spilledToMemory(MF, BasePtrFI)) + RestoreSGPRFromMemory(BasePtrReg, BasePtrFI); + else + RestoreSGPRFromVGPRLane(BasePtrReg, BasePtrFI); } Register ScratchExecCopy; @@ -1100,18 +1057,13 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF, Reg.VGPR, *Reg.FI); } - for (const auto &Reg : FuncInfo->WWMReservedRegs) { - auto VGPR = Reg.first; - auto FI = Reg.second; - if (!FI) - continue; - + for (auto ReservedWWM : FuncInfo->wwmAllocation()) { if (!ScratchExecCopy) ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false); - buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, VGPR, - *FI); + buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, + std::get<0>(ReservedWWM), std::get<1>(ReservedWWM)); } if (ScratchExecCopy) { @@ -1161,6 +1113,11 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized( MachineRegisterInfo &MRI = MF.getRegInfo(); SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); + if (!FuncInfo->isEntryFunction()) { + // Spill VGPRs used for Whole Wave Mode + FuncInfo->allocateWWMReservedSpillSlots(MFI, *TRI); + } + const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs() && EnableSpillVGPRToAGPR; @@ -1200,7 +1157,7 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized( } } - // Stack slot coloring may assign different objets to the same stack slot. + // Stack slot coloring may assign different objects to the same stack slot. // If not, then the VGPR to AGPR spill slot is dead. for (unsigned FI : SpillFIs.set_bits()) if (!NonVGPRSpillFIs.test(FI)) @@ -1229,7 +1186,11 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized( } } - FuncInfo->removeDeadFrameIndices(MFI); + // At this point we've already allocated all spilled SGPRs to VGPRs if we + // can. Any remaining SGPR spills will go to memory, so move them back to the + // default stack. + bool HaveSGPRToVMemSpill = + FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ true); assert(allSGPRSpillsAreDead(MF) && "SGPR spill should have been removed in SILowerSGPRSpills"); @@ -1241,6 +1202,39 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized( // Add an emergency spill slot RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI)); + + // If we are spilling SGPRs to memory with a large frame, we may need a + // second VGPR emergency frame index. + if (HaveSGPRToVMemSpill && + allocateScavengingFrameIndexesNearIncomingSP(MF)) { + RS->addScavengingFrameIndex(MFI.CreateStackObject(4, Align(4), false)); + } + } +} + +void SIFrameLowering::processFunctionBeforeFrameIndicesReplaced( + MachineFunction &MF, RegScavenger *RS) const { + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); + + if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) { + // On gfx908, we had initially reserved highest available VGPR for AGPR + // copy. Now since we are done with RA, check if there exist an unused VGPR + // which is lower than the eariler reserved VGPR before RA. If one exist, + // use it for AGPR copy instead of one reserved before RA. + Register VGPRForAGPRCopy = FuncInfo->getVGPRForAGPRCopy(); + Register UnusedLowVGPR = + TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF); + if (UnusedLowVGPR && (TRI->getHWRegIndex(UnusedLowVGPR) < + TRI->getHWRegIndex(VGPRForAGPRCopy))) { + // Call to setVGPRForAGPRCopy() should happen first before calling + // freezeReservedRegs() so that getReservedRegs() can reserve this newly + // identified VGPR (for AGPR copy). + FuncInfo->setVGPRForAGPRCopy(UnusedLowVGPR); + MRI.freezeReservedRegs(MF); + } } } @@ -1333,6 +1327,20 @@ void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF, // FP will be specially managed like SP. if (WillHaveFP || hasFP(MF)) SavedRegs.reset(MFI->getFrameOffsetReg()); + + // Return address use with return instruction is hidden through the SI_RETURN + // pseudo. Given that and since the IPRA computes actual register usage and + // does not use CSR list, the clobbering of return address by function calls + // (D117243) or otherwise (D120922) is ignored/not seen by the IPRA's register + // usage collection. This will ensure save/restore of return address happens + // in those scenarios. + const MachineRegisterInfo &MRI = MF.getRegInfo(); + Register RetAddrReg = TRI->getReturnAddressReg(MF); + if (!MFI->isEntryFunction() && + (FrameInfo.hasCalls() || MRI.isPhysRegModified(RetAddrReg))) { + SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub0)); + SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub1)); + } } bool SIFrameLowering::assignCalleeSavedSpillSlots( |