aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp323
1 files changed, 323 insertions, 0 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
new file mode 100644
index 000000000000..a82047473370
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
@@ -0,0 +1,323 @@
+//===-- SILowerSGPRSPills.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all
+// SGPR spills, so must insert CSR SGPR spills as well as expand them.
+//
+// This pass must never create new SGPR virtual registers.
+//
+// FIXME: Must stop RegScavenger spills in later passes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-lower-sgpr-spills"
+
+using MBBVector = SmallVector<MachineBasicBlock *, 4>;
+
+namespace {
+
+static cl::opt<bool> EnableSpillVGPRToAGPR(
+ "amdgpu-spill-vgpr-to-agpr",
+ cl::desc("Enable spilling VGPRs to AGPRs"),
+ cl::ReallyHidden,
+ cl::init(true));
+
+class SILowerSGPRSpills : public MachineFunctionPass {
+private:
+ const SIRegisterInfo *TRI = nullptr;
+ const SIInstrInfo *TII = nullptr;
+ VirtRegMap *VRM = nullptr;
+ LiveIntervals *LIS = nullptr;
+
+ // Save and Restore blocks of the current function. Typically there is a
+ // single save block, unless Windows EH funclets are involved.
+ MBBVector SaveBlocks;
+ MBBVector RestoreBlocks;
+
+public:
+ static char ID;
+
+ SILowerSGPRSpills() : MachineFunctionPass(ID) {}
+
+ void calculateSaveRestoreBlocks(MachineFunction &MF);
+ bool spillCalleeSavedRegs(MachineFunction &MF);
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+} // end anonymous namespace
+
+char SILowerSGPRSpills::ID = 0;
+
+INITIALIZE_PASS_BEGIN(SILowerSGPRSpills, DEBUG_TYPE,
+ "SI lower SGPR spill instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_END(SILowerSGPRSpills, DEBUG_TYPE,
+ "SI lower SGPR spill instructions", false, false)
+
+char &llvm::SILowerSGPRSpillsID = SILowerSGPRSpills::ID;
+
+/// Insert restore code for the callee-saved registers used in the function.
+static void insertCSRSaves(MachineBasicBlock &SaveBlock,
+ ArrayRef<CalleeSavedInfo> CSI,
+ LiveIntervals *LIS) {
+ MachineFunction &MF = *SaveBlock.getParent();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
+ MachineBasicBlock::iterator I = SaveBlock.begin();
+ if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) {
+ for (const CalleeSavedInfo &CS : CSI) {
+ // Insert the spill to the stack frame.
+ unsigned Reg = CS.getReg();
+
+ MachineInstrSpan MIS(I, &SaveBlock);
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+
+ TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC,
+ TRI);
+
+ if (LIS) {
+ assert(std::distance(MIS.begin(), I) == 1);
+ MachineInstr &Inst = *std::prev(I);
+
+ LIS->InsertMachineInstrInMaps(Inst);
+ LIS->removeAllRegUnitsForPhysReg(Reg);
+ }
+ }
+ }
+}
+
+/// Insert restore code for the callee-saved registers used in the function.
+static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
+ std::vector<CalleeSavedInfo> &CSI,
+ LiveIntervals *LIS) {
+ MachineFunction &MF = *RestoreBlock.getParent();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
+ // Restore all registers immediately before the return and any
+ // terminators that precede it.
+ MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator();
+
+ // FIXME: Just emit the readlane/writelane directly
+ if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
+ for (const CalleeSavedInfo &CI : reverse(CSI)) {
+ unsigned Reg = CI.getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+
+ TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI);
+ assert(I != RestoreBlock.begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ // Insert in reverse order. loadRegFromStackSlot can insert
+ // multiple instructions.
+
+ if (LIS) {
+ MachineInstr &Inst = *std::prev(I);
+ LIS->InsertMachineInstrInMaps(Inst);
+ LIS->removeAllRegUnitsForPhysReg(Reg);
+ }
+ }
+ }
+}
+
+/// Compute the sets of entry and return blocks for saving and restoring
+/// callee-saved registers, and placing prolog and epilog code.
+void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ // Even when we do not change any CSR, we still want to insert the
+ // prologue and epilogue of the function.
+ // So set the save points for those.
+
+ // Use the points found by shrink-wrapping, if any.
+ if (MFI.getSavePoint()) {
+ SaveBlocks.push_back(MFI.getSavePoint());
+ assert(MFI.getRestorePoint() && "Both restore and save must be set");
+ MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
+ // If RestoreBlock does not have any successor and is not a return block
+ // then the end point is unreachable and we do not need to insert any
+ // epilogue.
+ if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock())
+ RestoreBlocks.push_back(RestoreBlock);
+ return;
+ }
+
+ // Save refs to entry and return blocks.
+ SaveBlocks.push_back(&MF.front());
+ for (MachineBasicBlock &MBB : MF) {
+ if (MBB.isEHFuncletEntry())
+ SaveBlocks.push_back(&MBB);
+ if (MBB.isReturnBlock())
+ RestoreBlocks.push_back(&MBB);
+ }
+}
+
+bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const Function &F = MF.getFunction();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const SIFrameLowering *TFI = ST.getFrameLowering();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ RegScavenger *RS = nullptr;
+
+ // Determine which of the registers in the callee save list should be saved.
+ BitVector SavedRegs;
+ TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS);
+
+ // Add the code to save and restore the callee saved registers.
+ if (!F.hasFnAttribute(Attribute::Naked)) {
+ // FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is
+ // necessary for verifier liveness checks.
+ MFI.setCalleeSavedInfoValid(true);
+
+ std::vector<CalleeSavedInfo> CSI;
+ const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
+
+ for (unsigned I = 0; CSRegs[I]; ++I) {
+ unsigned Reg = CSRegs[I];
+ if (SavedRegs.test(Reg)) {
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC),
+ TRI->getSpillAlignment(*RC),
+ true);
+
+ CSI.push_back(CalleeSavedInfo(Reg, JunkFI));
+ }
+ }
+
+ if (!CSI.empty()) {
+ for (MachineBasicBlock *SaveBlock : SaveBlocks)
+ insertCSRSaves(*SaveBlock, CSI, LIS);
+
+ for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
+ insertCSRRestores(*RestoreBlock, CSI, LIS);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ TII = ST.getInstrInfo();
+ TRI = &TII->getRegisterInfo();
+
+ VRM = getAnalysisIfAvailable<VirtRegMap>();
+
+ assert(SaveBlocks.empty() && RestoreBlocks.empty());
+
+ // First, expose any CSR SGPR spills. This is mostly the same as what PEI
+ // does, but somewhat simpler.
+ calculateSaveRestoreBlocks(MF);
+ bool HasCSRs = spillCalleeSavedRegs(MF);
+
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (!MFI.hasStackObjects() && !HasCSRs) {
+ SaveBlocks.clear();
+ RestoreBlocks.clear();
+ return false;
+ }
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
+ const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
+ && EnableSpillVGPRToAGPR;
+
+ bool MadeChange = false;
+
+ const bool SpillToAGPR = EnableSpillVGPRToAGPR && ST.hasMAIInsts();
+
+ // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be
+ // handled as SpilledToReg in regular PrologEpilogInserter.
+ if ((TRI->spillSGPRToVGPR() && (HasCSRs || FuncInfo->hasSpilledSGPRs())) ||
+ SpillVGPRToAGPR) {
+ // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
+ // are spilled to VGPRs, in which case we can eliminate the stack usage.
+ //
+ // This operates under the assumption that only other SGPR spills are users
+ // of the frame index.
+ for (MachineBasicBlock &MBB : MF) {
+ MachineBasicBlock::iterator Next;
+ for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
+ MachineInstr &MI = *I;
+ Next = std::next(I);
+
+ if (SpillToAGPR && TII->isVGPRSpill(MI)) {
+ // Try to eliminate stack used by VGPR spills before frame
+ // finalization.
+ unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+ AMDGPU::OpName::vaddr);
+ int FI = MI.getOperand(FIOp).getIndex();
+ unsigned VReg = TII->getNamedOperand(MI, AMDGPU::OpName::vdata)
+ ->getReg();
+ if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
+ TRI->isAGPR(MRI, VReg))) {
+ TRI->eliminateFrameIndex(MI, 0, FIOp, nullptr);
+ continue;
+ }
+ }
+
+ if (!TII->isSGPRSpill(MI))
+ continue;
+
+ int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
+ assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
+ if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) {
+ bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(MI, FI, nullptr);
+ (void)Spilled;
+ assert(Spilled && "failed to spill SGPR to VGPR when allocated");
+ }
+ }
+ }
+
+ for (MachineBasicBlock &MBB : MF) {
+ for (auto SSpill : FuncInfo->getSGPRSpillVGPRs())
+ MBB.addLiveIn(SSpill.VGPR);
+
+ for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())
+ MBB.addLiveIn(Reg);
+
+ for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs())
+ MBB.addLiveIn(Reg);
+
+ MBB.sortUniqueLiveIns();
+ }
+
+ MadeChange = true;
+ }
+
+ SaveBlocks.clear();
+ RestoreBlocks.clear();
+
+ return MadeChange;
+}