diff options
Diffstat (limited to 'lib/Target/AMDGPU/SIWholeQuadMode.cpp')
-rw-r--r-- | lib/Target/AMDGPU/SIWholeQuadMode.cpp | 82 |
1 files changed, 54 insertions, 28 deletions
diff --git a/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/lib/Target/AMDGPU/SIWholeQuadMode.cpp index 879726b1528c..4e07efff55d8 100644 --- a/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -1,9 +1,8 @@ //===-- SIWholeQuadMode.cpp - enter and suspend whole quad mode -----------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -149,6 +148,7 @@ private: CallingConv::ID CallingConv; const SIInstrInfo *TII; const SIRegisterInfo *TRI; + const GCNSubtarget *ST; MachineRegisterInfo *MRI; LiveIntervals *LIS; @@ -201,6 +201,8 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<LiveIntervals>(); + AU.addPreserved<SlotIndexes>(); + AU.addPreserved<LiveIntervals>(); AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -277,7 +279,7 @@ void SIWholeQuadMode::markInstructionUses(const MachineInstr &MI, char Flag, // for VCC, which can appear as the (implicit) input of a uniform branch, // e.g. when a loop counter is stored in a VGPR. if (!TargetRegisterInfo::isVirtualRegister(Reg)) { - if (Reg == AMDGPU::EXEC) + if (Reg == AMDGPU::EXEC || Reg == AMDGPU::EXEC_LO) continue; for (MCRegUnitIterator RegUnit(Reg, TRI); RegUnit.isValid(); ++RegUnit) { @@ -386,7 +388,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, unsigned Reg = MO.getReg(); if (!TRI->isVirtualRegister(Reg) && - TRI->hasVGPRs(TRI->getPhysRegClass(Reg))) { + TRI->hasVectorRegisters(TRI->getPhysRegClass(Reg))) { Flags = StateWQM; break; } @@ -619,13 +621,16 @@ void SIWholeQuadMode::toExact(MachineBasicBlock &MBB, MachineInstr *MI; if (SaveWQM) { - MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_AND_SAVEEXEC_B64), + MI = BuildMI(MBB, Before, DebugLoc(), TII->get(ST->isWave32() ? + AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64), SaveWQM) .addReg(LiveMaskReg); } else { - MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_AND_B64), - AMDGPU::EXEC) - .addReg(AMDGPU::EXEC) + unsigned Exec = ST->isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; + MI = BuildMI(MBB, Before, DebugLoc(), TII->get(ST->isWave32() ? + AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64), + Exec) + .addReg(Exec) .addReg(LiveMaskReg); } @@ -637,13 +642,15 @@ void SIWholeQuadMode::toWQM(MachineBasicBlock &MBB, unsigned SavedWQM) { MachineInstr *MI; + unsigned Exec = ST->isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; if (SavedWQM) { - MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), AMDGPU::EXEC) + MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), Exec) .addReg(SavedWQM); } else { - MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_WQM_B64), - AMDGPU::EXEC) - .addReg(AMDGPU::EXEC); + MI = BuildMI(MBB, Before, DebugLoc(), TII->get(ST->isWave32() ? + AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64), + Exec) + .addReg(Exec); } LIS->InsertMachineInstrInMaps(*MI); @@ -655,8 +662,7 @@ void SIWholeQuadMode::toWWM(MachineBasicBlock &MBB, MachineInstr *MI; assert(SaveOrig); - MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_OR_SAVEEXEC_B64), - SaveOrig) + MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::ENTER_WWM), SaveOrig) .addImm(-1); LIS->InsertMachineInstrInMaps(*MI); } @@ -667,7 +673,8 @@ void SIWholeQuadMode::fromWWM(MachineBasicBlock &MBB, MachineInstr *MI; assert(SavedOrig); - MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::EXIT_WWM), AMDGPU::EXEC) + MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::EXIT_WWM), + ST->isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC) .addReg(SavedOrig); LIS->InsertMachineInstrInMaps(*MI); } @@ -693,6 +700,7 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg, bool WQMFromExec = isEntry; char State = (isEntry || !(BI.InNeeds & StateWQM)) ? StateExact : StateWQM; char NonWWMState = 0; + const TargetRegisterClass *BoolRC = TRI->getBoolRC(); auto II = MBB.getFirstNonPHI(), IE = MBB.end(); if (isEntry) @@ -780,13 +788,13 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg, if (Needs == StateWWM) { NonWWMState = State; - SavedNonWWMReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass); + SavedNonWWMReg = MRI->createVirtualRegister(BoolRC); toWWM(MBB, Before, SavedNonWWMReg); State = StateWWM; } else { if (State == StateWQM && (Needs & StateExact) && !(Needs & StateWQM)) { if (!WQMFromExec && (OutNeeds & StateWQM)) - SavedWQMReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass); + SavedWQMReg = MRI->createVirtualRegister(BoolRC); toExact(MBB, Before, SavedWQMReg, LiveMaskReg); State = StateExact; @@ -838,7 +846,23 @@ void SIWholeQuadMode::lowerCopyInstrs() { for (MachineInstr *MI : LowerToCopyInstrs) { for (unsigned i = MI->getNumExplicitOperands() - 1; i > 1; i--) MI->RemoveOperand(i); - MI->setDesc(TII->get(AMDGPU::COPY)); + + const unsigned Reg = MI->getOperand(0).getReg(); + + if (TRI->isVGPR(*MRI, Reg)) { + const TargetRegisterClass *regClass = + TargetRegisterInfo::isVirtualRegister(Reg) + ? MRI->getRegClass(Reg) + : TRI->getPhysRegClass(Reg); + + const unsigned MovOp = TII->getMovOpcode(regClass); + MI->setDesc(TII->get(MovOp)); + + // And make it implicitly depend on exec (like all VALU movs should do). + MI->addOperand(MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); + } else { + MI->setDesc(TII->get(AMDGPU::COPY)); + } } } @@ -849,17 +873,18 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) { LowerToCopyInstrs.clear(); CallingConv = MF.getFunction().getCallingConv(); - const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); + ST = &MF.getSubtarget<GCNSubtarget>(); - TII = ST.getInstrInfo(); + TII = ST->getInstrInfo(); TRI = &TII->getRegisterInfo(); MRI = &MF.getRegInfo(); LIS = &getAnalysis<LiveIntervals>(); char GlobalFlags = analyzeFunction(MF); unsigned LiveMaskReg = 0; + unsigned Exec = ST->isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; if (!(GlobalFlags & StateWQM)) { - lowerLiveMaskQueries(AMDGPU::EXEC); + lowerLiveMaskQueries(Exec); if (!(GlobalFlags & StateWWM)) return !LiveMaskQueries.empty(); } else { @@ -868,10 +893,10 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) { MachineBasicBlock::iterator EntryMI = Entry.getFirstNonPHI(); if (GlobalFlags & StateExact || !LiveMaskQueries.empty()) { - LiveMaskReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass); + LiveMaskReg = MRI->createVirtualRegister(TRI->getBoolRC()); MachineInstr *MI = BuildMI(Entry, EntryMI, DebugLoc(), TII->get(AMDGPU::COPY), LiveMaskReg) - .addReg(AMDGPU::EXEC); + .addReg(Exec); LIS->InsertMachineInstrInMaps(*MI); } @@ -879,9 +904,10 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) { if (GlobalFlags == StateWQM) { // For a shader that needs only WQM, we can just set it once. - BuildMI(Entry, EntryMI, DebugLoc(), TII->get(AMDGPU::S_WQM_B64), - AMDGPU::EXEC) - .addReg(AMDGPU::EXEC); + BuildMI(Entry, EntryMI, DebugLoc(), TII->get(ST->isWave32() ? + AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64), + Exec) + .addReg(Exec); lowerCopyInstrs(); // EntryMI may become invalid here |