aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/SIWholeQuadMode.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/SIWholeQuadMode.cpp')
-rw-r--r--lib/Target/AMDGPU/SIWholeQuadMode.cpp82
1 files changed, 54 insertions, 28 deletions
diff --git a/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 879726b1528c..4e07efff55d8 100644
--- a/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -1,9 +1,8 @@
//===-- SIWholeQuadMode.cpp - enter and suspend whole quad mode -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -149,6 +148,7 @@ private:
CallingConv::ID CallingConv;
const SIInstrInfo *TII;
const SIRegisterInfo *TRI;
+ const GCNSubtarget *ST;
MachineRegisterInfo *MRI;
LiveIntervals *LIS;
@@ -201,6 +201,8 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LiveIntervals>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addPreserved<LiveIntervals>();
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -277,7 +279,7 @@ void SIWholeQuadMode::markInstructionUses(const MachineInstr &MI, char Flag,
// for VCC, which can appear as the (implicit) input of a uniform branch,
// e.g. when a loop counter is stored in a VGPR.
if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
- if (Reg == AMDGPU::EXEC)
+ if (Reg == AMDGPU::EXEC || Reg == AMDGPU::EXEC_LO)
continue;
for (MCRegUnitIterator RegUnit(Reg, TRI); RegUnit.isValid(); ++RegUnit) {
@@ -386,7 +388,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
unsigned Reg = MO.getReg();
if (!TRI->isVirtualRegister(Reg) &&
- TRI->hasVGPRs(TRI->getPhysRegClass(Reg))) {
+ TRI->hasVectorRegisters(TRI->getPhysRegClass(Reg))) {
Flags = StateWQM;
break;
}
@@ -619,13 +621,16 @@ void SIWholeQuadMode::toExact(MachineBasicBlock &MBB,
MachineInstr *MI;
if (SaveWQM) {
- MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_AND_SAVEEXEC_B64),
+ MI = BuildMI(MBB, Before, DebugLoc(), TII->get(ST->isWave32() ?
+ AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64),
SaveWQM)
.addReg(LiveMaskReg);
} else {
- MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_AND_B64),
- AMDGPU::EXEC)
- .addReg(AMDGPU::EXEC)
+ unsigned Exec = ST->isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+ MI = BuildMI(MBB, Before, DebugLoc(), TII->get(ST->isWave32() ?
+ AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64),
+ Exec)
+ .addReg(Exec)
.addReg(LiveMaskReg);
}
@@ -637,13 +642,15 @@ void SIWholeQuadMode::toWQM(MachineBasicBlock &MBB,
unsigned SavedWQM) {
MachineInstr *MI;
+ unsigned Exec = ST->isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
if (SavedWQM) {
- MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), AMDGPU::EXEC)
+ MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), Exec)
.addReg(SavedWQM);
} else {
- MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
- AMDGPU::EXEC)
- .addReg(AMDGPU::EXEC);
+ MI = BuildMI(MBB, Before, DebugLoc(), TII->get(ST->isWave32() ?
+ AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64),
+ Exec)
+ .addReg(Exec);
}
LIS->InsertMachineInstrInMaps(*MI);
@@ -655,8 +662,7 @@ void SIWholeQuadMode::toWWM(MachineBasicBlock &MBB,
MachineInstr *MI;
assert(SaveOrig);
- MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_OR_SAVEEXEC_B64),
- SaveOrig)
+ MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::ENTER_WWM), SaveOrig)
.addImm(-1);
LIS->InsertMachineInstrInMaps(*MI);
}
@@ -667,7 +673,8 @@ void SIWholeQuadMode::fromWWM(MachineBasicBlock &MBB,
MachineInstr *MI;
assert(SavedOrig);
- MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::EXIT_WWM), AMDGPU::EXEC)
+ MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::EXIT_WWM),
+ ST->isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC)
.addReg(SavedOrig);
LIS->InsertMachineInstrInMaps(*MI);
}
@@ -693,6 +700,7 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
bool WQMFromExec = isEntry;
char State = (isEntry || !(BI.InNeeds & StateWQM)) ? StateExact : StateWQM;
char NonWWMState = 0;
+ const TargetRegisterClass *BoolRC = TRI->getBoolRC();
auto II = MBB.getFirstNonPHI(), IE = MBB.end();
if (isEntry)
@@ -780,13 +788,13 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
if (Needs == StateWWM) {
NonWWMState = State;
- SavedNonWWMReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ SavedNonWWMReg = MRI->createVirtualRegister(BoolRC);
toWWM(MBB, Before, SavedNonWWMReg);
State = StateWWM;
} else {
if (State == StateWQM && (Needs & StateExact) && !(Needs & StateWQM)) {
if (!WQMFromExec && (OutNeeds & StateWQM))
- SavedWQMReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ SavedWQMReg = MRI->createVirtualRegister(BoolRC);
toExact(MBB, Before, SavedWQMReg, LiveMaskReg);
State = StateExact;
@@ -838,7 +846,23 @@ void SIWholeQuadMode::lowerCopyInstrs() {
for (MachineInstr *MI : LowerToCopyInstrs) {
for (unsigned i = MI->getNumExplicitOperands() - 1; i > 1; i--)
MI->RemoveOperand(i);
- MI->setDesc(TII->get(AMDGPU::COPY));
+
+ const unsigned Reg = MI->getOperand(0).getReg();
+
+ if (TRI->isVGPR(*MRI, Reg)) {
+ const TargetRegisterClass *regClass =
+ TargetRegisterInfo::isVirtualRegister(Reg)
+ ? MRI->getRegClass(Reg)
+ : TRI->getPhysRegClass(Reg);
+
+ const unsigned MovOp = TII->getMovOpcode(regClass);
+ MI->setDesc(TII->get(MovOp));
+
+ // And make it implicitly depend on exec (like all VALU movs should do).
+ MI->addOperand(MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
+ } else {
+ MI->setDesc(TII->get(AMDGPU::COPY));
+ }
}
}
@@ -849,17 +873,18 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
LowerToCopyInstrs.clear();
CallingConv = MF.getFunction().getCallingConv();
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ ST = &MF.getSubtarget<GCNSubtarget>();
- TII = ST.getInstrInfo();
+ TII = ST->getInstrInfo();
TRI = &TII->getRegisterInfo();
MRI = &MF.getRegInfo();
LIS = &getAnalysis<LiveIntervals>();
char GlobalFlags = analyzeFunction(MF);
unsigned LiveMaskReg = 0;
+ unsigned Exec = ST->isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
if (!(GlobalFlags & StateWQM)) {
- lowerLiveMaskQueries(AMDGPU::EXEC);
+ lowerLiveMaskQueries(Exec);
if (!(GlobalFlags & StateWWM))
return !LiveMaskQueries.empty();
} else {
@@ -868,10 +893,10 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
MachineBasicBlock::iterator EntryMI = Entry.getFirstNonPHI();
if (GlobalFlags & StateExact || !LiveMaskQueries.empty()) {
- LiveMaskReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ LiveMaskReg = MRI->createVirtualRegister(TRI->getBoolRC());
MachineInstr *MI = BuildMI(Entry, EntryMI, DebugLoc(),
TII->get(AMDGPU::COPY), LiveMaskReg)
- .addReg(AMDGPU::EXEC);
+ .addReg(Exec);
LIS->InsertMachineInstrInMaps(*MI);
}
@@ -879,9 +904,10 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
if (GlobalFlags == StateWQM) {
// For a shader that needs only WQM, we can just set it once.
- BuildMI(Entry, EntryMI, DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
- AMDGPU::EXEC)
- .addReg(AMDGPU::EXEC);
+ BuildMI(Entry, EntryMI, DebugLoc(), TII->get(ST->isWave32() ?
+ AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64),
+ Exec)
+ .addReg(Exec);
lowerCopyInstrs();
// EntryMI may become invalid here