summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp59
1 files changed, 20 insertions, 39 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 39f5df767977e..b1c73df269fb2 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -61,6 +61,7 @@
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
@@ -154,7 +155,7 @@ private:
LiveIntervals *LIS;
DenseMap<const MachineInstr *, InstrInfo> Instructions;
- DenseMap<MachineBasicBlock *, BlockInfo> Blocks;
+ MapVector<MachineBasicBlock *, BlockInfo> Blocks;
SmallVector<MachineInstr *, 1> LiveMaskQueries;
SmallVector<MachineInstr *, 4> LowerToMovInstrs;
SmallVector<MachineInstr *, 4> LowerToCopyInstrs;
@@ -170,8 +171,6 @@ private:
void propagateBlock(MachineBasicBlock &MBB, std::vector<WorkItem> &Worklist);
char analyzeFunction(MachineFunction &MF);
- bool requiresCorrectState(const MachineInstr &MI) const;
-
MachineBasicBlock::iterator saveSCC(MachineBasicBlock &MBB,
MachineBasicBlock::iterator Before);
MachineBasicBlock::iterator
@@ -525,36 +524,6 @@ char SIWholeQuadMode::analyzeFunction(MachineFunction &MF) {
return GlobalFlags;
}
-/// Whether \p MI really requires the exec state computed during analysis.
-///
-/// Scalar instructions must occasionally be marked WQM for correct propagation
-/// (e.g. thread masks leading up to branches), but when it comes to actual
-/// execution, they don't care about EXEC.
-bool SIWholeQuadMode::requiresCorrectState(const MachineInstr &MI) const {
- if (MI.isTerminator())
- return true;
-
- // Skip instructions that are not affected by EXEC
- if (TII->isScalarUnit(MI))
- return false;
-
- // Generic instructions such as COPY will either disappear by register
- // coalescing or be lowered to SALU or VALU instructions.
- if (MI.isTransient()) {
- if (MI.getNumExplicitOperands() >= 1) {
- const MachineOperand &Op = MI.getOperand(0);
- if (Op.isReg()) {
- if (TRI->isSGPRReg(*MRI, Op.getReg())) {
- // SGPR instructions are not affected by EXEC
- return false;
- }
- }
- }
- }
-
- return true;
-}
-
MachineBasicBlock::iterator
SIWholeQuadMode::saveSCC(MachineBasicBlock &MBB,
MachineBasicBlock::iterator Before) {
@@ -741,7 +710,7 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
if (II != IE) {
MachineInstr &MI = *II;
- if (requiresCorrectState(MI)) {
+ if (MI.isTerminator() || TII->mayReadEXEC(*MRI, MI)) {
auto III = Instructions.find(&MI);
if (III != Instructions.end()) {
if (III->second.Needs & StateWWM)
@@ -793,18 +762,23 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
if (State == StateWWM) {
assert(SavedNonWWMReg);
fromWWM(MBB, Before, SavedNonWWMReg);
+ LIS->createAndComputeVirtRegInterval(SavedNonWWMReg);
+ SavedNonWWMReg = 0;
State = NonWWMState;
}
if (Needs == StateWWM) {
NonWWMState = State;
+ assert(!SavedNonWWMReg);
SavedNonWWMReg = MRI->createVirtualRegister(BoolRC);
toWWM(MBB, Before, SavedNonWWMReg);
State = StateWWM;
} else {
if (State == StateWQM && (Needs & StateExact) && !(Needs & StateWQM)) {
- if (!WQMFromExec && (OutNeeds & StateWQM))
+ if (!WQMFromExec && (OutNeeds & StateWQM)) {
+ assert(!SavedWQMReg);
SavedWQMReg = MRI->createVirtualRegister(BoolRC);
+ }
toExact(MBB, Before, SavedWQMReg, LiveMaskReg);
State = StateExact;
@@ -837,6 +811,8 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
break;
II = Next;
}
+ assert(!SavedWQMReg);
+ assert(!SavedNonWWMReg);
}
void SIWholeQuadMode::lowerLiveMaskQueries(unsigned LiveMaskReg) {
@@ -929,10 +905,12 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
if (GlobalFlags == StateWQM) {
// For a shader that needs only WQM, we can just set it once.
- BuildMI(Entry, EntryMI, DebugLoc(), TII->get(ST->isWave32() ?
- AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64),
- Exec)
- .addReg(Exec);
+ auto MI = BuildMI(Entry, EntryMI, DebugLoc(),
+ TII->get(ST->isWave32() ? AMDGPU::S_WQM_B32
+ : AMDGPU::S_WQM_B64),
+ Exec)
+ .addReg(Exec);
+ LIS->InsertMachineInstrInMaps(*MI);
lowerCopyInstrs();
// EntryMI may become invalid here
@@ -948,6 +926,9 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
for (auto BII : Blocks)
processBlock(*BII.first, LiveMaskReg, BII.first == &*MF.begin());
+ if (LiveMaskReg)
+ LIS->createAndComputeVirtRegInterval(LiveMaskReg);
+
// Physical registers like SCC aren't tracked by default anyway, so just
// removing the ranges we computed is the simplest option for maintaining
// the analysis results.