diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp | 59 |
1 files changed, 20 insertions, 39 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp index 39f5df767977e..b1c73df269fb2 100644 --- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -61,6 +61,7 @@ #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" @@ -154,7 +155,7 @@ private: LiveIntervals *LIS; DenseMap<const MachineInstr *, InstrInfo> Instructions; - DenseMap<MachineBasicBlock *, BlockInfo> Blocks; + MapVector<MachineBasicBlock *, BlockInfo> Blocks; SmallVector<MachineInstr *, 1> LiveMaskQueries; SmallVector<MachineInstr *, 4> LowerToMovInstrs; SmallVector<MachineInstr *, 4> LowerToCopyInstrs; @@ -170,8 +171,6 @@ private: void propagateBlock(MachineBasicBlock &MBB, std::vector<WorkItem> &Worklist); char analyzeFunction(MachineFunction &MF); - bool requiresCorrectState(const MachineInstr &MI) const; - MachineBasicBlock::iterator saveSCC(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before); MachineBasicBlock::iterator @@ -525,36 +524,6 @@ char SIWholeQuadMode::analyzeFunction(MachineFunction &MF) { return GlobalFlags; } -/// Whether \p MI really requires the exec state computed during analysis. -/// -/// Scalar instructions must occasionally be marked WQM for correct propagation -/// (e.g. thread masks leading up to branches), but when it comes to actual -/// execution, they don't care about EXEC. -bool SIWholeQuadMode::requiresCorrectState(const MachineInstr &MI) const { - if (MI.isTerminator()) - return true; - - // Skip instructions that are not affected by EXEC - if (TII->isScalarUnit(MI)) - return false; - - // Generic instructions such as COPY will either disappear by register - // coalescing or be lowered to SALU or VALU instructions. - if (MI.isTransient()) { - if (MI.getNumExplicitOperands() >= 1) { - const MachineOperand &Op = MI.getOperand(0); - if (Op.isReg()) { - if (TRI->isSGPRReg(*MRI, Op.getReg())) { - // SGPR instructions are not affected by EXEC - return false; - } - } - } - } - - return true; -} - MachineBasicBlock::iterator SIWholeQuadMode::saveSCC(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before) { @@ -741,7 +710,7 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg, if (II != IE) { MachineInstr &MI = *II; - if (requiresCorrectState(MI)) { + if (MI.isTerminator() || TII->mayReadEXEC(*MRI, MI)) { auto III = Instructions.find(&MI); if (III != Instructions.end()) { if (III->second.Needs & StateWWM) @@ -793,18 +762,23 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg, if (State == StateWWM) { assert(SavedNonWWMReg); fromWWM(MBB, Before, SavedNonWWMReg); + LIS->createAndComputeVirtRegInterval(SavedNonWWMReg); + SavedNonWWMReg = 0; State = NonWWMState; } if (Needs == StateWWM) { NonWWMState = State; + assert(!SavedNonWWMReg); SavedNonWWMReg = MRI->createVirtualRegister(BoolRC); toWWM(MBB, Before, SavedNonWWMReg); State = StateWWM; } else { if (State == StateWQM && (Needs & StateExact) && !(Needs & StateWQM)) { - if (!WQMFromExec && (OutNeeds & StateWQM)) + if (!WQMFromExec && (OutNeeds & StateWQM)) { + assert(!SavedWQMReg); SavedWQMReg = MRI->createVirtualRegister(BoolRC); + } toExact(MBB, Before, SavedWQMReg, LiveMaskReg); State = StateExact; @@ -837,6 +811,8 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg, break; II = Next; } + assert(!SavedWQMReg); + assert(!SavedNonWWMReg); } void SIWholeQuadMode::lowerLiveMaskQueries(unsigned LiveMaskReg) { @@ -929,10 +905,12 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) { if (GlobalFlags == StateWQM) { // For a shader that needs only WQM, we can just set it once. - BuildMI(Entry, EntryMI, DebugLoc(), TII->get(ST->isWave32() ? - AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64), - Exec) - .addReg(Exec); + auto MI = BuildMI(Entry, EntryMI, DebugLoc(), + TII->get(ST->isWave32() ? AMDGPU::S_WQM_B32 + : AMDGPU::S_WQM_B64), + Exec) + .addReg(Exec); + LIS->InsertMachineInstrInMaps(*MI); lowerCopyInstrs(); // EntryMI may become invalid here @@ -948,6 +926,9 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) { for (auto BII : Blocks) processBlock(*BII.first, LiveMaskReg, BII.first == &*MF.begin()); + if (LiveMaskReg) + LIS->createAndComputeVirtRegInterval(LiveMaskReg); + // Physical registers like SCC aren't tracked by default anyway, so just // removing the ranges we computed is the simplest option for maintaining // the analysis results. |