diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp')
| -rw-r--r-- | contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | 96 |
1 files changed, 38 insertions, 58 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index 5839e59b4d7f..0f2836e1e7fb 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -72,10 +72,9 @@ private: MachineRegisterInfo *MRI = nullptr; SetVector<MachineInstr*> LoweredEndCf; DenseSet<Register> LoweredIf; - SmallSet<MachineInstr *, 16> NeedsKillCleanup; + SmallSet<MachineBasicBlock *, 4> KillBlocks; const TargetRegisterClass *BoolRC = nullptr; - bool InsertKillCleanups; unsigned AndOpc; unsigned OrOpc; unsigned XorOpc; @@ -86,6 +85,8 @@ private: unsigned OrSaveExecOpc; unsigned Exec; + bool hasKill(const MachineBasicBlock *Begin, const MachineBasicBlock *End); + void emitIf(MachineInstr &MI); void emitElse(MachineInstr &MI); void emitIfBreak(MachineInstr &MI); @@ -163,8 +164,8 @@ static void setImpSCCDefDead(MachineInstr &MI, bool IsDead) { char &llvm::SILowerControlFlowID = SILowerControlFlow::ID; -static bool hasKill(const MachineBasicBlock *Begin, - const MachineBasicBlock *End, const SIInstrInfo *TII) { +bool SILowerControlFlow::hasKill(const MachineBasicBlock *Begin, + const MachineBasicBlock *End) { DenseSet<const MachineBasicBlock*> Visited; SmallVector<MachineBasicBlock *, 4> Worklist(Begin->successors()); @@ -173,9 +174,8 @@ static bool hasKill(const MachineBasicBlock *Begin, if (MBB == End || !Visited.insert(MBB).second) continue; - for (auto &Term : MBB->terminators()) - if (TII->isKillTerminator(Term.getOpcode())) - return true; + if (KillBlocks.contains(MBB)) + return true; Worklist.append(MBB->succ_begin(), MBB->succ_end()); } @@ -211,32 +211,11 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) { // just cleared bits. bool SimpleIf = isSimpleIf(MI, MRI); - if (InsertKillCleanups) { - // Check for SI_KILL_*_TERMINATOR on full path of control flow and - // flag the associated SI_END_CF for insertion of a kill cleanup. - auto UseMI = MRI->use_instr_nodbg_begin(SaveExecReg); - while (UseMI->getOpcode() != AMDGPU::SI_END_CF) { - assert(std::next(UseMI) == MRI->use_instr_nodbg_end()); - assert(UseMI->getOpcode() == AMDGPU::SI_ELSE); - MachineOperand &NextExec = UseMI->getOperand(0); - Register NextExecReg = NextExec.getReg(); - if (NextExec.isDead()) { - assert(!SimpleIf); - break; - } - UseMI = MRI->use_instr_nodbg_begin(NextExecReg); - } - if (UseMI->getOpcode() == AMDGPU::SI_END_CF) { - if (hasKill(MI.getParent(), UseMI->getParent(), TII)) { - NeedsKillCleanup.insert(&*UseMI); - SimpleIf = false; - } - } - } else if (SimpleIf) { + if (SimpleIf) { // Check for SI_KILL_*_TERMINATOR on path from if to endif. // if there is any such terminator simplifications are not safe. auto UseMI = MRI->use_instr_nodbg_begin(SaveExecReg); - SimpleIf = !hasKill(MI.getParent(), UseMI->getParent(), TII); + SimpleIf = !hasKill(MI.getParent(), UseMI->getParent()); } // Add an implicit def of exec to discourage scheduling VALU after this which @@ -451,8 +430,6 @@ SILowerControlFlow::skipIgnoreExecInstsTrivialSucc( auto E = B->end(); for ( ; It != E; ++It) { - if (It->getOpcode() == AMDGPU::SI_KILL_CLEANUP) - continue; if (TII->mayReadEXEC(*MRI, *It)) break; } @@ -505,18 +482,8 @@ MachineBasicBlock *SILowerControlFlow::emitEndCf(MachineInstr &MI) { LoweredEndCf.insert(NewMI); - // If this ends control flow which contains kills (as flagged in emitIf) - // then insert an SI_KILL_CLEANUP immediately following the exec mask - // manipulation. This can be lowered to early termination if appropriate. - MachineInstr *CleanUpMI = nullptr; - if (NeedsKillCleanup.count(&MI)) - CleanUpMI = BuildMI(MBB, InsPt, DL, TII->get(AMDGPU::SI_KILL_CLEANUP)); - - if (LIS) { + if (LIS) LIS->ReplaceMachineInstrInMaps(MI, *NewMI); - if (CleanUpMI) - LIS->InsertMachineInstrInMaps(*CleanUpMI); - } MI.eraseFromParent(); @@ -633,6 +600,10 @@ MachineBasicBlock *SILowerControlFlow::process(MachineInstr &MI) { emitLoop(MI); break; + case AMDGPU::SI_WATERFALL_LOOP: + MI.setDesc(TII->get(AMDGPU::S_CBRANCH_EXECNZ)); + break; + case AMDGPU::SI_END_CF: SplitBB = emitEndCf(MI); break; @@ -811,8 +782,6 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) { LIS = getAnalysisIfAvailable<LiveIntervals>(); MRI = &MF.getRegInfo(); BoolRC = TRI->getBoolRC(); - InsertKillCleanups = - MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS; if (ST.isWave32()) { AndOpc = AMDGPU::S_AND_B32; @@ -836,7 +805,27 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) { Exec = AMDGPU::EXEC; } - SmallVector<MachineInstr *, 32> Worklist; + // Compute set of blocks with kills + const bool CanDemote = + MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS; + for (auto &MBB : MF) { + bool IsKillBlock = false; + for (auto &Term : MBB.terminators()) { + if (TII->isKillTerminator(Term.getOpcode())) { + KillBlocks.insert(&MBB); + IsKillBlock = true; + break; + } + } + if (CanDemote && !IsKillBlock) { + for (auto &MI : MBB) { + if (MI.getOpcode() == AMDGPU::SI_DEMOTE_I1) { + KillBlocks.insert(&MBB); + break; + } + } + } + } MachineFunction::iterator NextBB; for (MachineFunction::iterator BI = MF.begin(); @@ -853,18 +842,12 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) { switch (MI.getOpcode()) { case AMDGPU::SI_IF: - SplitMBB = process(MI); - break; - case AMDGPU::SI_ELSE: case AMDGPU::SI_IF_BREAK: + case AMDGPU::SI_WATERFALL_LOOP: case AMDGPU::SI_LOOP: case AMDGPU::SI_END_CF: - // Only build worklist if SI_IF instructions must be processed first. - if (InsertKillCleanups) - Worklist.push_back(&MI); - else - SplitMBB = process(MI); + SplitMBB = process(MI); break; // FIXME: find a better place for this @@ -886,14 +869,11 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) { } } - for (MachineInstr *MI : Worklist) - process(*MI); - optimizeEndCf(); LoweredEndCf.clear(); LoweredIf.clear(); - NeedsKillCleanup.clear(); + KillBlocks.clear(); return true; } |
