diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp | 10 |
1 files changed, 6 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp b/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp index 51779e97ac620..64fca0b467977 100644 --- a/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp +++ b/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp @@ -88,15 +88,17 @@ bool SIRemoveShortExecBranches::mustRetainExeczBranch( for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { // When a uniform loop is inside non-uniform control flow, the branch - // leaving the loop might be an S_CBRANCH_VCCNZ, which is never taken - // when EXEC = 0. We should skip the loop lest it becomes infinite. - if (I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ || - I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ) + // leaving the loop might never be taken when EXEC = 0. + // Hence we should retain cbranch out of the loop lest it become infinite. + if (I->isConditionalBranch()) return true; if (TII->hasUnwantedEffectsWhenEXECEmpty(*I)) return true; + if (TII->isKillTerminator(I->getOpcode())) + return true; + // These instructions are potentially expensive even if EXEC = 0. if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) || I->getOpcode() == AMDGPU::S_WAITCNT) |