summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp10
1 files changed, 6 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp b/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp
index 51779e97ac620..64fca0b467977 100644
--- a/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp
@@ -88,15 +88,17 @@ bool SIRemoveShortExecBranches::mustRetainExeczBranch(
for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
// When a uniform loop is inside non-uniform control flow, the branch
- // leaving the loop might be an S_CBRANCH_VCCNZ, which is never taken
- // when EXEC = 0. We should skip the loop lest it becomes infinite.
- if (I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ ||
- I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
+ // leaving the loop might never be taken when EXEC = 0.
+ // Hence we should retain cbranch out of the loop lest it become infinite.
+ if (I->isConditionalBranch())
return true;
if (TII->hasUnwantedEffectsWhenEXECEmpty(*I))
return true;
+ if (TII->isKillTerminator(I->getOpcode()))
+ return true;
+
// These instructions are potentially expensive even if EXEC = 0.
if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) ||
I->getOpcode() == AMDGPU::S_WAITCNT)