aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/SIInsertSkips.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/SIInsertSkips.cpp')
-rw-r--r--lib/Target/AMDGPU/SIInsertSkips.cpp76
1 files changed, 45 insertions, 31 deletions
diff --git a/lib/Target/AMDGPU/SIInsertSkips.cpp b/lib/Target/AMDGPU/SIInsertSkips.cpp
index ba21a5ce1293..87e63fcc4a04 100644
--- a/lib/Target/AMDGPU/SIInsertSkips.cpp
+++ b/lib/Target/AMDGPU/SIInsertSkips.cpp
@@ -1,9 +1,8 @@
//===-- SIInsertSkips.cpp - Use predicates for control flow ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -93,15 +92,13 @@ INITIALIZE_PASS(SIInsertSkips, DEBUG_TYPE,
char &llvm::SIInsertSkipsPassID = SIInsertSkips::ID;
-static bool opcodeEmitsNoInsts(unsigned Opc) {
- switch (Opc) {
- case TargetOpcode::IMPLICIT_DEF:
- case TargetOpcode::KILL:
- case TargetOpcode::BUNDLE:
- case TargetOpcode::CFI_INSTRUCTION:
- case TargetOpcode::EH_LABEL:
- case TargetOpcode::GC_LABEL:
- case TargetOpcode::DBG_VALUE:
+static bool opcodeEmitsNoInsts(const MachineInstr &MI) {
+ if (MI.isMetaInstruction())
+ return true;
+
+ // Handle target specific opcodes.
+ switch (MI.getOpcode()) {
+ case AMDGPU::SI_MASK_BRANCH:
return true;
default:
return false;
@@ -110,9 +107,6 @@ static bool opcodeEmitsNoInsts(unsigned Opc) {
bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From,
const MachineBasicBlock &To) const {
- if (From.succ_empty())
- return false;
-
unsigned NumInstr = 0;
const MachineFunction *MF = From.getParent();
@@ -122,7 +116,7 @@ bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From,
for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
NumInstr < SkipThreshold && I != E; ++I) {
- if (opcodeEmitsNoInsts(I->getOpcode()))
+ if (opcodeEmitsNoInsts(*I))
continue;
// FIXME: Since this is required for correctness, this should be inserted
@@ -138,6 +132,11 @@ bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From,
if (TII->hasUnwantedEffectsWhenEXECEmpty(*I))
return true;
+ // These instructions are potentially expensive even if EXEC = 0.
+ if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) ||
+ I->getOpcode() == AMDGPU::S_WAITCNT)
+ return true;
+
++NumInstr;
if (NumInstr >= SkipThreshold)
return true;
@@ -177,7 +176,7 @@ bool SIInsertSkips::skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB) {
.addImm(0); // en
// ... and terminate wavefront.
- BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM));
+ BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM)).addImm(0);
return true;
}
@@ -245,6 +244,10 @@ void SIInsertSkips::kill(MachineInstr &MI) {
llvm_unreachable("invalid ISD:SET cond code");
}
+ const GCNSubtarget &ST = MBB.getParent()->getSubtarget<GCNSubtarget>();
+ if (ST.hasNoSdstCMPX())
+ Opcode = AMDGPU::getVCMPXNoSDstOp(Opcode);
+
assert(MI.getOperand(0).isReg());
if (TRI->isVGPR(MBB.getParent()->getRegInfo(),
@@ -254,17 +257,23 @@ void SIInsertSkips::kill(MachineInstr &MI) {
.add(MI.getOperand(1))
.add(MI.getOperand(0));
} else {
- BuildMI(MBB, &MI, DL, TII->get(Opcode))
- .addReg(AMDGPU::VCC, RegState::Define)
- .addImm(0) // src0 modifiers
- .add(MI.getOperand(1))
- .addImm(0) // src1 modifiers
- .add(MI.getOperand(0))
- .addImm(0); // omod
+ auto I = BuildMI(MBB, &MI, DL, TII->get(Opcode));
+ if (!ST.hasNoSdstCMPX())
+ I.addReg(AMDGPU::VCC, RegState::Define);
+
+ I.addImm(0) // src0 modifiers
+ .add(MI.getOperand(1))
+ .addImm(0) // src1 modifiers
+ .add(MI.getOperand(0));
+
+ I.addImm(0); // omod
}
break;
}
case AMDGPU::SI_KILL_I1_TERMINATOR: {
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+ unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
const MachineOperand &Op = MI.getOperand(0);
int64_t KillVal = MI.getOperand(1).getImm();
assert(KillVal == 0 || KillVal == -1);
@@ -275,14 +284,17 @@ void SIInsertSkips::kill(MachineInstr &MI) {
assert(Imm == 0 || Imm == -1);
if (Imm == KillVal)
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
+ BuildMI(MBB, &MI, DL, TII->get(ST.isWave32() ? AMDGPU::S_MOV_B32
+ : AMDGPU::S_MOV_B64), Exec)
.addImm(0);
break;
}
unsigned Opcode = KillVal ? AMDGPU::S_ANDN2_B64 : AMDGPU::S_AND_B64;
- BuildMI(MBB, &MI, DL, TII->get(Opcode), AMDGPU::EXEC)
- .addReg(AMDGPU::EXEC)
+ if (ST.isWave32())
+ Opcode = KillVal ? AMDGPU::S_ANDN2_B32 : AMDGPU::S_AND_B32;
+ BuildMI(MBB, &MI, DL, TII->get(Opcode), Exec)
+ .addReg(Exec)
.add(Op);
break;
}
@@ -331,9 +343,11 @@ bool SIInsertSkips::optimizeVccBranch(MachineInstr &MI) const {
// S_CBRANCH_EXEC[N]Z
bool Changed = false;
MachineBasicBlock &MBB = *MI.getParent();
- const unsigned CondReg = AMDGPU::VCC;
- const unsigned ExecReg = AMDGPU::EXEC;
- const unsigned And = AMDGPU::S_AND_B64;
+ const GCNSubtarget &ST = MBB.getParent()->getSubtarget<GCNSubtarget>();
+ const bool IsWave32 = ST.isWave32();
+ const unsigned CondReg = TRI->getVCC();
+ const unsigned ExecReg = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+ const unsigned And = IsWave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
MachineBasicBlock::reverse_iterator A = MI.getReverseIterator(),
E = MBB.rend();