diff options
Diffstat (limited to 'lib/Target/AMDGPU/SIInsertSkips.cpp')
-rw-r--r-- | lib/Target/AMDGPU/SIInsertSkips.cpp | 76 |
1 files changed, 45 insertions, 31 deletions
diff --git a/lib/Target/AMDGPU/SIInsertSkips.cpp b/lib/Target/AMDGPU/SIInsertSkips.cpp index ba21a5ce1293..87e63fcc4a04 100644 --- a/lib/Target/AMDGPU/SIInsertSkips.cpp +++ b/lib/Target/AMDGPU/SIInsertSkips.cpp @@ -1,9 +1,8 @@ //===-- SIInsertSkips.cpp - Use predicates for control flow ---------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -93,15 +92,13 @@ INITIALIZE_PASS(SIInsertSkips, DEBUG_TYPE, char &llvm::SIInsertSkipsPassID = SIInsertSkips::ID; -static bool opcodeEmitsNoInsts(unsigned Opc) { - switch (Opc) { - case TargetOpcode::IMPLICIT_DEF: - case TargetOpcode::KILL: - case TargetOpcode::BUNDLE: - case TargetOpcode::CFI_INSTRUCTION: - case TargetOpcode::EH_LABEL: - case TargetOpcode::GC_LABEL: - case TargetOpcode::DBG_VALUE: +static bool opcodeEmitsNoInsts(const MachineInstr &MI) { + if (MI.isMetaInstruction()) + return true; + + // Handle target specific opcodes. + switch (MI.getOpcode()) { + case AMDGPU::SI_MASK_BRANCH: return true; default: return false; @@ -110,9 +107,6 @@ static bool opcodeEmitsNoInsts(unsigned Opc) { bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From, const MachineBasicBlock &To) const { - if (From.succ_empty()) - return false; - unsigned NumInstr = 0; const MachineFunction *MF = From.getParent(); @@ -122,7 +116,7 @@ bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From, for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end(); NumInstr < SkipThreshold && I != E; ++I) { - if (opcodeEmitsNoInsts(I->getOpcode())) + if (opcodeEmitsNoInsts(*I)) continue; // FIXME: Since this is required for correctness, this should be inserted @@ -138,6 +132,11 @@ bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From, if (TII->hasUnwantedEffectsWhenEXECEmpty(*I)) return true; + // These instructions are potentially expensive even if EXEC = 0. + if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) || + I->getOpcode() == AMDGPU::S_WAITCNT) + return true; + ++NumInstr; if (NumInstr >= SkipThreshold) return true; @@ -177,7 +176,7 @@ bool SIInsertSkips::skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB) { .addImm(0); // en // ... and terminate wavefront. - BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM)); + BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM)).addImm(0); return true; } @@ -245,6 +244,10 @@ void SIInsertSkips::kill(MachineInstr &MI) { llvm_unreachable("invalid ISD:SET cond code"); } + const GCNSubtarget &ST = MBB.getParent()->getSubtarget<GCNSubtarget>(); + if (ST.hasNoSdstCMPX()) + Opcode = AMDGPU::getVCMPXNoSDstOp(Opcode); + assert(MI.getOperand(0).isReg()); if (TRI->isVGPR(MBB.getParent()->getRegInfo(), @@ -254,17 +257,23 @@ void SIInsertSkips::kill(MachineInstr &MI) { .add(MI.getOperand(1)) .add(MI.getOperand(0)); } else { - BuildMI(MBB, &MI, DL, TII->get(Opcode)) - .addReg(AMDGPU::VCC, RegState::Define) - .addImm(0) // src0 modifiers - .add(MI.getOperand(1)) - .addImm(0) // src1 modifiers - .add(MI.getOperand(0)) - .addImm(0); // omod + auto I = BuildMI(MBB, &MI, DL, TII->get(Opcode)); + if (!ST.hasNoSdstCMPX()) + I.addReg(AMDGPU::VCC, RegState::Define); + + I.addImm(0) // src0 modifiers + .add(MI.getOperand(1)) + .addImm(0) // src1 modifiers + .add(MI.getOperand(0)); + + I.addImm(0); // omod } break; } case AMDGPU::SI_KILL_I1_TERMINATOR: { + const MachineFunction *MF = MI.getParent()->getParent(); + const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); + unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; const MachineOperand &Op = MI.getOperand(0); int64_t KillVal = MI.getOperand(1).getImm(); assert(KillVal == 0 || KillVal == -1); @@ -275,14 +284,17 @@ void SIInsertSkips::kill(MachineInstr &MI) { assert(Imm == 0 || Imm == -1); if (Imm == KillVal) - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC) + BuildMI(MBB, &MI, DL, TII->get(ST.isWave32() ? AMDGPU::S_MOV_B32 + : AMDGPU::S_MOV_B64), Exec) .addImm(0); break; } unsigned Opcode = KillVal ? AMDGPU::S_ANDN2_B64 : AMDGPU::S_AND_B64; - BuildMI(MBB, &MI, DL, TII->get(Opcode), AMDGPU::EXEC) - .addReg(AMDGPU::EXEC) + if (ST.isWave32()) + Opcode = KillVal ? AMDGPU::S_ANDN2_B32 : AMDGPU::S_AND_B32; + BuildMI(MBB, &MI, DL, TII->get(Opcode), Exec) + .addReg(Exec) .add(Op); break; } @@ -331,9 +343,11 @@ bool SIInsertSkips::optimizeVccBranch(MachineInstr &MI) const { // S_CBRANCH_EXEC[N]Z bool Changed = false; MachineBasicBlock &MBB = *MI.getParent(); - const unsigned CondReg = AMDGPU::VCC; - const unsigned ExecReg = AMDGPU::EXEC; - const unsigned And = AMDGPU::S_AND_B64; + const GCNSubtarget &ST = MBB.getParent()->getSubtarget<GCNSubtarget>(); + const bool IsWave32 = ST.isWave32(); + const unsigned CondReg = TRI->getVCC(); + const unsigned ExecReg = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC; + const unsigned And = IsWave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64; MachineBasicBlock::reverse_iterator A = MI.getReverseIterator(), E = MBB.rend(); |