diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp | 28 |
1 files changed, 25 insertions, 3 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp index b0e45dd3e3e3..8d33b8a1fd4b 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp @@ -74,6 +74,15 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const { // We end up with this pattern sometimes after basic block placement. // It happens while combining a block which assigns -1 or 0 to a saved mask // and another block which consumes that saved mask and then a branch. + // + // While searching this also performs the following substitution: + // vcc = V_CMP + // vcc = S_AND exec, vcc + // S_CBRANCH_VCC[N]Z + // => + // vcc = V_CMP + // S_CBRANCH_VCC[N]Z + bool Changed = false; MachineBasicBlock &MBB = *MI.getParent(); const GCNSubtarget &ST = MBB.getParent()->getSubtarget<GCNSubtarget>(); @@ -121,19 +130,32 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const { SReg = Op2.getReg(); auto M = std::next(A); bool ReadsSreg = false; + bool ModifiesExec = false; for (; M != E; ++M) { if (M->definesRegister(SReg, TRI)) break; if (M->modifiesRegister(SReg, TRI)) return Changed; ReadsSreg |= M->readsRegister(SReg, TRI); + ModifiesExec |= M->modifiesRegister(ExecReg, TRI); + } + if (M == E) + return Changed; + // If SReg is VCC and SReg definition is a VALU comparison. + // This means S_AND with EXEC is not required. + // Erase the S_AND and return. + // Note: isVOPC is used instead of isCompare to catch V_CMP_CLASS + if (A->getOpcode() == And && SReg == CondReg && !ModifiesExec && + TII->isVOPC(*M)) { + A->eraseFromParent(); + return true; } - if (M == E || !M->isMoveImmediate() || !M->getOperand(1).isImm() || + if (!M->isMoveImmediate() || !M->getOperand(1).isImm() || (M->getOperand(1).getImm() != -1 && M->getOperand(1).getImm() != 0)) return Changed; MaskValue = M->getOperand(1).getImm(); // First if sreg is only used in the AND instruction fold the immediate - // into into the AND. + // into the AND. if (!ReadsSreg && Op2.isKill()) { A->getOperand(2).ChangeToImmediate(MaskValue); M->eraseFromParent(); @@ -213,7 +235,7 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const { TII->get(IsVCCZ ? AMDGPU::S_CBRANCH_EXECZ : AMDGPU::S_CBRANCH_EXECNZ)); } - MI.RemoveOperand(MI.findRegisterUseOperandIdx(CondReg, false /*Kill*/, TRI)); + MI.removeOperand(MI.findRegisterUseOperandIdx(CondReg, false /*Kill*/, TRI)); MI.addImplicitDefUseOperands(*MBB.getParent()); return true; |