diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2019-08-20 20:50:12 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2019-08-20 20:50:12 +0000 |
commit | e6d1592492a3a379186bfb02bd0f4eda0669c0d5 (patch) | |
tree | 599ab169a01f1c86eda9adc774edaedde2f2db5b /lib/Target/AMDGPU/SIPeepholeSDWA.cpp | |
parent | 1a56a5ead7a2e84bee8240f5f6b033b5f1707154 (diff) |
Diffstat (limited to 'lib/Target/AMDGPU/SIPeepholeSDWA.cpp')
-rw-r--r-- | lib/Target/AMDGPU/SIPeepholeSDWA.cpp | 36 |
1 files changed, 24 insertions, 12 deletions
diff --git a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp index 2d43d5d05ef6..2d71abc0612a 100644 --- a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -1,9 +1,8 @@ //===- SIPeepholeSDWA.cpp - Peephole optimization for SDWA instructions ---===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -348,8 +347,8 @@ uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII, if (Abs || Neg) { assert(!Sext && "Float and integer src modifiers can't be set simulteniously"); - Mods |= Abs ? SISrcMods::ABS : 0; - Mods ^= Neg ? SISrcMods::NEG : 0; + Mods |= Abs ? SISrcMods::ABS : 0u; + Mods ^= Neg ? SISrcMods::NEG : 0u; } else if (Sext) { Mods |= SISrcMods::SEXT; } @@ -419,7 +418,9 @@ bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) { } assert(Src && Src->isReg()); - if ((MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa || + if ((MI.getOpcode() == AMDGPU::V_FMAC_F16_sdwa || + MI.getOpcode() == AMDGPU::V_FMAC_F32_sdwa || + MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa || MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) && !isSameReg(*Src, *getReplacedOperand())) { // In case of v_mac_f16/32_sdwa this pass can try to apply src operand to @@ -461,7 +462,9 @@ MachineInstr *SDWADstOperand::potentialToConvert(const SIInstrInfo *TII) { bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) { // Replace vdst operand in MI with target operand. Set dst_sel and dst_unused - if ((MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa || + if ((MI.getOpcode() == AMDGPU::V_FMAC_F16_sdwa || + MI.getOpcode() == AMDGPU::V_FMAC_F32_sdwa || + MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa || MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) && getDstSel() != AMDGPU::SDWA::DWORD) { // v_mac_f16/32_sdwa allow dst_sel to be equal only to DWORD @@ -951,7 +954,8 @@ bool SIPeepholeSDWA::isConvertibleToSDWA(MachineInstr &MI, if (TII->isVOPC(Opc)) { if (!ST.hasSDWASdst()) { const MachineOperand *SDst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst); - if (SDst && SDst->getReg() != AMDGPU::VCC) + if (SDst && (SDst->getReg() != AMDGPU::VCC && + SDst->getReg() != AMDGPU::VCC_LO)) return false; } @@ -965,10 +969,16 @@ bool SIPeepholeSDWA::isConvertibleToSDWA(MachineInstr &MI, return false; } - if (!ST.hasSDWAMac() && (Opc == AMDGPU::V_MAC_F16_e32 || + if (!ST.hasSDWAMac() && (Opc == AMDGPU::V_FMAC_F16_e32 || + Opc == AMDGPU::V_FMAC_F32_e32 || + Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F32_e32)) return false; + // Check if target supports this SDWA opcode + if (TII->pseudoToMCOpcode(Opc) == -1) + return false; + // FIXME: has SDWA but require handling of implicit VCC use if (Opc == AMDGPU::V_CNDMASK_B32_e32) return false; @@ -1010,7 +1020,7 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, SDWAInst.add(*Dst); } else { assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1); - SDWAInst.addReg(AMDGPU::VCC, RegState::Define); + SDWAInst.addReg(TRI->getVCC(), RegState::Define); } // Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and @@ -1039,7 +1049,9 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, SDWAInst.add(*Src1); } - if (SDWAOpcode == AMDGPU::V_MAC_F16_sdwa || + if (SDWAOpcode == AMDGPU::V_FMAC_F16_sdwa || + SDWAOpcode == AMDGPU::V_FMAC_F32_sdwa || + SDWAOpcode == AMDGPU::V_MAC_F16_sdwa || SDWAOpcode == AMDGPU::V_MAC_F32_sdwa) { // v_mac_f16/32 has additional src2 operand tied to vdst MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2); |