aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/SIPeepholeSDWA.cpp')
-rw-r--r--lib/Target/AMDGPU/SIPeepholeSDWA.cpp36
1 files changed, 24 insertions, 12 deletions
diff --git a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index 2d43d5d05ef6..2d71abc0612a 100644
--- a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -1,9 +1,8 @@
//===- SIPeepholeSDWA.cpp - Peephole optimization for SDWA instructions ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -348,8 +347,8 @@ uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII,
if (Abs || Neg) {
assert(!Sext &&
"Float and integer src modifiers can't be set simulteniously");
- Mods |= Abs ? SISrcMods::ABS : 0;
- Mods ^= Neg ? SISrcMods::NEG : 0;
+ Mods |= Abs ? SISrcMods::ABS : 0u;
+ Mods ^= Neg ? SISrcMods::NEG : 0u;
} else if (Sext) {
Mods |= SISrcMods::SEXT;
}
@@ -419,7 +418,9 @@ bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
}
assert(Src && Src->isReg());
- if ((MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
+ if ((MI.getOpcode() == AMDGPU::V_FMAC_F16_sdwa ||
+ MI.getOpcode() == AMDGPU::V_FMAC_F32_sdwa ||
+ MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
!isSameReg(*Src, *getReplacedOperand())) {
// In case of v_mac_f16/32_sdwa this pass can try to apply src operand to
@@ -461,7 +462,9 @@ MachineInstr *SDWADstOperand::potentialToConvert(const SIInstrInfo *TII) {
bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
// Replace vdst operand in MI with target operand. Set dst_sel and dst_unused
- if ((MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
+ if ((MI.getOpcode() == AMDGPU::V_FMAC_F16_sdwa ||
+ MI.getOpcode() == AMDGPU::V_FMAC_F32_sdwa ||
+ MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
getDstSel() != AMDGPU::SDWA::DWORD) {
// v_mac_f16/32_sdwa allow dst_sel to be equal only to DWORD
@@ -951,7 +954,8 @@ bool SIPeepholeSDWA::isConvertibleToSDWA(MachineInstr &MI,
if (TII->isVOPC(Opc)) {
if (!ST.hasSDWASdst()) {
const MachineOperand *SDst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
- if (SDst && SDst->getReg() != AMDGPU::VCC)
+ if (SDst && (SDst->getReg() != AMDGPU::VCC &&
+ SDst->getReg() != AMDGPU::VCC_LO))
return false;
}
@@ -965,10 +969,16 @@ bool SIPeepholeSDWA::isConvertibleToSDWA(MachineInstr &MI,
return false;
}
- if (!ST.hasSDWAMac() && (Opc == AMDGPU::V_MAC_F16_e32 ||
+ if (!ST.hasSDWAMac() && (Opc == AMDGPU::V_FMAC_F16_e32 ||
+ Opc == AMDGPU::V_FMAC_F32_e32 ||
+ Opc == AMDGPU::V_MAC_F16_e32 ||
Opc == AMDGPU::V_MAC_F32_e32))
return false;
+ // Check if target supports this SDWA opcode
+ if (TII->pseudoToMCOpcode(Opc) == -1)
+ return false;
+
// FIXME: has SDWA but require handling of implicit VCC use
if (Opc == AMDGPU::V_CNDMASK_B32_e32)
return false;
@@ -1010,7 +1020,7 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
SDWAInst.add(*Dst);
} else {
assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1);
- SDWAInst.addReg(AMDGPU::VCC, RegState::Define);
+ SDWAInst.addReg(TRI->getVCC(), RegState::Define);
}
// Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and
@@ -1039,7 +1049,9 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
SDWAInst.add(*Src1);
}
- if (SDWAOpcode == AMDGPU::V_MAC_F16_sdwa ||
+ if (SDWAOpcode == AMDGPU::V_FMAC_F16_sdwa ||
+ SDWAOpcode == AMDGPU::V_FMAC_F32_sdwa ||
+ SDWAOpcode == AMDGPU::V_MAC_F16_sdwa ||
SDWAOpcode == AMDGPU::V_MAC_F32_sdwa) {
// v_mac_f16/32 has additional src2 operand tied to vdst
MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);