summaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/SIPeepholeSDWA.cpp')
-rw-r--r--lib/Target/AMDGPU/SIPeepholeSDWA.cpp100
1 files changed, 74 insertions, 26 deletions
diff --git a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index f4ddf1891683b..4ac23ef03cb32 100644
--- a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -67,9 +67,9 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
void matchSDWAOperands(MachineFunction &MF);
- bool isConvertibleToSDWA(const MachineInstr &MI) const;
+ bool isConvertibleToSDWA(const MachineInstr &MI, const SISubtarget &ST) const;
bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands);
- void legalizeScalarOperands(MachineInstr &MI) const;
+ void legalizeScalarOperands(MachineInstr &MI, const SISubtarget &ST) const;
StringRef getPassName() const override { return "SI Peephole SDWA"; }
@@ -224,7 +224,7 @@ static bool isSameReg(const MachineOperand &LHS, const MachineOperand &RHS) {
static bool isSubregOf(const MachineOperand &SubReg,
const MachineOperand &SuperReg,
const TargetRegisterInfo *TRI) {
-
+
if (!SuperReg.isReg() || !SubReg.isReg())
return false;
@@ -557,7 +557,7 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
-
+
if (TRI->isPhysicalRegister(Src0->getReg()) ||
TRI->isPhysicalRegister(Dst->getReg()))
break;
@@ -590,7 +590,7 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
break;
MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
-
+
if (TRI->isPhysicalRegister(Src1->getReg()) ||
TRI->isPhysicalRegister(Dst->getReg()))
break;
@@ -607,16 +607,38 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
}
}
-bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI) const {
+bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI,
+ const SISubtarget &ST) const {
// Check if this instruction has opcode that supports SDWA
- unsigned Opc = MI.getOpcode();
- if (AMDGPU::getSDWAOp(Opc) != -1)
- return true;
- int Opc32 = AMDGPU::getVOPe32(Opc);
- if (Opc32 != -1 && AMDGPU::getSDWAOp(Opc32) != -1)
- return !TII->hasModifiersSet(MI, AMDGPU::OpName::omod) &&
- !TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
- return false;
+ int Opc = MI.getOpcode();
+ if (AMDGPU::getSDWAOp(Opc) == -1)
+ Opc = AMDGPU::getVOPe32(Opc);
+
+ if (Opc == -1 || AMDGPU::getSDWAOp(Opc) == -1)
+ return false;
+
+ if (!ST.hasSDWAOmod() && TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
+ return false;
+
+ if (TII->isVOPC(Opc)) {
+ if (!ST.hasSDWASdst()) {
+ const MachineOperand *SDst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
+ if (SDst && SDst->getReg() != AMDGPU::VCC)
+ return false;
+ }
+
+ if (!ST.hasSDWAClampVOPC() && TII->hasModifiersSet(MI, AMDGPU::OpName::clamp))
+ return false;
+
+ } else if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst)) {
+ return false;
+ }
+
+ if (!ST.hasSDWAMac() && (Opc == AMDGPU::V_MAC_F16_e32 ||
+ Opc == AMDGPU::V_MAC_F32_e32))
+ return false;
+
+ return true;
}
bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
@@ -641,6 +663,11 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
if (Dst) {
assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1);
SDWAInst.add(*Dst);
+ } else {
+ Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
+ assert(Dst &&
+ AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1);
+ SDWAInst.add(*Dst);
}
// Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and
@@ -677,9 +704,23 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
SDWAInst.add(*Src2);
}
- // Initialize clamp.
+ // Copy clamp if present, initialize otherwise
assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::clamp) != -1);
- SDWAInst.addImm(0);
+ MachineOperand *Clamp = TII->getNamedOperand(MI, AMDGPU::OpName::clamp);
+ if (Clamp) {
+ SDWAInst.add(*Clamp);
+ } else {
+ SDWAInst.addImm(0);
+ }
+
+ // Copy omod if present, initialize otherwise if needed
+ MachineOperand *OMod = TII->getNamedOperand(MI, AMDGPU::OpName::omod);
+ if (OMod) {
+ assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1);
+ SDWAInst.add(*OMod);
+ } else if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1) {
+ SDWAInst.addImm(0);
+ }
// Initialize dst_sel and dst_unused if present
if (Dst) {
@@ -733,16 +774,25 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
}
// If an instruction was converted to SDWA it should not have immediates or SGPR
-// operands. Copy its scalar operands into VGPRs.
-void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI) const {
+// operands (allowed one SGPR on GFX9). Copy its scalar operands into VGPRs.
+void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI, const SISubtarget &ST) const {
const MCInstrDesc &Desc = TII->get(MI.getOpcode());
- for (unsigned I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) {
- MachineOperand &Op = MI.getOperand(I);
+ unsigned ConstantBusCount = 0;
+ for (MachineOperand &Op: MI.explicit_uses()) {
if (!Op.isImm() && !(Op.isReg() && !TRI->isVGPR(*MRI, Op.getReg())))
continue;
+
+ unsigned I = MI.getOperandNo(&Op);
if (Desc.OpInfo[I].RegClass == -1 ||
!TRI->hasVGPRs(TRI->getRegClass(Desc.OpInfo[I].RegClass)))
continue;
+
+ if (ST.hasSDWAScalar() && ConstantBusCount == 0 && Op.isReg() &&
+ TRI->isSGPRReg(*MRI, Op.getReg())) {
+ ++ConstantBusCount;
+ continue;
+ }
+
unsigned VGPR = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
auto Copy = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
TII->get(AMDGPU::V_MOV_B32_e32), VGPR);
@@ -758,22 +808,20 @@ void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI) const {
bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
- if (!ST.hasSDWA() ||
- !AMDGPU::isVI(ST)) { // TODO: Add support for SDWA on gfx9
+ if (!ST.hasSDWA())
return false;
- }
MRI = &MF.getRegInfo();
TRI = ST.getRegisterInfo();
TII = ST.getInstrInfo();
-
+
// Find all SDWA operands in MF.
matchSDWAOperands(MF);
for (const auto &OperandPair : SDWAOperands) {
const auto &Operand = OperandPair.second;
MachineInstr *PotentialMI = Operand->potentialToConvert(TII);
- if (PotentialMI && isConvertibleToSDWA(*PotentialMI)) {
+ if (PotentialMI && isConvertibleToSDWA(*PotentialMI, ST)) {
PotentialMatches[PotentialMI].push_back(Operand.get());
}
}
@@ -788,7 +836,7 @@ bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
bool Ret = !ConvertedInstructions.empty();
while (!ConvertedInstructions.empty())
- legalizeScalarOperands(*ConvertedInstructions.pop_back_val());
+ legalizeScalarOperands(*ConvertedInstructions.pop_back_val(), ST);
return Ret;
}