aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/SIShrinkInstructions.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/SIShrinkInstructions.cpp')
-rw-r--r--lib/Target/AMDGPU/SIShrinkInstructions.cpp140
1 files changed, 125 insertions, 15 deletions
diff --git a/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 6ad7dd0e3a7c..7ee178149c7a 100644
--- a/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -1,9 +1,8 @@
//===-- SIShrinkInstructions.cpp - Shrink Instructions --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
/// The pass tries to use the 32-bit encoding for instructions when possible.
//===----------------------------------------------------------------------===//
@@ -39,6 +38,8 @@ class SIShrinkInstructions : public MachineFunctionPass {
public:
static char ID;
+ void shrinkMIMG(MachineInstr &MI);
+
public:
SIShrinkInstructions() : MachineFunctionPass(ID) {
}
@@ -94,6 +95,10 @@ static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
Src0.setSubReg(0);
Src0.ChangeToFrameIndex(MovSrc.getIndex());
ConstantFolded = true;
+ } else if (MovSrc.isGlobal()) {
+ Src0.ChangeToGA(MovSrc.getGlobal(), MovSrc.getOffset(),
+ MovSrc.getTargetFlags());
+ ConstantFolded = true;
}
if (ConstantFolded) {
@@ -212,6 +217,96 @@ static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) {
}
}
+// Shrink NSA encoded instructions with contiguous VGPRs to non-NSA encoding.
+void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) {
+ const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
+ if (Info->MIMGEncoding != AMDGPU::MIMGEncGfx10NSA)
+ return;
+
+ MachineFunction *MF = MI.getParent()->getParent();
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ const SIRegisterInfo &TRI = TII->getRegisterInfo();
+ int VAddr0Idx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
+ unsigned NewAddrDwords = Info->VAddrDwords;
+ const TargetRegisterClass *RC;
+
+ if (Info->VAddrDwords == 2) {
+ RC = &AMDGPU::VReg_64RegClass;
+ } else if (Info->VAddrDwords == 3) {
+ RC = &AMDGPU::VReg_96RegClass;
+ } else if (Info->VAddrDwords == 4) {
+ RC = &AMDGPU::VReg_128RegClass;
+ } else if (Info->VAddrDwords <= 8) {
+ RC = &AMDGPU::VReg_256RegClass;
+ NewAddrDwords = 8;
+ } else {
+ RC = &AMDGPU::VReg_512RegClass;
+ NewAddrDwords = 16;
+ }
+
+ unsigned VgprBase = 0;
+ bool IsUndef = true;
+ bool IsKill = NewAddrDwords == Info->VAddrDwords;
+ for (unsigned i = 0; i < Info->VAddrDwords; ++i) {
+ const MachineOperand &Op = MI.getOperand(VAddr0Idx + i);
+ unsigned Vgpr = TRI.getHWRegIndex(Op.getReg());
+
+ if (i == 0) {
+ VgprBase = Vgpr;
+ } else if (VgprBase + i != Vgpr)
+ return;
+
+ if (!Op.isUndef())
+ IsUndef = false;
+ if (!Op.isKill())
+ IsKill = false;
+ }
+
+ if (VgprBase + NewAddrDwords > 256)
+ return;
+
+ // Further check for implicit tied operands - this may be present if TFE is
+ // enabled
+ int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
+ int LWEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::lwe);
+ unsigned TFEVal = MI.getOperand(TFEIdx).getImm();
+ unsigned LWEVal = MI.getOperand(LWEIdx).getImm();
+ int ToUntie = -1;
+ if (TFEVal || LWEVal) {
+ // TFE/LWE is enabled so we need to deal with an implicit tied operand
+ for (unsigned i = LWEIdx + 1, e = MI.getNumOperands(); i != e; ++i) {
+ if (MI.getOperand(i).isReg() && MI.getOperand(i).isTied() &&
+ MI.getOperand(i).isImplicit()) {
+ // This is the tied operand
+ assert(
+ ToUntie == -1 &&
+ "found more than one tied implicit operand when expecting only 1");
+ ToUntie = i;
+ MI.untieRegOperand(ToUntie);
+ }
+ }
+ }
+
+ unsigned NewOpcode =
+ AMDGPU::getMIMGOpcode(Info->BaseOpcode, AMDGPU::MIMGEncGfx10Default,
+ Info->VDataDwords, NewAddrDwords);
+ MI.setDesc(TII->get(NewOpcode));
+ MI.getOperand(VAddr0Idx).setReg(RC->getRegister(VgprBase));
+ MI.getOperand(VAddr0Idx).setIsUndef(IsUndef);
+ MI.getOperand(VAddr0Idx).setIsKill(IsKill);
+
+ for (unsigned i = 1; i < Info->VAddrDwords; ++i)
+ MI.RemoveOperand(VAddr0Idx + 1);
+
+ if (ToUntie >= 0) {
+ MI.tieOperands(
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata),
+ ToUntie - (Info->VAddrDwords - 1));
+ }
+}
+
/// Attempt to shink AND/OR/XOR operations requiring non-inlineable literals.
/// For AND or OR, try using S_BITSET{0,1} to clear or set bits.
/// If the inverse of the immediate is legal, use ANDN2, ORN2 or
@@ -277,7 +372,9 @@ static bool shrinkScalarLogicOp(const GCNSubtarget &ST,
if (Opc == AMDGPU::S_BITSET0_B32 ||
Opc == AMDGPU::S_BITSET1_B32) {
Src0->ChangeToImmediate(NewImm);
- MI.RemoveOperand(2);
+ // Remove the immediate and add the tied input.
+ MI.getOperand(2).ChangeToRegister(Dest->getReg(), false);
+ MI.tieOperands(0, 2);
} else {
SrcImm->setImm(NewImm);
}
@@ -458,6 +555,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
+ unsigned VCCReg = ST.isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC;
std::vector<unsigned> I1Defs;
@@ -596,6 +694,14 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
continue;
}
+ if (TII->isMIMG(MI.getOpcode()) &&
+ ST.getGeneration() >= AMDGPUSubtarget::GFX10 &&
+ MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::NoVRegs)) {
+ shrinkMIMG(MI);
+ continue;
+ }
+
if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
continue;
@@ -625,10 +731,10 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
// So, instead of forcing the instruction to write to VCC, we provide
// a hint to the register allocator to use VCC and then we will run
// this pass again after RA and shrink it if it outputs to VCC.
- MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, AMDGPU::VCC);
+ MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, VCCReg);
continue;
}
- if (DstReg != AMDGPU::VCC)
+ if (DstReg != VCCReg)
continue;
}
@@ -641,10 +747,10 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
continue;
unsigned SReg = Src2->getReg();
if (TargetRegisterInfo::isVirtualRegister(SReg)) {
- MRI.setRegAllocationHint(SReg, 0, AMDGPU::VCC);
+ MRI.setRegAllocationHint(SReg, 0, VCCReg);
continue;
}
- if (SReg != AMDGPU::VCC)
+ if (SReg != VCCReg)
continue;
}
@@ -657,20 +763,24 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
AMDGPU::OpName::src2);
if (SDst) {
- if (SDst->getReg() != AMDGPU::VCC) {
+ bool Next = false;
+
+ if (SDst->getReg() != VCCReg) {
if (TargetRegisterInfo::isVirtualRegister(SDst->getReg()))
- MRI.setRegAllocationHint(SDst->getReg(), 0, AMDGPU::VCC);
- continue;
+ MRI.setRegAllocationHint(SDst->getReg(), 0, VCCReg);
+ Next = true;
}
// All of the instructions with carry outs also have an SGPR input in
// src2.
- if (Src2 && Src2->getReg() != AMDGPU::VCC) {
+ if (Src2 && Src2->getReg() != VCCReg) {
if (TargetRegisterInfo::isVirtualRegister(Src2->getReg()))
- MRI.setRegAllocationHint(Src2->getReg(), 0, AMDGPU::VCC);
+ MRI.setRegAllocationHint(Src2->getReg(), 0, VCCReg);
+ Next = true;
+ }
+ if (Next)
continue;
- }
}
// We can shrink this instruction