diff options
Diffstat (limited to 'lib/Target/AMDGPU/SIShrinkInstructions.cpp')
-rw-r--r-- | lib/Target/AMDGPU/SIShrinkInstructions.cpp | 140 |
1 files changed, 125 insertions, 15 deletions
diff --git a/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/lib/Target/AMDGPU/SIShrinkInstructions.cpp index 6ad7dd0e3a7c..7ee178149c7a 100644 --- a/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -1,9 +1,8 @@ //===-- SIShrinkInstructions.cpp - Shrink Instructions --------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // /// The pass tries to use the 32-bit encoding for instructions when possible. //===----------------------------------------------------------------------===// @@ -39,6 +38,8 @@ class SIShrinkInstructions : public MachineFunctionPass { public: static char ID; + void shrinkMIMG(MachineInstr &MI); + public: SIShrinkInstructions() : MachineFunctionPass(ID) { } @@ -94,6 +95,10 @@ static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, Src0.setSubReg(0); Src0.ChangeToFrameIndex(MovSrc.getIndex()); ConstantFolded = true; + } else if (MovSrc.isGlobal()) { + Src0.ChangeToGA(MovSrc.getGlobal(), MovSrc.getOffset(), + MovSrc.getTargetFlags()); + ConstantFolded = true; } if (ConstantFolded) { @@ -212,6 +217,96 @@ static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) { } } +// Shrink NSA encoded instructions with contiguous VGPRs to non-NSA encoding. +void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) { + const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode()); + if (Info->MIMGEncoding != AMDGPU::MIMGEncGfx10NSA) + return; + + MachineFunction *MF = MI.getParent()->getParent(); + const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); + const SIInstrInfo *TII = ST.getInstrInfo(); + const SIRegisterInfo &TRI = TII->getRegisterInfo(); + int VAddr0Idx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0); + unsigned NewAddrDwords = Info->VAddrDwords; + const TargetRegisterClass *RC; + + if (Info->VAddrDwords == 2) { + RC = &AMDGPU::VReg_64RegClass; + } else if (Info->VAddrDwords == 3) { + RC = &AMDGPU::VReg_96RegClass; + } else if (Info->VAddrDwords == 4) { + RC = &AMDGPU::VReg_128RegClass; + } else if (Info->VAddrDwords <= 8) { + RC = &AMDGPU::VReg_256RegClass; + NewAddrDwords = 8; + } else { + RC = &AMDGPU::VReg_512RegClass; + NewAddrDwords = 16; + } + + unsigned VgprBase = 0; + bool IsUndef = true; + bool IsKill = NewAddrDwords == Info->VAddrDwords; + for (unsigned i = 0; i < Info->VAddrDwords; ++i) { + const MachineOperand &Op = MI.getOperand(VAddr0Idx + i); + unsigned Vgpr = TRI.getHWRegIndex(Op.getReg()); + + if (i == 0) { + VgprBase = Vgpr; + } else if (VgprBase + i != Vgpr) + return; + + if (!Op.isUndef()) + IsUndef = false; + if (!Op.isKill()) + IsKill = false; + } + + if (VgprBase + NewAddrDwords > 256) + return; + + // Further check for implicit tied operands - this may be present if TFE is + // enabled + int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe); + int LWEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::lwe); + unsigned TFEVal = MI.getOperand(TFEIdx).getImm(); + unsigned LWEVal = MI.getOperand(LWEIdx).getImm(); + int ToUntie = -1; + if (TFEVal || LWEVal) { + // TFE/LWE is enabled so we need to deal with an implicit tied operand + for (unsigned i = LWEIdx + 1, e = MI.getNumOperands(); i != e; ++i) { + if (MI.getOperand(i).isReg() && MI.getOperand(i).isTied() && + MI.getOperand(i).isImplicit()) { + // This is the tied operand + assert( + ToUntie == -1 && + "found more than one tied implicit operand when expecting only 1"); + ToUntie = i; + MI.untieRegOperand(ToUntie); + } + } + } + + unsigned NewOpcode = + AMDGPU::getMIMGOpcode(Info->BaseOpcode, AMDGPU::MIMGEncGfx10Default, + Info->VDataDwords, NewAddrDwords); + MI.setDesc(TII->get(NewOpcode)); + MI.getOperand(VAddr0Idx).setReg(RC->getRegister(VgprBase)); + MI.getOperand(VAddr0Idx).setIsUndef(IsUndef); + MI.getOperand(VAddr0Idx).setIsKill(IsKill); + + for (unsigned i = 1; i < Info->VAddrDwords; ++i) + MI.RemoveOperand(VAddr0Idx + 1); + + if (ToUntie >= 0) { + MI.tieOperands( + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata), + ToUntie - (Info->VAddrDwords - 1)); + } +} + /// Attempt to shink AND/OR/XOR operations requiring non-inlineable literals. /// For AND or OR, try using S_BITSET{0,1} to clear or set bits. /// If the inverse of the immediate is legal, use ANDN2, ORN2 or @@ -277,7 +372,9 @@ static bool shrinkScalarLogicOp(const GCNSubtarget &ST, if (Opc == AMDGPU::S_BITSET0_B32 || Opc == AMDGPU::S_BITSET1_B32) { Src0->ChangeToImmediate(NewImm); - MI.RemoveOperand(2); + // Remove the immediate and add the tied input. + MI.getOperand(2).ChangeToRegister(Dest->getReg(), false); + MI.tieOperands(0, 2); } else { SrcImm->setImm(NewImm); } @@ -458,6 +555,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); + unsigned VCCReg = ST.isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC; std::vector<unsigned> I1Defs; @@ -596,6 +694,14 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { continue; } + if (TII->isMIMG(MI.getOpcode()) && + ST.getGeneration() >= AMDGPUSubtarget::GFX10 && + MF.getProperties().hasProperty( + MachineFunctionProperties::Property::NoVRegs)) { + shrinkMIMG(MI); + continue; + } + if (!TII->hasVALU32BitEncoding(MI.getOpcode())) continue; @@ -625,10 +731,10 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { // So, instead of forcing the instruction to write to VCC, we provide // a hint to the register allocator to use VCC and then we will run // this pass again after RA and shrink it if it outputs to VCC. - MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, AMDGPU::VCC); + MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, VCCReg); continue; } - if (DstReg != AMDGPU::VCC) + if (DstReg != VCCReg) continue; } @@ -641,10 +747,10 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { continue; unsigned SReg = Src2->getReg(); if (TargetRegisterInfo::isVirtualRegister(SReg)) { - MRI.setRegAllocationHint(SReg, 0, AMDGPU::VCC); + MRI.setRegAllocationHint(SReg, 0, VCCReg); continue; } - if (SReg != AMDGPU::VCC) + if (SReg != VCCReg) continue; } @@ -657,20 +763,24 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { AMDGPU::OpName::src2); if (SDst) { - if (SDst->getReg() != AMDGPU::VCC) { + bool Next = false; + + if (SDst->getReg() != VCCReg) { if (TargetRegisterInfo::isVirtualRegister(SDst->getReg())) - MRI.setRegAllocationHint(SDst->getReg(), 0, AMDGPU::VCC); - continue; + MRI.setRegAllocationHint(SDst->getReg(), 0, VCCReg); + Next = true; } // All of the instructions with carry outs also have an SGPR input in // src2. - if (Src2 && Src2->getReg() != AMDGPU::VCC) { + if (Src2 && Src2->getReg() != VCCReg) { if (TargetRegisterInfo::isVirtualRegister(Src2->getReg())) - MRI.setRegAllocationHint(Src2->getReg(), 0, AMDGPU::VCC); + MRI.setRegAllocationHint(Src2->getReg(), 0, VCCReg); + Next = true; + } + if (Next) continue; - } } // We can shrink this instruction |