diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp | 29 |
1 files changed, 20 insertions, 9 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp index 9f98f9ada802..6f82148854c4 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp @@ -1,4 +1,4 @@ -//===-- GCNNSAReassign.cpp - Reassign registers in NSA unstructions -------===// +//===-- GCNNSAReassign.cpp - Reassign registers in NSA instructions -------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -8,7 +8,7 @@ // /// \file /// \brief Try to reassign registers on GFX10+ from non-sequential to sequential -/// in NSA image instructions. Later SIShrinkInstructions pass will relace NSA +/// in NSA image instructions. Later SIShrinkInstructions pass will replace NSA /// with sequential versions where possible. /// //===----------------------------------------------------------------------===// @@ -16,10 +16,12 @@ #include "AMDGPU.h" #include "GCNSubtarget.h" #include "SIMachineFunctionInfo.h" +#include "SIRegisterInfo.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveRegMatrix.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/VirtRegMap.h" #include "llvm/InitializePasses.h" using namespace llvm; @@ -159,15 +161,23 @@ GCNNSAReassign::scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const { GCNNSAReassign::NSA_Status GCNNSAReassign::CheckNSA(const MachineInstr &MI, bool Fast) const { const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode()); - if (!Info || Info->MIMGEncoding != AMDGPU::MIMGEncGfx10NSA) + if (!Info) return NSA_Status::NOT_NSA; + switch (Info->MIMGEncoding) { + case AMDGPU::MIMGEncGfx10NSA: + case AMDGPU::MIMGEncGfx11NSA: + break; + default: + return NSA_Status::NOT_NSA; + } + int VAddr0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0); unsigned VgprBase = 0; bool NSA = false; - for (unsigned I = 0; I < Info->VAddrDwords; ++I) { + for (unsigned I = 0; I < Info->VAddrOperands; ++I) { const MachineOperand &Op = MI.getOperand(VAddr0Idx + I); Register Reg = Op.getReg(); if (Reg.isPhysical() || !VRM->isAssignedReg(Reg)) @@ -179,15 +189,16 @@ GCNNSAReassign::CheckNSA(const MachineInstr &MI, bool Fast) const { if (!PhysReg) return NSA_Status::FIXED; + // TODO: address the below limitation to handle GFX11 BVH instructions // Bail if address is not a VGPR32. That should be possible to extend the // optimization to work with subregs of a wider register tuples, but the // logic to find free registers will be much more complicated with much // less chances for success. That seems reasonable to assume that in most // cases a tuple is used because a vector variable contains different - // parts of an address and it is either already consequitive or cannot + // parts of an address and it is either already consecutive or cannot // be reassigned if not. If needed it is better to rely on register // coalescer to process such address tuples. - if (MRI->getRegClass(Reg) != &AMDGPU::VGPR_32RegClass || Op.getSubReg()) + if (TRI->getRegSizeInBits(*MRI->getRegClass(Reg)) != 32 || Op.getSubReg()) return NSA_Status::FIXED; // InlineSpiller does not call LRM::assign() after an LI split leaving @@ -278,7 +289,7 @@ bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) { SmallVector<LiveInterval *, 16> Intervals; SmallVector<MCRegister, 16> OrigRegs; SlotIndex MinInd, MaxInd; - for (unsigned I = 0; I < Info->VAddrDwords; ++I) { + for (unsigned I = 0; I < Info->VAddrOperands; ++I) { const MachineOperand &Op = MI->getOperand(VAddr0Idx + I); Register Reg = Op.getReg(); LiveInterval *LI = &LIS->getInterval(Reg); @@ -331,11 +342,11 @@ bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) { } if (!Success) { - for (unsigned I = 0; I < Info->VAddrDwords; ++I) + for (unsigned I = 0; I < Info->VAddrOperands; ++I) if (VRM->hasPhys(Intervals[I]->reg())) LRM->unassign(*Intervals[I]); - for (unsigned I = 0; I < Info->VAddrDwords; ++I) + for (unsigned I = 0; I < Info->VAddrOperands; ++I) LRM->assign(*Intervals[I], OrigRegs[I]); continue; |