aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp29
1 files changed, 20 insertions, 9 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
index 9f98f9ada802..6f82148854c4 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
@@ -1,4 +1,4 @@
-//===-- GCNNSAReassign.cpp - Reassign registers in NSA unstructions -------===//
+//===-- GCNNSAReassign.cpp - Reassign registers in NSA instructions -------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -8,7 +8,7 @@
//
/// \file
/// \brief Try to reassign registers on GFX10+ from non-sequential to sequential
-/// in NSA image instructions. Later SIShrinkInstructions pass will relace NSA
+/// in NSA image instructions. Later SIShrinkInstructions pass will replace NSA
/// with sequential versions where possible.
///
//===----------------------------------------------------------------------===//
@@ -16,10 +16,12 @@
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRegMatrix.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/InitializePasses.h"
using namespace llvm;
@@ -159,15 +161,23 @@ GCNNSAReassign::scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const {
GCNNSAReassign::NSA_Status
GCNNSAReassign::CheckNSA(const MachineInstr &MI, bool Fast) const {
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
- if (!Info || Info->MIMGEncoding != AMDGPU::MIMGEncGfx10NSA)
+ if (!Info)
return NSA_Status::NOT_NSA;
+ switch (Info->MIMGEncoding) {
+ case AMDGPU::MIMGEncGfx10NSA:
+ case AMDGPU::MIMGEncGfx11NSA:
+ break;
+ default:
+ return NSA_Status::NOT_NSA;
+ }
+
int VAddr0Idx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
unsigned VgprBase = 0;
bool NSA = false;
- for (unsigned I = 0; I < Info->VAddrDwords; ++I) {
+ for (unsigned I = 0; I < Info->VAddrOperands; ++I) {
const MachineOperand &Op = MI.getOperand(VAddr0Idx + I);
Register Reg = Op.getReg();
if (Reg.isPhysical() || !VRM->isAssignedReg(Reg))
@@ -179,15 +189,16 @@ GCNNSAReassign::CheckNSA(const MachineInstr &MI, bool Fast) const {
if (!PhysReg)
return NSA_Status::FIXED;
+ // TODO: address the below limitation to handle GFX11 BVH instructions
// Bail if address is not a VGPR32. That should be possible to extend the
// optimization to work with subregs of a wider register tuples, but the
// logic to find free registers will be much more complicated with much
// less chances for success. That seems reasonable to assume that in most
// cases a tuple is used because a vector variable contains different
- // parts of an address and it is either already consequitive or cannot
+ // parts of an address and it is either already consecutive or cannot
// be reassigned if not. If needed it is better to rely on register
// coalescer to process such address tuples.
- if (MRI->getRegClass(Reg) != &AMDGPU::VGPR_32RegClass || Op.getSubReg())
+ if (TRI->getRegSizeInBits(*MRI->getRegClass(Reg)) != 32 || Op.getSubReg())
return NSA_Status::FIXED;
// InlineSpiller does not call LRM::assign() after an LI split leaving
@@ -278,7 +289,7 @@ bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) {
SmallVector<LiveInterval *, 16> Intervals;
SmallVector<MCRegister, 16> OrigRegs;
SlotIndex MinInd, MaxInd;
- for (unsigned I = 0; I < Info->VAddrDwords; ++I) {
+ for (unsigned I = 0; I < Info->VAddrOperands; ++I) {
const MachineOperand &Op = MI->getOperand(VAddr0Idx + I);
Register Reg = Op.getReg();
LiveInterval *LI = &LIS->getInterval(Reg);
@@ -331,11 +342,11 @@ bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) {
}
if (!Success) {
- for (unsigned I = 0; I < Info->VAddrDwords; ++I)
+ for (unsigned I = 0; I < Info->VAddrOperands; ++I)
if (VRM->hasPhys(Intervals[I]->reg()))
LRM->unassign(*Intervals[I]);
- for (unsigned I = 0; I < Info->VAddrDwords; ++I)
+ for (unsigned I = 0; I < Info->VAddrOperands; ++I)
LRM->assign(*Intervals[I], OrigRegs[I]);
continue;