summaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/SIFixSGPRCopies.cpp')
-rw-r--r--lib/Target/AMDGPU/SIFixSGPRCopies.cpp83
1 files changed, 57 insertions, 26 deletions
diff --git a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 809f5bab4693..624953963cf4 100644
--- a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -1,9 +1,8 @@
//===- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -104,7 +103,7 @@ using namespace llvm;
static cl::opt<bool> EnableM0Merge(
"amdgpu-enable-merge-m0",
cl::desc("Merge and hoist M0 initializations"),
- cl::init(false));
+ cl::init(true));
namespace {
@@ -144,14 +143,15 @@ FunctionPass *llvm::createSIFixSGPRCopiesPass() {
return new SIFixSGPRCopies();
}
-static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) {
+static bool hasVectorOperands(const MachineInstr &MI,
+ const SIRegisterInfo *TRI) {
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
if (!MI.getOperand(i).isReg() ||
!TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg()))
continue;
- if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg())))
+ if (TRI->hasVectorRegisters(MRI.getRegClass(MI.getOperand(i).getReg())))
return true;
}
return false;
@@ -184,14 +184,14 @@ static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC,
const TargetRegisterClass *DstRC,
const SIRegisterInfo &TRI) {
return SrcRC != &AMDGPU::VReg_1RegClass && TRI.isSGPRClass(DstRC) &&
- TRI.hasVGPRs(SrcRC);
+ TRI.hasVectorRegisters(SrcRC);
}
static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC,
const TargetRegisterClass *DstRC,
const SIRegisterInfo &TRI) {
return DstRC != &AMDGPU::VReg_1RegClass && TRI.isSGPRClass(SrcRC) &&
- TRI.hasVGPRs(DstRC);
+ TRI.hasVectorRegisters(DstRC);
}
static bool tryChangeVGPRtoSGPRinCopy(MachineInstr &MI,
@@ -278,6 +278,7 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
// VGPRz = REG_SEQUENCE VGPRx, sub0
MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg());
+ bool IsAGPR = TRI->hasAGPRs(DstRC);
for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
unsigned SrcReg = MI.getOperand(I).getReg();
@@ -296,6 +297,17 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
TmpReg)
.add(MI.getOperand(I));
+ if (IsAGPR) {
+ const TargetRegisterClass *NewSrcRC = TRI->getEquivalentAGPRClass(SrcRC);
+ unsigned TmpAReg = MRI.createVirtualRegister(NewSrcRC);
+ unsigned Opc = NewSrcRC == &AMDGPU::AGPR_32RegClass ?
+ AMDGPU::V_ACCVGPR_WRITE_B32 : AMDGPU::COPY;
+ BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(Opc),
+ TmpAReg)
+ .addReg(TmpReg, RegState::Kill);
+ TmpReg = TmpAReg;
+ }
+
MI.getOperand(I).setReg(TmpReg);
}
@@ -440,18 +452,32 @@ static bool isReachable(const MachineInstr *From,
(const MachineBasicBlock *MBB) { return MBB == MBBFrom; });
}
+// Return the first non-prologue instruction in the block.
+static MachineBasicBlock::iterator
+getFirstNonPrologue(MachineBasicBlock *MBB, const TargetInstrInfo *TII) {
+ MachineBasicBlock::iterator I = MBB->getFirstNonPHI();
+ while (I != MBB->end() && TII->isBasicBlockPrologue(*I))
+ ++I;
+
+ return I;
+}
+
// Hoist and merge identical SGPR initializations into a common predecessor.
// This is intended to combine M0 initializations, but can work with any
// SGPR. A VGPR cannot be processed since we cannot guarantee vector
// executioon.
static bool hoistAndMergeSGPRInits(unsigned Reg,
const MachineRegisterInfo &MRI,
- MachineDominatorTree &MDT) {
+ MachineDominatorTree &MDT,
+ const TargetInstrInfo *TII) {
// List of inits by immediate value.
using InitListMap = std::map<unsigned, std::list<MachineInstr *>>;
InitListMap Inits;
// List of clobbering instructions.
SmallVector<MachineInstr*, 8> Clobbers;
+ // List of instructions marked for deletion.
+ SmallSet<MachineInstr*, 8> MergedInstrs;
+
bool Changed = false;
for (auto &MI : MRI.def_instructions(Reg)) {
@@ -480,8 +506,8 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
MachineInstr *MI2 = *I2;
// Check any possible interference
- auto intereferes = [&](MachineBasicBlock::iterator From,
- MachineBasicBlock::iterator To) -> bool {
+ auto interferes = [&](MachineBasicBlock::iterator From,
+ MachineBasicBlock::iterator To) -> bool {
assert(MDT.dominates(&*To, &*From));
@@ -513,23 +539,23 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
};
if (MDT.dominates(MI1, MI2)) {
- if (!intereferes(MI2, MI1)) {
+ if (!interferes(MI2, MI1)) {
LLVM_DEBUG(dbgs()
<< "Erasing from "
<< printMBBReference(*MI2->getParent()) << " " << *MI2);
- MI2->eraseFromParent();
- Defs.erase(I2++);
+ MergedInstrs.insert(MI2);
Changed = true;
+ ++I2;
continue;
}
} else if (MDT.dominates(MI2, MI1)) {
- if (!intereferes(MI1, MI2)) {
+ if (!interferes(MI1, MI2)) {
LLVM_DEBUG(dbgs()
<< "Erasing from "
<< printMBBReference(*MI1->getParent()) << " " << *MI1);
- MI1->eraseFromParent();
- Defs.erase(I1++);
+ MergedInstrs.insert(MI1);
Changed = true;
+ ++I1;
break;
}
} else {
@@ -540,8 +566,8 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
continue;
}
- MachineBasicBlock::iterator I = MBB->getFirstNonPHI();
- if (!intereferes(MI1, I) && !intereferes(MI2, I)) {
+ MachineBasicBlock::iterator I = getFirstNonPrologue(MBB, TII);
+ if (!interferes(MI1, I) && !interferes(MI2, I)) {
LLVM_DEBUG(dbgs()
<< "Erasing from "
<< printMBBReference(*MI1->getParent()) << " " << *MI1
@@ -549,9 +575,9 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
<< printMBBReference(*MI2->getParent()) << " to "
<< printMBBReference(*I->getParent()) << " " << *MI2);
I->getParent()->splice(I, MI2->getParent(), MI2);
- MI1->eraseFromParent();
- Defs.erase(I1++);
+ MergedInstrs.insert(MI1);
Changed = true;
+ ++I1;
break;
}
}
@@ -561,6 +587,9 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
}
}
+ for (auto MI : MergedInstrs)
+ MI->removeFromParent();
+
if (Changed)
MRI.clearKillFlags(Reg);
@@ -679,11 +708,12 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI);
TII->moveToVALU(MI, MDT);
}
+
break;
}
case AMDGPU::REG_SEQUENCE:
- if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) ||
- !hasVGPROperands(MI, TRI)) {
+ if (TRI->hasVectorRegisters(TII->getOpRegClass(MI, 0)) ||
+ !hasVectorOperands(MI, TRI)) {
foldVGPRCopyIntoRegSequence(MI, TRI, TII, MRI);
continue;
}
@@ -698,7 +728,8 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
Src0RC = MRI.getRegClass(MI.getOperand(1).getReg());
Src1RC = MRI.getRegClass(MI.getOperand(2).getReg());
if (TRI->isSGPRClass(DstRC) &&
- (TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) {
+ (TRI->hasVectorRegisters(Src0RC) ||
+ TRI->hasVectorRegisters(Src1RC))) {
LLVM_DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI);
TII->moveToVALU(MI, MDT);
}
@@ -709,7 +740,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
}
if (MF.getTarget().getOptLevel() > CodeGenOpt::None && EnableM0Merge)
- hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT);
+ hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT, TII);
return true;
}