summaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-12-18 20:10:56 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-12-18 20:10:56 +0000
commit044eb2f6afba375a914ac9d8024f8f5142bb912e (patch)
tree1475247dc9f9fe5be155ebd4c9069c75aadf8c20 /lib/Target/AMDGPU/SIFixSGPRCopies.cpp
parenteb70dddbd77e120e5d490bd8fbe7ff3f8fa81c6b (diff)
Notes
Diffstat (limited to 'lib/Target/AMDGPU/SIFixSGPRCopies.cpp')
-rw-r--r--lib/Target/AMDGPU/SIFixSGPRCopies.cpp192
1 files changed, 131 insertions, 61 deletions
diff --git a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 0a795c99f94e5..8b155c2d27800 100644
--- a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -1,4 +1,4 @@
-//===-- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies --------===//
+//===- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies ---------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,46 +14,46 @@
/// Register Class <vsrc> is the union of <vgpr> and <sgpr>
///
/// BB0:
-/// %vreg0 <sgpr> = SCALAR_INST
-/// %vreg1 <vsrc> = COPY %vreg0 <sgpr>
+/// %0 <sgpr> = SCALAR_INST
+/// %1 <vsrc> = COPY %0 <sgpr>
/// ...
/// BRANCH %cond BB1, BB2
/// BB1:
-/// %vreg2 <vgpr> = VECTOR_INST
-/// %vreg3 <vsrc> = COPY %vreg2 <vgpr>
+/// %2 <vgpr> = VECTOR_INST
+/// %3 <vsrc> = COPY %2 <vgpr>
/// BB2:
-/// %vreg4 <vsrc> = PHI %vreg1 <vsrc>, <BB#0>, %vreg3 <vrsc>, <BB#1>
-/// %vreg5 <vgpr> = VECTOR_INST %vreg4 <vsrc>
+/// %4 <vsrc> = PHI %1 <vsrc>, <%bb.0>, %3 <vrsc>, <%bb.1>
+/// %5 <vgpr> = VECTOR_INST %4 <vsrc>
///
///
/// The coalescer will begin at BB0 and eliminate its copy, then the resulting
/// code will look like this:
///
/// BB0:
-/// %vreg0 <sgpr> = SCALAR_INST
+/// %0 <sgpr> = SCALAR_INST
/// ...
/// BRANCH %cond BB1, BB2
/// BB1:
-/// %vreg2 <vgpr> = VECTOR_INST
-/// %vreg3 <vsrc> = COPY %vreg2 <vgpr>
+/// %2 <vgpr> = VECTOR_INST
+/// %3 <vsrc> = COPY %2 <vgpr>
/// BB2:
-/// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <vsrc>, <BB#1>
-/// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr>
+/// %4 <sgpr> = PHI %0 <sgpr>, <%bb.0>, %3 <vsrc>, <%bb.1>
+/// %5 <vgpr> = VECTOR_INST %4 <sgpr>
///
/// Now that the result of the PHI instruction is an SGPR, the register
-/// allocator is now forced to constrain the register class of %vreg3 to
+/// allocator is now forced to constrain the register class of %3 to
/// <sgpr> so we end up with final code like this:
///
/// BB0:
-/// %vreg0 <sgpr> = SCALAR_INST
+/// %0 <sgpr> = SCALAR_INST
/// ...
/// BRANCH %cond BB1, BB2
/// BB1:
-/// %vreg2 <vgpr> = VECTOR_INST
-/// %vreg3 <sgpr> = COPY %vreg2 <vgpr>
+/// %2 <vgpr> = VECTOR_INST
+/// %3 <sgpr> = COPY %2 <vgpr>
/// BB2:
-/// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <sgpr>, <BB#1>
-/// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr>
+/// %4 <sgpr> = PHI %0 <sgpr>, <%bb.0>, %3 <sgpr>, <%bb.1>
+/// %5 <vgpr> = VECTOR_INST %4 <sgpr>
///
/// Now this code contains an illegal copy from a VGPR to an SGPR.
///
@@ -68,14 +68,34 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
+#include "SIRegisterInfo.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <list>
+#include <map>
+#include <tuple>
+#include <utility>
using namespace llvm;
@@ -89,13 +109,17 @@ static cl::opt<bool> EnableM0Merge(
namespace {
class SIFixSGPRCopies : public MachineFunctionPass {
-
MachineDominatorTree *MDT;
-
+ MachinePostDominatorTree *MPDT;
+ DenseMap<MachineBasicBlock *, SetVector<MachineBasicBlock*>> PDF;
+ void computePDF(MachineFunction * MF);
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ void printPDF();
+#endif
public:
static char ID;
- SIFixSGPRCopies() : MachineFunctionPass(ID) { }
+ SIFixSGPRCopies() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -104,12 +128,14 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addPreserved<MachinePostDominatorTree>();
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
-} // End anonymous namespace
+} // end anonymous namespace
INITIALIZE_PASS_BEGIN(SIFixSGPRCopies, DEBUG_TYPE,
"SI Fix SGPR copies", false, false)
@@ -117,7 +143,6 @@ INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_END(SIFixSGPRCopies, DEBUG_TYPE,
"SI Fix SGPR copies", false, false)
-
char SIFixSGPRCopies::ID = 0;
char &llvm::SIFixSGPRCopiesID = SIFixSGPRCopies::ID;
@@ -287,7 +312,6 @@ static bool phiHasVGPROperands(const MachineInstr &PHI,
const MachineRegisterInfo &MRI,
const SIRegisterInfo *TRI,
const SIInstrInfo *TII) {
-
for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
unsigned Reg = PHI.getOperand(i).getReg();
if (TRI->hasVGPRs(MRI.getRegClass(Reg)))
@@ -295,10 +319,10 @@ static bool phiHasVGPROperands(const MachineInstr &PHI,
}
return false;
}
+
static bool phiHasBreakDef(const MachineInstr &PHI,
const MachineRegisterInfo &MRI,
SmallSet<unsigned, 8> &Visited) {
-
for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
unsigned Reg = PHI.getOperand(i).getReg();
if (Visited.count(Reg))
@@ -337,6 +361,8 @@ static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy,
const SIInstrInfo *TII,
unsigned &SMovOp,
int64_t &Imm) {
+ if (Copy->getOpcode() != AMDGPU::COPY)
+ return false;
if (!MoveImm->isMoveImmediate())
return false;
@@ -368,13 +394,12 @@ template <class UnaryPredicate>
bool searchPredecessors(const MachineBasicBlock *MBB,
const MachineBasicBlock *CutOff,
UnaryPredicate Predicate) {
-
if (MBB == CutOff)
return false;
- DenseSet<const MachineBasicBlock*> Visited;
- SmallVector<MachineBasicBlock*, 4> Worklist(MBB->pred_begin(),
- MBB->pred_end());
+ DenseSet<const MachineBasicBlock *> Visited;
+ SmallVector<MachineBasicBlock *, 4> Worklist(MBB->pred_begin(),
+ MBB->pred_end());
while (!Worklist.empty()) {
MachineBasicBlock *MBB = Worklist.pop_back_val();
@@ -392,12 +417,6 @@ bool searchPredecessors(const MachineBasicBlock *MBB,
return false;
}
-static bool predsHasDivergentTerminator(MachineBasicBlock *MBB,
- const TargetRegisterInfo *TRI) {
- return searchPredecessors(MBB, nullptr, [TRI](MachineBasicBlock *MBB) {
- return hasTerminatorThatModifiesExec(*MBB, *TRI); });
-}
-
// Checks if there is potential path From instruction To instruction.
// If CutOff is specified and it sits in between of that path we ignore
// a higher portion of the path and report it is not reachable.
@@ -430,7 +449,7 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
const MachineRegisterInfo &MRI,
MachineDominatorTree &MDT) {
// List of inits by immediate value.
- typedef std::map<unsigned, std::list<MachineInstr*>> InitListMap;
+ using InitListMap = std::map<unsigned, std::list<MachineInstr *>>;
InitListMap Inits;
// List of clobbering instructions.
SmallVector<MachineInstr*, 8> Clobbers;
@@ -487,16 +506,18 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
MDT.properlyDominates(Clobber->getParent(), MBBTo));
};
- return (any_of(Clobbers, interferes)) ||
- (any_of(Inits, [&](InitListMap::value_type &C) {
- return C.first != Init.first && any_of(C.second, interferes);
+ return (llvm::any_of(Clobbers, interferes)) ||
+ (llvm::any_of(Inits, [&](InitListMap::value_type &C) {
+ return C.first != Init.first &&
+ llvm::any_of(C.second, interferes);
}));
};
if (MDT.dominates(MI1, MI2)) {
if (!intereferes(MI2, MI1)) {
- DEBUG(dbgs() << "Erasing from BB#" << MI2->getParent()->getNumber()
- << " " << *MI2);
+ DEBUG(dbgs() << "Erasing from "
+ << printMBBReference(*MI2->getParent()) << " "
+ << *MI2);
MI2->eraseFromParent();
Defs.erase(I2++);
Changed = true;
@@ -504,8 +525,9 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
}
} else if (MDT.dominates(MI2, MI1)) {
if (!intereferes(MI1, MI2)) {
- DEBUG(dbgs() << "Erasing from BB#" << MI1->getParent()->getNumber()
- << " " << *MI1);
+ DEBUG(dbgs() << "Erasing from "
+ << printMBBReference(*MI1->getParent()) << " "
+ << *MI1);
MI1->eraseFromParent();
Defs.erase(I1++);
Changed = true;
@@ -521,10 +543,11 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
MachineBasicBlock::iterator I = MBB->getFirstNonPHI();
if (!intereferes(MI1, I) && !intereferes(MI2, I)) {
- DEBUG(dbgs() << "Erasing from BB#" << MI1->getParent()->getNumber()
- << " " << *MI1 << "and moving from BB#"
- << MI2->getParent()->getNumber() << " to BB#"
- << I->getParent()->getNumber() << " " << *MI2);
+ DEBUG(dbgs() << "Erasing from "
+ << printMBBReference(*MI1->getParent()) << " " << *MI1
+ << "and moving from "
+ << printMBBReference(*MI2->getParent()) << " to "
+ << printMBBReference(*I->getParent()) << " " << *MI2);
I->getParent()->splice(I, MI2->getParent(), MI2);
MI1->eraseFromParent();
Defs.erase(I1++);
@@ -544,18 +567,52 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
return Changed;
}
+void SIFixSGPRCopies::computePDF(MachineFunction *MF) {
+ MachineFunction::iterator B = MF->begin();
+ MachineFunction::iterator E = MF->end();
+ for (; B != E; ++B) {
+ if (B->succ_size() > 1) {
+ for (auto S : B->successors()) {
+ MachineDomTreeNode *runner = MPDT->getNode(&*S);
+ MachineDomTreeNode *sentinel = MPDT->getNode(&*B)->getIDom();
+ while (runner && runner != sentinel) {
+ PDF[runner->getBlock()].insert(&*B);
+ runner = runner->getIDom();
+ }
+ }
+ }
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void SIFixSGPRCopies::printPDF() {
+ dbgs() << "\n######## PostDominanceFrontiers set #########\n";
+ for (auto &I : PDF) {
+ dbgs() << "PDF[ " << I.first->getNumber() << "] : ";
+ for (auto &J : I.second) {
+ dbgs() << J->getNumber() << ' ';
+ }
+ dbgs() << '\n';
+ }
+ dbgs() << "\n##############################################\n";
+}
+#endif
+
bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
MachineRegisterInfo &MRI = MF.getRegInfo();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
const SIInstrInfo *TII = ST.getInstrInfo();
MDT = &getAnalysis<MachineDominatorTree>();
+ MPDT = &getAnalysis<MachinePostDominatorTree>();
+ PDF.clear();
+ computePDF(&MF);
+ DEBUG(printPDF());
SmallVector<MachineInstr *, 16> Worklist;
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI) {
-
MachineBasicBlock &MBB = *BI;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
@@ -564,7 +621,9 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
switch (MI.getOpcode()) {
default:
continue;
- case AMDGPU::COPY: {
+ case AMDGPU::COPY:
+ case AMDGPU::WQM:
+ case AMDGPU::WWM: {
// If the destination register is a physical register there isn't really
// much we can do to fix this.
if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()))
@@ -602,14 +661,27 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
break;
- // We don't need to fix the PHI if the common dominator of the
- // two incoming blocks terminates with a uniform branch.
- if (MI.getNumExplicitOperands() == 5) {
- MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB();
- MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB();
-
- if (!predsHasDivergentTerminator(MBB0, TRI) &&
- !predsHasDivergentTerminator(MBB1, TRI)) {
+ // We don't need to fix the PHI if all the source blocks
+ // have no divergent control dependecies
+ bool HasVGPROperand = phiHasVGPROperands(MI, MRI, TRI, TII);
+ if (!HasVGPROperand) {
+ bool Uniform = true;
+ MachineBasicBlock * Join = MI.getParent();
+ for (auto &O : MI.explicit_operands()) {
+ if (O.isMBB()) {
+ MachineBasicBlock * Source = O.getMBB();
+ SetVector<MachineBasicBlock*> &SourcePDF = PDF[Source];
+ SetVector<MachineBasicBlock*> &JoinPDF = PDF[Join];
+ SetVector<MachineBasicBlock*> CDList;
+ for (auto &I : SourcePDF) {
+ if (!JoinPDF.count(I) || /* back edge */MDT->dominates(Join, I)) {
+ if (hasTerminatorThatModifiesExec(*I, *TRI))
+ Uniform = false;
+ }
+ }
+ }
+ }
+ if (Uniform) {
DEBUG(dbgs() << "Not fixing PHI for uniform branch: " << MI << '\n');
break;
}
@@ -649,14 +721,13 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
// is no chance for values to be over-written.
SmallSet<unsigned, 8> Visited;
- if (phiHasVGPROperands(MI, MRI, TRI, TII) ||
- !phiHasBreakDef(MI, MRI, Visited)) {
+ if (HasVGPROperand || !phiHasBreakDef(MI, MRI, Visited)) {
DEBUG(dbgs() << "Fixing PHI: " << MI);
TII->moveToVALU(MI);
}
break;
}
- case AMDGPU::REG_SEQUENCE: {
+ case AMDGPU::REG_SEQUENCE:
if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) ||
!hasVGPROperands(MI, TRI)) {
foldVGPRCopyIntoRegSequence(MI, TRI, TII, MRI);
@@ -667,7 +738,6 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
TII->moveToVALU(MI);
break;
- }
case AMDGPU::INSERT_SUBREG: {
const TargetRegisterClass *DstRC, *Src0RC, *Src1RC;
DstRC = MRI.getRegClass(MI.getOperand(0).getReg());