summaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/SIFixSGPRCopies.cpp')
-rw-r--r--lib/Target/AMDGPU/SIFixSGPRCopies.cpp185
1 files changed, 176 insertions, 9 deletions
diff --git a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index f9d258f44a622..b0f0bf04a8910 100644
--- a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -81,6 +81,11 @@ using namespace llvm;
#define DEBUG_TYPE "si-fix-sgpr-copies"
+static cl::opt<bool> EnableM0Merge(
+ "amdgpu-enable-merge-m0",
+ cl::desc("Merge and hoist M0 initializations"),
+ cl::init(false));
+
namespace {
class SIFixSGPRCopies : public MachineFunctionPass {
@@ -108,7 +113,7 @@ public:
INITIALIZE_PASS_BEGIN(SIFixSGPRCopies, DEBUG_TYPE,
"SI Fix SGPR copies", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_END(SIFixSGPRCopies, DEBUG_TYPE,
"SI Fix SGPR copies", false, false)
@@ -332,27 +337,186 @@ static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy,
return true;
}
-static bool predsHasDivergentTerminator(MachineBasicBlock *MBB,
- const TargetRegisterInfo *TRI) {
- DenseSet<MachineBasicBlock*> Visited;
+template <class UnaryPredicate>
+bool searchPredecessors(const MachineBasicBlock *MBB,
+ const MachineBasicBlock *CutOff,
+ UnaryPredicate Predicate) {
+
+ if (MBB == CutOff)
+ return false;
+
+ DenseSet<const MachineBasicBlock*> Visited;
SmallVector<MachineBasicBlock*, 4> Worklist(MBB->pred_begin(),
MBB->pred_end());
while (!Worklist.empty()) {
- MachineBasicBlock *mbb = Worklist.back();
- Worklist.pop_back();
+ MachineBasicBlock *MBB = Worklist.pop_back_val();
- if (!Visited.insert(mbb).second)
+ if (!Visited.insert(MBB).second)
continue;
- if (hasTerminatorThatModifiesExec(*mbb, *TRI))
+ if (MBB == CutOff)
+ continue;
+ if (Predicate(MBB))
return true;
- Worklist.insert(Worklist.end(), mbb->pred_begin(), mbb->pred_end());
+ Worklist.append(MBB->pred_begin(), MBB->pred_end());
}
return false;
}
+static bool predsHasDivergentTerminator(MachineBasicBlock *MBB,
+ const TargetRegisterInfo *TRI) {
+ return searchPredecessors(MBB, nullptr, [TRI](MachineBasicBlock *MBB) {
+ return hasTerminatorThatModifiesExec(*MBB, *TRI); });
+}
+
+// Checks if there is potential path From instruction To instruction.
+// If CutOff is specified and it sits in between of that path we ignore
+// a higher portion of the path and report it is not reachable.
+static bool isReachable(const MachineInstr *From,
+ const MachineInstr *To,
+ const MachineBasicBlock *CutOff,
+ MachineDominatorTree &MDT) {
+ // If either From block dominates To block or instructions are in the same
+ // block and From is higher.
+ if (MDT.dominates(From, To))
+ return true;
+
+ const MachineBasicBlock *MBBFrom = From->getParent();
+ const MachineBasicBlock *MBBTo = To->getParent();
+ if (MBBFrom == MBBTo)
+ return false;
+
+ // Instructions are in different blocks, do predecessor search.
+ // We should almost never get here since we do not usually produce M0 stores
+ // other than -1.
+ return searchPredecessors(MBBTo, CutOff, [MBBFrom]
+ (const MachineBasicBlock *MBB) { return MBB == MBBFrom; });
+}
+
+// Hoist and merge identical SGPR initializations into a common predecessor.
+// This is intended to combine M0 initializations, but can work with any
+// SGPR. A VGPR cannot be processed since we cannot guarantee vector
+// executioon.
+static bool hoistAndMergeSGPRInits(unsigned Reg,
+ const MachineRegisterInfo &MRI,
+ MachineDominatorTree &MDT) {
+ // List of inits by immediate value.
+ typedef std::map<unsigned, std::list<MachineInstr*>> InitListMap;
+ InitListMap Inits;
+ // List of clobbering instructions.
+ SmallVector<MachineInstr*, 8> Clobbers;
+ bool Changed = false;
+
+ for (auto &MI : MRI.def_instructions(Reg)) {
+ MachineOperand *Imm = nullptr;
+ for (auto &MO: MI.operands()) {
+ if ((MO.isReg() && ((MO.isDef() && MO.getReg() != Reg) || !MO.isDef())) ||
+ (!MO.isImm() && !MO.isReg()) || (MO.isImm() && Imm)) {
+ Imm = nullptr;
+ break;
+ } else if (MO.isImm())
+ Imm = &MO;
+ }
+ if (Imm)
+ Inits[Imm->getImm()].push_front(&MI);
+ else
+ Clobbers.push_back(&MI);
+ }
+
+ for (auto &Init : Inits) {
+ auto &Defs = Init.second;
+
+ for (auto I1 = Defs.begin(), E = Defs.end(); I1 != E; ) {
+ MachineInstr *MI1 = *I1;
+
+ for (auto I2 = std::next(I1); I2 != E; ) {
+ MachineInstr *MI2 = *I2;
+
+ // Check any possible interference
+ auto intereferes = [&](MachineBasicBlock::iterator From,
+ MachineBasicBlock::iterator To) -> bool {
+
+ assert(MDT.dominates(&*To, &*From));
+
+ auto interferes = [&MDT, From, To](MachineInstr* &Clobber) -> bool {
+ const MachineBasicBlock *MBBFrom = From->getParent();
+ const MachineBasicBlock *MBBTo = To->getParent();
+ bool MayClobberFrom = isReachable(Clobber, &*From, MBBTo, MDT);
+ bool MayClobberTo = isReachable(Clobber, &*To, MBBTo, MDT);
+ if (!MayClobberFrom && !MayClobberTo)
+ return false;
+ if ((MayClobberFrom && !MayClobberTo) ||
+ (!MayClobberFrom && MayClobberTo))
+ return true;
+ // Both can clobber, this is not an interference only if both are
+ // dominated by Clobber and belong to the same block or if Clobber
+ // properly dominates To, given that To >> From, so it dominates
+ // both and located in a common dominator.
+ return !((MBBFrom == MBBTo &&
+ MDT.dominates(Clobber, &*From) &&
+ MDT.dominates(Clobber, &*To)) ||
+ MDT.properlyDominates(Clobber->getParent(), MBBTo));
+ };
+
+ return (any_of(Clobbers, interferes)) ||
+ (any_of(Inits, [&](InitListMap::value_type &C) {
+ return C.first != Init.first && any_of(C.second, interferes);
+ }));
+ };
+
+ if (MDT.dominates(MI1, MI2)) {
+ if (!intereferes(MI2, MI1)) {
+ DEBUG(dbgs() << "Erasing from BB#" << MI2->getParent()->getNumber()
+ << " " << *MI2);
+ MI2->eraseFromParent();
+ Defs.erase(I2++);
+ Changed = true;
+ continue;
+ }
+ } else if (MDT.dominates(MI2, MI1)) {
+ if (!intereferes(MI1, MI2)) {
+ DEBUG(dbgs() << "Erasing from BB#" << MI1->getParent()->getNumber()
+ << " " << *MI1);
+ MI1->eraseFromParent();
+ Defs.erase(I1++);
+ Changed = true;
+ break;
+ }
+ } else {
+ auto *MBB = MDT.findNearestCommonDominator(MI1->getParent(),
+ MI2->getParent());
+ if (!MBB) {
+ ++I2;
+ continue;
+ }
+
+ MachineBasicBlock::iterator I = MBB->getFirstNonPHI();
+ if (!intereferes(MI1, I) && !intereferes(MI2, I)) {
+ DEBUG(dbgs() << "Erasing from BB#" << MI1->getParent()->getNumber()
+ << " " << *MI1 << "and moving from BB#"
+ << MI2->getParent()->getNumber() << " to BB#"
+ << I->getParent()->getNumber() << " " << *MI2);
+ I->getParent()->splice(I, MI2->getParent(), MI2);
+ MI1->eraseFromParent();
+ Defs.erase(I1++);
+ Changed = true;
+ break;
+ }
+ }
+ ++I2;
+ }
+ ++I1;
+ }
+ }
+
+ if (Changed)
+ MRI.clearKillFlags(Reg);
+
+ return Changed;
+}
+
bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -485,5 +649,8 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
}
}
+ if (MF.getTarget().getOptLevel() > CodeGenOpt::None && EnableM0Merge)
+ hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT);
+
return true;
}