summaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/CodeGen/MIRCanonicalizerPass.cpp')
-rw-r--r--llvm/lib/CodeGen/MIRCanonicalizerPass.cpp485
1 files changed, 485 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
new file mode 100644
index 000000000000..c9bb5461aa3c
--- /dev/null
+++ b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
@@ -0,0 +1,485 @@
+//===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The purpose of this pass is to employ a canonical code transformation so
+// that code compiled with slightly different IR passes can be diffed more
+// effectively than otherwise. This is done by renaming vregs in a given
+// LiveRange in a canonical way. This pass also does a pseudo-scheduling to
+// move defs closer to their use inorder to reduce diffs caused by slightly
+// different schedules.
+//
+// Basic Usage:
+//
+// llc -o - -run-pass mir-canonicalizer example.mir
+//
+// Reorders instructions canonically.
+// Renames virtual register operands canonically.
+// Strips certain MIR artifacts (optionally).
+//
+//===----------------------------------------------------------------------===//
+
+#include "MIRVRegNamerUtils.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <queue>
+
+using namespace llvm;
+
+namespace llvm {
+extern char &MIRCanonicalizerID;
+} // namespace llvm
+
+#define DEBUG_TYPE "mir-canonicalizer"
+
+static cl::opt<unsigned>
+ CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u),
+ cl::value_desc("N"),
+ cl::desc("Function number to canonicalize."));
+
+static cl::opt<unsigned> CanonicalizeBasicBlockNumber(
+ "canon-nth-basicblock", cl::Hidden, cl::init(~0u), cl::value_desc("N"),
+ cl::desc("BasicBlock number to canonicalize."));
+
+namespace {
+
+class MIRCanonicalizer : public MachineFunctionPass {
+public:
+ static char ID;
+ MIRCanonicalizer() : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override {
+ return "Rename register operands in a canonical ordering.";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+} // end anonymous namespace
+
+char MIRCanonicalizer::ID;
+
+char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID;
+
+INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer",
+ "Rename Register Operands Canonically", false, false)
+
+INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer",
+ "Rename Register Operands Canonically", false, false)
+
+static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) {
+ if (MF.empty())
+ return {};
+ ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
+ std::vector<MachineBasicBlock *> RPOList;
+ for (auto MBB : RPOT) {
+ RPOList.push_back(MBB);
+ }
+
+ return RPOList;
+}
+
+static bool
+rescheduleLexographically(std::vector<MachineInstr *> instructions,
+ MachineBasicBlock *MBB,
+ std::function<MachineBasicBlock::iterator()> getPos) {
+
+ bool Changed = false;
+ using StringInstrPair = std::pair<std::string, MachineInstr *>;
+ std::vector<StringInstrPair> StringInstrMap;
+
+ for (auto *II : instructions) {
+ std::string S;
+ raw_string_ostream OS(S);
+ II->print(OS);
+ OS.flush();
+
+ // Trim the assignment, or start from the begining in the case of a store.
+ const size_t i = S.find("=");
+ StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II});
+ }
+
+ llvm::sort(StringInstrMap,
+ [](const StringInstrPair &a, const StringInstrPair &b) -> bool {
+ return (a.first < b.first);
+ });
+
+ for (auto &II : StringInstrMap) {
+
+ LLVM_DEBUG({
+ dbgs() << "Splicing ";
+ II.second->dump();
+ dbgs() << " right before: ";
+ getPos()->dump();
+ });
+
+ Changed = true;
+ MBB->splice(getPos(), MBB, II.second);
+ }
+
+ return Changed;
+}
+
+static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
+ MachineBasicBlock *MBB) {
+
+ bool Changed = false;
+
+ // Calculates the distance of MI from the begining of its parent BB.
+ auto getInstrIdx = [](const MachineInstr &MI) {
+ unsigned i = 0;
+ for (auto &CurMI : *MI.getParent()) {
+ if (&CurMI == &MI)
+ return i;
+ i++;
+ }
+ return ~0U;
+ };
+
+ // Pre-Populate vector of instructions to reschedule so that we don't
+ // clobber the iterator.
+ std::vector<MachineInstr *> Instructions;
+ for (auto &MI : *MBB) {
+ Instructions.push_back(&MI);
+ }
+
+ std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers;
+ std::map<unsigned, MachineInstr *> MultiUserLookup;
+ unsigned UseToBringDefCloserToCount = 0;
+ std::vector<MachineInstr *> PseudoIdempotentInstructions;
+ std::vector<unsigned> PhysRegDefs;
+ for (auto *II : Instructions) {
+ for (unsigned i = 1; i < II->getNumOperands(); i++) {
+ MachineOperand &MO = II->getOperand(i);
+ if (!MO.isReg())
+ continue;
+
+ if (Register::isVirtualRegister(MO.getReg()))
+ continue;
+
+ if (!MO.isDef())
+ continue;
+
+ PhysRegDefs.push_back(MO.getReg());
+ }
+ }
+
+ for (auto *II : Instructions) {
+ if (II->getNumOperands() == 0)
+ continue;
+ if (II->mayLoadOrStore())
+ continue;
+
+ MachineOperand &MO = II->getOperand(0);
+ if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
+ continue;
+ if (!MO.isDef())
+ continue;
+
+ bool IsPseudoIdempotent = true;
+ for (unsigned i = 1; i < II->getNumOperands(); i++) {
+
+ if (II->getOperand(i).isImm()) {
+ continue;
+ }
+
+ if (II->getOperand(i).isReg()) {
+ if (!Register::isVirtualRegister(II->getOperand(i).getReg()))
+ if (llvm::find(PhysRegDefs, II->getOperand(i).getReg()) ==
+ PhysRegDefs.end()) {
+ continue;
+ }
+ }
+
+ IsPseudoIdempotent = false;
+ break;
+ }
+
+ if (IsPseudoIdempotent) {
+ PseudoIdempotentInstructions.push_back(II);
+ continue;
+ }
+
+ LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump(););
+
+ MachineInstr *Def = II;
+ unsigned Distance = ~0U;
+ MachineInstr *UseToBringDefCloserTo = nullptr;
+ MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo();
+ for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) {
+ MachineInstr *UseInst = UO.getParent();
+
+ const unsigned DefLoc = getInstrIdx(*Def);
+ const unsigned UseLoc = getInstrIdx(*UseInst);
+ const unsigned Delta = (UseLoc - DefLoc);
+
+ if (UseInst->getParent() != Def->getParent())
+ continue;
+ if (DefLoc >= UseLoc)
+ continue;
+
+ if (Delta < Distance) {
+ Distance = Delta;
+ UseToBringDefCloserTo = UseInst;
+ MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo;
+ }
+ }
+
+ const auto BBE = MBB->instr_end();
+ MachineBasicBlock::iterator DefI = BBE;
+ MachineBasicBlock::iterator UseI = BBE;
+
+ for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) {
+
+ if (DefI != BBE && UseI != BBE)
+ break;
+
+ if (&*BBI == Def) {
+ DefI = BBI;
+ continue;
+ }
+
+ if (&*BBI == UseToBringDefCloserTo) {
+ UseI = BBI;
+ continue;
+ }
+ }
+
+ if (DefI == BBE || UseI == BBE)
+ continue;
+
+ LLVM_DEBUG({
+ dbgs() << "Splicing ";
+ DefI->dump();
+ dbgs() << " right before: ";
+ UseI->dump();
+ });
+
+ MultiUsers[UseToBringDefCloserTo].push_back(Def);
+ Changed = true;
+ MBB->splice(UseI, MBB, DefI);
+ }
+
+ // Sort the defs for users of multiple defs lexographically.
+ for (const auto &E : MultiUserLookup) {
+
+ auto UseI =
+ std::find_if(MBB->instr_begin(), MBB->instr_end(),
+ [&](MachineInstr &MI) -> bool { return &MI == E.second; });
+
+ if (UseI == MBB->instr_end())
+ continue;
+
+ LLVM_DEBUG(
+ dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";);
+ Changed |= rescheduleLexographically(
+ MultiUsers[E.second], MBB,
+ [&]() -> MachineBasicBlock::iterator { return UseI; });
+ }
+
+ PseudoIdempotentInstCount = PseudoIdempotentInstructions.size();
+ LLVM_DEBUG(
+ dbgs() << "Rescheduling Idempotent Instructions Lexographically.";);
+ Changed |= rescheduleLexographically(
+ PseudoIdempotentInstructions, MBB,
+ [&]() -> MachineBasicBlock::iterator { return MBB->begin(); });
+
+ return Changed;
+}
+
+static bool propagateLocalCopies(MachineBasicBlock *MBB) {
+ bool Changed = false;
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+ std::vector<MachineInstr *> Copies;
+ for (MachineInstr &MI : MBB->instrs()) {
+ if (MI.isCopy())
+ Copies.push_back(&MI);
+ }
+
+ for (MachineInstr *MI : Copies) {
+
+ if (!MI->getOperand(0).isReg())
+ continue;
+ if (!MI->getOperand(1).isReg())
+ continue;
+
+ const Register Dst = MI->getOperand(0).getReg();
+ const Register Src = MI->getOperand(1).getReg();
+
+ if (!Register::isVirtualRegister(Dst))
+ continue;
+ if (!Register::isVirtualRegister(Src))
+ continue;
+ // Not folding COPY instructions if regbankselect has not set the RCs.
+ // Why are we only considering Register Classes? Because the verifier
+ // sometimes gets upset if the register classes don't match even if the
+ // types do. A future patch might add COPY folding for matching types in
+ // pre-registerbankselect code.
+ if (!MRI.getRegClassOrNull(Dst))
+ continue;
+ if (MRI.getRegClass(Dst) != MRI.getRegClass(Src))
+ continue;
+
+ std::vector<MachineOperand *> Uses;
+ for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI)
+ Uses.push_back(&*UI);
+ for (auto *MO : Uses)
+ MO->setReg(Src);
+
+ Changed = true;
+ MI->eraseFromParent();
+ }
+
+ return Changed;
+}
+
+static bool doDefKillClear(MachineBasicBlock *MBB) {
+ bool Changed = false;
+
+ for (auto &MI : *MBB) {
+ for (auto &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ if (!MO.isDef() && MO.isKill()) {
+ Changed = true;
+ MO.setIsKill(false);
+ }
+
+ if (MO.isDef() && MO.isDead()) {
+ Changed = true;
+ MO.setIsDead(false);
+ }
+ }
+ }
+
+ return Changed;
+}
+
+static bool runOnBasicBlock(MachineBasicBlock *MBB,
+ std::vector<StringRef> &bbNames,
+ unsigned &basicBlockNum, NamedVRegCursor &NVC) {
+
+ if (CanonicalizeBasicBlockNumber != ~0U) {
+ if (CanonicalizeBasicBlockNumber != basicBlockNum++)
+ return false;
+ LLVM_DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB->getName()
+ << "\n";);
+ }
+
+ if (llvm::find(bbNames, MBB->getName()) != bbNames.end()) {
+ LLVM_DEBUG({
+ dbgs() << "Found potentially duplicate BasicBlocks: " << MBB->getName()
+ << "\n";
+ });
+ return false;
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n";
+ dbgs() << "\n\n================================================\n\n";
+ });
+
+ bool Changed = false;
+ MachineFunction &MF = *MBB->getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ bbNames.push_back(MBB->getName());
+ LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";);
+
+ LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n";
+ MBB->dump(););
+ Changed |= propagateLocalCopies(MBB);
+ LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB->dump(););
+
+ LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump(););
+ unsigned IdempotentInstCount = 0;
+ Changed |= rescheduleCanonically(IdempotentInstCount, MBB);
+ LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
+
+ Changed |= NVC.renameVRegs(MBB);
+
+ // Here we renumber the def vregs for the idempotent instructions from the top
+ // of the MachineBasicBlock so that they are named in the order that we sorted
+ // them alphabetically. Eventually we wont need SkipVRegs because we will use
+ // named vregs instead.
+ if (IdempotentInstCount)
+ NVC.skipVRegs();
+
+ auto MII = MBB->begin();
+ for (unsigned i = 0; i < IdempotentInstCount && MII != MBB->end(); ++i) {
+ MachineInstr &MI = *MII++;
+ Changed = true;
+ Register vRegToRename = MI.getOperand(0).getReg();
+ auto Rename = NVC.createVirtualRegister(vRegToRename);
+
+ std::vector<MachineOperand *> RenameMOs;
+ for (auto &MO : MRI.reg_operands(vRegToRename)) {
+ RenameMOs.push_back(&MO);
+ }
+
+ for (auto *MO : RenameMOs) {
+ MO->setReg(Rename);
+ }
+ }
+
+ Changed |= doDefKillClear(MBB);
+
+ LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump();
+ dbgs() << "\n";);
+ LLVM_DEBUG(
+ dbgs() << "\n\n================================================\n\n");
+ return Changed;
+}
+
+bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
+
+ static unsigned functionNum = 0;
+ if (CanonicalizeFunctionNumber != ~0U) {
+ if (CanonicalizeFunctionNumber != functionNum++)
+ return false;
+ LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName()
+ << "\n";);
+ }
+
+ // we need a valid vreg to create a vreg type for skipping all those
+ // stray vreg numbers so reach alignment/canonical vreg values.
+ std::vector<MachineBasicBlock *> RPOList = GetRPOList(MF);
+
+ LLVM_DEBUG(
+ dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF.getName() << " \n\n";
+ dbgs() << "\n\n================================================\n\n";
+ dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n";
+ for (auto MBB
+ : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs()
+ << "\n\n================================================\n\n";);
+
+ std::vector<StringRef> BBNames;
+
+ unsigned BBNum = 0;
+
+ bool Changed = false;
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ NamedVRegCursor NVC(MRI);
+ for (auto MBB : RPOList)
+ Changed |= runOnBasicBlock(MBB, BBNames, BBNum, NVC);
+
+ return Changed;
+}