diff options
Diffstat (limited to 'llvm/lib/CodeGen/MIRCanonicalizerPass.cpp')
-rw-r--r-- | llvm/lib/CodeGen/MIRCanonicalizerPass.cpp | 485 |
1 files changed, 485 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp new file mode 100644 index 000000000000..c9bb5461aa3c --- /dev/null +++ b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp @@ -0,0 +1,485 @@ +//===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The purpose of this pass is to employ a canonical code transformation so +// that code compiled with slightly different IR passes can be diffed more +// effectively than otherwise. This is done by renaming vregs in a given +// LiveRange in a canonical way. This pass also does a pseudo-scheduling to +// move defs closer to their use inorder to reduce diffs caused by slightly +// different schedules. +// +// Basic Usage: +// +// llc -o - -run-pass mir-canonicalizer example.mir +// +// Reorders instructions canonically. +// Renames virtual register operands canonically. +// Strips certain MIR artifacts (optionally). +// +//===----------------------------------------------------------------------===// + +#include "MIRVRegNamerUtils.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#include <queue> + +using namespace llvm; + +namespace llvm { +extern char &MIRCanonicalizerID; +} // namespace llvm + +#define DEBUG_TYPE "mir-canonicalizer" + +static cl::opt<unsigned> + CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u), + cl::value_desc("N"), + cl::desc("Function number to canonicalize.")); + +static cl::opt<unsigned> CanonicalizeBasicBlockNumber( + "canon-nth-basicblock", cl::Hidden, cl::init(~0u), cl::value_desc("N"), + cl::desc("BasicBlock number to canonicalize.")); + +namespace { + +class MIRCanonicalizer : public MachineFunctionPass { +public: + static char ID; + MIRCanonicalizer() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { + return "Rename register operands in a canonical ordering."; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; + +} // end anonymous namespace + +char MIRCanonicalizer::ID; + +char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID; + +INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer", + "Rename Register Operands Canonically", false, false) + +INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer", + "Rename Register Operands Canonically", false, false) + +static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) { + if (MF.empty()) + return {}; + ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin()); + std::vector<MachineBasicBlock *> RPOList; + for (auto MBB : RPOT) { + RPOList.push_back(MBB); + } + + return RPOList; +} + +static bool +rescheduleLexographically(std::vector<MachineInstr *> instructions, + MachineBasicBlock *MBB, + std::function<MachineBasicBlock::iterator()> getPos) { + + bool Changed = false; + using StringInstrPair = std::pair<std::string, MachineInstr *>; + std::vector<StringInstrPair> StringInstrMap; + + for (auto *II : instructions) { + std::string S; + raw_string_ostream OS(S); + II->print(OS); + OS.flush(); + + // Trim the assignment, or start from the begining in the case of a store. + const size_t i = S.find("="); + StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II}); + } + + llvm::sort(StringInstrMap, + [](const StringInstrPair &a, const StringInstrPair &b) -> bool { + return (a.first < b.first); + }); + + for (auto &II : StringInstrMap) { + + LLVM_DEBUG({ + dbgs() << "Splicing "; + II.second->dump(); + dbgs() << " right before: "; + getPos()->dump(); + }); + + Changed = true; + MBB->splice(getPos(), MBB, II.second); + } + + return Changed; +} + +static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, + MachineBasicBlock *MBB) { + + bool Changed = false; + + // Calculates the distance of MI from the begining of its parent BB. + auto getInstrIdx = [](const MachineInstr &MI) { + unsigned i = 0; + for (auto &CurMI : *MI.getParent()) { + if (&CurMI == &MI) + return i; + i++; + } + return ~0U; + }; + + // Pre-Populate vector of instructions to reschedule so that we don't + // clobber the iterator. + std::vector<MachineInstr *> Instructions; + for (auto &MI : *MBB) { + Instructions.push_back(&MI); + } + + std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers; + std::map<unsigned, MachineInstr *> MultiUserLookup; + unsigned UseToBringDefCloserToCount = 0; + std::vector<MachineInstr *> PseudoIdempotentInstructions; + std::vector<unsigned> PhysRegDefs; + for (auto *II : Instructions) { + for (unsigned i = 1; i < II->getNumOperands(); i++) { + MachineOperand &MO = II->getOperand(i); + if (!MO.isReg()) + continue; + + if (Register::isVirtualRegister(MO.getReg())) + continue; + + if (!MO.isDef()) + continue; + + PhysRegDefs.push_back(MO.getReg()); + } + } + + for (auto *II : Instructions) { + if (II->getNumOperands() == 0) + continue; + if (II->mayLoadOrStore()) + continue; + + MachineOperand &MO = II->getOperand(0); + if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) + continue; + if (!MO.isDef()) + continue; + + bool IsPseudoIdempotent = true; + for (unsigned i = 1; i < II->getNumOperands(); i++) { + + if (II->getOperand(i).isImm()) { + continue; + } + + if (II->getOperand(i).isReg()) { + if (!Register::isVirtualRegister(II->getOperand(i).getReg())) + if (llvm::find(PhysRegDefs, II->getOperand(i).getReg()) == + PhysRegDefs.end()) { + continue; + } + } + + IsPseudoIdempotent = false; + break; + } + + if (IsPseudoIdempotent) { + PseudoIdempotentInstructions.push_back(II); + continue; + } + + LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump();); + + MachineInstr *Def = II; + unsigned Distance = ~0U; + MachineInstr *UseToBringDefCloserTo = nullptr; + MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo(); + for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) { + MachineInstr *UseInst = UO.getParent(); + + const unsigned DefLoc = getInstrIdx(*Def); + const unsigned UseLoc = getInstrIdx(*UseInst); + const unsigned Delta = (UseLoc - DefLoc); + + if (UseInst->getParent() != Def->getParent()) + continue; + if (DefLoc >= UseLoc) + continue; + + if (Delta < Distance) { + Distance = Delta; + UseToBringDefCloserTo = UseInst; + MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo; + } + } + + const auto BBE = MBB->instr_end(); + MachineBasicBlock::iterator DefI = BBE; + MachineBasicBlock::iterator UseI = BBE; + + for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) { + + if (DefI != BBE && UseI != BBE) + break; + + if (&*BBI == Def) { + DefI = BBI; + continue; + } + + if (&*BBI == UseToBringDefCloserTo) { + UseI = BBI; + continue; + } + } + + if (DefI == BBE || UseI == BBE) + continue; + + LLVM_DEBUG({ + dbgs() << "Splicing "; + DefI->dump(); + dbgs() << " right before: "; + UseI->dump(); + }); + + MultiUsers[UseToBringDefCloserTo].push_back(Def); + Changed = true; + MBB->splice(UseI, MBB, DefI); + } + + // Sort the defs for users of multiple defs lexographically. + for (const auto &E : MultiUserLookup) { + + auto UseI = + std::find_if(MBB->instr_begin(), MBB->instr_end(), + [&](MachineInstr &MI) -> bool { return &MI == E.second; }); + + if (UseI == MBB->instr_end()) + continue; + + LLVM_DEBUG( + dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";); + Changed |= rescheduleLexographically( + MultiUsers[E.second], MBB, + [&]() -> MachineBasicBlock::iterator { return UseI; }); + } + + PseudoIdempotentInstCount = PseudoIdempotentInstructions.size(); + LLVM_DEBUG( + dbgs() << "Rescheduling Idempotent Instructions Lexographically.";); + Changed |= rescheduleLexographically( + PseudoIdempotentInstructions, MBB, + [&]() -> MachineBasicBlock::iterator { return MBB->begin(); }); + + return Changed; +} + +static bool propagateLocalCopies(MachineBasicBlock *MBB) { + bool Changed = false; + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + + std::vector<MachineInstr *> Copies; + for (MachineInstr &MI : MBB->instrs()) { + if (MI.isCopy()) + Copies.push_back(&MI); + } + + for (MachineInstr *MI : Copies) { + + if (!MI->getOperand(0).isReg()) + continue; + if (!MI->getOperand(1).isReg()) + continue; + + const Register Dst = MI->getOperand(0).getReg(); + const Register Src = MI->getOperand(1).getReg(); + + if (!Register::isVirtualRegister(Dst)) + continue; + if (!Register::isVirtualRegister(Src)) + continue; + // Not folding COPY instructions if regbankselect has not set the RCs. + // Why are we only considering Register Classes? Because the verifier + // sometimes gets upset if the register classes don't match even if the + // types do. A future patch might add COPY folding for matching types in + // pre-registerbankselect code. + if (!MRI.getRegClassOrNull(Dst)) + continue; + if (MRI.getRegClass(Dst) != MRI.getRegClass(Src)) + continue; + + std::vector<MachineOperand *> Uses; + for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) + Uses.push_back(&*UI); + for (auto *MO : Uses) + MO->setReg(Src); + + Changed = true; + MI->eraseFromParent(); + } + + return Changed; +} + +static bool doDefKillClear(MachineBasicBlock *MBB) { + bool Changed = false; + + for (auto &MI : *MBB) { + for (auto &MO : MI.operands()) { + if (!MO.isReg()) + continue; + if (!MO.isDef() && MO.isKill()) { + Changed = true; + MO.setIsKill(false); + } + + if (MO.isDef() && MO.isDead()) { + Changed = true; + MO.setIsDead(false); + } + } + } + + return Changed; +} + +static bool runOnBasicBlock(MachineBasicBlock *MBB, + std::vector<StringRef> &bbNames, + unsigned &basicBlockNum, NamedVRegCursor &NVC) { + + if (CanonicalizeBasicBlockNumber != ~0U) { + if (CanonicalizeBasicBlockNumber != basicBlockNum++) + return false; + LLVM_DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB->getName() + << "\n";); + } + + if (llvm::find(bbNames, MBB->getName()) != bbNames.end()) { + LLVM_DEBUG({ + dbgs() << "Found potentially duplicate BasicBlocks: " << MBB->getName() + << "\n"; + }); + return false; + } + + LLVM_DEBUG({ + dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n"; + dbgs() << "\n\n================================================\n\n"; + }); + + bool Changed = false; + MachineFunction &MF = *MBB->getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + bbNames.push_back(MBB->getName()); + LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";); + + LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n"; + MBB->dump();); + Changed |= propagateLocalCopies(MBB); + LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB->dump();); + + LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump();); + unsigned IdempotentInstCount = 0; + Changed |= rescheduleCanonically(IdempotentInstCount, MBB); + LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump();); + + Changed |= NVC.renameVRegs(MBB); + + // Here we renumber the def vregs for the idempotent instructions from the top + // of the MachineBasicBlock so that they are named in the order that we sorted + // them alphabetically. Eventually we wont need SkipVRegs because we will use + // named vregs instead. + if (IdempotentInstCount) + NVC.skipVRegs(); + + auto MII = MBB->begin(); + for (unsigned i = 0; i < IdempotentInstCount && MII != MBB->end(); ++i) { + MachineInstr &MI = *MII++; + Changed = true; + Register vRegToRename = MI.getOperand(0).getReg(); + auto Rename = NVC.createVirtualRegister(vRegToRename); + + std::vector<MachineOperand *> RenameMOs; + for (auto &MO : MRI.reg_operands(vRegToRename)) { + RenameMOs.push_back(&MO); + } + + for (auto *MO : RenameMOs) { + MO->setReg(Rename); + } + } + + Changed |= doDefKillClear(MBB); + + LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump(); + dbgs() << "\n";); + LLVM_DEBUG( + dbgs() << "\n\n================================================\n\n"); + return Changed; +} + +bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) { + + static unsigned functionNum = 0; + if (CanonicalizeFunctionNumber != ~0U) { + if (CanonicalizeFunctionNumber != functionNum++) + return false; + LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName() + << "\n";); + } + + // we need a valid vreg to create a vreg type for skipping all those + // stray vreg numbers so reach alignment/canonical vreg values. + std::vector<MachineBasicBlock *> RPOList = GetRPOList(MF); + + LLVM_DEBUG( + dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF.getName() << " \n\n"; + dbgs() << "\n\n================================================\n\n"; + dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n"; + for (auto MBB + : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs() + << "\n\n================================================\n\n";); + + std::vector<StringRef> BBNames; + + unsigned BBNum = 0; + + bool Changed = false; + + MachineRegisterInfo &MRI = MF.getRegInfo(); + NamedVRegCursor NVC(MRI); + for (auto MBB : RPOList) + Changed |= runOnBasicBlock(MBB, BBNames, BBNum, NVC); + + return Changed; +} |