diff options
Diffstat (limited to 'llvm/lib/CodeGen/MIRCanonicalizerPass.cpp')
| -rw-r--r-- | llvm/lib/CodeGen/MIRCanonicalizerPass.cpp | 485 | 
1 files changed, 485 insertions, 0 deletions
| diff --git a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp new file mode 100644 index 0000000000000..c9bb5461aa3c9 --- /dev/null +++ b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp @@ -0,0 +1,485 @@ +//===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The purpose of this pass is to employ a canonical code transformation so +// that code compiled with slightly different IR passes can be diffed more +// effectively than otherwise. This is done by renaming vregs in a given +// LiveRange in a canonical way. This pass also does a pseudo-scheduling to +// move defs closer to their use inorder to reduce diffs caused by slightly +// different schedules. +// +// Basic Usage: +// +// llc -o - -run-pass mir-canonicalizer example.mir +// +// Reorders instructions canonically. +// Renames virtual register operands canonically. +// Strips certain MIR artifacts (optionally). +// +//===----------------------------------------------------------------------===// + +#include "MIRVRegNamerUtils.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#include <queue> + +using namespace llvm; + +namespace llvm { +extern char &MIRCanonicalizerID; +} // namespace llvm + +#define DEBUG_TYPE "mir-canonicalizer" + +static cl::opt<unsigned> +    CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u), +                               cl::value_desc("N"), +                               cl::desc("Function number to canonicalize.")); + +static cl::opt<unsigned> CanonicalizeBasicBlockNumber( +    "canon-nth-basicblock", cl::Hidden, cl::init(~0u), cl::value_desc("N"), +    cl::desc("BasicBlock number to canonicalize.")); + +namespace { + +class MIRCanonicalizer : public MachineFunctionPass { +public: +  static char ID; +  MIRCanonicalizer() : MachineFunctionPass(ID) {} + +  StringRef getPassName() const override { +    return "Rename register operands in a canonical ordering."; +  } + +  void getAnalysisUsage(AnalysisUsage &AU) const override { +    AU.setPreservesCFG(); +    MachineFunctionPass::getAnalysisUsage(AU); +  } + +  bool runOnMachineFunction(MachineFunction &MF) override; +}; + +} // end anonymous namespace + +char MIRCanonicalizer::ID; + +char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID; + +INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer", +                      "Rename Register Operands Canonically", false, false) + +INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer", +                    "Rename Register Operands Canonically", false, false) + +static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) { +  if (MF.empty()) +    return {}; +  ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin()); +  std::vector<MachineBasicBlock *> RPOList; +  for (auto MBB : RPOT) { +    RPOList.push_back(MBB); +  } + +  return RPOList; +} + +static bool +rescheduleLexographically(std::vector<MachineInstr *> instructions, +                          MachineBasicBlock *MBB, +                          std::function<MachineBasicBlock::iterator()> getPos) { + +  bool Changed = false; +  using StringInstrPair = std::pair<std::string, MachineInstr *>; +  std::vector<StringInstrPair> StringInstrMap; + +  for (auto *II : instructions) { +    std::string S; +    raw_string_ostream OS(S); +    II->print(OS); +    OS.flush(); + +    // Trim the assignment, or start from the begining in the case of a store. +    const size_t i = S.find("="); +    StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II}); +  } + +  llvm::sort(StringInstrMap, +             [](const StringInstrPair &a, const StringInstrPair &b) -> bool { +               return (a.first < b.first); +             }); + +  for (auto &II : StringInstrMap) { + +    LLVM_DEBUG({ +      dbgs() << "Splicing "; +      II.second->dump(); +      dbgs() << " right before: "; +      getPos()->dump(); +    }); + +    Changed = true; +    MBB->splice(getPos(), MBB, II.second); +  } + +  return Changed; +} + +static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, +                                  MachineBasicBlock *MBB) { + +  bool Changed = false; + +  // Calculates the distance of MI from the begining of its parent BB. +  auto getInstrIdx = [](const MachineInstr &MI) { +    unsigned i = 0; +    for (auto &CurMI : *MI.getParent()) { +      if (&CurMI == &MI) +        return i; +      i++; +    } +    return ~0U; +  }; + +  // Pre-Populate vector of instructions to reschedule so that we don't +  // clobber the iterator. +  std::vector<MachineInstr *> Instructions; +  for (auto &MI : *MBB) { +    Instructions.push_back(&MI); +  } + +  std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers; +  std::map<unsigned, MachineInstr *> MultiUserLookup; +  unsigned UseToBringDefCloserToCount = 0; +  std::vector<MachineInstr *> PseudoIdempotentInstructions; +  std::vector<unsigned> PhysRegDefs; +  for (auto *II : Instructions) { +    for (unsigned i = 1; i < II->getNumOperands(); i++) { +      MachineOperand &MO = II->getOperand(i); +      if (!MO.isReg()) +        continue; + +      if (Register::isVirtualRegister(MO.getReg())) +        continue; + +      if (!MO.isDef()) +        continue; + +      PhysRegDefs.push_back(MO.getReg()); +    } +  } + +  for (auto *II : Instructions) { +    if (II->getNumOperands() == 0) +      continue; +    if (II->mayLoadOrStore()) +      continue; + +    MachineOperand &MO = II->getOperand(0); +    if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) +      continue; +    if (!MO.isDef()) +      continue; + +    bool IsPseudoIdempotent = true; +    for (unsigned i = 1; i < II->getNumOperands(); i++) { + +      if (II->getOperand(i).isImm()) { +        continue; +      } + +      if (II->getOperand(i).isReg()) { +        if (!Register::isVirtualRegister(II->getOperand(i).getReg())) +          if (llvm::find(PhysRegDefs, II->getOperand(i).getReg()) == +              PhysRegDefs.end()) { +            continue; +          } +      } + +      IsPseudoIdempotent = false; +      break; +    } + +    if (IsPseudoIdempotent) { +      PseudoIdempotentInstructions.push_back(II); +      continue; +    } + +    LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump();); + +    MachineInstr *Def = II; +    unsigned Distance = ~0U; +    MachineInstr *UseToBringDefCloserTo = nullptr; +    MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo(); +    for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) { +      MachineInstr *UseInst = UO.getParent(); + +      const unsigned DefLoc = getInstrIdx(*Def); +      const unsigned UseLoc = getInstrIdx(*UseInst); +      const unsigned Delta = (UseLoc - DefLoc); + +      if (UseInst->getParent() != Def->getParent()) +        continue; +      if (DefLoc >= UseLoc) +        continue; + +      if (Delta < Distance) { +        Distance = Delta; +        UseToBringDefCloserTo = UseInst; +        MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo; +      } +    } + +    const auto BBE = MBB->instr_end(); +    MachineBasicBlock::iterator DefI = BBE; +    MachineBasicBlock::iterator UseI = BBE; + +    for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) { + +      if (DefI != BBE && UseI != BBE) +        break; + +      if (&*BBI == Def) { +        DefI = BBI; +        continue; +      } + +      if (&*BBI == UseToBringDefCloserTo) { +        UseI = BBI; +        continue; +      } +    } + +    if (DefI == BBE || UseI == BBE) +      continue; + +    LLVM_DEBUG({ +      dbgs() << "Splicing "; +      DefI->dump(); +      dbgs() << " right before: "; +      UseI->dump(); +    }); + +    MultiUsers[UseToBringDefCloserTo].push_back(Def); +    Changed = true; +    MBB->splice(UseI, MBB, DefI); +  } + +  // Sort the defs for users of multiple defs lexographically. +  for (const auto &E : MultiUserLookup) { + +    auto UseI = +        std::find_if(MBB->instr_begin(), MBB->instr_end(), +                     [&](MachineInstr &MI) -> bool { return &MI == E.second; }); + +    if (UseI == MBB->instr_end()) +      continue; + +    LLVM_DEBUG( +        dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";); +    Changed |= rescheduleLexographically( +        MultiUsers[E.second], MBB, +        [&]() -> MachineBasicBlock::iterator { return UseI; }); +  } + +  PseudoIdempotentInstCount = PseudoIdempotentInstructions.size(); +  LLVM_DEBUG( +      dbgs() << "Rescheduling Idempotent Instructions Lexographically.";); +  Changed |= rescheduleLexographically( +      PseudoIdempotentInstructions, MBB, +      [&]() -> MachineBasicBlock::iterator { return MBB->begin(); }); + +  return Changed; +} + +static bool propagateLocalCopies(MachineBasicBlock *MBB) { +  bool Changed = false; +  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + +  std::vector<MachineInstr *> Copies; +  for (MachineInstr &MI : MBB->instrs()) { +    if (MI.isCopy()) +      Copies.push_back(&MI); +  } + +  for (MachineInstr *MI : Copies) { + +    if (!MI->getOperand(0).isReg()) +      continue; +    if (!MI->getOperand(1).isReg()) +      continue; + +    const Register Dst = MI->getOperand(0).getReg(); +    const Register Src = MI->getOperand(1).getReg(); + +    if (!Register::isVirtualRegister(Dst)) +      continue; +    if (!Register::isVirtualRegister(Src)) +      continue; +    // Not folding COPY instructions if regbankselect has not set the RCs. +    // Why are we only considering Register Classes? Because the verifier +    // sometimes gets upset if the register classes don't match even if the +    // types do. A future patch might add COPY folding for matching types in +    // pre-registerbankselect code. +    if (!MRI.getRegClassOrNull(Dst)) +      continue; +    if (MRI.getRegClass(Dst) != MRI.getRegClass(Src)) +      continue; + +    std::vector<MachineOperand *> Uses; +    for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) +      Uses.push_back(&*UI); +    for (auto *MO : Uses) +      MO->setReg(Src); + +    Changed = true; +    MI->eraseFromParent(); +  } + +  return Changed; +} + +static bool doDefKillClear(MachineBasicBlock *MBB) { +  bool Changed = false; + +  for (auto &MI : *MBB) { +    for (auto &MO : MI.operands()) { +      if (!MO.isReg()) +        continue; +      if (!MO.isDef() && MO.isKill()) { +        Changed = true; +        MO.setIsKill(false); +      } + +      if (MO.isDef() && MO.isDead()) { +        Changed = true; +        MO.setIsDead(false); +      } +    } +  } + +  return Changed; +} + +static bool runOnBasicBlock(MachineBasicBlock *MBB, +                            std::vector<StringRef> &bbNames, +                            unsigned &basicBlockNum, NamedVRegCursor &NVC) { + +  if (CanonicalizeBasicBlockNumber != ~0U) { +    if (CanonicalizeBasicBlockNumber != basicBlockNum++) +      return false; +    LLVM_DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB->getName() +                      << "\n";); +  } + +  if (llvm::find(bbNames, MBB->getName()) != bbNames.end()) { +    LLVM_DEBUG({ +      dbgs() << "Found potentially duplicate BasicBlocks: " << MBB->getName() +             << "\n"; +    }); +    return false; +  } + +  LLVM_DEBUG({ +    dbgs() << "\n\n  NEW BASIC BLOCK: " << MBB->getName() << "  \n\n"; +    dbgs() << "\n\n================================================\n\n"; +  }); + +  bool Changed = false; +  MachineFunction &MF = *MBB->getParent(); +  MachineRegisterInfo &MRI = MF.getRegInfo(); + +  bbNames.push_back(MBB->getName()); +  LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";); + +  LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n"; +             MBB->dump();); +  Changed |= propagateLocalCopies(MBB); +  LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB->dump();); + +  LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump();); +  unsigned IdempotentInstCount = 0; +  Changed |= rescheduleCanonically(IdempotentInstCount, MBB); +  LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump();); + +  Changed |= NVC.renameVRegs(MBB); + +  // Here we renumber the def vregs for the idempotent instructions from the top +  // of the MachineBasicBlock so that they are named in the order that we sorted +  // them alphabetically. Eventually we wont need SkipVRegs because we will use +  // named vregs instead. +  if (IdempotentInstCount) +    NVC.skipVRegs(); + +  auto MII = MBB->begin(); +  for (unsigned i = 0; i < IdempotentInstCount && MII != MBB->end(); ++i) { +    MachineInstr &MI = *MII++; +    Changed = true; +    Register vRegToRename = MI.getOperand(0).getReg(); +    auto Rename = NVC.createVirtualRegister(vRegToRename); + +    std::vector<MachineOperand *> RenameMOs; +    for (auto &MO : MRI.reg_operands(vRegToRename)) { +      RenameMOs.push_back(&MO); +    } + +    for (auto *MO : RenameMOs) { +      MO->setReg(Rename); +    } +  } + +  Changed |= doDefKillClear(MBB); + +  LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump(); +             dbgs() << "\n";); +  LLVM_DEBUG( +      dbgs() << "\n\n================================================\n\n"); +  return Changed; +} + +bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) { + +  static unsigned functionNum = 0; +  if (CanonicalizeFunctionNumber != ~0U) { +    if (CanonicalizeFunctionNumber != functionNum++) +      return false; +    LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName() +                      << "\n";); +  } + +  // we need a valid vreg to create a vreg type for skipping all those +  // stray vreg numbers so reach alignment/canonical vreg values. +  std::vector<MachineBasicBlock *> RPOList = GetRPOList(MF); + +  LLVM_DEBUG( +      dbgs() << "\n\n  NEW MACHINE FUNCTION: " << MF.getName() << "  \n\n"; +      dbgs() << "\n\n================================================\n\n"; +      dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n"; +      for (auto MBB +           : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs() +      << "\n\n================================================\n\n";); + +  std::vector<StringRef> BBNames; + +  unsigned BBNum = 0; + +  bool Changed = false; + +  MachineRegisterInfo &MRI = MF.getRegInfo(); +  NamedVRegCursor NVC(MRI); +  for (auto MBB : RPOList) +    Changed |= runOnBasicBlock(MBB, BBNames, BBNum, NVC); + +  return Changed; +} | 
