diff options
Diffstat (limited to 'lib/CodeGen/MIRCanonicalizerPass.cpp')
| -rw-r--r-- | lib/CodeGen/MIRCanonicalizerPass.cpp | 625 | 
1 files changed, 625 insertions, 0 deletions
diff --git a/lib/CodeGen/MIRCanonicalizerPass.cpp b/lib/CodeGen/MIRCanonicalizerPass.cpp new file mode 100644 index 0000000000000..4b676a60a8cd1 --- /dev/null +++ b/lib/CodeGen/MIRCanonicalizerPass.cpp @@ -0,0 +1,625 @@ +//===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The purpose of this pass is to employ a canonical code transformation so +// that code compiled with slightly different IR passes can be diffed more +// effectively than otherwise. This is done by renaming vregs in a given +// LiveRange in a canonical way. This pass also does a pseudo-scheduling to +// move defs closer to their use inorder to reduce diffs caused by slightly +// different schedules. +// +// Basic Usage: +// +// llc -o - -run-pass mir-canonicalizer example.mir +// +// Reorders instructions canonically. +// Renames virtual register operands canonically. +// Strips certain MIR artifacts (optionally). +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/raw_ostream.h" + +#include <queue> + +using namespace llvm; + +namespace llvm { +extern char &MIRCanonicalizerID; +} // namespace llvm + +#define DEBUG_TYPE "mir-canonicalizer" + +static cl::opt<unsigned> +CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u), +                           cl::value_desc("N"), +                           cl::desc("Function number to canonicalize.")); + +static cl::opt<unsigned> +CanonicalizeBasicBlockNumber("canon-nth-basicblock", cl::Hidden, cl::init(~0u), +                             cl::value_desc("N"), +                             cl::desc("BasicBlock number to canonicalize.")); + +namespace { + +class MIRCanonicalizer : public MachineFunctionPass { +public: +  static char ID; +  MIRCanonicalizer() : MachineFunctionPass(ID) {} + +  StringRef getPassName() const override { +    return "Rename register operands in a canonical ordering."; +  } + +  void getAnalysisUsage(AnalysisUsage &AU) const override { +    AU.setPreservesCFG(); +    MachineFunctionPass::getAnalysisUsage(AU); +  } + +  bool runOnMachineFunction(MachineFunction &MF) override; +}; + +} // end anonymous namespace + +enum VRType { RSE_Reg = 0, RSE_FrameIndex, RSE_NewCandidate }; +class TypedVReg { +  VRType type; +  unsigned reg; + +public: +  TypedVReg(unsigned reg) : type(RSE_Reg), reg(reg) {} +  TypedVReg(VRType type) : type(type), reg(~0U) { +    assert(type != RSE_Reg && "Expected a non-register type."); +  } + +  bool isReg()        const { return type == RSE_Reg;          } +  bool isFrameIndex() const { return type == RSE_FrameIndex;   } +  bool isCandidate()  const { return type == RSE_NewCandidate; } + +  VRType getType() const { return type; } +  unsigned getReg() const { +    assert(this->isReg() && "Expected a virtual or physical register."); +    return reg; +  } +}; + +char MIRCanonicalizer::ID; + +char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID; + +INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer", +                      "Rename Register Operands Canonically", false, false) + +INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer", +                    "Rename Register Operands Canonically", false, false) + +static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) { +  ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin()); +  std::vector<MachineBasicBlock *> RPOList; +  for (auto MBB : RPOT) { +    RPOList.push_back(MBB); +  } + +  return RPOList; +} + +// Set a dummy vreg. We use this vregs register class to generate throw-away +// vregs that are used to skip vreg numbers so that vreg numbers line up. +static unsigned GetDummyVReg(const MachineFunction &MF) { +  for (auto &MBB : MF) { +    for (auto &MI : MBB) { +      for (auto &MO : MI.operands()) { +        if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) +          continue; +        return MO.getReg(); +      } +    } +  } + +  return ~0U; +} + +static bool rescheduleCanonically(MachineBasicBlock *MBB) { + +  bool Changed = false; + +  // Calculates the distance of MI from the begining of its parent BB. +  auto getInstrIdx = [](const MachineInstr &MI) { +    unsigned i = 0; +    for (auto &CurMI : *MI.getParent()) { +      if (&CurMI == &MI) +        return i; +      i++; +    } +    return ~0U; +  }; + +  // Pre-Populate vector of instructions to reschedule so that we don't +  // clobber the iterator. +  std::vector<MachineInstr *> Instructions; +  for (auto &MI : *MBB) { +    Instructions.push_back(&MI); +  } + +  for (auto *II : Instructions) { +    if (II->getNumOperands() == 0) +      continue; + +    MachineOperand &MO = II->getOperand(0); +    if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) +      continue; + +    DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump();); + +    MachineInstr *Def = II; +    unsigned Distance = ~0U; +    MachineInstr *UseToBringDefCloserTo = nullptr; +    MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo(); +    for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) { +      MachineInstr *UseInst = UO.getParent(); + +      const unsigned DefLoc = getInstrIdx(*Def); +      const unsigned UseLoc = getInstrIdx(*UseInst); +      const unsigned Delta = (UseLoc - DefLoc); + +      if (UseInst->getParent() != Def->getParent()) +        continue; +      if (DefLoc >= UseLoc) +        continue; + +      if (Delta < Distance) { +        Distance = Delta; +        UseToBringDefCloserTo = UseInst; +      } +    } + +    const auto BBE = MBB->instr_end(); +    MachineBasicBlock::iterator DefI = BBE; +    MachineBasicBlock::iterator UseI = BBE; + +    for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) { + +      if (DefI != BBE && UseI != BBE) +        break; + +      if ((&*BBI != Def) && (&*BBI != UseToBringDefCloserTo)) +        continue; + +      if (&*BBI == Def) { +        DefI = BBI; +        continue; +      } + +      if (&*BBI == UseToBringDefCloserTo) { +        UseI = BBI; +        continue; +      } +    } + +    if (DefI == BBE || UseI == BBE) +      continue; + +    DEBUG({ +      dbgs() << "Splicing "; +      DefI->dump(); +      dbgs() << " right before: "; +      UseI->dump(); +    }); + +    Changed = true; +    MBB->splice(UseI, MBB, DefI); +  } + +  return Changed; +} + +/// Here we find our candidates. What makes an interesting candidate? +/// An candidate for a canonicalization tree root is normally any kind of +/// instruction that causes side effects such as a store to memory or a copy to +/// a physical register or a return instruction. We use these as an expression +/// tree root that we walk inorder to build a canonical walk which should result +/// in canoncal vreg renaming. +static std::vector<MachineInstr *> populateCandidates(MachineBasicBlock *MBB) { +  std::vector<MachineInstr *> Candidates; +  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + +  for (auto II = MBB->begin(), IE = MBB->end(); II != IE; ++II) { +    MachineInstr *MI = &*II; + +    bool DoesMISideEffect = false; + +    if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg()) { +      const unsigned Dst = MI->getOperand(0).getReg(); +      DoesMISideEffect |= !TargetRegisterInfo::isVirtualRegister(Dst); + +      for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) { +        if (DoesMISideEffect) break; +        DoesMISideEffect |= (UI->getParent()->getParent() != MI->getParent()); +      } +    } + +    if (!MI->mayStore() && !MI->isBranch() && !DoesMISideEffect) +      continue; + +    DEBUG(dbgs() << "Found Candidate:  "; MI->dump();); +    Candidates.push_back(MI); +  } + +  return Candidates; +} + +static void doCandidateWalk(std::vector<TypedVReg> &VRegs, +                            std::queue<TypedVReg> &RegQueue, +                            std::vector<MachineInstr *> &VisitedMIs, +                            const MachineBasicBlock *MBB) { + +  const MachineFunction &MF = *MBB->getParent(); +  const MachineRegisterInfo &MRI = MF.getRegInfo(); + +  while (!RegQueue.empty()) { + +    auto TReg = RegQueue.front(); +    RegQueue.pop(); + +    if (TReg.isFrameIndex()) { +      DEBUG(dbgs() << "Popping frame index.\n";); +      VRegs.push_back(TypedVReg(RSE_FrameIndex)); +      continue; +    } + +    assert(TReg.isReg() && "Expected vreg or physreg."); +    unsigned Reg = TReg.getReg(); + +    if (TargetRegisterInfo::isVirtualRegister(Reg)) { +      DEBUG({ +        dbgs() << "Popping vreg "; +        MRI.def_begin(Reg)->dump(); +        dbgs() << "\n"; +      }); + +      if (!llvm::any_of(VRegs, [&](const TypedVReg &TR) { +            return TR.isReg() && TR.getReg() == Reg; +          })) { +        VRegs.push_back(TypedVReg(Reg)); +      } +    } else { +      DEBUG(dbgs() << "Popping physreg.\n";); +      VRegs.push_back(TypedVReg(Reg)); +      continue; +    } + +    for (auto RI = MRI.def_begin(Reg), RE = MRI.def_end(); RI != RE; ++RI) { +      MachineInstr *Def = RI->getParent(); + +      if (Def->getParent() != MBB) +        continue; + +      if (llvm::any_of(VisitedMIs, +                       [&](const MachineInstr *VMI) { return Def == VMI; })) { +        break; +      } + +      DEBUG({ +        dbgs() << "\n========================\n"; +        dbgs() << "Visited MI: "; +        Def->dump(); +        dbgs() << "BB Name: " << Def->getParent()->getName() << "\n"; +        dbgs() << "\n========================\n"; +      }); +      VisitedMIs.push_back(Def); +      for (unsigned I = 1, E = Def->getNumOperands(); I != E; ++I) { + +        MachineOperand &MO = Def->getOperand(I); +        if (MO.isFI()) { +          DEBUG(dbgs() << "Pushing frame index.\n";); +          RegQueue.push(TypedVReg(RSE_FrameIndex)); +        } + +        if (!MO.isReg()) +          continue; +        RegQueue.push(TypedVReg(MO.getReg())); +      } +    } +  } +} + +// TODO: Work to remove this in the future. One day when we have named vregs +// we should be able to form the canonical name based on some characteristic +// we see in that point of the expression tree (like if we were to name based +// on some sort of value numbering scheme). +static void SkipVRegs(unsigned &VRegGapIndex, MachineRegisterInfo &MRI, +                      const TargetRegisterClass *RC) { +  const unsigned VR_GAP = (++VRegGapIndex * 1000); + +  DEBUG({ +    dbgs() << "Adjusting per-BB VR_GAP for BB" << VRegGapIndex << " to " +           << VR_GAP << "\n"; +  }); + +  unsigned I = MRI.createVirtualRegister(RC); +  const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP; +  while (I != E) { +    I = MRI.createVirtualRegister(RC); +  } +} + +static std::map<unsigned, unsigned> +GetVRegRenameMap(const std::vector<TypedVReg> &VRegs, +                 const std::vector<unsigned> &renamedInOtherBB, +                 MachineRegisterInfo &MRI, +                 const TargetRegisterClass *RC) { +  std::map<unsigned, unsigned> VRegRenameMap; +  unsigned LastRenameReg = MRI.createVirtualRegister(RC); +  bool FirstCandidate = true; + +  for (auto &vreg : VRegs) { +    if (vreg.isFrameIndex()) { +      // We skip one vreg for any frame index because there is a good chance +      // (especially when comparing SelectionDAG to GlobalISel generated MIR) +      // that in the other file we are just getting an incoming vreg that comes +      // from a copy from a frame index. So it's safe to skip by one. +      LastRenameReg = MRI.createVirtualRegister(RC); +      DEBUG(dbgs() << "Skipping rename for FI " << LastRenameReg << "\n";); +      continue; +    } else if (vreg.isCandidate()) { + +      // After the first candidate, for every subsequent candidate, we skip mod +      // 10 registers so that the candidates are more likely to start at the +      // same vreg number making it more likely that the canonical walk from the +      // candidate insruction. We don't need to skip from the first candidate of +      // the BasicBlock because we already skip ahead several vregs for each BB. +      while (LastRenameReg % 10) { +        if (!FirstCandidate) break; +        LastRenameReg = MRI.createVirtualRegister(RC); + +        DEBUG({ +          dbgs() << "Skipping rename for new candidate " << LastRenameReg +                 << "\n"; +        }); +      } +      FirstCandidate = false; +      continue; +    } else if (!TargetRegisterInfo::isVirtualRegister(vreg.getReg())) { +      LastRenameReg = MRI.createVirtualRegister(RC); +      DEBUG({ +        dbgs() << "Skipping rename for Phys Reg " << LastRenameReg << "\n"; +      }); +      continue; +    } + +    auto Reg = vreg.getReg(); +    if (llvm::find(renamedInOtherBB, Reg) != renamedInOtherBB.end()) { +      DEBUG(dbgs() << "Vreg " << Reg << " already renamed in other BB.\n";); +      continue; +    } + +    auto Rename = MRI.createVirtualRegister(MRI.getRegClass(Reg)); +    LastRenameReg = Rename; + +    if (VRegRenameMap.find(Reg) == VRegRenameMap.end()) { +      DEBUG(dbgs() << "Mapping vreg ";); +      if (MRI.reg_begin(Reg) != MRI.reg_end()) { +        DEBUG(auto foo = &*MRI.reg_begin(Reg); foo->dump();); +      } else { +        DEBUG(dbgs() << Reg;); +      } +      DEBUG(dbgs() << " to ";); +      if (MRI.reg_begin(Rename) != MRI.reg_end()) { +        DEBUG(auto foo = &*MRI.reg_begin(Rename); foo->dump();); +      } else { +        DEBUG(dbgs() << Rename;); +      } +      DEBUG(dbgs() << "\n";); + +      VRegRenameMap.insert(std::pair<unsigned, unsigned>(Reg, Rename)); +    } +  } + +  return VRegRenameMap; +} + +static bool doVRegRenaming(std::vector<unsigned> &RenamedInOtherBB, +                           const std::map<unsigned, unsigned> &VRegRenameMap, +                           MachineRegisterInfo &MRI) { +  bool Changed = false; +  for (auto I = VRegRenameMap.begin(), E = VRegRenameMap.end(); I != E; ++I) { + +    auto VReg = I->first; +    auto Rename = I->second; + +    RenamedInOtherBB.push_back(Rename); + +    std::vector<MachineOperand *> RenameMOs; +    for (auto &MO : MRI.reg_operands(VReg)) { +      RenameMOs.push_back(&MO); +    } + +    for (auto *MO : RenameMOs) { +      Changed = true; +      MO->setReg(Rename); + +      if (!MO->isDef()) +        MO->setIsKill(false); +    } +  } + +  return Changed; +} + +static bool doDefKillClear(MachineBasicBlock *MBB) { +  bool Changed = false; + +  for (auto &MI : *MBB) { +    for (auto &MO : MI.operands()) { +      if (!MO.isReg()) +        continue; +      if (!MO.isDef() && MO.isKill()) { +        Changed = true; +        MO.setIsKill(false); +      } + +      if (MO.isDef() && MO.isDead()) { +        Changed = true; +        MO.setIsDead(false); +      } +    } +  } + +  return Changed; +} + +static bool runOnBasicBlock(MachineBasicBlock *MBB, +                            std::vector<StringRef> &bbNames, +                            std::vector<unsigned> &renamedInOtherBB, +                            unsigned &basicBlockNum, unsigned &VRegGapIndex) { + +  if (CanonicalizeBasicBlockNumber != ~0U) { +    if (CanonicalizeBasicBlockNumber != basicBlockNum++) +      return false; +    DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB->getName() << "\n";); +  } + +  if (llvm::find(bbNames, MBB->getName()) != bbNames.end()) { +    DEBUG({ +      dbgs() << "Found potentially duplicate BasicBlocks: " << MBB->getName() +             << "\n"; +    }); +    return false; +  } + +  DEBUG({ +    dbgs() << "\n\n  NEW BASIC BLOCK: " << MBB->getName() << "  \n\n"; +    dbgs() << "\n\n================================================\n\n"; +  }); + +  bool Changed = false; +  MachineFunction &MF = *MBB->getParent(); +  MachineRegisterInfo &MRI = MF.getRegInfo(); + +  const unsigned DummyVReg = GetDummyVReg(MF); +  const TargetRegisterClass *DummyRC = +    (DummyVReg == ~0U) ? nullptr : MRI.getRegClass(DummyVReg); +  if (!DummyRC) return false; + +  bbNames.push_back(MBB->getName()); +  DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";); + +  DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump();); +  Changed |= rescheduleCanonically(MBB); +  DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump();); + +  std::vector<MachineInstr *> Candidates = populateCandidates(MBB); +  std::vector<MachineInstr *> VisitedMIs; +  std::copy(Candidates.begin(), Candidates.end(), +            std::back_inserter(VisitedMIs)); + +  std::vector<TypedVReg> VRegs; +  for (auto candidate : Candidates) { +    VRegs.push_back(TypedVReg(RSE_NewCandidate)); + +    std::queue<TypedVReg> RegQueue; + +    // Here we walk the vreg operands of a non-root node along our walk. +    // The root nodes are the original candidates (stores normally). +    // These are normally not the root nodes (except for the case of copies to +    // physical registers). +    for (unsigned i = 1; i < candidate->getNumOperands(); i++) { +      if (candidate->mayStore() || candidate->isBranch()) +        break; + +      MachineOperand &MO = candidate->getOperand(i); +      if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) +        continue; + +      DEBUG(dbgs() << "Enqueue register"; MO.dump(); dbgs() << "\n";); +      RegQueue.push(TypedVReg(MO.getReg())); +    } + +    // Here we walk the root candidates. We start from the 0th operand because +    // the root is normally a store to a vreg. +    for (unsigned i = 0; i < candidate->getNumOperands(); i++) { + +      if (!candidate->mayStore() && !candidate->isBranch()) +        break; + +      MachineOperand &MO = candidate->getOperand(i); + +      // TODO: Do we want to only add vregs here? +      if (!MO.isReg() && !MO.isFI()) +        continue; + +      DEBUG(dbgs() << "Enqueue Reg/FI"; MO.dump(); dbgs() << "\n";); + +      RegQueue.push(MO.isReg() ? TypedVReg(MO.getReg()) : +                                  TypedVReg(RSE_FrameIndex)); +    } + +    doCandidateWalk(VRegs, RegQueue, VisitedMIs, MBB); +  } + +  // If we have populated no vregs to rename then bail. +  // The rest of this function does the vreg remaping. +  if (VRegs.size() == 0) +    return Changed; + +  // Skip some vregs, so we can recon where we'll land next. +  SkipVRegs(VRegGapIndex, MRI, DummyRC); + +  auto VRegRenameMap = GetVRegRenameMap(VRegs, renamedInOtherBB, MRI, DummyRC); +  Changed |= doVRegRenaming(renamedInOtherBB, VRegRenameMap, MRI); +  Changed |= doDefKillClear(MBB); + +  DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump(); dbgs() << "\n";); +  DEBUG(dbgs() << "\n\n================================================\n\n"); +  return Changed; +} + +bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) { + +  static unsigned functionNum = 0; +  if (CanonicalizeFunctionNumber != ~0U) { +    if (CanonicalizeFunctionNumber != functionNum++) +      return false; +    DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName() << "\n";); +  } + +  // we need a valid vreg to create a vreg type for skipping all those +  // stray vreg numbers so reach alignment/canonical vreg values. +  std::vector<MachineBasicBlock*> RPOList = GetRPOList(MF); + +  DEBUG( +    dbgs() << "\n\n  NEW MACHINE FUNCTION: " << MF.getName() << "  \n\n"; +    dbgs() << "\n\n================================================\n\n"; +    dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n"; +    for (auto MBB : RPOList) { +      dbgs() << MBB->getName() << "\n"; +    } +    dbgs() << "\n\n================================================\n\n"; +  ); + +  std::vector<StringRef> BBNames; +  std::vector<unsigned> RenamedInOtherBB; + +  unsigned GapIdx = 0; +  unsigned BBNum = 0; + +  bool Changed = false; + +  for (auto MBB : RPOList) +    Changed |= runOnBasicBlock(MBB, BBNames, RenamedInOtherBB, BBNum, GapIdx); + +  return Changed; +} +  | 
