diff options
Diffstat (limited to 'llvm/lib/CodeGen/ModuloSchedule.cpp')
| -rw-r--r-- | llvm/lib/CodeGen/ModuloSchedule.cpp | 217 | 
1 files changed, 189 insertions, 28 deletions
| diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp index 7ce3c5861801a..163e52d9199d1 100644 --- a/llvm/lib/CodeGen/ModuloSchedule.cpp +++ b/llvm/lib/CodeGen/ModuloSchedule.cpp @@ -13,6 +13,7 @@  #include "llvm/CodeGen/MachineLoopUtils.h"  #include "llvm/CodeGen/MachineRegisterInfo.h"  #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/InitializePasses.h"  #include "llvm/MC/MCContext.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/ErrorHandling.h" @@ -1189,7 +1190,7 @@ void ModuloScheduleExpander::rewriteScheduledInstr(  bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {    if (!Phi.isPHI())      return false; -  unsigned DefCycle = Schedule.getCycle(&Phi); +  int DefCycle = Schedule.getCycle(&Phi);    int DefStage = Schedule.getStage(&Phi);    unsigned InitVal = 0; @@ -1198,7 +1199,7 @@ bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {    MachineInstr *Use = MRI.getVRegDef(LoopVal);    if (!Use || Use->isPHI())      return true; -  unsigned LoopCycle = Schedule.getCycle(Use); +  int LoopCycle = Schedule.getCycle(Use);    int LoopStage = Schedule.getStage(Use);    return (LoopCycle > DefCycle) || (LoopStage <= DefStage);  } @@ -1214,7 +1215,7 @@ namespace {  // Remove any dead phis in MBB. Dead phis either have only one block as input  // (in which case they are the identity) or have no uses.  void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI, -                       LiveIntervals *LIS) { +                       LiveIntervals *LIS, bool KeepSingleSrcPhi = false) {    bool Changed = true;    while (Changed) {      Changed = false; @@ -1226,7 +1227,7 @@ void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,            LIS->RemoveMachineInstrFromMaps(MI);          MI.eraseFromParent();          Changed = true; -      } else if (MI.getNumExplicitOperands() == 3) { +      } else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) {          MRI.constrainRegClass(MI.getOperand(1).getReg(),                                MRI.getRegClass(MI.getOperand(0).getReg()));          MRI.replaceRegWith(MI.getOperand(0).getReg(), @@ -1582,6 +1583,133 @@ PeelingModuloScheduleExpander::peelKernel(LoopPeelDirection LPD) {    return NewBB;  } +void PeelingModuloScheduleExpander::filterInstructions(MachineBasicBlock *MB, +                                                       int MinStage) { +  for (auto I = MB->getFirstInstrTerminator()->getReverseIterator(); +       I != std::next(MB->getFirstNonPHI()->getReverseIterator());) { +    MachineInstr *MI = &*I++; +    int Stage = getStage(MI); +    if (Stage == -1 || Stage >= MinStage) +      continue; + +    for (MachineOperand &DefMO : MI->defs()) { +      SmallVector<std::pair<MachineInstr *, Register>, 4> Subs; +      for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) { +        // Only PHIs can use values from this block by construction. +        // Match with the equivalent PHI in B. +        assert(UseMI.isPHI()); +        Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(), +                                               MI->getParent()); +        Subs.emplace_back(&UseMI, Reg); +      } +      for (auto &Sub : Subs) +        Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0, +                                      *MRI.getTargetRegisterInfo()); +    } +    if (LIS) +      LIS->RemoveMachineInstrFromMaps(*MI); +    MI->eraseFromParent(); +  } +} + +void PeelingModuloScheduleExpander::moveStageBetweenBlocks( +    MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage) { +  auto InsertPt = DestBB->getFirstNonPHI(); +  DenseMap<Register, Register> Remaps; +  for (auto I = SourceBB->getFirstNonPHI(); I != SourceBB->end();) { +    MachineInstr *MI = &*I++; +    if (MI->isPHI()) { +      // This is an illegal PHI. If we move any instructions using an illegal +      // PHI, we need to create a legal Phi +      Register PhiR = MI->getOperand(0).getReg(); +      auto RC = MRI.getRegClass(PhiR); +      Register NR = MRI.createVirtualRegister(RC); +      MachineInstr *NI = BuildMI(*DestBB, DestBB->getFirstNonPHI(), DebugLoc(), +                                 TII->get(TargetOpcode::PHI), NR) +                             .addReg(PhiR) +                             .addMBB(SourceBB); +      BlockMIs[{DestBB, CanonicalMIs[MI]}] = NI; +      CanonicalMIs[NI] = CanonicalMIs[MI]; +      Remaps[PhiR] = NR; +      continue; +    } +    if (getStage(MI) != Stage) +      continue; +    MI->removeFromParent(); +    DestBB->insert(InsertPt, MI); +    auto *KernelMI = CanonicalMIs[MI]; +    BlockMIs[{DestBB, KernelMI}] = MI; +    BlockMIs.erase({SourceBB, KernelMI}); +  } +  SmallVector<MachineInstr *, 4> PhiToDelete; +  for (MachineInstr &MI : DestBB->phis()) { +    assert(MI.getNumOperands() == 3); +    MachineInstr *Def = MRI.getVRegDef(MI.getOperand(1).getReg()); +    // If the instruction referenced by the phi is moved inside the block +    // we don't need the phi anymore. +    if (getStage(Def) == Stage) { +      Register PhiReg = MI.getOperand(0).getReg(); +      MRI.replaceRegWith(MI.getOperand(0).getReg(), +                         Def->getOperand(0).getReg()); +      MI.getOperand(0).setReg(PhiReg); +      PhiToDelete.push_back(&MI); +    } +  } +  for (auto *P : PhiToDelete) +    P->eraseFromParent(); +  InsertPt = DestBB->getFirstNonPHI(); +  // Helper to clone Phi instructions into the destination block. We clone Phi +  // greedily to avoid combinatorial explosion of Phi instructions. +  auto clonePhi = [&](MachineInstr *Phi) { +    MachineInstr *NewMI = MF.CloneMachineInstr(Phi); +    DestBB->insert(InsertPt, NewMI); +    Register OrigR = Phi->getOperand(0).getReg(); +    Register R = MRI.createVirtualRegister(MRI.getRegClass(OrigR)); +    NewMI->getOperand(0).setReg(R); +    NewMI->getOperand(1).setReg(OrigR); +    NewMI->getOperand(2).setMBB(*DestBB->pred_begin()); +    Remaps[OrigR] = R; +    CanonicalMIs[NewMI] = CanonicalMIs[Phi]; +    BlockMIs[{DestBB, CanonicalMIs[Phi]}] = NewMI; +    PhiNodeLoopIteration[NewMI] = PhiNodeLoopIteration[Phi]; +    return R; +  }; +  for (auto I = DestBB->getFirstNonPHI(); I != DestBB->end(); ++I) { +    for (MachineOperand &MO : I->uses()) { +      if (!MO.isReg()) +        continue; +      if (Remaps.count(MO.getReg())) +        MO.setReg(Remaps[MO.getReg()]); +      else { +        // If we are using a phi from the source block we need to add a new phi +        // pointing to the old one. +        MachineInstr *Use = MRI.getUniqueVRegDef(MO.getReg()); +        if (Use && Use->isPHI() && Use->getParent() == SourceBB) { +          Register R = clonePhi(Use); +          MO.setReg(R); +        } +      } +    } +  } +} + +Register +PeelingModuloScheduleExpander::getPhiCanonicalReg(MachineInstr *CanonicalPhi, +                                                  MachineInstr *Phi) { +  unsigned distance = PhiNodeLoopIteration[Phi]; +  MachineInstr *CanonicalUse = CanonicalPhi; +  for (unsigned I = 0; I < distance; ++I) { +    assert(CanonicalUse->isPHI()); +    assert(CanonicalUse->getNumOperands() == 5); +    unsigned LoopRegIdx = 3, InitRegIdx = 1; +    if (CanonicalUse->getOperand(2).getMBB() == CanonicalUse->getParent()) +      std::swap(LoopRegIdx, InitRegIdx); +    CanonicalUse = +        MRI.getVRegDef(CanonicalUse->getOperand(LoopRegIdx).getReg()); +  } +  return CanonicalUse->getOperand(0).getReg(); +} +  void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {    BitVector LS(Schedule.getNumStages(), true);    BitVector AS(Schedule.getNumStages(), true); @@ -1604,26 +1732,45 @@ void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {    // property that any value deffed in BB but used outside of BB is used by a    // PHI in the exiting block.    MachineBasicBlock *ExitingBB = CreateLCSSAExitingBlock(); - +  EliminateDeadPhis(ExitingBB, MRI, LIS, /*KeepSingleSrcPhi=*/true);    // Push out the epilogs, again in reverse order.    // We can't assume anything about the minumum loop trip count at this point, -  // so emit a fairly complex epilog: -  //  K[0, 1, 2]     // Kernel runs stages 0, 1, 2 -  //  E0[2] <- P1    // Epilog runs stage 2 only, so the state after is [0]. -  //  E1[1, 2] <- P0 // Epilog 1 moves the last item from stage 0 to stage 2. -  // -  // This creates a single-successor single-predecessor sequence of blocks for -  // each epilog, which are kept this way for simplicity at this stage and -  // cleaned up by the optimizer later. +  // so emit a fairly complex epilog. + +  // We first peel number of stages minus one epilogue. Then we remove dead +  // stages and reorder instructions based on their stage. If we have 3 stages +  // we generate first: +  // E0[3, 2, 1] +  // E1[3', 2'] +  // E2[3''] +  // And then we move instructions based on their stages to have: +  // E0[3] +  // E1[2, 3'] +  // E2[1, 2', 3''] +  // The transformation is legal because we only move instructions past +  // instructions of a previous loop iteration.    for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) { -    Epilogs.push_back(nullptr); -    for (int J = Schedule.getNumStages() - 1; J >= I; --J) { -      LS.reset(); -      LS[J] = 1; -      Epilogs.back() = peelKernel(LPD_Back); -      LiveStages[Epilogs.back()] = LS; -      AvailableStages[Epilogs.back()] = AS; +    Epilogs.push_back(peelKernel(LPD_Back)); +    MachineBasicBlock *B = Epilogs.back(); +    filterInstructions(B, Schedule.getNumStages() - I); +    // Keep track at which iteration each phi belongs to. We need it to know +    // what version of the variable to use during prologue/epilogue stitching. +    EliminateDeadPhis(B, MRI, LIS, /*KeepSingleSrcPhi=*/true); +    for (auto Phi = B->begin(), IE = B->getFirstNonPHI(); Phi != IE; ++Phi) +      PhiNodeLoopIteration[&*Phi] = Schedule.getNumStages() - I; +  } +  for (size_t I = 0; I < Epilogs.size(); I++) { +    LS.reset(); +    for (size_t J = I; J < Epilogs.size(); J++) { +      int Iteration = J; +      unsigned Stage = Schedule.getNumStages() - 1 + I - J; +      // Move stage one block at a time so that Phi nodes are updated correctly. +      for (size_t K = Iteration; K > I; K--) +        moveStageBetweenBlocks(Epilogs[K - 1], Epilogs[K], Stage); +      LS[Stage] = 1;      } +    LiveStages[Epilogs[I]] = LS; +    AvailableStages[Epilogs[I]] = AS;    }    // Now we've defined all the prolog and epilog blocks as a fallthrough @@ -1638,8 +1785,16 @@ void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {      for (MachineInstr &MI : (*EI)->phis()) {        Register Reg = MI.getOperand(1).getReg();        MachineInstr *Use = MRI.getUniqueVRegDef(Reg); -      if (Use && Use->getParent() == Pred) +      if (Use && Use->getParent() == Pred) { +        MachineInstr *CanonicalUse = CanonicalMIs[Use]; +        if (CanonicalUse->isPHI()) { +          // If the use comes from a phi we need to skip as many phi as the +          // distance between the epilogue and the kernel. Trace through the phi +          // chain to find the right value. +          Reg = getPhiCanonicalReg(CanonicalUse, Use); +        }          Reg = getEquivalentRegisterIn(Reg, *PI); +      }        MI.addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/false));        MI.addOperand(MachineOperand::CreateMBB(*PI));      } @@ -1659,6 +1814,13 @@ void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {        rewriteUsesOf(MI);      }    } +  for (auto *MI : IllegalPhisToDelete) { +    if (LIS) +      LIS->RemoveMachineInstrFromMaps(*MI); +    MI->eraseFromParent(); +  } +  IllegalPhisToDelete.clear(); +    // Now all remapping has been done, we're free to optimize the generated code.    for (MachineBasicBlock *B : reverse(Blocks))      EliminateDeadPhis(B, MRI, LIS); @@ -1727,9 +1889,10 @@ void PeelingModuloScheduleExpander::rewriteUsesOf(MachineInstr *MI) {        R = MI->getOperand(1).getReg();      MRI.setRegClass(R, MRI.getRegClass(PhiR));      MRI.replaceRegWith(PhiR, R); -    if (LIS) -      LIS->RemoveMachineInstrFromMaps(*MI); -    MI->eraseFromParent(); +    // Postpone deleting the Phi as it may be referenced by BlockMIs and used +    // later to figure out how to remap registers. +    MI->getOperand(0).setReg(PhiR); +    IllegalPhisToDelete.push_back(MI);      return;    } @@ -1759,10 +1922,6 @@ void PeelingModuloScheduleExpander::rewriteUsesOf(MachineInstr *MI) {  }  void PeelingModuloScheduleExpander::fixupBranches() { -  std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> Info = -      TII->analyzeLoopForPipelining(BB); -  assert(Info); -    // Work outwards from the kernel.    bool KernelDisposed = false;    int TC = Schedule.getNumStages() - 1; @@ -1818,6 +1977,8 @@ void PeelingModuloScheduleExpander::expand() {    BB = Schedule.getLoop()->getTopBlock();    Preheader = Schedule.getLoop()->getLoopPreheader();    LLVM_DEBUG(Schedule.dump()); +  Info = TII->analyzeLoopForPipelining(BB); +  assert(Info);    rewriteKernel();    peelPrologAndEpilogs(); | 
