diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2013-06-10 20:36:52 +0000 | 
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2013-06-10 20:36:52 +0000 | 
| commit | 59d6cff90eecf31cb3dd860c4e786674cfdd42eb (patch) | |
| tree | 909310b2e05119d1d6efda049977042abbb58bb1 /lib/CodeGen/MachineScheduler.cpp | |
| parent | 4a16efa3e43e35f0cc9efe3a67f620f0017c3d36 (diff) | |
Notes
Diffstat (limited to 'lib/CodeGen/MachineScheduler.cpp')
| -rw-r--r-- | lib/CodeGen/MachineScheduler.cpp | 322 | 
1 files changed, 303 insertions, 19 deletions
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 5bd2349b50f6..fff6b2b4c062 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -51,7 +51,11 @@ static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,  static bool ViewMISchedDAGs = false;  #endif // NDEBUG -// Experimental heuristics +// FIXME: remove this flag after initial testing. It should always be a good +// thing. +static cl::opt<bool> EnableCopyConstrain("misched-vcopy", cl::Hidden, +    cl::desc("Constrain vreg copies."), cl::init(true)); +  static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,    cl::desc("Enable load clustering."), cl::init(true)); @@ -323,6 +327,10 @@ ScheduleDAGMI::~ScheduleDAGMI() {    delete SchedImpl;  } +bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) { +  return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU); +} +  bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {    if (SuccSU != &ExitSU) {      // Do not use WillCreateCycle, it assumes SD scheduling. @@ -404,6 +412,8 @@ void ScheduleDAGMI::releasePredecessors(SUnit *SU) {    }  } +/// This is normally called from the main scheduler loop but may also be invoked +/// by the scheduling strategy to perform additional code motion.  void ScheduleDAGMI::moveInstruction(MachineInstr *MI,                                      MachineBasicBlock::iterator InsertPos) {    // Advance RegionBegin if the first instruction moves down. @@ -505,6 +515,14 @@ updateScheduledPressure(const std::vector<unsigned> &NewMaxPressure) {      if ((int)NewMaxPressure[ID] > MaxUnits)        MaxUnits = NewMaxPressure[ID];    } +  DEBUG( +    for (unsigned i = 0, e = NewMaxPressure.size(); i < e; ++i) { +      unsigned Limit = TRI->getRegPressureSetLimit(i); +      if (NewMaxPressure[i] > Limit ) { +        dbgs() << "  " << TRI->getRegPressureSetName(i) << ": " +               << NewMaxPressure[i] << " > " << Limit << "\n"; +      } +    });  }  /// schedule - Called back from MachineScheduler::runOnMachineFunction @@ -905,6 +923,184 @@ void MacroFusion::apply(ScheduleDAGMI *DAG) {  }  //===----------------------------------------------------------------------===// +// CopyConstrain - DAG post-processing to encourage copy elimination. +//===----------------------------------------------------------------------===// + +namespace { +/// \brief Post-process the DAG to create weak edges from all uses of a copy to +/// the one use that defines the copy's source vreg, most likely an induction +/// variable increment. +class CopyConstrain : public ScheduleDAGMutation { +  // Transient state. +  SlotIndex RegionBeginIdx; +  // RegionEndIdx is the slot index of the last non-debug instruction in the +  // scheduling region. So we may have RegionBeginIdx == RegionEndIdx. +  SlotIndex RegionEndIdx; +public: +  CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {} + +  virtual void apply(ScheduleDAGMI *DAG); + +protected: +  void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG); +}; +} // anonymous + +/// constrainLocalCopy handles two possibilities: +/// 1) Local src: +/// I0:     = dst +/// I1: src = ... +/// I2:     = dst +/// I3: dst = src (copy) +/// (create pred->succ edges I0->I1, I2->I1) +/// +/// 2) Local copy: +/// I0: dst = src (copy) +/// I1:     = dst +/// I2: src = ... +/// I3:     = dst +/// (create pred->succ edges I1->I2, I3->I2) +/// +/// Although the MachineScheduler is currently constrained to single blocks, +/// this algorithm should handle extended blocks. An EBB is a set of +/// contiguously numbered blocks such that the previous block in the EBB is +/// always the single predecessor. +void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG) { +  LiveIntervals *LIS = DAG->getLIS(); +  MachineInstr *Copy = CopySU->getInstr(); + +  // Check for pure vreg copies. +  unsigned SrcReg = Copy->getOperand(1).getReg(); +  if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) +    return; + +  unsigned DstReg = Copy->getOperand(0).getReg(); +  if (!TargetRegisterInfo::isVirtualRegister(DstReg)) +    return; + +  // Check if either the dest or source is local. If it's live across a back +  // edge, it's not local. Note that if both vregs are live across the back +  // edge, we cannot successfully contrain the copy without cyclic scheduling. +  unsigned LocalReg = DstReg; +  unsigned GlobalReg = SrcReg; +  LiveInterval *LocalLI = &LIS->getInterval(LocalReg); +  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) { +    LocalReg = SrcReg; +    GlobalReg = DstReg; +    LocalLI = &LIS->getInterval(LocalReg); +    if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) +      return; +  } +  LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg); + +  // Find the global segment after the start of the local LI. +  LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex()); +  // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a +  // local live range. We could create edges from other global uses to the local +  // start, but the coalescer should have already eliminated these cases, so +  // don't bother dealing with it. +  if (GlobalSegment == GlobalLI->end()) +    return; + +  // If GlobalSegment is killed at the LocalLI->start, the call to find() +  // returned the next global segment. But if GlobalSegment overlaps with +  // LocalLI->start, then advance to the next segement. If a hole in GlobalLI +  // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole. +  if (GlobalSegment->contains(LocalLI->beginIndex())) +    ++GlobalSegment; + +  if (GlobalSegment == GlobalLI->end()) +    return; + +  // Check if GlobalLI contains a hole in the vicinity of LocalLI. +  if (GlobalSegment != GlobalLI->begin()) { +    // Two address defs have no hole. +    if (SlotIndex::isSameInstr(llvm::prior(GlobalSegment)->end, +                               GlobalSegment->start)) { +      return; +    } +    // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise +    // it would be a disconnected component in the live range. +    assert(llvm::prior(GlobalSegment)->start < LocalLI->beginIndex() && +           "Disconnected LRG within the scheduling region."); +  } +  MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start); +  if (!GlobalDef) +    return; + +  SUnit *GlobalSU = DAG->getSUnit(GlobalDef); +  if (!GlobalSU) +    return; + +  // GlobalDef is the bottom of the GlobalLI hole. Open the hole by +  // constraining the uses of the last local def to precede GlobalDef. +  SmallVector<SUnit*,8> LocalUses; +  const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex()); +  MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def); +  SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef); +  for (SUnit::const_succ_iterator +         I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end(); +       I != E; ++I) { +    if (I->getKind() != SDep::Data || I->getReg() != LocalReg) +      continue; +    if (I->getSUnit() == GlobalSU) +      continue; +    if (!DAG->canAddEdge(GlobalSU, I->getSUnit())) +      return; +    LocalUses.push_back(I->getSUnit()); +  } +  // Open the top of the GlobalLI hole by constraining any earlier global uses +  // to precede the start of LocalLI. +  SmallVector<SUnit*,8> GlobalUses; +  MachineInstr *FirstLocalDef = +    LIS->getInstructionFromIndex(LocalLI->beginIndex()); +  SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef); +  for (SUnit::const_pred_iterator +         I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) { +    if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg) +      continue; +    if (I->getSUnit() == FirstLocalSU) +      continue; +    if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit())) +      return; +    GlobalUses.push_back(I->getSUnit()); +  } +  DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n"); +  // Add the weak edges. +  for (SmallVectorImpl<SUnit*>::const_iterator +         I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) { +    DEBUG(dbgs() << "  Local use SU(" << (*I)->NodeNum << ") -> SU(" +          << GlobalSU->NodeNum << ")\n"); +    DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak)); +  } +  for (SmallVectorImpl<SUnit*>::const_iterator +         I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) { +    DEBUG(dbgs() << "  Global use SU(" << (*I)->NodeNum << ") -> SU(" +          << FirstLocalSU->NodeNum << ")\n"); +    DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak)); +  } +} + +/// \brief Callback from DAG postProcessing to create weak edges to encourage +/// copy elimination. +void CopyConstrain::apply(ScheduleDAGMI *DAG) { +  MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end()); +  if (FirstPos == DAG->end()) +    return; +  RegionBeginIdx = DAG->getLIS()->getInstructionIndex(&*FirstPos); +  RegionEndIdx = DAG->getLIS()->getInstructionIndex( +    &*priorNonDebug(DAG->end(), DAG->begin())); + +  for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) { +    SUnit *SU = &DAG->SUnits[Idx]; +    if (!SU->getInstr()->isCopy()) +      continue; + +    constrainLocalCopy(SU, DAG); +  } +} + +//===----------------------------------------------------------------------===//  // ConvergingScheduler - Implementation of the standard MachineSchedStrategy.  //===----------------------------------------------------------------------===// @@ -916,7 +1112,7 @@ public:    /// Represent the type of SchedCandidate found within a single queue.    /// pickNodeBidirectional depends on these listed by decreasing priority.    enum CandReason { -    NoCand, SingleExcess, SingleCritical, Cluster, +    NoCand, PhysRegCopy, SingleExcess, SingleCritical, Cluster, Weak,      ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,      TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse,      NodeOrder}; @@ -1191,6 +1387,8 @@ protected:                           const RegPressureTracker &RPTracker,                           SchedCandidate &Candidate); +  void reschedulePhysRegCopies(SUnit *SU, bool isTop); +  #ifndef NDEBUG    void traceCandidate(const SchedCandidate &Cand);  #endif @@ -1339,6 +1537,8 @@ void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) {    for (ReadyQueue::iterator I = Available.begin(), E = Available.end();         I != E; ++I) {      unsigned L = getUnscheduledLatency(*I); +    DEBUG(dbgs() << "  " << Available.getName() +          << " RemLatency SU(" << (*I)->NodeNum << ") " << L << '\n');      if (L > RemLatency)        RemLatency = L;    } @@ -1349,10 +1549,13 @@ void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) {        RemLatency = L;    }    unsigned CriticalPathLimit = Rem->CriticalPath + SchedModel->getILPWindow(); +  DEBUG(dbgs() << "  " << Available.getName() +        << " ExpectedLatency " << ExpectedLatency +        << " CP Limit " << CriticalPathLimit << '\n');    if (RemLatency + ExpectedLatency >= CriticalPathLimit        && RemLatency > Rem->getMaxRemainingCount(SchedModel)) {      Policy.ReduceLatency = true; -    DEBUG(dbgs() << "Increase ILP: " << Available.getName() << '\n'); +    DEBUG(dbgs() << "  Increase ILP: " << Available.getName() << '\n');    }  } @@ -1569,7 +1772,8 @@ void ConvergingScheduler::balanceZones(    if ((int)(Rem->getMaxRemainingCount(SchedModel) - RemainingCritCount)        > (int)SchedModel->getLatencyFactor()) {      CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx; -    DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce " +    DEBUG(dbgs() << "  Balance " << CriticalZone.Available.getName() +          << " reduce "            << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name            << '\n');    } @@ -1580,7 +1784,8 @@ void ConvergingScheduler::balanceZones(    if ((int)(OppositeZone.ExpectedCount - OppositeCount)        > (int)SchedModel->getLatencyFactor()) {      OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx; -    DEBUG(dbgs() << "Balance " << OppositeZone.Available.getName() << " demand " +    DEBUG(dbgs() << "  Balance " << OppositeZone.Available.getName() +          << " demand "            << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name            << '\n');    } @@ -1604,7 +1809,7 @@ void ConvergingScheduler::checkResourceLimits(      if (Top.CritResIdx != Rem.CritResIdx) {        TopCand.Policy.ReduceResIdx = Top.CritResIdx;        BotCand.Policy.ReduceResIdx = Bot.CritResIdx; -      DEBUG(dbgs() << "Reduce scheduled " +      DEBUG(dbgs() << "  Reduce scheduled "              << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n');      }      return; @@ -1621,7 +1826,7 @@ void ConvergingScheduler::checkResourceLimits(          && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) {        TopCand.Policy.ReduceLatency = true;        BotCand.Policy.ReduceLatency = true; -      DEBUG(dbgs() << "Reduce scheduled latency " << Top.ExpectedLatency +      DEBUG(dbgs() << "  Reduce scheduled latency " << Top.ExpectedLatency              << " + " << Bot.ExpectedLatency << '\n');      }      return; @@ -1696,6 +1901,34 @@ static unsigned getWeakLeft(const SUnit *SU, bool isTop) {    return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;  } +/// Minimize physical register live ranges. Regalloc wants them adjacent to +/// their physreg def/use. +/// +/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf +/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled +/// with the operation that produces or consumes the physreg. We'll do this when +/// regalloc has support for parallel copies. +static int biasPhysRegCopy(const SUnit *SU, bool isTop) { +  const MachineInstr *MI = SU->getInstr(); +  if (!MI->isCopy()) +    return 0; + +  unsigned ScheduledOper = isTop ? 1 : 0; +  unsigned UnscheduledOper = isTop ? 0 : 1; +  // If we have already scheduled the physreg produce/consumer, immediately +  // schedule the copy. +  if (TargetRegisterInfo::isPhysicalRegister( +        MI->getOperand(ScheduledOper).getReg())) +    return 1; +  // If the physreg is at the boundary, defer it. Otherwise schedule it +  // immediately to free the dependent. We can hoist the copy later. +  bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft; +  if (TargetRegisterInfo::isPhysicalRegister( +        MI->getOperand(UnscheduledOper).getReg())) +    return AtBoundary ? -1 : 1; +  return 0; +} +  /// Apply a set of heursitics to a new candidate. Heuristics are currently  /// hierarchical. This may be more efficient than a graduated cost model because  /// we don't need to evaluate all aspects of the model for each node in the @@ -1723,6 +1956,12 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,      TryCand.Reason = NodeOrder;      return;    } + +  if (tryGreater(biasPhysRegCopy(TryCand.SU, Zone.isTop()), +                 biasPhysRegCopy(Cand.SU, Zone.isTop()), +                 TryCand, Cand, PhysRegCopy)) +    return; +    // Avoid exceeding the target's limit.    if (tryLess(TryCand.RPDelta.Excess.UnitIncrease,                Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess)) @@ -1749,12 +1988,16 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,    if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU,                   TryCand, Cand, Cluster))      return; -  // Currently, weak edges are for clustering, so we hard-code that reason. -  // However, deferring the current TryCand will not change Cand's reason. + +  // Weak edges are for clustering and other constraints. +  // +  // Deferring TryCand here does not change Cand's reason. This is good in the +  // sense that a bad candidate shouldn't affect a previous candidate's +  // goodness, but bad in that it is assymetric and depends on queue order.    CandReason OrigReason = Cand.Reason;    if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()),                getWeakLeft(Cand.SU, Zone.isTop()), -              TryCand, Cand, Cluster)) { +              TryCand, Cand, Weak)) {      Cand.Reason = OrigReason;      return;    } @@ -1825,20 +2068,20 @@ static bool compareRPDelta(const RegPressureDelta &LHS,    // Avoid increasing the max critical pressure in the scheduled region.    if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) { -    DEBUG(dbgs() << "RP excess top - bot: " +    DEBUG(dbgs() << "  RP excess top - bot: "            << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n');      return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease;    }    // Avoid increasing the max critical pressure in the scheduled region.    if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) { -    DEBUG(dbgs() << "RP critical top - bot: " +    DEBUG(dbgs() << "  RP critical top - bot: "            << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease)            << '\n');      return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease;    }    // Avoid increasing the max pressure of the entire region.    if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) { -    DEBUG(dbgs() << "RP current top - bot: " +    DEBUG(dbgs() << "  RP current top - bot: "            << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease)            << '\n');      return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease; @@ -1851,9 +2094,11 @@ const char *ConvergingScheduler::getReasonStr(    ConvergingScheduler::CandReason Reason) {    switch (Reason) {    case NoCand:         return "NOCAND    "; +  case PhysRegCopy:    return "PREG-COPY";    case SingleExcess:   return "REG-EXCESS";    case SingleCritical: return "REG-CRIT  ";    case Cluster:        return "CLUSTER   "; +  case Weak:           return "WEAK      ";    case SingleMax:      return "REG-MAX   ";    case MultiPressure:  return "REG-MULTI ";    case ResourceReduce: return "RES-REDUCE"; @@ -1953,8 +2198,7 @@ void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone,  static void tracePick(const ConvergingScheduler::SchedCandidate &Cand,                        bool IsTop) { -  DEBUG(dbgs() << "Pick " << (IsTop ? "Top" : "Bot") -        << " SU(" << Cand.SU->NodeNum << ") " +  DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")          << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n');  } @@ -1964,10 +2208,12 @@ SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) {    // efficient, but also provides the best heuristics for CriticalPSets.    if (SUnit *SU = Bot.pickOnlyChoice()) {      IsTopNode = false; +    DEBUG(dbgs() << "Pick Top NOCAND\n");      return SU;    }    if (SUnit *SU = Top.pickOnlyChoice()) {      IsTopNode = true; +    DEBUG(dbgs() << "Pick Bot NOCAND\n");      return SU;    }    CandPolicy NoPolicy; @@ -2065,21 +2311,53 @@ SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {    if (SU->isBottomReady())      Bot.removeReady(SU); -  DEBUG(dbgs() << "Scheduling " << *SU->getInstr()); +  DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());    return SU;  } +void ConvergingScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { + +  MachineBasicBlock::iterator InsertPos = SU->getInstr(); +  if (!isTop) +    ++InsertPos; +  SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs; + +  // Find already scheduled copies with a single physreg dependence and move +  // them just above the scheduled instruction. +  for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end(); +       I != E; ++I) { +    if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg())) +      continue; +    SUnit *DepSU = I->getSUnit(); +    if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1) +      continue; +    MachineInstr *Copy = DepSU->getInstr(); +    if (!Copy->isCopy()) +      continue; +    DEBUG(dbgs() << "  Rescheduling physreg copy "; +          I->getSUnit()->dump(DAG)); +    DAG->moveInstruction(Copy, InsertPos); +  } +} +  /// Update the scheduler's state after scheduling a node. This is the same node  /// that was just returned by pickNode(). However, ScheduleDAGMI needs to update  /// it's state based on the current cycle before MachineSchedStrategy does. +/// +/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling +/// them here. See comments in biasPhysRegCopy.  void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {    if (IsTopNode) {      SU->TopReadyCycle = Top.CurrCycle;      Top.bumpNode(SU); +    if (SU->hasPhysRegUses) +      reschedulePhysRegCopies(SU, true);    }    else {      SU->BotReadyCycle = Bot.CurrCycle;      Bot.bumpNode(SU); +    if (SU->hasPhysRegDefs) +      reschedulePhysRegCopies(SU, false);    }  } @@ -2090,6 +2368,12 @@ static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {           "-misched-topdown incompatible with -misched-bottomup");    ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler());    // Register DAG post-processors. +  // +  // FIXME: extend the mutation API to allow earlier mutations to instantiate +  // data and pass it to later mutations. Have a single mutation that gathers +  // the interesting nodes in one pass. +  if (EnableCopyConstrain) +    DAG->addMutation(new CopyConstrain(DAG->TII, DAG->TRI));    if (EnableLoadCluster)      DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI));    if (EnableMacroFusion) @@ -2179,12 +2463,12 @@ public:      SUnit *SU = ReadyQ.back();      ReadyQ.pop_back();      IsTopNode = false; -    DEBUG(dbgs() << "*** Scheduling " << "SU(" << SU->NodeNum << "): " -          << *SU->getInstr() +    DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") "            << " ILP: " << DAG->getDFSResult()->getILP(SU)            << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"            << DAG->getDFSResult()->getSubtreeLevel( -            DAG->getDFSResult()->getSubtreeID(SU)) << '\n'); +            DAG->getDFSResult()->getSubtreeID(SU)) << '\n' +          << "Scheduling " << *SU->getInstr());      return SU;    }  | 
