diff options
Diffstat (limited to 'lib/CodeGen/MachineScheduler.cpp')
| -rw-r--r-- | lib/CodeGen/MachineScheduler.cpp | 1458 | 
1 files changed, 1013 insertions, 445 deletions
| diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index a1dc9481c639..a4817d09c0d3 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -18,11 +18,8 @@  #include "llvm/CodeGen/MachineScheduler.h"  #include "llvm/CodeGen/Passes.h"  #include "llvm/CodeGen/RegisterClassInfo.h" -#include "llvm/CodeGen/RegisterPressure.h" -#include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/CodeGen/ScheduleDAGILP.h"  #include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/MC/MCInstrItineraries.h"  #include "llvm/Analysis/AliasAnalysis.h"  #include "llvm/Support/CommandLine.h"  #include "llvm/Support/Debug.h" @@ -35,10 +32,12 @@  using namespace llvm; -static cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden, -                                  cl::desc("Force top-down list scheduling")); -static cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden, -                                  cl::desc("Force bottom-up list scheduling")); +namespace llvm { +cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden, +                           cl::desc("Force top-down list scheduling")); +cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden, +                            cl::desc("Force bottom-up list scheduling")); +}  #ifndef NDEBUG  static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden, @@ -50,6 +49,15 @@ static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,  static bool ViewMISchedDAGs = false;  #endif // NDEBUG +// Threshold to very roughly model an out-of-order processor's instruction +// buffers. If the actual value of this threshold matters much in practice, then +// it can be specified by the machine model. For now, it's an experimental +// tuning knob to determine when and if it matters. +static cl::opt<unsigned> ILPWindow("ilp-window", cl::Hidden, +  cl::desc("Allow expected latency to exceed the critical path by N cycles " +           "before attempting to balance ILP"), +  cl::init(10U)); +  //===----------------------------------------------------------------------===//  // Machine Instruction Scheduling Pass and Registry  //===----------------------------------------------------------------------===// @@ -221,7 +229,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {      // The Scheduler may insert instructions during either schedule() or      // exitRegion(), even for empty regions. So the local iterators 'I' and      // 'RegionEnd' are invalid across these calls. -    unsigned RemainingCount = MBB->size(); +    unsigned RemainingInstrs = MBB->size();      for(MachineBasicBlock::iterator RegionEnd = MBB->end();          RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) { @@ -230,19 +238,19 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {            || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) {          --RegionEnd;          // Count the boundary instruction. -        --RemainingCount; +        --RemainingInstrs;        }        // The next region starts above the previous region. Look backward in the        // instruction stream until we find the nearest boundary.        MachineBasicBlock::iterator I = RegionEnd; -      for(;I != MBB->begin(); --I, --RemainingCount) { +      for(;I != MBB->begin(); --I, --RemainingInstrs) {          if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF))            break;        }        // Notify the scheduler of the region, even if we may skip scheduling        // it. Perhaps it still needs to be bundled. -      Scheduler->enterRegion(MBB, I, RegionEnd, RemainingCount); +      Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs);        // Skip empty scheduling regions (0 or 1 schedulable instructions).        if (I == RegionEnd || I == llvm::prior(RegionEnd)) { @@ -252,11 +260,11 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {          continue;        }        DEBUG(dbgs() << "********** MI Scheduling **********\n"); -      DEBUG(dbgs() << MF->getFunction()->getName() +      DEBUG(dbgs() << MF->getName()              << ":BB#" << MBB->getNumber() << "\n  From: " << *I << "    To: ";              if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;              else dbgs() << "End"; -            dbgs() << " Remaining: " << RemainingCount << "\n"); +            dbgs() << " Remaining: " << RemainingInstrs << "\n");        // Schedule a region: possibly reorder instructions.        // This invalidates 'RegionEnd' and 'I'. @@ -269,7 +277,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {        // scheduler for the top of it's scheduled region.        RegionEnd = Scheduler->begin();      } -    assert(RemainingCount == 0 && "Instruction count mismatch!"); +    assert(RemainingInstrs == 0 && "Instruction count mismatch!");      Scheduler->finishBlock();    }    Scheduler->finalizeSchedule(); @@ -281,157 +289,20 @@ void MachineScheduler::print(raw_ostream &O, const Module* m) const {    // unimplemented  } -//===----------------------------------------------------------------------===// -// MachineSchedStrategy - Interface to a machine scheduling algorithm. -//===----------------------------------------------------------------------===// - -namespace { -class ScheduleDAGMI; - -/// MachineSchedStrategy - Interface used by ScheduleDAGMI to drive the selected -/// scheduling algorithm. -/// -/// If this works well and targets wish to reuse ScheduleDAGMI, we may expose it -/// in ScheduleDAGInstrs.h -class MachineSchedStrategy { -public: -  virtual ~MachineSchedStrategy() {} - -  /// Initialize the strategy after building the DAG for a new region. -  virtual void initialize(ScheduleDAGMI *DAG) = 0; - -  /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to -  /// schedule the node at the top of the unscheduled region. Otherwise it will -  /// be scheduled at the bottom. -  virtual SUnit *pickNode(bool &IsTopNode) = 0; - -  /// Notify MachineSchedStrategy that ScheduleDAGMI has scheduled a node. -  virtual void schedNode(SUnit *SU, bool IsTopNode) = 0; - -  /// When all predecessor dependencies have been resolved, free this node for -  /// top-down scheduling. -  virtual void releaseTopNode(SUnit *SU) = 0; -  /// When all successor dependencies have been resolved, free this node for -  /// bottom-up scheduling. -  virtual void releaseBottomNode(SUnit *SU) = 0; -}; -} // namespace +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void ReadyQueue::dump() { +  dbgs() << Name << ": "; +  for (unsigned i = 0, e = Queue.size(); i < e; ++i) +    dbgs() << Queue[i]->NodeNum << " "; +  dbgs() << "\n"; +} +#endif  //===----------------------------------------------------------------------===//  // ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals  // preservation.  //===----------------------------------------------------------------------===// -namespace { -/// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that schedules -/// machine instructions while updating LiveIntervals. -class ScheduleDAGMI : public ScheduleDAGInstrs { -  AliasAnalysis *AA; -  RegisterClassInfo *RegClassInfo; -  MachineSchedStrategy *SchedImpl; - -  MachineBasicBlock::iterator LiveRegionEnd; - -  /// Register pressure in this region computed by buildSchedGraph. -  IntervalPressure RegPressure; -  RegPressureTracker RPTracker; - -  /// List of pressure sets that exceed the target's pressure limit before -  /// scheduling, listed in increasing set ID order. Each pressure set is paired -  /// with its max pressure in the currently scheduled regions. -  std::vector<PressureElement> RegionCriticalPSets; - -  /// The top of the unscheduled zone. -  MachineBasicBlock::iterator CurrentTop; -  IntervalPressure TopPressure; -  RegPressureTracker TopRPTracker; - -  /// The bottom of the unscheduled zone. -  MachineBasicBlock::iterator CurrentBottom; -  IntervalPressure BotPressure; -  RegPressureTracker BotRPTracker; - -#ifndef NDEBUG -  /// The number of instructions scheduled so far. Used to cut off the -  /// scheduler at the point determined by misched-cutoff. -  unsigned NumInstrsScheduled; -#endif -public: -  ScheduleDAGMI(MachineSchedContext *C, MachineSchedStrategy *S): -    ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, /*IsPostRA=*/false, C->LIS), -    AA(C->AA), RegClassInfo(C->RegClassInfo), SchedImpl(S), -    RPTracker(RegPressure), CurrentTop(), TopRPTracker(TopPressure), -    CurrentBottom(), BotRPTracker(BotPressure) { -#ifndef NDEBUG -    NumInstrsScheduled = 0; -#endif -  } - -  ~ScheduleDAGMI() { -    delete SchedImpl; -  } - -  MachineBasicBlock::iterator top() const { return CurrentTop; } -  MachineBasicBlock::iterator bottom() const { return CurrentBottom; } - -  /// Implement the ScheduleDAGInstrs interface for handling the next scheduling -  /// region. This covers all instructions in a block, while schedule() may only -  /// cover a subset. -  void enterRegion(MachineBasicBlock *bb, -                   MachineBasicBlock::iterator begin, -                   MachineBasicBlock::iterator end, -                   unsigned endcount); - -  /// Implement ScheduleDAGInstrs interface for scheduling a sequence of -  /// reorderable instructions. -  void schedule(); - -  /// Get current register pressure for the top scheduled instructions. -  const IntervalPressure &getTopPressure() const { return TopPressure; } -  const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; } - -  /// Get current register pressure for the bottom scheduled instructions. -  const IntervalPressure &getBotPressure() const { return BotPressure; } -  const RegPressureTracker &getBotRPTracker() const { return BotRPTracker; } - -  /// Get register pressure for the entire scheduling region before scheduling. -  const IntervalPressure &getRegPressure() const { return RegPressure; } - -  const std::vector<PressureElement> &getRegionCriticalPSets() const { -    return RegionCriticalPSets; -  } - -  /// getIssueWidth - Return the max instructions per scheduling group. -  unsigned getIssueWidth() const { -    return (InstrItins && InstrItins->SchedModel) -      ? InstrItins->SchedModel->IssueWidth : 1; -  } - -  /// getNumMicroOps - Return the number of issue slots required for this MI. -  unsigned getNumMicroOps(MachineInstr *MI) const { -    if (!InstrItins) return 1; -    int UOps = InstrItins->getNumMicroOps(MI->getDesc().getSchedClass()); -    return (UOps >= 0) ? UOps : TII->getNumMicroOps(InstrItins, MI); -  } - -protected: -  void initRegPressure(); -  void updateScheduledPressure(std::vector<unsigned> NewMaxPressure); - -  void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos); -  bool checkSchedLimit(); - -  void releaseRoots(); - -  void releaseSucc(SUnit *SU, SDep *SuccEdge); -  void releaseSuccessors(SUnit *SU); -  void releasePred(SUnit *SU, SDep *PredEdge); -  void releasePredecessors(SUnit *SU); - -  void placeDebugValues(); -}; -} // namespace -  /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When  /// NumPredsLeft reaches zero, release the successor node.  /// @@ -498,7 +369,7 @@ void ScheduleDAGMI::moveInstruction(MachineInstr *MI,    BB->splice(InsertPos, BB, MI);    // Update LiveIntervals -  LIS->handleMove(MI); +  LIS->handleMove(MI, /*UpdateFlags=*/true);    // Recede RegionBegin if an instruction moves above the first.    if (RegionBegin == InsertPos) @@ -565,6 +436,9 @@ void ScheduleDAGMI::initRegPressure() {    std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure;    for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {      unsigned Limit = TRI->getRegPressureSetLimit(i); +    DEBUG(dbgs() << TRI->getRegPressureSetName(i) +          << "Limit " << Limit +          << " Actual " << RegionPressure[i] << "\n");      if (RegionPressure[i] > Limit)        RegionCriticalPSets.push_back(PressureElement(i, 0));    } @@ -587,6 +461,74 @@ updateScheduledPressure(std::vector<unsigned> NewMaxPressure) {    }  } +/// schedule - Called back from MachineScheduler::runOnMachineFunction +/// after setting up the current scheduling region. [RegionBegin, RegionEnd) +/// only includes instructions that have DAG nodes, not scheduling boundaries. +/// +/// This is a skeletal driver, with all the functionality pushed into helpers, +/// so that it can be easilly extended by experimental schedulers. Generally, +/// implementing MachineSchedStrategy should be sufficient to implement a new +/// scheduling algorithm. However, if a scheduler further subclasses +/// ScheduleDAGMI then it will want to override this virtual method in order to +/// update any specialized state. +void ScheduleDAGMI::schedule() { +  buildDAGWithRegPressure(); + +  postprocessDAG(); + +  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) +          SUnits[su].dumpAll(this)); + +  if (ViewMISchedDAGs) viewGraph(); + +  initQueues(); + +  bool IsTopNode = false; +  while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { +    assert(!SU->isScheduled && "Node already scheduled"); +    if (!checkSchedLimit()) +      break; + +    scheduleMI(SU, IsTopNode); + +    updateQueues(SU, IsTopNode); +  } +  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); + +  placeDebugValues(); + +  DEBUG({ +      unsigned BBNum = top()->getParent()->getNumber(); +      dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; +      dumpSchedule(); +      dbgs() << '\n'; +    }); +} + +/// Build the DAG and setup three register pressure trackers. +void ScheduleDAGMI::buildDAGWithRegPressure() { +  // Initialize the register pressure tracker used by buildSchedGraph. +  RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); + +  // Account for liveness generate by the region boundary. +  if (LiveRegionEnd != RegionEnd) +    RPTracker.recede(); + +  // Build the DAG, and compute current register pressure. +  buildSchedGraph(AA, &RPTracker); +  if (ViewMISchedDAGs) viewGraph(); + +  // Initialize top/bottom trackers after computing region pressure. +  initRegPressure(); +} + +/// Apply each ScheduleDAGMutation step in order. +void ScheduleDAGMI::postprocessDAG() { +  for (unsigned i = 0, e = Mutations.size(); i < e; ++i) { +    Mutations[i]->apply(this); +  } +} +  // Release all DAG roots for scheduling.  void ScheduleDAGMI::releaseRoots() {    SmallVector<SUnit*, 16> BotRoots; @@ -607,28 +549,10 @@ void ScheduleDAGMI::releaseRoots() {      SchedImpl->releaseBottomNode(*I);  } -/// schedule - Called back from MachineScheduler::runOnMachineFunction -/// after setting up the current scheduling region. [RegionBegin, RegionEnd) -/// only includes instructions that have DAG nodes, not scheduling boundaries. -void ScheduleDAGMI::schedule() { -  // Initialize the register pressure tracker used by buildSchedGraph. -  RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); - -  // Account for liveness generate by the region boundary. -  if (LiveRegionEnd != RegionEnd) -    RPTracker.recede(); - -  // Build the DAG, and compute current register pressure. -  buildSchedGraph(AA, &RPTracker); - -  // Initialize top/bottom trackers after computing region pressure. -  initRegPressure(); - -  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) -          SUnits[su].dumpAll(this)); - -  if (ViewMISchedDAGs) viewGraph(); +/// Identify DAG roots and setup scheduler queues. +void ScheduleDAGMI::initQueues() { +  // Initialize the strategy before modifying the DAG.    SchedImpl->initialize(this);    // Release edges from the special Entry node or to the special Exit node. @@ -638,61 +562,64 @@ void ScheduleDAGMI::schedule() {    // Release all DAG roots for scheduling.    releaseRoots(); +  SchedImpl->registerRoots(); +    CurrentTop = nextIfDebug(RegionBegin, RegionEnd);    CurrentBottom = RegionEnd; -  bool IsTopNode = false; -  while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { -    if (!checkSchedLimit()) -      break; - -    // Move the instruction to its new location in the instruction stream. -    MachineInstr *MI = SU->getInstr(); - -    if (IsTopNode) { -      assert(SU->isTopReady() && "node still has unscheduled dependencies"); -      if (&*CurrentTop == MI) -        CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); -      else { -        moveInstruction(MI, CurrentTop); -        TopRPTracker.setPos(MI); -      } +} -      // Update top scheduled pressure. -      TopRPTracker.advance(); -      assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); -      updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure); +/// Move an instruction and update register pressure. +void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) { +  // Move the instruction to its new location in the instruction stream. +  MachineInstr *MI = SU->getInstr(); -      // Release dependent instructions for scheduling. -      releaseSuccessors(SU); +  if (IsTopNode) { +    assert(SU->isTopReady() && "node still has unscheduled dependencies"); +    if (&*CurrentTop == MI) +      CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); +    else { +      moveInstruction(MI, CurrentTop); +      TopRPTracker.setPos(MI);      } + +    // Update top scheduled pressure. +    TopRPTracker.advance(); +    assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); +    updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure); +  } +  else { +    assert(SU->isBottomReady() && "node still has unscheduled dependencies"); +    MachineBasicBlock::iterator priorII = +      priorNonDebug(CurrentBottom, CurrentTop); +    if (&*priorII == MI) +      CurrentBottom = priorII;      else { -      assert(SU->isBottomReady() && "node still has unscheduled dependencies"); -      MachineBasicBlock::iterator priorII = -        priorNonDebug(CurrentBottom, CurrentTop); -      if (&*priorII == MI) -        CurrentBottom = priorII; -      else { -        if (&*CurrentTop == MI) { -          CurrentTop = nextIfDebug(++CurrentTop, priorII); -          TopRPTracker.setPos(CurrentTop); -        } -        moveInstruction(MI, CurrentBottom); -        CurrentBottom = MI; +      if (&*CurrentTop == MI) { +        CurrentTop = nextIfDebug(++CurrentTop, priorII); +        TopRPTracker.setPos(CurrentTop);        } -      // Update bottom scheduled pressure. -      BotRPTracker.recede(); -      assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); -      updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure); - -      // Release dependent instructions for scheduling. -      releasePredecessors(SU); +      moveInstruction(MI, CurrentBottom); +      CurrentBottom = MI;      } -    SU->isScheduled = true; -    SchedImpl->schedNode(SU, IsTopNode); +    // Update bottom scheduled pressure. +    BotRPTracker.recede(); +    assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); +    updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure);    } -  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); +} -  placeDebugValues(); +/// Update scheduler queues after scheduling an instruction. +void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) { +  // Release dependent instructions for scheduling. +  if (IsTopNode) +    releaseSuccessors(SU); +  else +    releasePredecessors(SU); + +  SU->isScheduled = true; + +  // Notify the scheduling strategy after updating the DAG. +  SchedImpl->schedNode(SU, IsTopNode);  }  /// Reinsert any remaining debug_values, just like the PostRA scheduler. @@ -716,91 +643,146 @@ void ScheduleDAGMI::placeDebugValues() {    FirstDbgValue = NULL;  } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void ScheduleDAGMI::dumpSchedule() const { +  for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) { +    if (SUnit *SU = getSUnit(&(*MI))) +      SU->dump(this); +    else +      dbgs() << "Missing SUnit\n"; +  } +} +#endif +  //===----------------------------------------------------------------------===//  // ConvergingScheduler - Implementation of the standard MachineSchedStrategy.  //===----------------------------------------------------------------------===//  namespace { -/// ReadyQueue encapsulates vector of "ready" SUnits with basic convenience -/// methods for pushing and removing nodes. ReadyQueue's are uniquely identified -/// by an ID. SUnit::NodeQueueId is a mask of the ReadyQueues the SUnit is in. -class ReadyQueue { -  unsigned ID; -  std::string Name; -  std::vector<SUnit*> Queue; - +/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance +/// the schedule. +class ConvergingScheduler : public MachineSchedStrategy {  public: -  ReadyQueue(unsigned id, const Twine &name): ID(id), Name(name.str()) {} - -  unsigned getID() const { return ID; } - -  StringRef getName() const { return Name; } - -  // SU is in this queue if it's NodeQueueID is a superset of this ID. -  bool isInQueue(SUnit *SU) const { return (SU->NodeQueueId & ID); } - -  bool empty() const { return Queue.empty(); } - -  unsigned size() const { return Queue.size(); } - -  typedef std::vector<SUnit*>::iterator iterator; +  /// Represent the type of SchedCandidate found within a single queue. +  /// pickNodeBidirectional depends on these listed by decreasing priority. +  enum CandReason { +    NoCand, SingleExcess, SingleCritical, ResourceReduce, ResourceDemand, +    BotHeightReduce, BotPathReduce, TopDepthReduce, TopPathReduce, +    SingleMax, MultiPressure, NextDefUse, NodeOrder}; -  iterator begin() { return Queue.begin(); } +#ifndef NDEBUG +  static const char *getReasonStr(ConvergingScheduler::CandReason Reason); +#endif -  iterator end() { return Queue.end(); } +  /// Policy for scheduling the next instruction in the candidate's zone. +  struct CandPolicy { +    bool ReduceLatency; +    unsigned ReduceResIdx; +    unsigned DemandResIdx; -  iterator find(SUnit *SU) { -    return std::find(Queue.begin(), Queue.end(), SU); -  } +    CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {} +  }; -  void push(SUnit *SU) { -    Queue.push_back(SU); -    SU->NodeQueueId |= ID; -  } +  /// Status of an instruction's critical resource consumption. +  struct SchedResourceDelta { +    // Count critical resources in the scheduled region required by SU. +    unsigned CritResources; -  void remove(iterator I) { -    (*I)->NodeQueueId &= ~ID; -    *I = Queue.back(); -    Queue.pop_back(); -  } +    // Count critical resources from another region consumed by SU. +    unsigned DemandedResources; -  void dump() { -    dbgs() << Name << ": "; -    for (unsigned i = 0, e = Queue.size(); i < e; ++i) -      dbgs() << Queue[i]->NodeNum << " "; -    dbgs() << "\n"; -  } -}; +    SchedResourceDelta(): CritResources(0), DemandedResources(0) {} -/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance -/// the schedule. -class ConvergingScheduler : public MachineSchedStrategy { +    bool operator==(const SchedResourceDelta &RHS) const { +      return CritResources == RHS.CritResources +        && DemandedResources == RHS.DemandedResources; +    } +    bool operator!=(const SchedResourceDelta &RHS) const { +      return !operator==(RHS); +    } +  };    /// Store the state used by ConvergingScheduler heuristics, required for the    /// lifetime of one invocation of pickNode().    struct SchedCandidate { +    CandPolicy Policy; +      // The best SUnit candidate.      SUnit *SU; +    // The reason for this candidate. +    CandReason Reason; +      // Register pressure values for the best candidate.      RegPressureDelta RPDelta; -    SchedCandidate(): SU(NULL) {} +    // Critical resource consumption of the best candidate. +    SchedResourceDelta ResDelta; + +    SchedCandidate(const CandPolicy &policy) +    : Policy(policy), SU(NULL), Reason(NoCand) {} + +    bool isValid() const { return SU; } + +    // Copy the status of another candidate without changing policy. +    void setBest(SchedCandidate &Best) { +      assert(Best.Reason != NoCand && "uninitialized Sched candidate"); +      SU = Best.SU; +      Reason = Best.Reason; +      RPDelta = Best.RPDelta; +      ResDelta = Best.ResDelta; +    } + +    void initResourceDelta(const ScheduleDAGMI *DAG, +                           const TargetSchedModel *SchedModel); +  }; + +  /// Summarize the unscheduled region. +  struct SchedRemainder { +    // Critical path through the DAG in expected latency. +    unsigned CriticalPath; + +    // Unscheduled resources +    SmallVector<unsigned, 16> RemainingCounts; +    // Critical resource for the unscheduled zone. +    unsigned CritResIdx; +    // Number of micro-ops left to schedule. +    unsigned RemainingMicroOps; +    // Is the unscheduled zone resource limited. +    bool IsResourceLimited; + +    unsigned MaxRemainingCount; + +    void reset() { +      CriticalPath = 0; +      RemainingCounts.clear(); +      CritResIdx = 0; +      RemainingMicroOps = 0; +      IsResourceLimited = false; +      MaxRemainingCount = 0; +    } + +    SchedRemainder() { reset(); } + +    void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);    }; -  /// Represent the type of SchedCandidate found within a single queue. -  enum CandResult { -    NoCand, NodeOrder, SingleExcess, SingleCritical, SingleMax, MultiPressure };    /// Each Scheduling boundary is associated with ready queues. It tracks the -  /// current cycle in whichever direction at has moved, and maintains the state +  /// current cycle in the direction of movement, and maintains the state    /// of "hazards" and other interlocks at the current cycle.    struct SchedBoundary {      ScheduleDAGMI *DAG; +    const TargetSchedModel *SchedModel; +    SchedRemainder *Rem;      ReadyQueue Available;      ReadyQueue Pending;      bool CheckPending; +    // For heuristics, keep a list of the nodes that immediately depend on the +    // most recently scheduled node. +    SmallPtrSet<const SUnit*, 8> NextSUs; +      ScheduleHazardRecognizer *HazardRec;      unsigned CurrCycle; @@ -809,29 +791,88 @@ class ConvergingScheduler : public MachineSchedStrategy {      /// MinReadyCycle - Cycle of the soonest available instruction.      unsigned MinReadyCycle; +    // The expected latency of the critical path in this scheduled zone. +    unsigned ExpectedLatency; + +    // Resources used in the scheduled zone beyond this boundary. +    SmallVector<unsigned, 16> ResourceCounts; + +    // Cache the critical resources ID in this scheduled zone. +    unsigned CritResIdx; + +    // Is the scheduled region resource limited vs. latency limited. +    bool IsResourceLimited; + +    unsigned ExpectedCount; + +    // Policy flag: attempt to find ILP until expected latency is covered. +    bool ShouldIncreaseILP; + +#ifndef NDEBUG      // Remember the greatest min operand latency.      unsigned MaxMinLatency; +#endif + +    void reset() { +      Available.clear(); +      Pending.clear(); +      CheckPending = false; +      NextSUs.clear(); +      HazardRec = 0; +      CurrCycle = 0; +      IssueCount = 0; +      MinReadyCycle = UINT_MAX; +      ExpectedLatency = 0; +      ResourceCounts.resize(1); +      assert(!ResourceCounts[0] && "nonzero count for bad resource"); +      CritResIdx = 0; +      IsResourceLimited = false; +      ExpectedCount = 0; +      ShouldIncreaseILP = false; +#ifndef NDEBUG +      MaxMinLatency = 0; +#endif +      // Reserve a zero-count for invalid CritResIdx. +      ResourceCounts.resize(1); +    }      /// Pending queues extend the ready queues with the same ID and the      /// PendingFlag set.      SchedBoundary(unsigned ID, const Twine &Name): -      DAG(0), Available(ID, Name+".A"), -      Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"), -      CheckPending(false), HazardRec(0), CurrCycle(0), IssueCount(0), -      MinReadyCycle(UINT_MAX), MaxMinLatency(0) {} +      DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"), +      Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P") { +      reset(); +    }      ~SchedBoundary() { delete HazardRec; } +    void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, +              SchedRemainder *rem); +      bool isTop() const {        return Available.getID() == ConvergingScheduler::TopQID;      } +    unsigned getUnscheduledLatency(SUnit *SU) const { +      if (isTop()) +        return SU->getHeight(); +      return SU->getDepth(); +    } + +    unsigned getCriticalCount() const { +      return ResourceCounts[CritResIdx]; +    } +      bool checkHazard(SUnit *SU); +    void checkILPPolicy(); +      void releaseNode(SUnit *SU, unsigned ReadyCycle);      void bumpCycle(); +    void countResource(unsigned PIdx, unsigned Cycles); +      void bumpNode(SUnit *SU);      void releasePending(); @@ -841,10 +882,13 @@ class ConvergingScheduler : public MachineSchedStrategy {      SUnit *pickOnlyChoice();    }; +private:    ScheduleDAGMI *DAG; +  const TargetSchedModel *SchedModel;    const TargetRegisterInfo *TRI;    // State of the top and bottom scheduled instruction boundaries. +  SchedRemainder Rem;    SchedBoundary Top;    SchedBoundary Bot; @@ -857,7 +901,7 @@ public:    };    ConvergingScheduler(): -    DAG(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} +    DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}    virtual void initialize(ScheduleDAGMI *dag); @@ -869,28 +913,80 @@ public:    virtual void releaseBottomNode(SUnit *SU); +  virtual void registerRoots(); +  protected: -  SUnit *pickNodeBidrectional(bool &IsTopNode); +  void balanceZones( +    ConvergingScheduler::SchedBoundary &CriticalZone, +    ConvergingScheduler::SchedCandidate &CriticalCand, +    ConvergingScheduler::SchedBoundary &OppositeZone, +    ConvergingScheduler::SchedCandidate &OppositeCand); + +  void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand, +                           ConvergingScheduler::SchedCandidate &BotCand); + +  void tryCandidate(SchedCandidate &Cand, +                    SchedCandidate &TryCand, +                    SchedBoundary &Zone, +                    const RegPressureTracker &RPTracker, +                    RegPressureTracker &TempTracker); + +  SUnit *pickNodeBidirectional(bool &IsTopNode); + +  void pickNodeFromQueue(SchedBoundary &Zone, +                         const RegPressureTracker &RPTracker, +                         SchedCandidate &Candidate); -  CandResult pickNodeFromQueue(ReadyQueue &Q, -                               const RegPressureTracker &RPTracker, -                               SchedCandidate &Candidate);  #ifndef NDEBUG -  void traceCandidate(const char *Label, const ReadyQueue &Q, SUnit *SU, -                      PressureElement P = PressureElement()); +  void traceCandidate(const SchedCandidate &Cand, const SchedBoundary &Zone);  #endif  };  } // namespace +void ConvergingScheduler::SchedRemainder:: +init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { +  reset(); +  if (!SchedModel->hasInstrSchedModel()) +    return; +  RemainingCounts.resize(SchedModel->getNumProcResourceKinds()); +  for (std::vector<SUnit>::iterator +         I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) { +    const MCSchedClassDesc *SC = DAG->getSchedClass(&*I); +    RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC); +    for (TargetSchedModel::ProcResIter +           PI = SchedModel->getWriteProcResBegin(SC), +           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { +      unsigned PIdx = PI->ProcResourceIdx; +      unsigned Factor = SchedModel->getResourceFactor(PIdx); +      RemainingCounts[PIdx] += (Factor * PI->Cycles); +    } +  } +} + +void ConvergingScheduler::SchedBoundary:: +init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { +  reset(); +  DAG = dag; +  SchedModel = smodel; +  Rem = rem; +  if (SchedModel->hasInstrSchedModel()) +    ResourceCounts.resize(SchedModel->getNumProcResourceKinds()); +} +  void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {    DAG = dag; +  SchedModel = DAG->getSchedModel();    TRI = DAG->TRI; -  Top.DAG = dag; -  Bot.DAG = dag; +  Rem.init(DAG, SchedModel); +  Top.init(DAG, SchedModel, &Rem); +  Bot.init(DAG, SchedModel, &Rem); + +  // Initialize resource counts. -  // Initialize the HazardRecognizers. +  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or +  // are disabled, then these HazardRecs will be disabled. +  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();    const TargetMachine &TM = DAG->MF.getTarget(); -  const InstrItineraryData *Itin = TM.getInstrItineraryData();    Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);    Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); @@ -905,13 +1001,12 @@ void ConvergingScheduler::releaseTopNode(SUnit *SU) {    for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end();         I != E; ++I) {      unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; -    unsigned Latency = -      DAG->computeOperandLatency(I->getSUnit(), SU, *I, /*FindMin=*/true); +    unsigned MinLatency = I->getMinLatency();  #ifndef NDEBUG -    Top.MaxMinLatency = std::max(Latency, Top.MaxMinLatency); +    Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);  #endif -    if (SU->TopReadyCycle < PredReadyCycle + Latency) -      SU->TopReadyCycle = PredReadyCycle + Latency; +    if (SU->TopReadyCycle < PredReadyCycle + MinLatency) +      SU->TopReadyCycle = PredReadyCycle + MinLatency;    }    Top.releaseNode(SU, SU->TopReadyCycle);  } @@ -925,17 +1020,27 @@ void ConvergingScheduler::releaseBottomNode(SUnit *SU) {    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();         I != E; ++I) {      unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; -    unsigned Latency = -      DAG->computeOperandLatency(SU, I->getSUnit(), *I, /*FindMin=*/true); +    unsigned MinLatency = I->getMinLatency();  #ifndef NDEBUG -    Bot.MaxMinLatency = std::max(Latency, Bot.MaxMinLatency); +    Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);  #endif -    if (SU->BotReadyCycle < SuccReadyCycle + Latency) -      SU->BotReadyCycle = SuccReadyCycle + Latency; +    if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) +      SU->BotReadyCycle = SuccReadyCycle + MinLatency;    }    Bot.releaseNode(SU, SU->BotReadyCycle);  } +void ConvergingScheduler::registerRoots() { +  Rem.CriticalPath = DAG->ExitSU.getDepth(); +  // Some roots may not feed into ExitSU. Check all of them in case. +  for (std::vector<SUnit*>::const_iterator +         I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) { +    if ((*I)->getDepth() > Rem.CriticalPath) +      Rem.CriticalPath = (*I)->getDepth(); +  } +  DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); +} +  /// Does this SU have a hazard within the current instruction group.  ///  /// The scheduler supports two modes of hazard recognition. The first is the @@ -953,14 +1058,27 @@ bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) {    if (HazardRec->isEnabled())      return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; -  if (IssueCount + DAG->getNumMicroOps(SU->getInstr()) > DAG->getIssueWidth()) +  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); +  if ((IssueCount > 0) && (IssueCount + uops > SchedModel->getIssueWidth())) { +    DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops=" +          << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');      return true; - +  }    return false;  } +/// If expected latency is covered, disable ILP policy. +void ConvergingScheduler::SchedBoundary::checkILPPolicy() { +  if (ShouldIncreaseILP +      && (IsResourceLimited || ExpectedLatency <= CurrCycle)) { +    ShouldIncreaseILP = false; +    DEBUG(dbgs() << "Disable ILP: " << Available.getName() << '\n'); +  } +} +  void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,                                                       unsigned ReadyCycle) { +    if (ReadyCycle < MinReadyCycle)      MinReadyCycle = ReadyCycle; @@ -970,15 +1088,31 @@ void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,      Pending.push(SU);    else      Available.push(SU); + +  // Record this node as an immediate dependent of the scheduled node. +  NextSUs.insert(SU); + +  // If CriticalPath has been computed, then check if the unscheduled nodes +  // exceed the ILP window. Before registerRoots, CriticalPath==0. +  if (Rem->CriticalPath && (ExpectedLatency + getUnscheduledLatency(SU) +                            > Rem->CriticalPath + ILPWindow)) { +    ShouldIncreaseILP = true; +    DEBUG(dbgs() << "Increase ILP: " << Available.getName() << " " +          << ExpectedLatency << " + " << getUnscheduledLatency(SU) << '\n'); +  }  }  /// Move the boundary of scheduled code by one cycle.  void ConvergingScheduler::SchedBoundary::bumpCycle() { -  unsigned Width = DAG->getIssueWidth(); +  unsigned Width = SchedModel->getIssueWidth();    IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; +  unsigned NextCycle = CurrCycle + 1;    assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); -  unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle); +  if (MinReadyCycle > NextCycle) { +    IssueCount = 0; +    NextCycle = MinReadyCycle; +  }    if (!HazardRec->isEnabled()) {      // Bypass HazardRec virtual calls. @@ -994,11 +1128,39 @@ void ConvergingScheduler::SchedBoundary::bumpCycle() {      }    }    CheckPending = true; +  IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); -  DEBUG(dbgs() << "*** " << Available.getName() << " cycle " +  DEBUG(dbgs() << "  *** " << Available.getName() << " cycle "          << CurrCycle << '\n');  } +/// Add the given processor resource to this scheduled zone. +void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx, +                                                       unsigned Cycles) { +  unsigned Factor = SchedModel->getResourceFactor(PIdx); +  DEBUG(dbgs() << "  " << SchedModel->getProcResource(PIdx)->Name +        << " +(" << Cycles << "x" << Factor +        << ") / " << SchedModel->getLatencyFactor() << '\n'); + +  unsigned Count = Factor * Cycles; +  ResourceCounts[PIdx] += Count; +  assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted"); +  Rem->RemainingCounts[PIdx] -= Count; + +  // Reset MaxRemainingCount for sanity. +  Rem->MaxRemainingCount = 0; + +  // Check if this resource exceeds the current critical resource by a full +  // cycle. If so, it becomes the critical resource. +  if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx]) +      >= (int)SchedModel->getLatencyFactor()) { +    CritResIdx = PIdx; +    DEBUG(dbgs() << "  *** Critical resource " +          << SchedModel->getProcResource(PIdx)->Name << " x" +          << ResourceCounts[PIdx] << '\n'); +  } +} +  /// Move the boundary of scheduled code by one SUnit.  void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) {    // Update the reservation table. @@ -1010,11 +1172,38 @@ void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) {      }      HazardRec->EmitInstruction(SU);    } +  // Update resource counts and critical resource. +  if (SchedModel->hasInstrSchedModel()) { +    const MCSchedClassDesc *SC = DAG->getSchedClass(SU); +    Rem->RemainingMicroOps -= SchedModel->getNumMicroOps(SU->getInstr(), SC); +    for (TargetSchedModel::ProcResIter +           PI = SchedModel->getWriteProcResBegin(SC), +           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { +      countResource(PI->ProcResourceIdx, PI->Cycles); +    } +  } +  if (isTop()) { +    if (SU->getDepth() > ExpectedLatency) +      ExpectedLatency = SU->getDepth(); +  } +  else { +    if (SU->getHeight() > ExpectedLatency) +      ExpectedLatency = SU->getHeight(); +  } + +  IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); +    // Check the instruction group dispatch limit.    // TODO: Check if this SU must end a dispatch group. -  IssueCount += DAG->getNumMicroOps(SU->getInstr()); -  if (IssueCount >= DAG->getIssueWidth()) { -    DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n'); +  IssueCount += SchedModel->getNumMicroOps(SU->getInstr()); + +  // checkHazard prevents scheduling multiple instructions per cycle that exceed +  // issue width. However, we commonly reach the maximum. In this case +  // opportunistically bump the cycle to avoid uselessly checking everything in +  // the readyQ. Furthermore, a single instruction may produce more than one +  // cycle's worth of micro-ops. +  if (IssueCount >= SchedModel->getIssueWidth()) { +    DEBUG(dbgs() << "  *** Max instrs at cycle " << CurrCycle << '\n');      bumpCycle();    }  } @@ -1045,6 +1234,7 @@ void ConvergingScheduler::SchedBoundary::releasePending() {      Pending.remove(Pending.begin()+i);      --i; --e;    } +  DEBUG(if (!Pending.empty()) Pending.dump());    CheckPending = false;  } @@ -1059,12 +1249,23 @@ void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) {  }  /// If this queue only has one ready candidate, return it. As a side effect, -/// advance the cycle until at least one node is ready. If multiple instructions -/// are ready, return NULL. +/// defer any nodes that now hit a hazard, and advance the cycle until at least +/// one node is ready. If multiple instructions are ready, return NULL.  SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {    if (CheckPending)      releasePending(); +  if (IssueCount > 0) { +    // Defer any ready instrs that now have a hazard. +    for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { +      if (checkHazard(*I)) { +        Pending.push(*I); +        I = Available.remove(I); +        continue; +      } +      ++I; +    } +  }    for (unsigned i = 0; Available.empty(); ++i) {      assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&             "permanent hazard"); (void)i; @@ -1076,18 +1277,262 @@ SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {    return NULL;  } -#ifndef NDEBUG -void ConvergingScheduler::traceCandidate(const char *Label, const ReadyQueue &Q, -                                         SUnit *SU, PressureElement P) { -  dbgs() << Label << " " << Q.getName() << " "; -  if (P.isValid()) -    dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease -           << " "; -  else -    dbgs() << "     "; -  SU->dump(DAG); +/// Record the candidate policy for opposite zones with different critical +/// resources. +/// +/// If the CriticalZone is latency limited, don't force a policy for the +/// candidates here. Instead, When releasing each candidate, releaseNode +/// compares the region's critical path to the candidate's height or depth and +/// the scheduled zone's expected latency then sets ShouldIncreaseILP. +void ConvergingScheduler::balanceZones( +  ConvergingScheduler::SchedBoundary &CriticalZone, +  ConvergingScheduler::SchedCandidate &CriticalCand, +  ConvergingScheduler::SchedBoundary &OppositeZone, +  ConvergingScheduler::SchedCandidate &OppositeCand) { + +  if (!CriticalZone.IsResourceLimited) +    return; + +  SchedRemainder *Rem = CriticalZone.Rem; + +  // If the critical zone is overconsuming a resource relative to the +  // remainder, try to reduce it. +  unsigned RemainingCritCount = +    Rem->RemainingCounts[CriticalZone.CritResIdx]; +  if ((int)(Rem->MaxRemainingCount - RemainingCritCount) +      > (int)SchedModel->getLatencyFactor()) { +    CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx; +    DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce " +          << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name +          << '\n'); +  } +  // If the other zone is underconsuming a resource relative to the full zone, +  // try to increase it. +  unsigned OppositeCount = +    OppositeZone.ResourceCounts[CriticalZone.CritResIdx]; +  if ((int)(OppositeZone.ExpectedCount - OppositeCount) +      > (int)SchedModel->getLatencyFactor()) { +    OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx; +    DEBUG(dbgs() << "Balance " << OppositeZone.Available.getName() << " demand " +          << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name +          << '\n'); +  } +} + +/// Determine if the scheduled zones exceed resource limits or critical path and +/// set each candidate's ReduceHeight policy accordingly. +void ConvergingScheduler::checkResourceLimits( +  ConvergingScheduler::SchedCandidate &TopCand, +  ConvergingScheduler::SchedCandidate &BotCand) { + +  Bot.checkILPPolicy(); +  Top.checkILPPolicy(); +  if (Bot.ShouldIncreaseILP) +    BotCand.Policy.ReduceLatency = true; +  if (Top.ShouldIncreaseILP) +    TopCand.Policy.ReduceLatency = true; + +  // Handle resource-limited regions. +  if (Top.IsResourceLimited && Bot.IsResourceLimited +      && Top.CritResIdx == Bot.CritResIdx) { +    // If the scheduled critical resource in both zones is no longer the +    // critical remaining resource, attempt to reduce resource height both ways. +    if (Top.CritResIdx != Rem.CritResIdx) { +      TopCand.Policy.ReduceResIdx = Top.CritResIdx; +      BotCand.Policy.ReduceResIdx = Bot.CritResIdx; +      DEBUG(dbgs() << "Reduce scheduled " +            << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n'); +    } +    return; +  } +  // Handle latency-limited regions. +  if (!Top.IsResourceLimited && !Bot.IsResourceLimited) { +    // If the total scheduled expected latency exceeds the region's critical +    // path then reduce latency both ways. +    // +    // Just because a zone is not resource limited does not mean it is latency +    // limited. Unbuffered resource, such as max micro-ops may cause CurrCycle +    // to exceed expected latency. +    if ((Top.ExpectedLatency + Bot.ExpectedLatency >= Rem.CriticalPath) +        && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) { +      TopCand.Policy.ReduceLatency = true; +      BotCand.Policy.ReduceLatency = true; +      DEBUG(dbgs() << "Reduce scheduled latency " << Top.ExpectedLatency +            << " + " << Bot.ExpectedLatency << '\n'); +    } +    return; +  } +  // The critical resource is different in each zone, so request balancing. + +  // Compute the cost of each zone. +  Rem.MaxRemainingCount = std::max( +    Rem.RemainingMicroOps * SchedModel->getMicroOpFactor(), +    Rem.RemainingCounts[Rem.CritResIdx]); +  Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle); +  Top.ExpectedCount = std::max( +    Top.getCriticalCount(), +    Top.ExpectedCount * SchedModel->getLatencyFactor()); +  Bot.ExpectedCount = std::max(Bot.ExpectedLatency, Bot.CurrCycle); +  Bot.ExpectedCount = std::max( +    Bot.getCriticalCount(), +    Bot.ExpectedCount * SchedModel->getLatencyFactor()); + +  balanceZones(Top, TopCand, Bot, BotCand); +  balanceZones(Bot, BotCand, Top, TopCand); +} + +void ConvergingScheduler::SchedCandidate:: +initResourceDelta(const ScheduleDAGMI *DAG, +                  const TargetSchedModel *SchedModel) { +  if (!Policy.ReduceResIdx && !Policy.DemandResIdx) +    return; + +  const MCSchedClassDesc *SC = DAG->getSchedClass(SU); +  for (TargetSchedModel::ProcResIter +         PI = SchedModel->getWriteProcResBegin(SC), +         PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { +    if (PI->ProcResourceIdx == Policy.ReduceResIdx) +      ResDelta.CritResources += PI->Cycles; +    if (PI->ProcResourceIdx == Policy.DemandResIdx) +      ResDelta.DemandedResources += PI->Cycles; +  } +} + +/// Return true if this heuristic determines order. +static bool tryLess(unsigned TryVal, unsigned CandVal, +                    ConvergingScheduler::SchedCandidate &TryCand, +                    ConvergingScheduler::SchedCandidate &Cand, +                    ConvergingScheduler::CandReason Reason) { +  if (TryVal < CandVal) { +    TryCand.Reason = Reason; +    return true; +  } +  if (TryVal > CandVal) { +    if (Cand.Reason > Reason) +      Cand.Reason = Reason; +    return true; +  } +  return false; +} +static bool tryGreater(unsigned TryVal, unsigned CandVal, +                       ConvergingScheduler::SchedCandidate &TryCand, +                       ConvergingScheduler::SchedCandidate &Cand, +                       ConvergingScheduler::CandReason Reason) { +  if (TryVal > CandVal) { +    TryCand.Reason = Reason; +    return true; +  } +  if (TryVal < CandVal) { +    if (Cand.Reason > Reason) +      Cand.Reason = Reason; +    return true; +  } +  return false; +} + +/// Apply a set of heursitics to a new candidate. Heuristics are currently +/// hierarchical. This may be more efficient than a graduated cost model because +/// we don't need to evaluate all aspects of the model for each node in the +/// queue. But it's really done to make the heuristics easier to debug and +/// statistically analyze. +/// +/// \param Cand provides the policy and current best candidate. +/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. +/// \param Zone describes the scheduled zone that we are extending. +/// \param RPTracker describes reg pressure within the scheduled zone. +/// \param TempTracker is a scratch pressure tracker to reuse in queries. +void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, +                                       SchedCandidate &TryCand, +                                       SchedBoundary &Zone, +                                       const RegPressureTracker &RPTracker, +                                       RegPressureTracker &TempTracker) { + +  // Always initialize TryCand's RPDelta. +  TempTracker.getMaxPressureDelta(TryCand.SU->getInstr(), TryCand.RPDelta, +                                  DAG->getRegionCriticalPSets(), +                                  DAG->getRegPressure().MaxSetPressure); + +  // Initialize the candidate if needed. +  if (!Cand.isValid()) { +    TryCand.Reason = NodeOrder; +    return; +  } +  // Avoid exceeding the target's limit. +  if (tryLess(TryCand.RPDelta.Excess.UnitIncrease, +              Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess)) +    return; +  if (Cand.Reason == SingleExcess) +    Cand.Reason = MultiPressure; + +  // Avoid increasing the max critical pressure in the scheduled region. +  if (tryLess(TryCand.RPDelta.CriticalMax.UnitIncrease, +              Cand.RPDelta.CriticalMax.UnitIncrease, +              TryCand, Cand, SingleCritical)) +    return; +  if (Cand.Reason == SingleCritical) +    Cand.Reason = MultiPressure; + +  // Avoid critical resource consumption and balance the schedule. +  TryCand.initResourceDelta(DAG, SchedModel); +  if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, +              TryCand, Cand, ResourceReduce)) +    return; +  if (tryGreater(TryCand.ResDelta.DemandedResources, +                 Cand.ResDelta.DemandedResources, +                 TryCand, Cand, ResourceDemand)) +    return; + +  // Avoid serializing long latency dependence chains. +  if (Cand.Policy.ReduceLatency) { +    if (Zone.isTop()) { +      if (Cand.SU->getDepth() * SchedModel->getLatencyFactor() +          > Zone.ExpectedCount) { +        if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), +                    TryCand, Cand, TopDepthReduce)) +          return; +      } +      if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), +                     TryCand, Cand, TopPathReduce)) +        return; +    } +    else { +      if (Cand.SU->getHeight() * SchedModel->getLatencyFactor() +          > Zone.ExpectedCount) { +        if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), +                    TryCand, Cand, BotHeightReduce)) +          return; +      } +      if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), +                     TryCand, Cand, BotPathReduce)) +        return; +    } +  } + +  // Avoid increasing the max pressure of the entire region. +  if (tryLess(TryCand.RPDelta.CurrentMax.UnitIncrease, +              Cand.RPDelta.CurrentMax.UnitIncrease, TryCand, Cand, SingleMax)) +    return; +  if (Cand.Reason == SingleMax) +    Cand.Reason = MultiPressure; + +  // Prefer immediate defs/users of the last scheduled instruction. This is a +  // nice pressure avoidance strategy that also conserves the processor's +  // register renaming resources and keeps the machine code readable. +  if (Zone.NextSUs.count(TryCand.SU) && !Zone.NextSUs.count(Cand.SU)) { +    TryCand.Reason = NextDefUse; +    return; +  } +  if (!Zone.NextSUs.count(TryCand.SU) && Zone.NextSUs.count(Cand.SU)) { +    if (Cand.Reason > NextDefUse) +      Cand.Reason = NextDefUse; +    return; +  } +  // Fall through to original instruction order. +  if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) +      || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { +    TryCand.Reason = NodeOrder; +  }  } -#endif  /// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is  /// more desirable than RHS from scheduling standpoint. @@ -1098,109 +1543,144 @@ static bool compareRPDelta(const RegPressureDelta &LHS,    // have UnitIncrease==0, so are neutral.    // Avoid increasing the max critical pressure in the scheduled region. -  if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) +  if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) { +    DEBUG(dbgs() << "RP excess top - bot: " +          << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n');      return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease; - +  }    // Avoid increasing the max critical pressure in the scheduled region. -  if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) +  if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) { +    DEBUG(dbgs() << "RP critical top - bot: " +          << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease) +          << '\n');      return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease; - +  }    // Avoid increasing the max pressure of the entire region. -  if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) +  if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) { +    DEBUG(dbgs() << "RP current top - bot: " +          << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease) +          << '\n');      return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease; - +  }    return false;  } +#ifndef NDEBUG +const char *ConvergingScheduler::getReasonStr( +  ConvergingScheduler::CandReason Reason) { +  switch (Reason) { +  case NoCand:         return "NOCAND    "; +  case SingleExcess:   return "REG-EXCESS"; +  case SingleCritical: return "REG-CRIT  "; +  case SingleMax:      return "REG-MAX   "; +  case MultiPressure:  return "REG-MULTI "; +  case ResourceReduce: return "RES-REDUCE"; +  case ResourceDemand: return "RES-DEMAND"; +  case TopDepthReduce: return "TOP-DEPTH "; +  case TopPathReduce:  return "TOP-PATH  "; +  case BotHeightReduce:return "BOT-HEIGHT"; +  case BotPathReduce:  return "BOT-PATH  "; +  case NextDefUse:     return "DEF-USE   "; +  case NodeOrder:      return "ORDER     "; +  }; +  llvm_unreachable("Unknown reason!"); +} + +void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand, +                                         const SchedBoundary &Zone) { +  const char *Label = getReasonStr(Cand.Reason); +  PressureElement P; +  unsigned ResIdx = 0; +  unsigned Latency = 0; +  switch (Cand.Reason) { +  default: +    break; +  case SingleExcess: +    P = Cand.RPDelta.Excess; +    break; +  case SingleCritical: +    P = Cand.RPDelta.CriticalMax; +    break; +  case SingleMax: +    P = Cand.RPDelta.CurrentMax; +    break; +  case ResourceReduce: +    ResIdx = Cand.Policy.ReduceResIdx; +    break; +  case ResourceDemand: +    ResIdx = Cand.Policy.DemandResIdx; +    break; +  case TopDepthReduce: +    Latency = Cand.SU->getDepth(); +    break; +  case TopPathReduce: +    Latency = Cand.SU->getHeight(); +    break; +  case BotHeightReduce: +    Latency = Cand.SU->getHeight(); +    break; +  case BotPathReduce: +    Latency = Cand.SU->getDepth(); +    break; +  } +  dbgs() << Label << " " << Zone.Available.getName() << " "; +  if (P.isValid()) +    dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease +           << " "; +  else +    dbgs() << "     "; +  if (ResIdx) +    dbgs() << SchedModel->getProcResource(ResIdx)->Name << " "; +  else +    dbgs() << "        "; +  if (Latency) +    dbgs() << Latency << " cycles "; +  else +    dbgs() << "         "; +  Cand.SU->dump(DAG); +} +#endif +  /// Pick the best candidate from the top queue.  ///  /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during  /// DAG building. To adjust for the current scheduling location we need to  /// maintain the number of vreg uses remaining to be top-scheduled. -ConvergingScheduler::CandResult ConvergingScheduler:: -pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker, -                  SchedCandidate &Candidate) { +void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone, +                                            const RegPressureTracker &RPTracker, +                                            SchedCandidate &Cand) { +  ReadyQueue &Q = Zone.Available; +    DEBUG(Q.dump());    // getMaxPressureDelta temporarily modifies the tracker.    RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); -  // BestSU remains NULL if no top candidates beat the best existing candidate. -  CandResult FoundCandidate = NoCand;    for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { -    RegPressureDelta RPDelta; -    TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, -                                    DAG->getRegionCriticalPSets(), -                                    DAG->getRegPressure().MaxSetPressure); - -    // Initialize the candidate if needed. -    if (!Candidate.SU) { -      Candidate.SU = *I; -      Candidate.RPDelta = RPDelta; -      FoundCandidate = NodeOrder; -      continue; -    } -    // Avoid exceeding the target's limit. -    if (RPDelta.Excess.UnitIncrease < Candidate.RPDelta.Excess.UnitIncrease) { -      DEBUG(traceCandidate("ECAND", Q, *I, RPDelta.Excess)); -      Candidate.SU = *I; -      Candidate.RPDelta = RPDelta; -      FoundCandidate = SingleExcess; -      continue; -    } -    if (RPDelta.Excess.UnitIncrease > Candidate.RPDelta.Excess.UnitIncrease) -      continue; -    if (FoundCandidate == SingleExcess) -      FoundCandidate = MultiPressure; - -    // Avoid increasing the max critical pressure in the scheduled region. -    if (RPDelta.CriticalMax.UnitIncrease -        < Candidate.RPDelta.CriticalMax.UnitIncrease) { -      DEBUG(traceCandidate("PCAND", Q, *I, RPDelta.CriticalMax)); -      Candidate.SU = *I; -      Candidate.RPDelta = RPDelta; -      FoundCandidate = SingleCritical; -      continue; -    } -    if (RPDelta.CriticalMax.UnitIncrease -        > Candidate.RPDelta.CriticalMax.UnitIncrease) -      continue; -    if (FoundCandidate == SingleCritical) -      FoundCandidate = MultiPressure; - -    // Avoid increasing the max pressure of the entire region. -    if (RPDelta.CurrentMax.UnitIncrease -        < Candidate.RPDelta.CurrentMax.UnitIncrease) { -      DEBUG(traceCandidate("MCAND", Q, *I, RPDelta.CurrentMax)); -      Candidate.SU = *I; -      Candidate.RPDelta = RPDelta; -      FoundCandidate = SingleMax; -      continue; -    } -    if (RPDelta.CurrentMax.UnitIncrease -        > Candidate.RPDelta.CurrentMax.UnitIncrease) -      continue; -    if (FoundCandidate == SingleMax) -      FoundCandidate = MultiPressure; - -    // Fall through to original instruction order. -    // Only consider node order if Candidate was chosen from this Q. -    if (FoundCandidate == NoCand) -      continue; -    if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) -        || (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) { -      DEBUG(traceCandidate("NCAND", Q, *I)); -      Candidate.SU = *I; -      Candidate.RPDelta = RPDelta; -      FoundCandidate = NodeOrder; +    SchedCandidate TryCand(Cand.Policy); +    TryCand.SU = *I; +    tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker); +    if (TryCand.Reason != NoCand) { +      // Initialize resource delta if needed in case future heuristics query it. +      if (TryCand.ResDelta == SchedResourceDelta()) +        TryCand.initResourceDelta(DAG, SchedModel); +      Cand.setBest(TryCand); +      DEBUG(traceCandidate(Cand, Zone));      } +    TryCand.SU = *I;    } -  return FoundCandidate; +} + +static void tracePick(const ConvergingScheduler::SchedCandidate &Cand, +                      bool IsTop) { +  DEBUG(dbgs() << "Pick " << (IsTop ? "top" : "bot") +        << " SU(" << Cand.SU->NodeNum << ") " +        << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n');  }  /// Pick the best candidate node from either the top or bottom queue. -SUnit *ConvergingScheduler::pickNodeBidrectional(bool &IsTopNode) { +SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) {    // Schedule as far as possible in the direction of no choice. This is most    // efficient, but also provides the best heuristics for CriticalPSets.    if (SUnit *SU = Bot.pickOnlyChoice()) { @@ -1211,11 +1691,14 @@ SUnit *ConvergingScheduler::pickNodeBidrectional(bool &IsTopNode) {      IsTopNode = true;      return SU;    } -  SchedCandidate BotCand; +  CandPolicy NoPolicy; +  SchedCandidate BotCand(NoPolicy); +  SchedCandidate TopCand(NoPolicy); +  checkResourceLimits(TopCand, BotCand); +    // Prefer bottom scheduling when heuristics are silent. -  CandResult BotResult = pickNodeFromQueue(Bot.Available, -                                           DAG->getBotRPTracker(), BotCand); -  assert(BotResult != NoCand && "failed to find the first candidate"); +  pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); +  assert(BotCand.Reason != NoCand && "failed to find the first candidate");    // If either Q has a single candidate that provides the least increase in    // Excess pressure, we can immediately schedule from that Q. @@ -1224,37 +1707,41 @@ SUnit *ConvergingScheduler::pickNodeBidrectional(bool &IsTopNode) {    // affects picking from either Q. If scheduling in one direction must    // increase pressure for one of the excess PSets, then schedule in that    // direction first to provide more freedom in the other direction. -  if (BotResult == SingleExcess || BotResult == SingleCritical) { +  if (BotCand.Reason == SingleExcess || BotCand.Reason == SingleCritical) {      IsTopNode = false; +    tracePick(BotCand, IsTopNode);      return BotCand.SU;    }    // Check if the top Q has a better candidate. -  SchedCandidate TopCand; -  CandResult TopResult = pickNodeFromQueue(Top.Available, -                                           DAG->getTopRPTracker(), TopCand); -  assert(TopResult != NoCand && "failed to find the first candidate"); +  pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); +  assert(TopCand.Reason != NoCand && "failed to find the first candidate"); -  if (TopResult == SingleExcess || TopResult == SingleCritical) { -    IsTopNode = true; -    return TopCand.SU; -  }    // If either Q has a single candidate that minimizes pressure above the    // original region's pressure pick it. -  if (BotResult == SingleMax) { +  if (TopCand.Reason <= SingleMax || BotCand.Reason <= SingleMax) { +    if (TopCand.Reason < BotCand.Reason) { +      IsTopNode = true; +      tracePick(TopCand, IsTopNode); +      return TopCand.SU; +    }      IsTopNode = false; +    tracePick(BotCand, IsTopNode);      return BotCand.SU;    } -  if (TopResult == SingleMax) { +  // Check for a salient pressure difference and pick the best from either side. +  if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) {      IsTopNode = true; +    tracePick(TopCand, IsTopNode);      return TopCand.SU;    } -  // Check for a salient pressure difference and pick the best from either side. -  if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) { +  // Otherwise prefer the bottom candidate, in node order if all else failed. +  if (TopCand.Reason < BotCand.Reason) {      IsTopNode = true; +    tracePick(TopCand, IsTopNode);      return TopCand.SU;    } -  // Otherwise prefer the bottom candidate in node order.    IsTopNode = false; +  tracePick(BotCand, IsTopNode);    return BotCand.SU;  } @@ -1266,33 +1753,34 @@ SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {      return NULL;    }    SUnit *SU; -  if (ForceTopDown) { -    SU = Top.pickOnlyChoice(); -    if (!SU) { -      SchedCandidate TopCand; -      CandResult TopResult = -        pickNodeFromQueue(Top.Available, DAG->getTopRPTracker(), TopCand); -      assert(TopResult != NoCand && "failed to find the first candidate"); -      (void)TopResult; -      SU = TopCand.SU; +  do { +    if (ForceTopDown) { +      SU = Top.pickOnlyChoice(); +      if (!SU) { +        CandPolicy NoPolicy; +        SchedCandidate TopCand(NoPolicy); +        pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); +        assert(TopCand.Reason != NoCand && "failed to find the first candidate"); +        SU = TopCand.SU; +      } +      IsTopNode = true;      } -    IsTopNode = true; -  } -  else if (ForceBottomUp) { -    SU = Bot.pickOnlyChoice(); -    if (!SU) { -      SchedCandidate BotCand; -      CandResult BotResult = -        pickNodeFromQueue(Bot.Available, DAG->getBotRPTracker(), BotCand); -      assert(BotResult != NoCand && "failed to find the first candidate"); -      (void)BotResult; -      SU = BotCand.SU; +    else if (ForceBottomUp) { +      SU = Bot.pickOnlyChoice(); +      if (!SU) { +        CandPolicy NoPolicy; +        SchedCandidate BotCand(NoPolicy); +        pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); +        assert(BotCand.Reason != NoCand && "failed to find the first candidate"); +        SU = BotCand.SU; +      } +      IsTopNode = false;      } -    IsTopNode = false; -  } -  else { -    SU = pickNodeBidrectional(IsTopNode); -  } +    else { +      SU = pickNodeBidirectional(IsTopNode); +    } +  } while (SU->isScheduled); +    if (SU->isTopReady())      Top.removeReady(SU);    if (SU->isBottomReady()) @@ -1331,6 +1819,86 @@ ConvergingSchedRegistry("converge", "Standard converging scheduler.",                          createConvergingSched);  //===----------------------------------------------------------------------===// +// ILP Scheduler. Currently for experimental analysis of heuristics. +//===----------------------------------------------------------------------===// + +namespace { +/// \brief Order nodes by the ILP metric. +struct ILPOrder { +  ScheduleDAGILP *ILP; +  bool MaximizeILP; + +  ILPOrder(ScheduleDAGILP *ilp, bool MaxILP): ILP(ilp), MaximizeILP(MaxILP) {} + +  /// \brief Apply a less-than relation on node priority. +  bool operator()(const SUnit *A, const SUnit *B) const { +    // Return true if A comes after B in the Q. +    if (MaximizeILP) +      return ILP->getILP(A) < ILP->getILP(B); +    else +      return ILP->getILP(A) > ILP->getILP(B); +  } +}; + +/// \brief Schedule based on the ILP metric. +class ILPScheduler : public MachineSchedStrategy { +  ScheduleDAGILP ILP; +  ILPOrder Cmp; + +  std::vector<SUnit*> ReadyQ; +public: +  ILPScheduler(bool MaximizeILP) +  : ILP(/*BottomUp=*/true), Cmp(&ILP, MaximizeILP) {} + +  virtual void initialize(ScheduleDAGMI *DAG) { +    ReadyQ.clear(); +    ILP.resize(DAG->SUnits.size()); +  } + +  virtual void registerRoots() { +    for (std::vector<SUnit*>::const_iterator +           I = ReadyQ.begin(), E = ReadyQ.end(); I != E; ++I) { +      ILP.computeILP(*I); +    } +  } + +  /// Implement MachineSchedStrategy interface. +  /// ----------------------------------------- + +  virtual SUnit *pickNode(bool &IsTopNode) { +    if (ReadyQ.empty()) return NULL; +    pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); +    SUnit *SU = ReadyQ.back(); +    ReadyQ.pop_back(); +    IsTopNode = false; +    DEBUG(dbgs() << "*** Scheduling " << *SU->getInstr() +          << " ILP: " << ILP.getILP(SU) << '\n'); +    return SU; +  } + +  virtual void schedNode(SUnit *, bool) {} + +  virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ } + +  virtual void releaseBottomNode(SUnit *SU) { +    ReadyQ.push_back(SU); +    std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); +  } +}; +} // namespace + +static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) { +  return new ScheduleDAGMI(C, new ILPScheduler(true)); +} +static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) { +  return new ScheduleDAGMI(C, new ILPScheduler(false)); +} +static MachineSchedRegistry ILPMaxRegistry( +  "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler); +static MachineSchedRegistry ILPMinRegistry( +  "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler); + +//===----------------------------------------------------------------------===//  // Machine Instruction Shuffler for Correctness Testing  //===----------------------------------------------------------------------===// | 
