diff options
Diffstat (limited to 'contrib/llvm/lib/CodeGen/MachineScheduler.cpp')
| -rw-r--r-- | contrib/llvm/lib/CodeGen/MachineScheduler.cpp | 232 | 
1 files changed, 161 insertions, 71 deletions
diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp index a48e54caf3fe..bcee15c7c75f 100644 --- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp @@ -49,6 +49,11 @@ DumpCriticalPathLength("misched-dcpl", cl::Hidden,  static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,    cl::desc("Pop up a window to show MISched dags after they are processed")); +/// In some situations a few uninteresting nodes depend on nearly all other +/// nodes in the graph, provide a cutoff to hide them. +static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, +  cl::desc("Hide nodes with more predecessor/successor than cutoff")); +  static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,    cl::desc("Stop scheduling after N instructions"), cl::init(~0U)); @@ -106,7 +111,7 @@ public:    void print(raw_ostream &O, const Module* = nullptr) const override;  protected: -  void scheduleRegions(ScheduleDAGInstrs &Scheduler); +  void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);  };  /// MachineScheduler runs after coalescing and before register allocation. @@ -146,7 +151,7 @@ char &llvm::MachineSchedulerID = MachineScheduler::ID;  INITIALIZE_PASS_BEGIN(MachineScheduler, "machine-scheduler",                        "Machine Instruction Scheduler", false, false) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)  INITIALIZE_PASS_DEPENDENCY(SlotIndexes)  INITIALIZE_PASS_DEPENDENCY(LiveIntervals)  INITIALIZE_PASS_END(MachineScheduler, "machine-scheduler", @@ -161,7 +166,7 @@ void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {    AU.setPreservesCFG();    AU.addRequiredID(MachineDominatorsID);    AU.addRequired<MachineLoopInfo>(); -  AU.addRequired<AliasAnalysis>(); +  AU.addRequired<AAResultsWrapperPass>();    AU.addRequired<TargetPassConfig>();    AU.addRequired<SlotIndexes>();    AU.addPreserved<SlotIndexes>(); @@ -315,14 +320,14 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {    } else if (!mf.getSubtarget().enableMachineScheduler())      return false; -  DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs())); +  DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));    // Initialize the context of the pass.    MF = &mf;    MLI = &getAnalysis<MachineLoopInfo>();    MDT = &getAnalysis<MachineDominatorTree>();    PassConfig = &getAnalysis<TargetPassConfig>(); -  AA = &getAnalysis<AliasAnalysis>(); +  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();    LIS = &getAnalysis<LiveIntervals>(); @@ -335,7 +340,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {    // Instantiate the selected scheduler for this target, function, and    // optimization level.    std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler()); -  scheduleRegions(*Scheduler); +  scheduleRegions(*Scheduler, false);    DEBUG(LIS->dump());    if (VerifyScheduling) @@ -363,7 +368,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {    // Instantiate the selected scheduler for this target, function, and    // optimization level.    std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler()); -  scheduleRegions(*Scheduler); +  scheduleRegions(*Scheduler, true);    if (VerifyScheduling)      MF->verify(this, "After post machine scheduling."); @@ -383,15 +388,14 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {  static bool isSchedBoundary(MachineBasicBlock::iterator MI,                              MachineBasicBlock *MBB,                              MachineFunction *MF, -                            const TargetInstrInfo *TII, -                            bool IsPostRA) { +                            const TargetInstrInfo *TII) {    return MI->isCall() || TII->isSchedulingBoundary(MI, MBB, *MF);  }  /// Main driver for both MachineScheduler and PostMachineScheduler. -void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { +void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler, +                                           bool FixKillFlags) {    const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); -  bool IsPostRA = Scheduler.isPostRA();    // Visit all machine basic blocks.    // @@ -400,7 +404,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {    for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();         MBB != MBBEnd; ++MBB) { -    Scheduler.startBlock(MBB); +    Scheduler.startBlock(&*MBB);  #ifndef NDEBUG      if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName()) @@ -429,7 +433,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {        // Avoid decrementing RegionEnd for blocks with no terminator.        if (RegionEnd != MBB->end() || -          isSchedBoundary(std::prev(RegionEnd), MBB, MF, TII, IsPostRA)) { +          isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {          --RegionEnd;          // Count the boundary instruction.          --RemainingInstrs; @@ -440,14 +444,14 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {        unsigned NumRegionInstrs = 0;        MachineBasicBlock::iterator I = RegionEnd;        for(;I != MBB->begin(); --I, --RemainingInstrs) { -        if (isSchedBoundary(std::prev(I), MBB, MF, TII, IsPostRA)) +        if (isSchedBoundary(&*std::prev(I), &*MBB, MF, TII))            break;          if (!I->isDebugValue())            ++NumRegionInstrs;        }        // Notify the scheduler of the region, even if we may skip scheduling        // it. Perhaps it still needs to be bundled. -      Scheduler.enterRegion(MBB, I, RegionEnd, NumRegionInstrs); +      Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);        // Skip empty scheduling regions (0 or 1 schedulable instructions).        if (I == RegionEnd || I == std::prev(RegionEnd)) { @@ -456,8 +460,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {          Scheduler.exitRegion();          continue;        } -      DEBUG(dbgs() << "********** " << ((Scheduler.isPostRA()) ? "PostRA " : "") -            << "MI Scheduling **********\n"); +      DEBUG(dbgs() << "********** MI Scheduling **********\n");        DEBUG(dbgs() << MF->getName()              << ":BB#" << MBB->getNumber() << " " << MBB->getName()              << "\n  From: " << *I << "    To: "; @@ -484,11 +487,11 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {      }      assert(RemainingInstrs == 0 && "Instruction count mismatch!");      Scheduler.finishBlock(); -    if (Scheduler.isPostRA()) { -      // FIXME: Ideally, no further passes should rely on kill flags. However, -      // thumb2 size reduction is currently an exception. -      Scheduler.fixupKills(MBB); -    } +    // FIXME: Ideally, no further passes should rely on kill flags. However, +    // thumb2 size reduction is currently an exception, so the PostMIScheduler +    // needs to do this. +    if (FixKillFlags) +        Scheduler.fixupKills(&*MBB);    }    Scheduler.finalizeSchedule();  } @@ -499,7 +502,7 @@ void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {  LLVM_DUMP_METHOD  void ReadyQueue::dump() { -  dbgs() << Name << ": "; +  dbgs() << "Queue " << Name << ": ";    for (unsigned i = 0, e = Queue.size(); i < e; ++i)      dbgs() << Queue[i]->NodeNum << " ";    dbgs() << "\n"; @@ -660,6 +663,9 @@ bool ScheduleDAGMI::checkSchedLimit() {  /// does not consider liveness or register pressure. It is useful for PostRA  /// scheduling and potentially other custom schedulers.  void ScheduleDAGMI::schedule() { +  DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n"); +  DEBUG(SchedImpl->dumpPolicy()); +    // Build the DAG.    buildSchedGraph(AA); @@ -682,7 +688,11 @@ void ScheduleDAGMI::schedule() {    initQueues(TopRoots, BotRoots);    bool IsTopNode = false; -  while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { +  while (true) { +    DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n"); +    SUnit *SU = SchedImpl->pickNode(IsTopNode); +    if (!SU) break; +      assert(!SU->isScheduled && "Node already scheduled");      if (!checkSchedLimit())        break; @@ -900,6 +910,13 @@ void ScheduleDAGMILive::initRegPressure() {      updatePressureDiffs(LiveUses);    } +  DEBUG( +    dbgs() << "Top Pressure:\n"; +    dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI); +    dbgs() << "Bottom Pressure:\n"; +    dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI); +  ); +    assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");    // Cache the list of excess pressure sets in this region. This will also track @@ -976,18 +993,24 @@ void ScheduleDAGMILive::updatePressureDiffs(ArrayRef<unsigned> LiveUses) {      }      // RegisterPressureTracker guarantees that readsReg is true for LiveUses.      assert(VNI && "No live value at use."); -    for (VReg2UseMap::iterator -           UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) { -      SUnit *SU = UI->SU; -      DEBUG(dbgs() << "  UpdateRegP: SU(" << SU->NodeNum << ") " -            << *SU->getInstr()); +    for (const VReg2SUnit &V2SU +         : make_range(VRegUses.find(Reg), VRegUses.end())) { +      SUnit *SU = V2SU.SU;        // If this use comes before the reaching def, it cannot be a last use, so        // descrease its pressure change.        if (!SU->isScheduled && SU != &ExitSU) {          LiveQueryResult LRQ            = LI.Query(LIS->getInstructionIndex(SU->getInstr())); -        if (LRQ.valueIn() == VNI) -          getPressureDiff(SU).addPressureChange(Reg, true, &MRI); +        if (LRQ.valueIn() == VNI) { +          PressureDiff &PDiff = getPressureDiff(SU); +          PDiff.addPressureChange(Reg, true, &MRI); +          DEBUG( +            dbgs() << "  UpdateRegP: SU(" << SU->NodeNum << ") " +                   << *SU->getInstr(); +            dbgs() << "              to "; +            PDiff.dump(*TRI); +          ); +        }        }      }    } @@ -998,12 +1021,14 @@ void ScheduleDAGMILive::updatePressureDiffs(ArrayRef<unsigned> LiveUses) {  /// only includes instructions that have DAG nodes, not scheduling boundaries.  ///  /// This is a skeletal driver, with all the functionality pushed into helpers, -/// so that it can be easilly extended by experimental schedulers. Generally, +/// so that it can be easily extended by experimental schedulers. Generally,  /// implementing MachineSchedStrategy should be sufficient to implement a new  /// scheduling algorithm. However, if a scheduler further subclasses  /// ScheduleDAGMILive then it will want to override this virtual method in order  /// to update any specialized state.  void ScheduleDAGMILive::schedule() { +  DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n"); +  DEBUG(SchedImpl->dumpPolicy());    buildDAGWithRegPressure();    Topo.InitDAGTopologicalSorting(); @@ -1017,8 +1042,16 @@ void ScheduleDAGMILive::schedule() {    // This may initialize a DFSResult to be used for queue priority.    SchedImpl->initialize(this); -  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) -          SUnits[su].dumpAll(this)); +  DEBUG( +    for (const SUnit &SU : SUnits) { +      SU.dumpAll(this); +      if (ShouldTrackPressure) { +        dbgs() << "  Pressure Diff      : "; +        getPressureDiff(&SU).dump(*TRI); +      } +      dbgs() << '\n'; +    } +  );    if (ViewMISchedDAGs) viewGraph();    // Initialize ready queues now that the DAG and priority data are finalized. @@ -1030,7 +1063,11 @@ void ScheduleDAGMILive::schedule() {    }    bool IsTopNode = false; -  while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { +  while (true) { +    DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n"); +    SUnit *SU = SchedImpl->pickNode(IsTopNode); +    if (!SU) break; +      assert(!SU->isScheduled && "Node already scheduled");      if (!checkSchedLimit())        break; @@ -1149,14 +1186,15 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {      unsigned LiveOutHeight = DefSU->getHeight();      unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;      // Visit all local users of the vreg def. -    for (VReg2UseMap::iterator -           UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) { -      if (UI->SU == &ExitSU) +    for (const VReg2SUnit &V2SU +         : make_range(VRegUses.find(Reg), VRegUses.end())) { +      SUnit *SU = V2SU.SU; +      if (SU == &ExitSU)          continue;        // Only consider uses of the phi.        LiveQueryResult LRQ = -        LI.Query(LIS->getInstructionIndex(UI->SU->getInstr())); +        LI.Query(LIS->getInstructionIndex(SU->getInstr()));        if (!LRQ.valueIn()->isPHIDef())          continue; @@ -1164,10 +1202,10 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {        // overestimate in strange cases. This allows cyclic latency to be        // estimated as the minimum slack of the vreg's depth or height.        unsigned CyclicLatency = 0; -      if (LiveOutDepth > UI->SU->getDepth()) -        CyclicLatency = LiveOutDepth - UI->SU->getDepth(); +      if (LiveOutDepth > SU->getDepth()) +        CyclicLatency = LiveOutDepth - SU->getDepth(); -      unsigned LiveInHeight = UI->SU->getHeight() + DefSU->Latency; +      unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;        if (LiveInHeight > LiveOutHeight) {          if (LiveInHeight - LiveOutHeight < CyclicLatency)            CyclicLatency = LiveInHeight - LiveOutHeight; @@ -1176,7 +1214,7 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {          CyclicLatency = 0;        DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU(" -            << UI->SU->NodeNum << ") = " << CyclicLatency << "c\n"); +            << SU->NodeNum << ") = " << CyclicLatency << "c\n");        if (CyclicLatency > MaxCyclicLatency)          MaxCyclicLatency = CyclicLatency;      } @@ -1203,6 +1241,11 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {        // Update top scheduled pressure.        TopRPTracker.advance();        assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); +      DEBUG( +        dbgs() << "Top Pressure:\n"; +        dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI); +      ); +        updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);      }    } @@ -1225,6 +1268,11 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {        SmallVector<unsigned, 8> LiveUses;        BotRPTracker.recede(&LiveUses);        assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); +      DEBUG( +        dbgs() << "Bottom Pressure:\n"; +        dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI); +      ); +        updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);        updatePressureDiffs(LiveUses);      } @@ -1349,25 +1397,49 @@ namespace {  /// \brief Post-process the DAG to create cluster edges between instructions  /// that may be fused by the processor into a single operation.  class MacroFusion : public ScheduleDAGMutation { -  const TargetInstrInfo *TII; +  const TargetInstrInfo &TII; +  const TargetRegisterInfo &TRI;  public: -  MacroFusion(const TargetInstrInfo *tii): TII(tii) {} +  MacroFusion(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) +    : TII(TII), TRI(TRI) {}    void apply(ScheduleDAGMI *DAG) override;  };  } // anonymous +/// Returns true if \p MI reads a register written by \p Other. +static bool HasDataDep(const TargetRegisterInfo &TRI, const MachineInstr &MI, +                       const MachineInstr &Other) { +  for (const MachineOperand &MO : MI.uses()) { +    if (!MO.isReg() || !MO.readsReg()) +      continue; + +    unsigned Reg = MO.getReg(); +    if (Other.modifiesRegister(Reg, &TRI)) +      return true; +  } +  return false; +} +  /// \brief Callback from DAG postProcessing to create cluster edges to encourage  /// fused operations.  void MacroFusion::apply(ScheduleDAGMI *DAG) {    // For now, assume targets can only fuse with the branch. -  MachineInstr *Branch = DAG->ExitSU.getInstr(); +  SUnit &ExitSU = DAG->ExitSU; +  MachineInstr *Branch = ExitSU.getInstr();    if (!Branch)      return; -  for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) { -    SUnit *SU = &DAG->SUnits[--Idx]; -    if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch)) +  for (SUnit &SU : DAG->SUnits) { +    // SUnits with successors can't be schedule in front of the ExitSU. +    if (!SU.Succs.empty()) +      continue; +    // We only care if the node writes to a register that the branch reads. +    MachineInstr *Pred = SU.getInstr(); +    if (!HasDataDep(TRI, *Branch, *Pred)) +      continue; + +    if (!TII.shouldScheduleAdjacent(Pred, Branch))        continue;      // Create a single weak edge from SU to ExitSU. The only effect is to cause @@ -1376,11 +1448,11 @@ void MacroFusion::apply(ScheduleDAGMI *DAG) {      // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling      // of SU, we could create an artificial edge from the deepest root, but it      // hasn't been needed yet. -    bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster)); +    bool Success = DAG->addEdge(&ExitSU, SDep(&SU, SDep::Cluster));      (void)Success;      assert(Success && "No DAG nodes should be reachable from ExitSU"); -    DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n"); +    DEBUG(dbgs() << "Macro Fuse SU(" << SU.NodeNum << ")\n");      break;    }  } @@ -2277,7 +2349,7 @@ void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {      Latency = Cand.SU->getDepth();      break;    } -  dbgs() << "  SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); +  dbgs() << "  Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);    if (P.isValid())      dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())             << ":" << P.getUnitInc() << " "; @@ -2438,6 +2510,14 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,    }  } +void GenericScheduler::dumpPolicy() { +  dbgs() << "GenericScheduler RegionPolicy: " +         << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure +         << " OnlyTopDown=" << RegionPolicy.OnlyTopDown +         << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp +         << "\n"; +} +  /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic  /// critical path by more cycles than it takes to drain the instruction buffer.  /// We estimate an upper bounds on in-flight instructions as: @@ -2499,11 +2579,13 @@ static bool tryPressure(const PressureChange &TryP,                          const PressureChange &CandP,                          GenericSchedulerBase::SchedCandidate &TryCand,                          GenericSchedulerBase::SchedCandidate &Cand, -                        GenericSchedulerBase::CandReason Reason) { -  int TryRank = TryP.getPSetOrMax(); -  int CandRank = CandP.getPSetOrMax(); +                        GenericSchedulerBase::CandReason Reason, +                        const TargetRegisterInfo *TRI, +                        const MachineFunction &MF) { +  unsigned TryPSet = TryP.getPSetOrMax(); +  unsigned CandPSet = CandP.getPSetOrMax();    // If both candidates affect the same set, go with the smallest increase. -  if (TryRank == CandRank) { +  if (TryPSet == CandPSet) {      return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,                     Reason);    } @@ -2513,6 +2595,13 @@ static bool tryPressure(const PressureChange &TryP,                   Reason)) {      return true;    } + +  int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) : +                                 std::numeric_limits<int>::max(); + +  int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) : +                                   std::numeric_limits<int>::max(); +    // If the candidates are decreasing pressure, reverse priority.    if (TryP.getUnitInc() < 0)      std::swap(TryRank, CandRank); @@ -2597,7 +2686,7 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,      }    }    DEBUG(if (TryCand.RPDelta.Excess.isValid()) -          dbgs() << "  SU(" << TryCand.SU->NodeNum << ") " +          dbgs() << "  Try  SU(" << TryCand.SU->NodeNum << ") "                   << TRI->getRegPressureSetName(TryCand.RPDelta.Excess.getPSet())                   << ":" << TryCand.RPDelta.Excess.getUnitInc() << "\n"); @@ -2615,13 +2704,15 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,    // Avoid exceeding the target's limit.    if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,                                                 Cand.RPDelta.Excess, -                                               TryCand, Cand, RegExcess)) +                                               TryCand, Cand, RegExcess, TRI, +                                               DAG->MF))      return;    // Avoid increasing the max critical pressure in the scheduled region.    if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,                                                 Cand.RPDelta.CriticalMax, -                                               TryCand, Cand, RegCritical)) +                                               TryCand, Cand, RegCritical, TRI, +                                               DAG->MF))      return;    // For loops that are acyclic path limited, aggressively schedule for latency. @@ -2657,7 +2748,8 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,    // Avoid increasing the max pressure of the entire region.    if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,                                                 Cand.RPDelta.CurrentMax, -                                               TryCand, Cand, RegMax)) +                                               TryCand, Cand, RegMax, TRI, +                                               DAG->MF))      return;    // Avoid critical resource consumption and balance the schedule. @@ -2672,8 +2764,8 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,    // Avoid serializing long latency dependence chains.    // For acyclic path limited loops, latency was already checked above. -  if (Cand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited -      && tryLatency(TryCand, Cand, Zone)) { +  if (!RegionPolicy.DisableLatencyHeuristic && Cand.Policy.ReduceLatency && +      !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, Zone)) {      return;    } @@ -2727,12 +2819,12 @@ SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {    // efficient, but also provides the best heuristics for CriticalPSets.    if (SUnit *SU = Bot.pickOnlyChoice()) {      IsTopNode = false; -    DEBUG(dbgs() << "Pick Bot NOCAND\n"); +    DEBUG(dbgs() << "Pick Bot ONLY1\n");      return SU;    }    if (SUnit *SU = Top.pickOnlyChoice()) {      IsTopNode = true; -    DEBUG(dbgs() << "Pick Top NOCAND\n"); +    DEBUG(dbgs() << "Pick Top ONLY1\n");      return SU;    }    CandPolicy NoPolicy; @@ -2887,7 +2979,7 @@ static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) {    if (EnableLoadCluster && DAG->TII->enableClusterLoads())      DAG->addMutation(make_unique<LoadClusterMutation>(DAG->TII, DAG->TRI));    if (EnableMacroFusion) -    DAG->addMutation(make_unique<MacroFusion>(DAG->TII)); +    DAG->addMutation(make_unique<MacroFusion>(*DAG->TII, *DAG->TRI));    return DAG;  } @@ -3254,12 +3346,10 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {    }    static bool isNodeHidden(const SUnit *Node) { -    return (Node->Preds.size() > 10 || Node->Succs.size() > 10); -  } - -  static bool hasNodeAddressLabel(const SUnit *Node, -                                  const ScheduleDAG *Graph) { -    return false; +    if (ViewMISchedCutoff == 0) +      return false; +    return (Node->Preds.size() > ViewMISchedCutoff +         || Node->Succs.size() > ViewMISchedCutoff);    }    /// If you want to override the dot attributes printed for a particular  | 
