diff options
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp')
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 3188 | 
1 files changed, 3188 insertions, 0 deletions
| diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp new file mode 100644 index 000000000000..ff806bdb822c --- /dev/null +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -0,0 +1,3188 @@ +//===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This implements bottom-up and top-down register pressure reduction list +// schedulers, using standard algorithms.  The basic approach uses a priority +// queue of available nodes to schedule.  One at a time, nodes are taken from +// the priority queue (thus in priority order), checked for legality to +// schedule, and emitted if legal. +// +//===----------------------------------------------------------------------===// + +#include "ScheduleDAGSDNodes.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstdlib> +#include <iterator> +#include <limits> +#include <memory> +#include <utility> +#include <vector> + +using namespace llvm; + +#define DEBUG_TYPE "pre-RA-sched" + +STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); +STATISTIC(NumUnfolds,    "Number of nodes unfolded"); +STATISTIC(NumDups,       "Number of duplicated nodes"); +STATISTIC(NumPRCopies,   "Number of physical register copies"); + +static RegisterScheduler +  burrListDAGScheduler("list-burr", +                       "Bottom-up register reduction list scheduling", +                       createBURRListDAGScheduler); + +static RegisterScheduler +  sourceListDAGScheduler("source", +                         "Similar to list-burr but schedules in source " +                         "order when possible", +                         createSourceListDAGScheduler); + +static RegisterScheduler +  hybridListDAGScheduler("list-hybrid", +                         "Bottom-up register pressure aware list scheduling " +                         "which tries to balance latency and register pressure", +                         createHybridListDAGScheduler); + +static RegisterScheduler +  ILPListDAGScheduler("list-ilp", +                      "Bottom-up register pressure aware list scheduling " +                      "which tries to balance ILP and register pressure", +                      createILPListDAGScheduler); + +static cl::opt<bool> DisableSchedCycles( +  "disable-sched-cycles", cl::Hidden, cl::init(false), +  cl::desc("Disable cycle-level precision during preRA scheduling")); + +// Temporary sched=list-ilp flags until the heuristics are robust. +// Some options are also available under sched=list-hybrid. +static cl::opt<bool> DisableSchedRegPressure( +  "disable-sched-reg-pressure", cl::Hidden, cl::init(false), +  cl::desc("Disable regpressure priority in sched=list-ilp")); +static cl::opt<bool> DisableSchedLiveUses( +  "disable-sched-live-uses", cl::Hidden, cl::init(true), +  cl::desc("Disable live use priority in sched=list-ilp")); +static cl::opt<bool> DisableSchedVRegCycle( +  "disable-sched-vrcycle", cl::Hidden, cl::init(false), +  cl::desc("Disable virtual register cycle interference checks")); +static cl::opt<bool> DisableSchedPhysRegJoin( +  "disable-sched-physreg-join", cl::Hidden, cl::init(false), +  cl::desc("Disable physreg def-use affinity")); +static cl::opt<bool> DisableSchedStalls( +  "disable-sched-stalls", cl::Hidden, cl::init(true), +  cl::desc("Disable no-stall priority in sched=list-ilp")); +static cl::opt<bool> DisableSchedCriticalPath( +  "disable-sched-critical-path", cl::Hidden, cl::init(false), +  cl::desc("Disable critical path priority in sched=list-ilp")); +static cl::opt<bool> DisableSchedHeight( +  "disable-sched-height", cl::Hidden, cl::init(false), +  cl::desc("Disable scheduled-height priority in sched=list-ilp")); +static cl::opt<bool> Disable2AddrHack( +  "disable-2addr-hack", cl::Hidden, cl::init(true), +  cl::desc("Disable scheduler's two-address hack")); + +static cl::opt<int> MaxReorderWindow( +  "max-sched-reorder", cl::Hidden, cl::init(6), +  cl::desc("Number of instructions to allow ahead of the critical path " +           "in sched=list-ilp")); + +static cl::opt<unsigned> AvgIPC( +  "sched-avg-ipc", cl::Hidden, cl::init(1), +  cl::desc("Average inst/cycle whan no target itinerary exists.")); + +namespace { + +//===----------------------------------------------------------------------===// +/// ScheduleDAGRRList - The actual register reduction list scheduler +/// implementation.  This supports both top-down and bottom-up scheduling. +/// +class ScheduleDAGRRList : public ScheduleDAGSDNodes { +private: +  /// NeedLatency - True if the scheduler will make use of latency information. +  bool NeedLatency; + +  /// AvailableQueue - The priority queue to use for the available SUnits. +  SchedulingPriorityQueue *AvailableQueue; + +  /// PendingQueue - This contains all of the instructions whose operands have +  /// been issued, but their results are not ready yet (due to the latency of +  /// the operation).  Once the operands becomes available, the instruction is +  /// added to the AvailableQueue. +  std::vector<SUnit *> PendingQueue; + +  /// HazardRec - The hazard recognizer to use. +  ScheduleHazardRecognizer *HazardRec; + +  /// CurCycle - The current scheduler state corresponds to this cycle. +  unsigned CurCycle = 0; + +  /// MinAvailableCycle - Cycle of the soonest available instruction. +  unsigned MinAvailableCycle; + +  /// IssueCount - Count instructions issued in this cycle +  /// Currently valid only for bottom-up scheduling. +  unsigned IssueCount; + +  /// LiveRegDefs - A set of physical registers and their definition +  /// that are "live". These nodes must be scheduled before any other nodes that +  /// modifies the registers can be scheduled. +  unsigned NumLiveRegs; +  std::unique_ptr<SUnit*[]> LiveRegDefs; +  std::unique_ptr<SUnit*[]> LiveRegGens; + +  // Collect interferences between physical register use/defs. +  // Each interference is an SUnit and set of physical registers. +  SmallVector<SUnit*, 4> Interferences; + +  using LRegsMapT = DenseMap<SUnit *, SmallVector<unsigned, 4>>; + +  LRegsMapT LRegsMap; + +  /// Topo - A topological ordering for SUnits which permits fast IsReachable +  /// and similar queries. +  ScheduleDAGTopologicalSort Topo; + +  // Hack to keep track of the inverse of FindCallSeqStart without more crazy +  // DAG crawling. +  DenseMap<SUnit*, SUnit*> CallSeqEndForStart; + +public: +  ScheduleDAGRRList(MachineFunction &mf, bool needlatency, +                    SchedulingPriorityQueue *availqueue, +                    CodeGenOpt::Level OptLevel) +    : ScheduleDAGSDNodes(mf), +      NeedLatency(needlatency), AvailableQueue(availqueue), +      Topo(SUnits, nullptr) { +    const TargetSubtargetInfo &STI = mf.getSubtarget(); +    if (DisableSchedCycles || !NeedLatency) +      HazardRec = new ScheduleHazardRecognizer(); +    else +      HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this); +  } + +  ~ScheduleDAGRRList() override { +    delete HazardRec; +    delete AvailableQueue; +  } + +  void Schedule() override; + +  ScheduleHazardRecognizer *getHazardRec() { return HazardRec; } + +  /// IsReachable - Checks if SU is reachable from TargetSU. +  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { +    return Topo.IsReachable(SU, TargetSU); +  } + +  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will +  /// create a cycle. +  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { +    return Topo.WillCreateCycle(SU, TargetSU); +  } + +  /// AddPredQueued - Queues and update to add a predecessor edge to SUnit SU. +  /// This returns true if this is a new predecessor. +  /// Does *NOT* update the topological ordering! It just queues an update. +  void AddPredQueued(SUnit *SU, const SDep &D) { +    Topo.AddPredQueued(SU, D.getSUnit()); +    SU->addPred(D); +  } + +  /// AddPred - adds a predecessor edge to SUnit SU. +  /// This returns true if this is a new predecessor. +  /// Updates the topological ordering if required. +  void AddPred(SUnit *SU, const SDep &D) { +    Topo.AddPred(SU, D.getSUnit()); +    SU->addPred(D); +  } + +  /// RemovePred - removes a predecessor edge from SUnit SU. +  /// This returns true if an edge was removed. +  /// Updates the topological ordering if required. +  void RemovePred(SUnit *SU, const SDep &D) { +    Topo.RemovePred(SU, D.getSUnit()); +    SU->removePred(D); +  } + +private: +  bool isReady(SUnit *SU) { +    return DisableSchedCycles || !AvailableQueue->hasReadyFilter() || +      AvailableQueue->isReady(SU); +  } + +  void ReleasePred(SUnit *SU, const SDep *PredEdge); +  void ReleasePredecessors(SUnit *SU); +  void ReleasePending(); +  void AdvanceToCycle(unsigned NextCycle); +  void AdvancePastStalls(SUnit *SU); +  void EmitNode(SUnit *SU); +  void ScheduleNodeBottomUp(SUnit*); +  void CapturePred(SDep *PredEdge); +  void UnscheduleNodeBottomUp(SUnit*); +  void RestoreHazardCheckerBottomUp(); +  void BacktrackBottomUp(SUnit*, SUnit*); +  SUnit *TryUnfoldSU(SUnit *); +  SUnit *CopyAndMoveSuccessors(SUnit*); +  void InsertCopiesAndMoveSuccs(SUnit*, unsigned, +                                const TargetRegisterClass*, +                                const TargetRegisterClass*, +                                SmallVectorImpl<SUnit*>&); +  bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&); + +  void releaseInterferences(unsigned Reg = 0); + +  SUnit *PickNodeToScheduleBottomUp(); +  void ListScheduleBottomUp(); + +  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. +  SUnit *CreateNewSUnit(SDNode *N) { +    unsigned NumSUnits = SUnits.size(); +    SUnit *NewNode = newSUnit(N); +    // Update the topological ordering. +    if (NewNode->NodeNum >= NumSUnits) +      Topo.MarkDirty(); +    return NewNode; +  } + +  /// CreateClone - Creates a new SUnit from an existing one. +  SUnit *CreateClone(SUnit *N) { +    unsigned NumSUnits = SUnits.size(); +    SUnit *NewNode = Clone(N); +    // Update the topological ordering. +    if (NewNode->NodeNum >= NumSUnits) +      Topo.MarkDirty(); +    return NewNode; +  } + +  /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't +  /// need actual latency information but the hybrid scheduler does. +  bool forceUnitLatencies() const override { +    return !NeedLatency; +  } +}; + +}  // end anonymous namespace + +/// GetCostForDef - Looks up the register class and cost for a given definition. +/// Typically this just means looking up the representative register class, +/// but for untyped values (MVT::Untyped) it means inspecting the node's +/// opcode to determine what register class is being generated. +static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, +                          const TargetLowering *TLI, +                          const TargetInstrInfo *TII, +                          const TargetRegisterInfo *TRI, +                          unsigned &RegClass, unsigned &Cost, +                          const MachineFunction &MF) { +  MVT VT = RegDefPos.GetValue(); + +  // Special handling for untyped values.  These values can only come from +  // the expansion of custom DAG-to-DAG patterns. +  if (VT == MVT::Untyped) { +    const SDNode *Node = RegDefPos.GetNode(); + +    // Special handling for CopyFromReg of untyped values. +    if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) { +      unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); +      const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg); +      RegClass = RC->getID(); +      Cost = 1; +      return; +    } + +    unsigned Opcode = Node->getMachineOpcode(); +    if (Opcode == TargetOpcode::REG_SEQUENCE) { +      unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); +      const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); +      RegClass = RC->getID(); +      Cost = 1; +      return; +    } + +    unsigned Idx = RegDefPos.GetIdx(); +    const MCInstrDesc Desc = TII->get(Opcode); +    const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF); +    RegClass = RC->getID(); +    // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a +    // better way to determine it. +    Cost = 1; +  } else { +    RegClass = TLI->getRepRegClassFor(VT)->getID(); +    Cost = TLI->getRepRegClassCostFor(VT); +  } +} + +/// Schedule - Schedule the DAG using list scheduling. +void ScheduleDAGRRList::Schedule() { +  LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB) +                    << " '" << BB->getName() << "' **********\n"); + +  CurCycle = 0; +  IssueCount = 0; +  MinAvailableCycle = +      DisableSchedCycles ? 0 : std::numeric_limits<unsigned>::max(); +  NumLiveRegs = 0; +  // Allocate slots for each physical register, plus one for a special register +  // to track the virtual resource of a calling sequence. +  LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]()); +  LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]()); +  CallSeqEndForStart.clear(); +  assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences"); + +  // Build the scheduling graph. +  BuildSchedGraph(nullptr); + +  LLVM_DEBUG(dump()); +  Topo.MarkDirty(); + +  AvailableQueue->initNodes(SUnits); + +  HazardRec->Reset(); + +  // Execute the actual scheduling loop. +  ListScheduleBottomUp(); + +  AvailableQueue->releaseState(); + +  LLVM_DEBUG({ +    dbgs() << "*** Final schedule ***\n"; +    dumpSchedule(); +    dbgs() << '\n'; +  }); +} + +//===----------------------------------------------------------------------===// +//  Bottom-Up Scheduling +//===----------------------------------------------------------------------===// + +/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to +/// the AvailableQueue if the count reaches zero. Also update its cycle bound. +void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { +  SUnit *PredSU = PredEdge->getSUnit(); + +#ifndef NDEBUG +  if (PredSU->NumSuccsLeft == 0) { +    dbgs() << "*** Scheduling failed! ***\n"; +    dumpNode(*PredSU); +    dbgs() << " has been released too many times!\n"; +    llvm_unreachable(nullptr); +  } +#endif +  --PredSU->NumSuccsLeft; + +  if (!forceUnitLatencies()) { +    // Updating predecessor's height. This is now the cycle when the +    // predecessor can be scheduled without causing a pipeline stall. +    PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency()); +  } + +  // If all the node's successors are scheduled, this node is ready +  // to be scheduled. Ignore the special EntrySU node. +  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { +    PredSU->isAvailable = true; + +    unsigned Height = PredSU->getHeight(); +    if (Height < MinAvailableCycle) +      MinAvailableCycle = Height; + +    if (isReady(PredSU)) { +      AvailableQueue->push(PredSU); +    } +    // CapturePred and others may have left the node in the pending queue, avoid +    // adding it twice. +    else if (!PredSU->isPending) { +      PredSU->isPending = true; +      PendingQueue.push_back(PredSU); +    } +  } +} + +/// IsChainDependent - Test if Outer is reachable from Inner through +/// chain dependencies. +static bool IsChainDependent(SDNode *Outer, SDNode *Inner, +                             unsigned NestLevel, +                             const TargetInstrInfo *TII) { +  SDNode *N = Outer; +  while (true) { +    if (N == Inner) +      return true; +    // For a TokenFactor, examine each operand. There may be multiple ways +    // to get to the CALLSEQ_BEGIN, but we need to find the path with the +    // most nesting in order to ensure that we find the corresponding match. +    if (N->getOpcode() == ISD::TokenFactor) { +      for (const SDValue &Op : N->op_values()) +        if (IsChainDependent(Op.getNode(), Inner, NestLevel, TII)) +          return true; +      return false; +    } +    // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. +    if (N->isMachineOpcode()) { +      if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) { +        ++NestLevel; +      } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) { +        if (NestLevel == 0) +          return false; +        --NestLevel; +      } +    } +    // Otherwise, find the chain and continue climbing. +    for (const SDValue &Op : N->op_values()) +      if (Op.getValueType() == MVT::Other) { +        N = Op.getNode(); +        goto found_chain_operand; +      } +    return false; +  found_chain_operand:; +    if (N->getOpcode() == ISD::EntryToken) +      return false; +  } +} + +/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate +/// the corresponding (lowered) CALLSEQ_BEGIN node. +/// +/// NestLevel and MaxNested are used in recursion to indcate the current level +/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum +/// level seen so far. +/// +/// TODO: It would be better to give CALLSEQ_END an explicit operand to point +/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it. +static SDNode * +FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, +                 const TargetInstrInfo *TII) { +  while (true) { +    // For a TokenFactor, examine each operand. There may be multiple ways +    // to get to the CALLSEQ_BEGIN, but we need to find the path with the +    // most nesting in order to ensure that we find the corresponding match. +    if (N->getOpcode() == ISD::TokenFactor) { +      SDNode *Best = nullptr; +      unsigned BestMaxNest = MaxNest; +      for (const SDValue &Op : N->op_values()) { +        unsigned MyNestLevel = NestLevel; +        unsigned MyMaxNest = MaxNest; +        if (SDNode *New = FindCallSeqStart(Op.getNode(), +                                           MyNestLevel, MyMaxNest, TII)) +          if (!Best || (MyMaxNest > BestMaxNest)) { +            Best = New; +            BestMaxNest = MyMaxNest; +          } +      } +      assert(Best); +      MaxNest = BestMaxNest; +      return Best; +    } +    // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. +    if (N->isMachineOpcode()) { +      if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) { +        ++NestLevel; +        MaxNest = std::max(MaxNest, NestLevel); +      } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) { +        assert(NestLevel != 0); +        --NestLevel; +        if (NestLevel == 0) +          return N; +      } +    } +    // Otherwise, find the chain and continue climbing. +    for (const SDValue &Op : N->op_values()) +      if (Op.getValueType() == MVT::Other) { +        N = Op.getNode(); +        goto found_chain_operand; +      } +    return nullptr; +  found_chain_operand:; +    if (N->getOpcode() == ISD::EntryToken) +      return nullptr; +  } +} + +/// Call ReleasePred for each predecessor, then update register live def/gen. +/// Always update LiveRegDefs for a register dependence even if the current SU +/// also defines the register. This effectively create one large live range +/// across a sequence of two-address node. This is important because the +/// entire chain must be scheduled together. Example: +/// +/// flags = (3) add +/// flags = (2) addc flags +/// flags = (1) addc flags +/// +/// results in +/// +/// LiveRegDefs[flags] = 3 +/// LiveRegGens[flags] = 1 +/// +/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid +/// interference on flags. +void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { +  // Bottom up: release predecessors +  for (SDep &Pred : SU->Preds) { +    ReleasePred(SU, &Pred); +    if (Pred.isAssignedRegDep()) { +      // This is a physical register dependency and it's impossible or +      // expensive to copy the register. Make sure nothing that can +      // clobber the register is scheduled between the predecessor and +      // this node. +      SUnit *RegDef = LiveRegDefs[Pred.getReg()]; (void)RegDef; +      assert((!RegDef || RegDef == SU || RegDef == Pred.getSUnit()) && +             "interference on register dependence"); +      LiveRegDefs[Pred.getReg()] = Pred.getSUnit(); +      if (!LiveRegGens[Pred.getReg()]) { +        ++NumLiveRegs; +        LiveRegGens[Pred.getReg()] = SU; +      } +    } +  } + +  // If we're scheduling a lowered CALLSEQ_END, find the corresponding +  // CALLSEQ_BEGIN. Inject an artificial physical register dependence between +  // these nodes, to prevent other calls from being interscheduled with them. +  unsigned CallResource = TRI->getNumRegs(); +  if (!LiveRegDefs[CallResource]) +    for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) +      if (Node->isMachineOpcode() && +          Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) { +        unsigned NestLevel = 0; +        unsigned MaxNest = 0; +        SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII); +        assert(N && "Must find call sequence start"); + +        SUnit *Def = &SUnits[N->getNodeId()]; +        CallSeqEndForStart[Def] = SU; + +        ++NumLiveRegs; +        LiveRegDefs[CallResource] = Def; +        LiveRegGens[CallResource] = SU; +        break; +      } +} + +/// Check to see if any of the pending instructions are ready to issue.  If +/// so, add them to the available queue. +void ScheduleDAGRRList::ReleasePending() { +  if (DisableSchedCycles) { +    assert(PendingQueue.empty() && "pending instrs not allowed in this mode"); +    return; +  } + +  // If the available queue is empty, it is safe to reset MinAvailableCycle. +  if (AvailableQueue->empty()) +    MinAvailableCycle = std::numeric_limits<unsigned>::max(); + +  // Check to see if any of the pending instructions are ready to issue.  If +  // so, add them to the available queue. +  for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { +    unsigned ReadyCycle = PendingQueue[i]->getHeight(); +    if (ReadyCycle < MinAvailableCycle) +      MinAvailableCycle = ReadyCycle; + +    if (PendingQueue[i]->isAvailable) { +      if (!isReady(PendingQueue[i])) +          continue; +      AvailableQueue->push(PendingQueue[i]); +    } +    PendingQueue[i]->isPending = false; +    PendingQueue[i] = PendingQueue.back(); +    PendingQueue.pop_back(); +    --i; --e; +  } +} + +/// Move the scheduler state forward by the specified number of Cycles. +void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { +  if (NextCycle <= CurCycle) +    return; + +  IssueCount = 0; +  AvailableQueue->setCurCycle(NextCycle); +  if (!HazardRec->isEnabled()) { +    // Bypass lots of virtual calls in case of long latency. +    CurCycle = NextCycle; +  } +  else { +    for (; CurCycle != NextCycle; ++CurCycle) { +      HazardRec->RecedeCycle(); +    } +  } +  // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the +  // available Q to release pending nodes at least once before popping. +  ReleasePending(); +} + +/// Move the scheduler state forward until the specified node's dependents are +/// ready and can be scheduled with no resource conflicts. +void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { +  if (DisableSchedCycles) +    return; + +  // FIXME: Nodes such as CopyFromReg probably should not advance the current +  // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node +  // has predecessors the cycle will be advanced when they are scheduled. +  // But given the crude nature of modeling latency though such nodes, we +  // currently need to treat these nodes like real instructions. +  // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return; + +  unsigned ReadyCycle = SU->getHeight(); + +  // Bump CurCycle to account for latency. We assume the latency of other +  // available instructions may be hidden by the stall (not a full pipe stall). +  // This updates the hazard recognizer's cycle before reserving resources for +  // this instruction. +  AdvanceToCycle(ReadyCycle); + +  // Calls are scheduled in their preceding cycle, so don't conflict with +  // hazards from instructions after the call. EmitNode will reset the +  // scoreboard state before emitting the call. +  if (SU->isCall) +    return; + +  // FIXME: For resource conflicts in very long non-pipelined stages, we +  // should probably skip ahead here to avoid useless scoreboard checks. +  int Stalls = 0; +  while (true) { +    ScheduleHazardRecognizer::HazardType HT = +      HazardRec->getHazardType(SU, -Stalls); + +    if (HT == ScheduleHazardRecognizer::NoHazard) +      break; + +    ++Stalls; +  } +  AdvanceToCycle(CurCycle + Stalls); +} + +/// Record this SUnit in the HazardRecognizer. +/// Does not update CurCycle. +void ScheduleDAGRRList::EmitNode(SUnit *SU) { +  if (!HazardRec->isEnabled()) +    return; + +  // Check for phys reg copy. +  if (!SU->getNode()) +    return; + +  switch (SU->getNode()->getOpcode()) { +  default: +    assert(SU->getNode()->isMachineOpcode() && +           "This target-independent node should not be scheduled."); +    break; +  case ISD::MERGE_VALUES: +  case ISD::TokenFactor: +  case ISD::LIFETIME_START: +  case ISD::LIFETIME_END: +  case ISD::CopyToReg: +  case ISD::CopyFromReg: +  case ISD::EH_LABEL: +    // Noops don't affect the scoreboard state. Copies are likely to be +    // removed. +    return; +  case ISD::INLINEASM: +  case ISD::INLINEASM_BR: +    // For inline asm, clear the pipeline state. +    HazardRec->Reset(); +    return; +  } +  if (SU->isCall) { +    // Calls are scheduled with their preceding instructions. For bottom-up +    // scheduling, clear the pipeline state before emitting. +    HazardRec->Reset(); +  } + +  HazardRec->EmitInstruction(SU); +} + +static void resetVRegCycle(SUnit *SU); + +/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending +/// count of its predecessors. If a predecessor pending count is zero, add it to +/// the Available queue. +void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { +  LLVM_DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); +  LLVM_DEBUG(dumpNode(*SU)); + +#ifndef NDEBUG +  if (CurCycle < SU->getHeight()) +    LLVM_DEBUG(dbgs() << "   Height [" << SU->getHeight() +                      << "] pipeline stall!\n"); +#endif + +  // FIXME: Do not modify node height. It may interfere with +  // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the +  // node its ready cycle can aid heuristics, and after scheduling it can +  // indicate the scheduled cycle. +  SU->setHeightToAtLeast(CurCycle); + +  // Reserve resources for the scheduled instruction. +  EmitNode(SU); + +  Sequence.push_back(SU); + +  AvailableQueue->scheduledNode(SU); + +  // If HazardRec is disabled, and each inst counts as one cycle, then +  // advance CurCycle before ReleasePredecessors to avoid useless pushes to +  // PendingQueue for schedulers that implement HasReadyFilter. +  if (!HazardRec->isEnabled() && AvgIPC < 2) +    AdvanceToCycle(CurCycle + 1); + +  // Update liveness of predecessors before successors to avoid treating a +  // two-address node as a live range def. +  ReleasePredecessors(SU); + +  // Release all the implicit physical register defs that are live. +  for (SDep &Succ : SU->Succs) { +    // LiveRegDegs[Succ.getReg()] != SU when SU is a two-address node. +    if (Succ.isAssignedRegDep() && LiveRegDefs[Succ.getReg()] == SU) { +      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); +      --NumLiveRegs; +      LiveRegDefs[Succ.getReg()] = nullptr; +      LiveRegGens[Succ.getReg()] = nullptr; +      releaseInterferences(Succ.getReg()); +    } +  } +  // Release the special call resource dependence, if this is the beginning +  // of a call. +  unsigned CallResource = TRI->getNumRegs(); +  if (LiveRegDefs[CallResource] == SU) +    for (const SDNode *SUNode = SU->getNode(); SUNode; +         SUNode = SUNode->getGluedNode()) { +      if (SUNode->isMachineOpcode() && +          SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) { +        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); +        --NumLiveRegs; +        LiveRegDefs[CallResource] = nullptr; +        LiveRegGens[CallResource] = nullptr; +        releaseInterferences(CallResource); +      } +    } + +  resetVRegCycle(SU); + +  SU->isScheduled = true; + +  // Conditions under which the scheduler should eagerly advance the cycle: +  // (1) No available instructions +  // (2) All pipelines full, so available instructions must have hazards. +  // +  // If HazardRec is disabled, the cycle was pre-advanced before calling +  // ReleasePredecessors. In that case, IssueCount should remain 0. +  // +  // Check AvailableQueue after ReleasePredecessors in case of zero latency. +  if (HazardRec->isEnabled() || AvgIPC > 1) { +    if (SU->getNode() && SU->getNode()->isMachineOpcode()) +      ++IssueCount; +    if ((HazardRec->isEnabled() && HazardRec->atIssueLimit()) +        || (!HazardRec->isEnabled() && IssueCount == AvgIPC)) +      AdvanceToCycle(CurCycle + 1); +  } +} + +/// CapturePred - This does the opposite of ReleasePred. Since SU is being +/// unscheduled, increase the succ left count of its predecessors. Remove +/// them from AvailableQueue if necessary. +void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { +  SUnit *PredSU = PredEdge->getSUnit(); +  if (PredSU->isAvailable) { +    PredSU->isAvailable = false; +    if (!PredSU->isPending) +      AvailableQueue->remove(PredSU); +  } + +  assert(PredSU->NumSuccsLeft < std::numeric_limits<unsigned>::max() && +         "NumSuccsLeft will overflow!"); +  ++PredSU->NumSuccsLeft; +} + +/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and +/// its predecessor states to reflect the change. +void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { +  LLVM_DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); +  LLVM_DEBUG(dumpNode(*SU)); + +  for (SDep &Pred : SU->Preds) { +    CapturePred(&Pred); +    if (Pred.isAssignedRegDep() && SU == LiveRegGens[Pred.getReg()]){ +      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); +      assert(LiveRegDefs[Pred.getReg()] == Pred.getSUnit() && +             "Physical register dependency violated?"); +      --NumLiveRegs; +      LiveRegDefs[Pred.getReg()] = nullptr; +      LiveRegGens[Pred.getReg()] = nullptr; +      releaseInterferences(Pred.getReg()); +    } +  } + +  // Reclaim the special call resource dependence, if this is the beginning +  // of a call. +  unsigned CallResource = TRI->getNumRegs(); +  for (const SDNode *SUNode = SU->getNode(); SUNode; +       SUNode = SUNode->getGluedNode()) { +    if (SUNode->isMachineOpcode() && +        SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) { +      SUnit *SeqEnd = CallSeqEndForStart[SU]; +      assert(SeqEnd && "Call sequence start/end must be known"); +      assert(!LiveRegDefs[CallResource]); +      assert(!LiveRegGens[CallResource]); +      ++NumLiveRegs; +      LiveRegDefs[CallResource] = SU; +      LiveRegGens[CallResource] = SeqEnd; +    } +  } + +  // Release the special call resource dependence, if this is the end +  // of a call. +  if (LiveRegGens[CallResource] == SU) +    for (const SDNode *SUNode = SU->getNode(); SUNode; +         SUNode = SUNode->getGluedNode()) { +      if (SUNode->isMachineOpcode() && +          SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) { +        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); +        assert(LiveRegDefs[CallResource]); +        assert(LiveRegGens[CallResource]); +        --NumLiveRegs; +        LiveRegDefs[CallResource] = nullptr; +        LiveRegGens[CallResource] = nullptr; +        releaseInterferences(CallResource); +      } +    } + +  for (auto &Succ : SU->Succs) { +    if (Succ.isAssignedRegDep()) { +      auto Reg = Succ.getReg(); +      if (!LiveRegDefs[Reg]) +        ++NumLiveRegs; +      // This becomes the nearest def. Note that an earlier def may still be +      // pending if this is a two-address node. +      LiveRegDefs[Reg] = SU; + +      // Update LiveRegGen only if was empty before this unscheduling. +      // This is to avoid incorrect updating LiveRegGen set in previous run. +      if (!LiveRegGens[Reg]) { +        // Find the successor with the lowest height. +        LiveRegGens[Reg] = Succ.getSUnit(); +        for (auto &Succ2 : SU->Succs) { +          if (Succ2.isAssignedRegDep() && Succ2.getReg() == Reg && +              Succ2.getSUnit()->getHeight() < LiveRegGens[Reg]->getHeight()) +            LiveRegGens[Reg] = Succ2.getSUnit(); +        } +      } +    } +  } +  if (SU->getHeight() < MinAvailableCycle) +    MinAvailableCycle = SU->getHeight(); + +  SU->setHeightDirty(); +  SU->isScheduled = false; +  SU->isAvailable = true; +  if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) { +    // Don't make available until backtracking is complete. +    SU->isPending = true; +    PendingQueue.push_back(SU); +  } +  else { +    AvailableQueue->push(SU); +  } +  AvailableQueue->unscheduledNode(SU); +} + +/// After backtracking, the hazard checker needs to be restored to a state +/// corresponding the current cycle. +void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() { +  HazardRec->Reset(); + +  unsigned LookAhead = std::min((unsigned)Sequence.size(), +                                HazardRec->getMaxLookAhead()); +  if (LookAhead == 0) +    return; + +  std::vector<SUnit *>::const_iterator I = (Sequence.end() - LookAhead); +  unsigned HazardCycle = (*I)->getHeight(); +  for (auto E = Sequence.end(); I != E; ++I) { +    SUnit *SU = *I; +    for (; SU->getHeight() > HazardCycle; ++HazardCycle) { +      HazardRec->RecedeCycle(); +    } +    EmitNode(SU); +  } +} + +/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in +/// BTCycle in order to schedule a specific node. +void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) { +  SUnit *OldSU = Sequence.back(); +  while (true) { +    Sequence.pop_back(); +    // FIXME: use ready cycle instead of height +    CurCycle = OldSU->getHeight(); +    UnscheduleNodeBottomUp(OldSU); +    AvailableQueue->setCurCycle(CurCycle); +    if (OldSU == BtSU) +      break; +    OldSU = Sequence.back(); +  } + +  assert(!SU->isSucc(OldSU) && "Something is wrong!"); + +  RestoreHazardCheckerBottomUp(); + +  ReleasePending(); + +  ++NumBacktracks; +} + +static bool isOperandOf(const SUnit *SU, SDNode *N) { +  for (const SDNode *SUNode = SU->getNode(); SUNode; +       SUNode = SUNode->getGluedNode()) { +    if (SUNode->isOperandOf(N)) +      return true; +  } +  return false; +} + +/// TryUnfold - Attempt to unfold +SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) { +  SDNode *N = SU->getNode(); +  // Use while over if to ease fall through. +  SmallVector<SDNode *, 2> NewNodes; +  if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) +    return nullptr; + +  // unfolding an x86 DEC64m operation results in store, dec, load which +  // can't be handled here so quit +  if (NewNodes.size() == 3) +    return nullptr; + +  assert(NewNodes.size() == 2 && "Expected a load folding node!"); + +  N = NewNodes[1]; +  SDNode *LoadNode = NewNodes[0]; +  unsigned NumVals = N->getNumValues(); +  unsigned OldNumVals = SU->getNode()->getNumValues(); + +  // LoadNode may already exist. This can happen when there is another +  // load from the same location and producing the same type of value +  // but it has different alignment or volatileness. +  bool isNewLoad = true; +  SUnit *LoadSU; +  if (LoadNode->getNodeId() != -1) { +    LoadSU = &SUnits[LoadNode->getNodeId()]; +    // If LoadSU has already been scheduled, we should clone it but +    // this would negate the benefit to unfolding so just return SU. +    if (LoadSU->isScheduled) +      return SU; +    isNewLoad = false; +  } else { +    LoadSU = CreateNewSUnit(LoadNode); +    LoadNode->setNodeId(LoadSU->NodeNum); + +    InitNumRegDefsLeft(LoadSU); +    computeLatency(LoadSU); +  } + +  bool isNewN = true; +  SUnit *NewSU; +  // This can only happen when isNewLoad is false. +  if (N->getNodeId() != -1) { +    NewSU = &SUnits[N->getNodeId()]; +    // If NewSU has already been scheduled, we need to clone it, but this +    // negates the benefit to unfolding so just return SU. +    if (NewSU->isScheduled) { +      return SU; +    } +    isNewN = false; +  } else { +    NewSU = CreateNewSUnit(N); +    N->setNodeId(NewSU->NodeNum); + +    const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); +    for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { +      if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { +        NewSU->isTwoAddress = true; +        break; +      } +    } +    if (MCID.isCommutable()) +      NewSU->isCommutable = true; + +    InitNumRegDefsLeft(NewSU); +    computeLatency(NewSU); +  } + +  LLVM_DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); + +  // Now that we are committed to unfolding replace DAG Uses. +  for (unsigned i = 0; i != NumVals; ++i) +    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); +  DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1), +                                 SDValue(LoadNode, 1)); + +  // Record all the edges to and from the old SU, by category. +  SmallVector<SDep, 4> ChainPreds; +  SmallVector<SDep, 4> ChainSuccs; +  SmallVector<SDep, 4> LoadPreds; +  SmallVector<SDep, 4> NodePreds; +  SmallVector<SDep, 4> NodeSuccs; +  for (SDep &Pred : SU->Preds) { +    if (Pred.isCtrl()) +      ChainPreds.push_back(Pred); +    else if (isOperandOf(Pred.getSUnit(), LoadNode)) +      LoadPreds.push_back(Pred); +    else +      NodePreds.push_back(Pred); +  } +  for (SDep &Succ : SU->Succs) { +    if (Succ.isCtrl()) +      ChainSuccs.push_back(Succ); +    else +      NodeSuccs.push_back(Succ); +  } + +  // Now assign edges to the newly-created nodes. +  for (const SDep &Pred : ChainPreds) { +    RemovePred(SU, Pred); +    if (isNewLoad) +      AddPredQueued(LoadSU, Pred); +  } +  for (const SDep &Pred : LoadPreds) { +    RemovePred(SU, Pred); +    if (isNewLoad) +      AddPredQueued(LoadSU, Pred); +  } +  for (const SDep &Pred : NodePreds) { +    RemovePred(SU, Pred); +    AddPredQueued(NewSU, Pred); +  } +  for (SDep D : NodeSuccs) { +    SUnit *SuccDep = D.getSUnit(); +    D.setSUnit(SU); +    RemovePred(SuccDep, D); +    D.setSUnit(NewSU); +    AddPredQueued(SuccDep, D); +    // Balance register pressure. +    if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled && +        !D.isCtrl() && NewSU->NumRegDefsLeft > 0) +      --NewSU->NumRegDefsLeft; +  } +  for (SDep D : ChainSuccs) { +    SUnit *SuccDep = D.getSUnit(); +    D.setSUnit(SU); +    RemovePred(SuccDep, D); +    if (isNewLoad) { +      D.setSUnit(LoadSU); +      AddPredQueued(SuccDep, D); +    } +  } + +  // Add a data dependency to reflect that NewSU reads the value defined +  // by LoadSU. +  SDep D(LoadSU, SDep::Data, 0); +  D.setLatency(LoadSU->Latency); +  AddPredQueued(NewSU, D); + +  if (isNewLoad) +    AvailableQueue->addNode(LoadSU); +  if (isNewN) +    AvailableQueue->addNode(NewSU); + +  ++NumUnfolds; + +  if (NewSU->NumSuccsLeft == 0) +    NewSU->isAvailable = true; + +  return NewSU; +} + +/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled +/// successors to the newly created node. +SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { +  SDNode *N = SU->getNode(); +  if (!N) +    return nullptr; + +  LLVM_DEBUG(dbgs() << "Considering duplicating the SU\n"); +  LLVM_DEBUG(dumpNode(*SU)); + +  if (N->getGluedNode() && +      !TII->canCopyGluedNodeDuringSchedule(N)) { +    LLVM_DEBUG( +        dbgs() +        << "Giving up because it has incoming glue and the target does not " +           "want to copy it\n"); +    return nullptr; +  } + +  SUnit *NewSU; +  bool TryUnfold = false; +  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { +    MVT VT = N->getSimpleValueType(i); +    if (VT == MVT::Glue) { +      LLVM_DEBUG(dbgs() << "Giving up because it has outgoing glue\n"); +      return nullptr; +    } else if (VT == MVT::Other) +      TryUnfold = true; +  } +  for (const SDValue &Op : N->op_values()) { +    MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); +    if (VT == MVT::Glue && !TII->canCopyGluedNodeDuringSchedule(N)) { +      LLVM_DEBUG( +          dbgs() << "Giving up because it one of the operands is glue and " +                    "the target does not want to copy it\n"); +      return nullptr; +    } +  } + +  // If possible unfold instruction. +  if (TryUnfold) { +    SUnit *UnfoldSU = TryUnfoldSU(SU); +    if (!UnfoldSU) +      return nullptr; +    SU = UnfoldSU; +    N = SU->getNode(); +    // If this can be scheduled don't bother duplicating and just return +    if (SU->NumSuccsLeft == 0) +      return SU; +  } + +  LLVM_DEBUG(dbgs() << "    Duplicating SU #" << SU->NodeNum << "\n"); +  NewSU = CreateClone(SU); + +  // New SUnit has the exact same predecessors. +  for (SDep &Pred : SU->Preds) +    if (!Pred.isArtificial()) +      AddPredQueued(NewSU, Pred); + +  // Make sure the clone comes after the original. (InstrEmitter assumes +  // this ordering.) +  AddPredQueued(NewSU, SDep(SU, SDep::Artificial)); + +  // Only copy scheduled successors. Cut them from old node's successor +  // list and move them over. +  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; +  for (SDep &Succ : SU->Succs) { +    if (Succ.isArtificial()) +      continue; +    SUnit *SuccSU = Succ.getSUnit(); +    if (SuccSU->isScheduled) { +      SDep D = Succ; +      D.setSUnit(NewSU); +      AddPredQueued(SuccSU, D); +      D.setSUnit(SU); +      DelDeps.push_back(std::make_pair(SuccSU, D)); +    } +  } +  for (auto &DelDep : DelDeps) +    RemovePred(DelDep.first, DelDep.second); + +  AvailableQueue->updateNode(SU); +  AvailableQueue->addNode(NewSU); + +  ++NumDups; +  return NewSU; +} + +/// InsertCopiesAndMoveSuccs - Insert register copies and move all +/// scheduled successors of the given SUnit to the last copy. +void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, +                                              const TargetRegisterClass *DestRC, +                                              const TargetRegisterClass *SrcRC, +                                              SmallVectorImpl<SUnit*> &Copies) { +  SUnit *CopyFromSU = CreateNewSUnit(nullptr); +  CopyFromSU->CopySrcRC = SrcRC; +  CopyFromSU->CopyDstRC = DestRC; + +  SUnit *CopyToSU = CreateNewSUnit(nullptr); +  CopyToSU->CopySrcRC = DestRC; +  CopyToSU->CopyDstRC = SrcRC; + +  // Only copy scheduled successors. Cut them from old node's successor +  // list and move them over. +  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; +  for (SDep &Succ : SU->Succs) { +    if (Succ.isArtificial()) +      continue; +    SUnit *SuccSU = Succ.getSUnit(); +    if (SuccSU->isScheduled) { +      SDep D = Succ; +      D.setSUnit(CopyToSU); +      AddPredQueued(SuccSU, D); +      DelDeps.push_back(std::make_pair(SuccSU, Succ)); +    } +    else { +      // Avoid scheduling the def-side copy before other successors. Otherwise +      // we could introduce another physreg interference on the copy and +      // continue inserting copies indefinitely. +      AddPredQueued(SuccSU, SDep(CopyFromSU, SDep::Artificial)); +    } +  } +  for (auto &DelDep : DelDeps) +    RemovePred(DelDep.first, DelDep.second); + +  SDep FromDep(SU, SDep::Data, Reg); +  FromDep.setLatency(SU->Latency); +  AddPredQueued(CopyFromSU, FromDep); +  SDep ToDep(CopyFromSU, SDep::Data, 0); +  ToDep.setLatency(CopyFromSU->Latency); +  AddPredQueued(CopyToSU, ToDep); + +  AvailableQueue->updateNode(SU); +  AvailableQueue->addNode(CopyFromSU); +  AvailableQueue->addNode(CopyToSU); +  Copies.push_back(CopyFromSU); +  Copies.push_back(CopyToSU); + +  ++NumPRCopies; +} + +/// getPhysicalRegisterVT - Returns the ValueType of the physical register +/// definition of the specified node. +/// FIXME: Move to SelectionDAG? +static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, +                                 const TargetInstrInfo *TII) { +  unsigned NumRes; +  if (N->getOpcode() == ISD::CopyFromReg) { +    // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type. +    NumRes = 1; +  } else { +    const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); +    assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); +    NumRes = MCID.getNumDefs(); +    for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { +      if (Reg == *ImpDef) +        break; +      ++NumRes; +    } +  } +  return N->getSimpleValueType(NumRes); +} + +/// CheckForLiveRegDef - Return true and update live register vector if the +/// specified register def of the specified SUnit clobbers any "live" registers. +static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, +                               SUnit **LiveRegDefs, +                               SmallSet<unsigned, 4> &RegAdded, +                               SmallVectorImpl<unsigned> &LRegs, +                               const TargetRegisterInfo *TRI) { +  for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) { + +    // Check if Ref is live. +    if (!LiveRegDefs[*AliasI]) continue; + +    // Allow multiple uses of the same def. +    if (LiveRegDefs[*AliasI] == SU) continue; + +    // Add Reg to the set of interfering live regs. +    if (RegAdded.insert(*AliasI).second) { +      LRegs.push_back(*AliasI); +    } +  } +} + +/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered +/// by RegMask, and add them to LRegs. +static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, +                                     ArrayRef<SUnit*> LiveRegDefs, +                                     SmallSet<unsigned, 4> &RegAdded, +                                     SmallVectorImpl<unsigned> &LRegs) { +  // Look at all live registers. Skip Reg0 and the special CallResource. +  for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) { +    if (!LiveRegDefs[i]) continue; +    if (LiveRegDefs[i] == SU) continue; +    if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue; +    if (RegAdded.insert(i).second) +      LRegs.push_back(i); +  } +} + +/// getNodeRegMask - Returns the register mask attached to an SDNode, if any. +static const uint32_t *getNodeRegMask(const SDNode *N) { +  for (const SDValue &Op : N->op_values()) +    if (const auto *RegOp = dyn_cast<RegisterMaskSDNode>(Op.getNode())) +      return RegOp->getRegMask(); +  return nullptr; +} + +/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay +/// scheduling of the given node to satisfy live physical register dependencies. +/// If the specific node is the last one that's available to schedule, do +/// whatever is necessary (i.e. backtracking or cloning) to make it possible. +bool ScheduleDAGRRList:: +DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { +  if (NumLiveRegs == 0) +    return false; + +  SmallSet<unsigned, 4> RegAdded; +  // If this node would clobber any "live" register, then it's not ready. +  // +  // If SU is the currently live definition of the same register that it uses, +  // then we are free to schedule it. +  for (SDep &Pred : SU->Preds) { +    if (Pred.isAssignedRegDep() && LiveRegDefs[Pred.getReg()] != SU) +      CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs.get(), +                         RegAdded, LRegs, TRI); +  } + +  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) { +    if (Node->getOpcode() == ISD::INLINEASM || +        Node->getOpcode() == ISD::INLINEASM_BR) { +      // Inline asm can clobber physical defs. +      unsigned NumOps = Node->getNumOperands(); +      if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) +        --NumOps;  // Ignore the glue operand. + +      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { +        unsigned Flags = +          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); +        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + +        ++i; // Skip the ID value. +        if (InlineAsm::isRegDefKind(Flags) || +            InlineAsm::isRegDefEarlyClobberKind(Flags) || +            InlineAsm::isClobberKind(Flags)) { +          // Check for def of register or earlyclobber register. +          for (; NumVals; --NumVals, ++i) { +            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); +            if (Register::isPhysicalRegister(Reg)) +              CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); +          } +        } else +          i += NumVals; +      } +      continue; +    } + +    if (!Node->isMachineOpcode()) +      continue; +    // If we're in the middle of scheduling a call, don't begin scheduling +    // another call. Also, don't allow any physical registers to be live across +    // the call. +    if (Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) { +      // Check the special calling-sequence resource. +      unsigned CallResource = TRI->getNumRegs(); +      if (LiveRegDefs[CallResource]) { +        SDNode *Gen = LiveRegGens[CallResource]->getNode(); +        while (SDNode *Glued = Gen->getGluedNode()) +          Gen = Glued; +        if (!IsChainDependent(Gen, Node, 0, TII) && +            RegAdded.insert(CallResource).second) +          LRegs.push_back(CallResource); +      } +    } +    if (const uint32_t *RegMask = getNodeRegMask(Node)) +      CheckForLiveRegDefMasked(SU, RegMask, +                               makeArrayRef(LiveRegDefs.get(), TRI->getNumRegs()), +                               RegAdded, LRegs); + +    const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); +    if (MCID.hasOptionalDef()) { +      // Most ARM instructions have an OptionalDef for CPSR, to model the S-bit. +      // This operand can be either a def of CPSR, if the S bit is set; or a use +      // of %noreg.  When the OptionalDef is set to a valid register, we need to +      // handle it in the same way as an ImplicitDef. +      for (unsigned i = 0; i < MCID.getNumDefs(); ++i) +        if (MCID.OpInfo[i].isOptionalDef()) { +          const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues()); +          unsigned Reg = cast<RegisterSDNode>(OptionalDef)->getReg(); +          CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); +        } +    } +    if (!MCID.ImplicitDefs) +      continue; +    for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) +      CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); +  } + +  return !LRegs.empty(); +} + +void ScheduleDAGRRList::releaseInterferences(unsigned Reg) { +  // Add the nodes that aren't ready back onto the available list. +  for (unsigned i = Interferences.size(); i > 0; --i) { +    SUnit *SU = Interferences[i-1]; +    LRegsMapT::iterator LRegsPos = LRegsMap.find(SU); +    if (Reg) { +      SmallVectorImpl<unsigned> &LRegs = LRegsPos->second; +      if (!is_contained(LRegs, Reg)) +        continue; +    } +    SU->isPending = false; +    // The interfering node may no longer be available due to backtracking. +    // Furthermore, it may have been made available again, in which case it is +    // now already in the AvailableQueue. +    if (SU->isAvailable && !SU->NodeQueueId) { +      LLVM_DEBUG(dbgs() << "    Repushing SU #" << SU->NodeNum << '\n'); +      AvailableQueue->push(SU); +    } +    if (i < Interferences.size()) +      Interferences[i-1] = Interferences.back(); +    Interferences.pop_back(); +    LRegsMap.erase(LRegsPos); +  } +} + +/// Return a node that can be scheduled in this cycle. Requirements: +/// (1) Ready: latency has been satisfied +/// (2) No Hazards: resources are available +/// (3) No Interferences: may unschedule to break register interferences. +SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { +  SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop(); +  auto FindAvailableNode = [&]() { +    while (CurSU) { +      SmallVector<unsigned, 4> LRegs; +      if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) +        break; +      LLVM_DEBUG(dbgs() << "    Interfering reg "; +                 if (LRegs[0] == TRI->getNumRegs()) dbgs() << "CallResource"; +                 else dbgs() << printReg(LRegs[0], TRI); +                 dbgs() << " SU #" << CurSU->NodeNum << '\n'); +      std::pair<LRegsMapT::iterator, bool> LRegsPair = +        LRegsMap.insert(std::make_pair(CurSU, LRegs)); +      if (LRegsPair.second) { +        CurSU->isPending = true;  // This SU is not in AvailableQueue right now. +        Interferences.push_back(CurSU); +      } +      else { +        assert(CurSU->isPending && "Interferences are pending"); +        // Update the interference with current live regs. +        LRegsPair.first->second = LRegs; +      } +      CurSU = AvailableQueue->pop(); +    } +  }; +  FindAvailableNode(); +  if (CurSU) +    return CurSU; + +  // We query the topological order in the loop body, so make sure outstanding +  // updates are applied before entering it (we only enter the loop if there +  // are some interferences). If we make changes to the ordering, we exit +  // the loop. + +  // All candidates are delayed due to live physical reg dependencies. +  // Try backtracking, code duplication, or inserting cross class copies +  // to resolve it. +  for (SUnit *TrySU : Interferences) { +    SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU]; + +    // Try unscheduling up to the point where it's safe to schedule +    // this node. +    SUnit *BtSU = nullptr; +    unsigned LiveCycle = std::numeric_limits<unsigned>::max(); +    for (unsigned Reg : LRegs) { +      if (LiveRegGens[Reg]->getHeight() < LiveCycle) { +        BtSU = LiveRegGens[Reg]; +        LiveCycle = BtSU->getHeight(); +      } +    } +    if (!WillCreateCycle(TrySU, BtSU))  { +      // BacktrackBottomUp mutates Interferences! +      BacktrackBottomUp(TrySU, BtSU); + +      // Force the current node to be scheduled before the node that +      // requires the physical reg dep. +      if (BtSU->isAvailable) { +        BtSU->isAvailable = false; +        if (!BtSU->isPending) +          AvailableQueue->remove(BtSU); +      } +      LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum +                        << ") to SU(" << TrySU->NodeNum << ")\n"); +      AddPredQueued(TrySU, SDep(BtSU, SDep::Artificial)); + +      // If one or more successors has been unscheduled, then the current +      // node is no longer available. +      if (!TrySU->isAvailable || !TrySU->NodeQueueId) { +        LLVM_DEBUG(dbgs() << "TrySU not available; choosing node from queue\n"); +        CurSU = AvailableQueue->pop(); +      } else { +        LLVM_DEBUG(dbgs() << "TrySU available\n"); +        // Available and in AvailableQueue +        AvailableQueue->remove(TrySU); +        CurSU = TrySU; +      } +      FindAvailableNode(); +      // Interferences has been mutated. We must break. +      break; +    } +  } + +  if (!CurSU) { +    // Can't backtrack. If it's too expensive to copy the value, then try +    // duplicate the nodes that produces these "too expensive to copy" +    // values to break the dependency. In case even that doesn't work, +    // insert cross class copies. +    // If it's not too expensive, i.e. cost != -1, issue copies. +    SUnit *TrySU = Interferences[0]; +    SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU]; +    assert(LRegs.size() == 1 && "Can't handle this yet!"); +    unsigned Reg = LRegs[0]; +    SUnit *LRDef = LiveRegDefs[Reg]; +    MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); +    const TargetRegisterClass *RC = +      TRI->getMinimalPhysRegClass(Reg, VT); +    const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); + +    // If cross copy register class is the same as RC, then it must be possible +    // copy the value directly. Do not try duplicate the def. +    // If cross copy register class is not the same as RC, then it's possible to +    // copy the value but it require cross register class copies and it is +    // expensive. +    // If cross copy register class is null, then it's not possible to copy +    // the value at all. +    SUnit *NewDef = nullptr; +    if (DestRC != RC) { +      NewDef = CopyAndMoveSuccessors(LRDef); +      if (!DestRC && !NewDef) +        report_fatal_error("Can't handle live physical register dependency!"); +    } +    if (!NewDef) { +      // Issue copies, these can be expensive cross register class copies. +      SmallVector<SUnit*, 2> Copies; +      InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); +      LLVM_DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum +                        << " to SU #" << Copies.front()->NodeNum << "\n"); +      AddPredQueued(TrySU, SDep(Copies.front(), SDep::Artificial)); +      NewDef = Copies.back(); +    } + +    LLVM_DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum +                      << " to SU #" << TrySU->NodeNum << "\n"); +    LiveRegDefs[Reg] = NewDef; +    AddPredQueued(NewDef, SDep(TrySU, SDep::Artificial)); +    TrySU->isAvailable = false; +    CurSU = NewDef; +  } +  assert(CurSU && "Unable to resolve live physical register dependencies!"); +  return CurSU; +} + +/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up +/// schedulers. +void ScheduleDAGRRList::ListScheduleBottomUp() { +  // Release any predecessors of the special Exit node. +  ReleasePredecessors(&ExitSU); + +  // Add root to Available queue. +  if (!SUnits.empty()) { +    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; +    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); +    RootSU->isAvailable = true; +    AvailableQueue->push(RootSU); +  } + +  // While Available queue is not empty, grab the node with the highest +  // priority. If it is not ready put it back.  Schedule the node. +  Sequence.reserve(SUnits.size()); +  while (!AvailableQueue->empty() || !Interferences.empty()) { +    LLVM_DEBUG(dbgs() << "\nExamining Available:\n"; +               AvailableQueue->dump(this)); + +    // Pick the best node to schedule taking all constraints into +    // consideration. +    SUnit *SU = PickNodeToScheduleBottomUp(); + +    AdvancePastStalls(SU); + +    ScheduleNodeBottomUp(SU); + +    while (AvailableQueue->empty() && !PendingQueue.empty()) { +      // Advance the cycle to free resources. Skip ahead to the next ready SU. +      assert(MinAvailableCycle < std::numeric_limits<unsigned>::max() && +             "MinAvailableCycle uninitialized"); +      AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle)); +    } +  } + +  // Reverse the order if it is bottom up. +  std::reverse(Sequence.begin(), Sequence.end()); + +#ifndef NDEBUG +  VerifyScheduledSequence(/*isBottomUp=*/true); +#endif +} + +namespace { + +class RegReductionPQBase; + +struct queue_sort { +  bool isReady(SUnit* SU, unsigned CurCycle) const { return true; } +}; + +#ifndef NDEBUG +template<class SF> +struct reverse_sort : public queue_sort { +  SF &SortFunc; + +  reverse_sort(SF &sf) : SortFunc(sf) {} + +  bool operator()(SUnit* left, SUnit* right) const { +    // reverse left/right rather than simply !SortFunc(left, right) +    // to expose different paths in the comparison logic. +    return SortFunc(right, left); +  } +}; +#endif // NDEBUG + +/// bu_ls_rr_sort - Priority function for bottom up register pressure +// reduction scheduler. +struct bu_ls_rr_sort : public queue_sort { +  enum { +    IsBottomUp = true, +    HasReadyFilter = false +  }; + +  RegReductionPQBase *SPQ; + +  bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} + +  bool operator()(SUnit* left, SUnit* right) const; +}; + +// src_ls_rr_sort - Priority function for source order scheduler. +struct src_ls_rr_sort : public queue_sort { +  enum { +    IsBottomUp = true, +    HasReadyFilter = false +  }; + +  RegReductionPQBase *SPQ; + +  src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} + +  bool operator()(SUnit* left, SUnit* right) const; +}; + +// hybrid_ls_rr_sort - Priority function for hybrid scheduler. +struct hybrid_ls_rr_sort : public queue_sort { +  enum { +    IsBottomUp = true, +    HasReadyFilter = false +  }; + +  RegReductionPQBase *SPQ; + +  hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} + +  bool isReady(SUnit *SU, unsigned CurCycle) const; + +  bool operator()(SUnit* left, SUnit* right) const; +}; + +// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism) +// scheduler. +struct ilp_ls_rr_sort : public queue_sort { +  enum { +    IsBottomUp = true, +    HasReadyFilter = false +  }; + +  RegReductionPQBase *SPQ; + +  ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} + +  bool isReady(SUnit *SU, unsigned CurCycle) const; + +  bool operator()(SUnit* left, SUnit* right) const; +}; + +class RegReductionPQBase : public SchedulingPriorityQueue { +protected: +  std::vector<SUnit *> Queue; +  unsigned CurQueueId = 0; +  bool TracksRegPressure; +  bool SrcOrder; + +  // SUnits - The SUnits for the current graph. +  std::vector<SUnit> *SUnits; + +  MachineFunction &MF; +  const TargetInstrInfo *TII; +  const TargetRegisterInfo *TRI; +  const TargetLowering *TLI; +  ScheduleDAGRRList *scheduleDAG = nullptr; + +  // SethiUllmanNumbers - The SethiUllman number for each node. +  std::vector<unsigned> SethiUllmanNumbers; + +  /// RegPressure - Tracking current reg pressure per register class. +  std::vector<unsigned> RegPressure; + +  /// RegLimit - Tracking the number of allocatable registers per register +  /// class. +  std::vector<unsigned> RegLimit; + +public: +  RegReductionPQBase(MachineFunction &mf, +                     bool hasReadyFilter, +                     bool tracksrp, +                     bool srcorder, +                     const TargetInstrInfo *tii, +                     const TargetRegisterInfo *tri, +                     const TargetLowering *tli) +    : SchedulingPriorityQueue(hasReadyFilter), TracksRegPressure(tracksrp), +      SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli) { +    if (TracksRegPressure) { +      unsigned NumRC = TRI->getNumRegClasses(); +      RegLimit.resize(NumRC); +      RegPressure.resize(NumRC); +      std::fill(RegLimit.begin(), RegLimit.end(), 0); +      std::fill(RegPressure.begin(), RegPressure.end(), 0); +      for (const TargetRegisterClass *RC : TRI->regclasses()) +        RegLimit[RC->getID()] = tri->getRegPressureLimit(RC, MF); +    } +  } + +  void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { +    scheduleDAG = scheduleDag; +  } + +  ScheduleHazardRecognizer* getHazardRec() { +    return scheduleDAG->getHazardRec(); +  } + +  void initNodes(std::vector<SUnit> &sunits) override; + +  void addNode(const SUnit *SU) override; + +  void updateNode(const SUnit *SU) override; + +  void releaseState() override { +    SUnits = nullptr; +    SethiUllmanNumbers.clear(); +    std::fill(RegPressure.begin(), RegPressure.end(), 0); +  } + +  unsigned getNodePriority(const SUnit *SU) const; + +  unsigned getNodeOrdering(const SUnit *SU) const { +    if (!SU->getNode()) return 0; + +    return SU->getNode()->getIROrder(); +  } + +  bool empty() const override { return Queue.empty(); } + +  void push(SUnit *U) override { +    assert(!U->NodeQueueId && "Node in the queue already"); +    U->NodeQueueId = ++CurQueueId; +    Queue.push_back(U); +  } + +  void remove(SUnit *SU) override { +    assert(!Queue.empty() && "Queue is empty!"); +    assert(SU->NodeQueueId != 0 && "Not in queue!"); +    std::vector<SUnit *>::iterator I = llvm::find(Queue, SU); +    if (I != std::prev(Queue.end())) +      std::swap(*I, Queue.back()); +    Queue.pop_back(); +    SU->NodeQueueId = 0; +  } + +  bool tracksRegPressure() const override { return TracksRegPressure; } + +  void dumpRegPressure() const; + +  bool HighRegPressure(const SUnit *SU) const; + +  bool MayReduceRegPressure(SUnit *SU) const; + +  int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const; + +  void scheduledNode(SUnit *SU) override; + +  void unscheduledNode(SUnit *SU) override; + +protected: +  bool canClobber(const SUnit *SU, const SUnit *Op); +  void AddPseudoTwoAddrDeps(); +  void PrescheduleNodesWithMultipleUses(); +  void CalculateSethiUllmanNumbers(); +}; + +template<class SF> +static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) { +  std::vector<SUnit *>::iterator Best = Q.begin(); +  for (auto I = std::next(Q.begin()), E = Q.end(); I != E; ++I) +    if (Picker(*Best, *I)) +      Best = I; +  SUnit *V = *Best; +  if (Best != std::prev(Q.end())) +    std::swap(*Best, Q.back()); +  Q.pop_back(); +  return V; +} + +template<class SF> +SUnit *popFromQueue(std::vector<SUnit *> &Q, SF &Picker, ScheduleDAG *DAG) { +#ifndef NDEBUG +  if (DAG->StressSched) { +    reverse_sort<SF> RPicker(Picker); +    return popFromQueueImpl(Q, RPicker); +  } +#endif +  (void)DAG; +  return popFromQueueImpl(Q, Picker); +} + +//===----------------------------------------------------------------------===// +//                RegReductionPriorityQueue Definition +//===----------------------------------------------------------------------===// +// +// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers +// to reduce register pressure. +// +template<class SF> +class RegReductionPriorityQueue : public RegReductionPQBase { +  SF Picker; + +public: +  RegReductionPriorityQueue(MachineFunction &mf, +                            bool tracksrp, +                            bool srcorder, +                            const TargetInstrInfo *tii, +                            const TargetRegisterInfo *tri, +                            const TargetLowering *tli) +    : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder, +                         tii, tri, tli), +      Picker(this) {} + +  bool isBottomUp() const override { return SF::IsBottomUp; } + +  bool isReady(SUnit *U) const override { +    return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle()); +  } + +  SUnit *pop() override { +    if (Queue.empty()) return nullptr; + +    SUnit *V = popFromQueue(Queue, Picker, scheduleDAG); +    V->NodeQueueId = 0; +    return V; +  } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +  LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override { +    // Emulate pop() without clobbering NodeQueueIds. +    std::vector<SUnit *> DumpQueue = Queue; +    SF DumpPicker = Picker; +    while (!DumpQueue.empty()) { +      SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG); +      dbgs() << "Height " << SU->getHeight() << ": "; +      DAG->dumpNode(*SU); +    } +  } +#endif +}; + +using BURegReductionPriorityQueue = RegReductionPriorityQueue<bu_ls_rr_sort>; +using SrcRegReductionPriorityQueue = RegReductionPriorityQueue<src_ls_rr_sort>; +using HybridBURRPriorityQueue = RegReductionPriorityQueue<hybrid_ls_rr_sort>; +using ILPBURRPriorityQueue = RegReductionPriorityQueue<ilp_ls_rr_sort>; + +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +//           Static Node Priority for Register Pressure Reduction +//===----------------------------------------------------------------------===// + +// Check for special nodes that bypass scheduling heuristics. +// Currently this pushes TokenFactor nodes down, but may be used for other +// pseudo-ops as well. +// +// Return -1 to schedule right above left, 1 for left above right. +// Return 0 if no bias exists. +static int checkSpecialNodes(const SUnit *left, const SUnit *right) { +  bool LSchedLow = left->isScheduleLow; +  bool RSchedLow = right->isScheduleLow; +  if (LSchedLow != RSchedLow) +    return LSchedLow < RSchedLow ? 1 : -1; +  return 0; +} + +/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. +/// Smaller number is the higher priority. +static unsigned +CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { +  if (SUNumbers[SU->NodeNum] != 0) +    return SUNumbers[SU->NodeNum]; + +  // Use WorkList to avoid stack overflow on excessively large IRs. +  struct WorkState { +    WorkState(const SUnit *SU) : SU(SU) {} +    const SUnit *SU; +    unsigned PredsProcessed = 0; +  }; + +  SmallVector<WorkState, 16> WorkList; +  WorkList.push_back(SU); +  while (!WorkList.empty()) { +    auto &Temp = WorkList.back(); +    auto *TempSU = Temp.SU; +    bool AllPredsKnown = true; +    // Try to find a non-evaluated pred and push it into the processing stack. +    for (unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size(); ++P) { +      auto &Pred = TempSU->Preds[P]; +      if (Pred.isCtrl()) continue;  // ignore chain preds +      SUnit *PredSU = Pred.getSUnit(); +      if (SUNumbers[PredSU->NodeNum] == 0) { +#ifndef NDEBUG +        // In debug mode, check that we don't have such element in the stack. +        for (auto It : WorkList) +          assert(It.SU != PredSU && "Trying to push an element twice?"); +#endif +        // Next time start processing this one starting from the next pred. +        Temp.PredsProcessed = P + 1; +        WorkList.push_back(PredSU); +        AllPredsKnown = false; +        break; +      } +    } + +    if (!AllPredsKnown) +      continue; + +    // Once all preds are known, we can calculate the answer for this one. +    unsigned SethiUllmanNumber = 0; +    unsigned Extra = 0; +    for (const SDep &Pred : TempSU->Preds) { +      if (Pred.isCtrl()) continue;  // ignore chain preds +      SUnit *PredSU = Pred.getSUnit(); +      unsigned PredSethiUllman = SUNumbers[PredSU->NodeNum]; +      assert(PredSethiUllman > 0 && "We should have evaluated this pred!"); +      if (PredSethiUllman > SethiUllmanNumber) { +        SethiUllmanNumber = PredSethiUllman; +        Extra = 0; +      } else if (PredSethiUllman == SethiUllmanNumber) +        ++Extra; +    } + +    SethiUllmanNumber += Extra; +    if (SethiUllmanNumber == 0) +      SethiUllmanNumber = 1; +    SUNumbers[TempSU->NodeNum] = SethiUllmanNumber; +    WorkList.pop_back(); +  } + +  assert(SUNumbers[SU->NodeNum] > 0 && "SethiUllman should never be zero!"); +  return SUNumbers[SU->NodeNum]; +} + +/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all +/// scheduling units. +void RegReductionPQBase::CalculateSethiUllmanNumbers() { +  SethiUllmanNumbers.assign(SUnits->size(), 0); + +  for (const SUnit &SU : *SUnits) +    CalcNodeSethiUllmanNumber(&SU, SethiUllmanNumbers); +} + +void RegReductionPQBase::addNode(const SUnit *SU) { +  unsigned SUSize = SethiUllmanNumbers.size(); +  if (SUnits->size() > SUSize) +    SethiUllmanNumbers.resize(SUSize*2, 0); +  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); +} + +void RegReductionPQBase::updateNode(const SUnit *SU) { +  SethiUllmanNumbers[SU->NodeNum] = 0; +  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); +} + +// Lower priority means schedule further down. For bottom-up scheduling, lower +// priority SUs are scheduled before higher priority SUs. +unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const { +  assert(SU->NodeNum < SethiUllmanNumbers.size()); +  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; +  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) +    // CopyToReg should be close to its uses to facilitate coalescing and +    // avoid spilling. +    return 0; +  if (Opc == TargetOpcode::EXTRACT_SUBREG || +      Opc == TargetOpcode::SUBREG_TO_REG || +      Opc == TargetOpcode::INSERT_SUBREG) +    // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be +    // close to their uses to facilitate coalescing. +    return 0; +  if (SU->NumSuccs == 0 && SU->NumPreds != 0) +    // If SU does not have a register use, i.e. it doesn't produce a value +    // that would be consumed (e.g. store), then it terminates a chain of +    // computation.  Give it a large SethiUllman number so it will be +    // scheduled right before its predecessors that it doesn't lengthen +    // their live ranges. +    return 0xffff; +  if (SU->NumPreds == 0 && SU->NumSuccs != 0) +    // If SU does not have a register def, schedule it close to its uses +    // because it does not lengthen any live ranges. +    return 0; +#if 1 +  return SethiUllmanNumbers[SU->NodeNum]; +#else +  unsigned Priority = SethiUllmanNumbers[SU->NodeNum]; +  if (SU->isCallOp) { +    // FIXME: This assumes all of the defs are used as call operands. +    int NP = (int)Priority - SU->getNode()->getNumValues(); +    return (NP > 0) ? NP : 0; +  } +  return Priority; +#endif +} + +//===----------------------------------------------------------------------===// +//                     Register Pressure Tracking +//===----------------------------------------------------------------------===// + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const { +  for (const TargetRegisterClass *RC : TRI->regclasses()) { +    unsigned Id = RC->getID(); +    unsigned RP = RegPressure[Id]; +    if (!RP) continue; +    LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / " +                      << RegLimit[Id] << '\n'); +  } +} +#endif + +bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { +  if (!TLI) +    return false; + +  for (const SDep &Pred : SU->Preds) { +    if (Pred.isCtrl()) +      continue; +    SUnit *PredSU = Pred.getSUnit(); +    // NumRegDefsLeft is zero when enough uses of this node have been scheduled +    // to cover the number of registers defined (they are all live). +    if (PredSU->NumRegDefsLeft == 0) { +      continue; +    } +    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); +         RegDefPos.IsValid(); RegDefPos.Advance()) { +      unsigned RCId, Cost; +      GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); + +      if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) +        return true; +    } +  } +  return false; +} + +bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const { +  const SDNode *N = SU->getNode(); + +  if (!N->isMachineOpcode() || !SU->NumSuccs) +    return false; + +  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); +  for (unsigned i = 0; i != NumDefs; ++i) { +    MVT VT = N->getSimpleValueType(i); +    if (!N->hasAnyUseOfValue(i)) +      continue; +    unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); +    if (RegPressure[RCId] >= RegLimit[RCId]) +      return true; +  } +  return false; +} + +// Compute the register pressure contribution by this instruction by count up +// for uses that are not live and down for defs. Only count register classes +// that are already under high pressure. As a side effect, compute the number of +// uses of registers that are already live. +// +// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure +// so could probably be factored. +int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const { +  LiveUses = 0; +  int PDiff = 0; +  for (const SDep &Pred : SU->Preds) { +    if (Pred.isCtrl()) +      continue; +    SUnit *PredSU = Pred.getSUnit(); +    // NumRegDefsLeft is zero when enough uses of this node have been scheduled +    // to cover the number of registers defined (they are all live). +    if (PredSU->NumRegDefsLeft == 0) { +      if (PredSU->getNode()->isMachineOpcode()) +        ++LiveUses; +      continue; +    } +    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); +         RegDefPos.IsValid(); RegDefPos.Advance()) { +      MVT VT = RegDefPos.GetValue(); +      unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); +      if (RegPressure[RCId] >= RegLimit[RCId]) +        ++PDiff; +    } +  } +  const SDNode *N = SU->getNode(); + +  if (!N || !N->isMachineOpcode() || !SU->NumSuccs) +    return PDiff; + +  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); +  for (unsigned i = 0; i != NumDefs; ++i) { +    MVT VT = N->getSimpleValueType(i); +    if (!N->hasAnyUseOfValue(i)) +      continue; +    unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); +    if (RegPressure[RCId] >= RegLimit[RCId]) +      --PDiff; +  } +  return PDiff; +} + +void RegReductionPQBase::scheduledNode(SUnit *SU) { +  if (!TracksRegPressure) +    return; + +  if (!SU->getNode()) +    return; + +  for (const SDep &Pred : SU->Preds) { +    if (Pred.isCtrl()) +      continue; +    SUnit *PredSU = Pred.getSUnit(); +    // NumRegDefsLeft is zero when enough uses of this node have been scheduled +    // to cover the number of registers defined (they are all live). +    if (PredSU->NumRegDefsLeft == 0) { +      continue; +    } +    // FIXME: The ScheduleDAG currently loses information about which of a +    // node's values is consumed by each dependence. Consequently, if the node +    // defines multiple register classes, we don't know which to pressurize +    // here. Instead the following loop consumes the register defs in an +    // arbitrary order. At least it handles the common case of clustered loads +    // to the same class. For precise liveness, each SDep needs to indicate the +    // result number. But that tightly couples the ScheduleDAG with the +    // SelectionDAG making updates tricky. A simpler hack would be to attach a +    // value type or register class to SDep. +    // +    // The most important aspect of register tracking is balancing the increase +    // here with the reduction further below. Note that this SU may use multiple +    // defs in PredSU. The can't be determined here, but we've already +    // compensated by reducing NumRegDefsLeft in PredSU during +    // ScheduleDAGSDNodes::AddSchedEdges. +    --PredSU->NumRegDefsLeft; +    unsigned SkipRegDefs = PredSU->NumRegDefsLeft; +    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); +         RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { +      if (SkipRegDefs) +        continue; + +      unsigned RCId, Cost; +      GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); +      RegPressure[RCId] += Cost; +      break; +    } +  } + +  // We should have this assert, but there may be dead SDNodes that never +  // materialize as SUnits, so they don't appear to generate liveness. +  //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses"); +  int SkipRegDefs = (int)SU->NumRegDefsLeft; +  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG); +       RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { +    if (SkipRegDefs > 0) +      continue; +    unsigned RCId, Cost; +    GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); +    if (RegPressure[RCId] < Cost) { +      // Register pressure tracking is imprecise. This can happen. But we try +      // hard not to let it happen because it likely results in poor scheduling. +      LLVM_DEBUG(dbgs() << "  SU(" << SU->NodeNum +                        << ") has too many regdefs\n"); +      RegPressure[RCId] = 0; +    } +    else { +      RegPressure[RCId] -= Cost; +    } +  } +  LLVM_DEBUG(dumpRegPressure()); +} + +void RegReductionPQBase::unscheduledNode(SUnit *SU) { +  if (!TracksRegPressure) +    return; + +  const SDNode *N = SU->getNode(); +  if (!N) return; + +  if (!N->isMachineOpcode()) { +    if (N->getOpcode() != ISD::CopyToReg) +      return; +  } else { +    unsigned Opc = N->getMachineOpcode(); +    if (Opc == TargetOpcode::EXTRACT_SUBREG || +        Opc == TargetOpcode::INSERT_SUBREG || +        Opc == TargetOpcode::SUBREG_TO_REG || +        Opc == TargetOpcode::REG_SEQUENCE || +        Opc == TargetOpcode::IMPLICIT_DEF) +      return; +  } + +  for (const SDep &Pred : SU->Preds) { +    if (Pred.isCtrl()) +      continue; +    SUnit *PredSU = Pred.getSUnit(); +    // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only +    // counts data deps. +    if (PredSU->NumSuccsLeft != PredSU->Succs.size()) +      continue; +    const SDNode *PN = PredSU->getNode(); +    if (!PN->isMachineOpcode()) { +      if (PN->getOpcode() == ISD::CopyFromReg) { +        MVT VT = PN->getSimpleValueType(0); +        unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); +        RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); +      } +      continue; +    } +    unsigned POpc = PN->getMachineOpcode(); +    if (POpc == TargetOpcode::IMPLICIT_DEF) +      continue; +    if (POpc == TargetOpcode::EXTRACT_SUBREG || +        POpc == TargetOpcode::INSERT_SUBREG || +        POpc == TargetOpcode::SUBREG_TO_REG) { +      MVT VT = PN->getSimpleValueType(0); +      unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); +      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); +      continue; +    } +    unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); +    for (unsigned i = 0; i != NumDefs; ++i) { +      MVT VT = PN->getSimpleValueType(i); +      if (!PN->hasAnyUseOfValue(i)) +        continue; +      unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); +      if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) +        // Register pressure tracking is imprecise. This can happen. +        RegPressure[RCId] = 0; +      else +        RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); +    } +  } + +  // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() +  // may transfer data dependencies to CopyToReg. +  if (SU->NumSuccs && N->isMachineOpcode()) { +    unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); +    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { +      MVT VT = N->getSimpleValueType(i); +      if (VT == MVT::Glue || VT == MVT::Other) +        continue; +      if (!N->hasAnyUseOfValue(i)) +        continue; +      unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); +      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); +    } +  } + +  LLVM_DEBUG(dumpRegPressure()); +} + +//===----------------------------------------------------------------------===// +//           Dynamic Node Priority for Register Pressure Reduction +//===----------------------------------------------------------------------===// + +/// closestSucc - Returns the scheduled cycle of the successor which is +/// closest to the current cycle. +static unsigned closestSucc(const SUnit *SU) { +  unsigned MaxHeight = 0; +  for (const SDep &Succ : SU->Succs) { +    if (Succ.isCtrl()) continue;  // ignore chain succs +    unsigned Height = Succ.getSUnit()->getHeight(); +    // If there are bunch of CopyToRegs stacked up, they should be considered +    // to be at the same position. +    if (Succ.getSUnit()->getNode() && +        Succ.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) +      Height = closestSucc(Succ.getSUnit())+1; +    if (Height > MaxHeight) +      MaxHeight = Height; +  } +  return MaxHeight; +} + +/// calcMaxScratches - Returns an cost estimate of the worse case requirement +/// for scratch registers, i.e. number of data dependencies. +static unsigned calcMaxScratches(const SUnit *SU) { +  unsigned Scratches = 0; +  for (const SDep &Pred : SU->Preds) { +    if (Pred.isCtrl()) continue;  // ignore chain preds +    Scratches++; +  } +  return Scratches; +} + +/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are +/// CopyFromReg from a virtual register. +static bool hasOnlyLiveInOpers(const SUnit *SU) { +  bool RetVal = false; +  for (const SDep &Pred : SU->Preds) { +    if (Pred.isCtrl()) continue; +    const SUnit *PredSU = Pred.getSUnit(); +    if (PredSU->getNode() && +        PredSU->getNode()->getOpcode() == ISD::CopyFromReg) { +      unsigned Reg = +        cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg(); +      if (Register::isVirtualRegister(Reg)) { +        RetVal = true; +        continue; +      } +    } +    return false; +  } +  return RetVal; +} + +/// hasOnlyLiveOutUses - Return true if SU has only value successors that are +/// CopyToReg to a virtual register. This SU def is probably a liveout and +/// it has no other use. It should be scheduled closer to the terminator. +static bool hasOnlyLiveOutUses(const SUnit *SU) { +  bool RetVal = false; +  for (const SDep &Succ : SU->Succs) { +    if (Succ.isCtrl()) continue; +    const SUnit *SuccSU = Succ.getSUnit(); +    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) { +      unsigned Reg = +        cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg(); +      if (Register::isVirtualRegister(Reg)) { +        RetVal = true; +        continue; +      } +    } +    return false; +  } +  return RetVal; +} + +// Set isVRegCycle for a node with only live in opers and live out uses. Also +// set isVRegCycle for its CopyFromReg operands. +// +// This is only relevant for single-block loops, in which case the VRegCycle +// node is likely an induction variable in which the operand and target virtual +// registers should be coalesced (e.g. pre/post increment values). Setting the +// isVRegCycle flag helps the scheduler prioritize other uses of the same +// CopyFromReg so that this node becomes the virtual register "kill". This +// avoids interference between the values live in and out of the block and +// eliminates a copy inside the loop. +static void initVRegCycle(SUnit *SU) { +  if (DisableSchedVRegCycle) +    return; + +  if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU)) +    return; + +  LLVM_DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n"); + +  SU->isVRegCycle = true; + +  for (const SDep &Pred : SU->Preds) { +    if (Pred.isCtrl()) continue; +    Pred.getSUnit()->isVRegCycle = true; +  } +} + +// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of +// CopyFromReg operands. We should no longer penalize other uses of this VReg. +static void resetVRegCycle(SUnit *SU) { +  if (!SU->isVRegCycle) +    return; + +  for (const SDep &Pred : SU->Preds) { +    if (Pred.isCtrl()) continue;  // ignore chain preds +    SUnit *PredSU = Pred.getSUnit(); +    if (PredSU->isVRegCycle) { +      assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg && +             "VRegCycle def must be CopyFromReg"); +      Pred.getSUnit()->isVRegCycle = false; +    } +  } +} + +// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This +// means a node that defines the VRegCycle has not been scheduled yet. +static bool hasVRegCycleUse(const SUnit *SU) { +  // If this SU also defines the VReg, don't hoist it as a "use". +  if (SU->isVRegCycle) +    return false; + +  for (const SDep &Pred : SU->Preds) { +    if (Pred.isCtrl()) continue;  // ignore chain preds +    if (Pred.getSUnit()->isVRegCycle && +        Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) { +      LLVM_DEBUG(dbgs() << "  VReg cycle use: SU (" << SU->NodeNum << ")\n"); +      return true; +    } +  } +  return false; +} + +// Check for either a dependence (latency) or resource (hazard) stall. +// +// Note: The ScheduleHazardRecognizer interface requires a non-const SU. +static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) { +  if ((int)SPQ->getCurCycle() < Height) return true; +  if (SPQ->getHazardRec()->getHazardType(SU, 0) +      != ScheduleHazardRecognizer::NoHazard) +    return true; +  return false; +} + +// Return -1 if left has higher priority, 1 if right has higher priority. +// Return 0 if latency-based priority is equivalent. +static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, +                            RegReductionPQBase *SPQ) { +  // Scheduling an instruction that uses a VReg whose postincrement has not yet +  // been scheduled will induce a copy. Model this as an extra cycle of latency. +  int LPenalty = hasVRegCycleUse(left) ? 1 : 0; +  int RPenalty = hasVRegCycleUse(right) ? 1 : 0; +  int LHeight = (int)left->getHeight() + LPenalty; +  int RHeight = (int)right->getHeight() + RPenalty; + +  bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) && +    BUHasStall(left, LHeight, SPQ); +  bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) && +    BUHasStall(right, RHeight, SPQ); + +  // If scheduling one of the node will cause a pipeline stall, delay it. +  // If scheduling either one of the node will cause a pipeline stall, sort +  // them according to their height. +  if (LStall) { +    if (!RStall) +      return 1; +    if (LHeight != RHeight) +      return LHeight > RHeight ? 1 : -1; +  } else if (RStall) +    return -1; + +  // If either node is scheduling for latency, sort them by height/depth +  // and latency. +  if (!checkPref || (left->SchedulingPref == Sched::ILP || +                     right->SchedulingPref == Sched::ILP)) { +    // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer +    // is enabled, grouping instructions by cycle, then its height is already +    // covered so only its depth matters. We also reach this point if both stall +    // but have the same height. +    if (!SPQ->getHazardRec()->isEnabled()) { +      if (LHeight != RHeight) +        return LHeight > RHeight ? 1 : -1; +    } +    int LDepth = left->getDepth() - LPenalty; +    int RDepth = right->getDepth() - RPenalty; +    if (LDepth != RDepth) { +      LLVM_DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum +                        << ") depth " << LDepth << " vs SU (" << right->NodeNum +                        << ") depth " << RDepth << "\n"); +      return LDepth < RDepth ? 1 : -1; +    } +    if (left->Latency != right->Latency) +      return left->Latency > right->Latency ? 1 : -1; +  } +  return 0; +} + +static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { +  // Schedule physical register definitions close to their use. This is +  // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as +  // long as shortening physreg live ranges is generally good, we can defer +  // creating a subtarget hook. +  if (!DisableSchedPhysRegJoin) { +    bool LHasPhysReg = left->hasPhysRegDefs; +    bool RHasPhysReg = right->hasPhysRegDefs; +    if (LHasPhysReg != RHasPhysReg) { +      #ifndef NDEBUG +      static const char *const PhysRegMsg[] = { " has no physreg", +                                                " defines a physreg" }; +      #endif +      LLVM_DEBUG(dbgs() << "  SU (" << left->NodeNum << ") " +                        << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum +                        << ") " << PhysRegMsg[RHasPhysReg] << "\n"); +      return LHasPhysReg < RHasPhysReg; +    } +  } + +  // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down. +  unsigned LPriority = SPQ->getNodePriority(left); +  unsigned RPriority = SPQ->getNodePriority(right); + +  // Be really careful about hoisting call operands above previous calls. +  // Only allows it if it would reduce register pressure. +  if (left->isCall && right->isCallOp) { +    unsigned RNumVals = right->getNode()->getNumValues(); +    RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0; +  } +  if (right->isCall && left->isCallOp) { +    unsigned LNumVals = left->getNode()->getNumValues(); +    LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0; +  } + +  if (LPriority != RPriority) +    return LPriority > RPriority; + +  // One or both of the nodes are calls and their sethi-ullman numbers are the +  // same, then keep source order. +  if (left->isCall || right->isCall) { +    unsigned LOrder = SPQ->getNodeOrdering(left); +    unsigned ROrder = SPQ->getNodeOrdering(right); + +    // Prefer an ordering where the lower the non-zero order number, the higher +    // the preference. +    if ((LOrder || ROrder) && LOrder != ROrder) +      return LOrder != 0 && (LOrder < ROrder || ROrder == 0); +  } + +  // Try schedule def + use closer when Sethi-Ullman numbers are the same. +  // e.g. +  // t1 = op t2, c1 +  // t3 = op t4, c2 +  // +  // and the following instructions are both ready. +  // t2 = op c3 +  // t4 = op c4 +  // +  // Then schedule t2 = op first. +  // i.e. +  // t4 = op c4 +  // t2 = op c3 +  // t1 = op t2, c1 +  // t3 = op t4, c2 +  // +  // This creates more short live intervals. +  unsigned LDist = closestSucc(left); +  unsigned RDist = closestSucc(right); +  if (LDist != RDist) +    return LDist < RDist; + +  // How many registers becomes live when the node is scheduled. +  unsigned LScratch = calcMaxScratches(left); +  unsigned RScratch = calcMaxScratches(right); +  if (LScratch != RScratch) +    return LScratch > RScratch; + +  // Comparing latency against a call makes little sense unless the node +  // is register pressure-neutral. +  if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0)) +    return (left->NodeQueueId > right->NodeQueueId); + +  // Do not compare latencies when one or both of the nodes are calls. +  if (!DisableSchedCycles && +      !(left->isCall || right->isCall)) { +    int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ); +    if (result != 0) +      return result > 0; +  } +  else { +    if (left->getHeight() != right->getHeight()) +      return left->getHeight() > right->getHeight(); + +    if (left->getDepth() != right->getDepth()) +      return left->getDepth() < right->getDepth(); +  } + +  assert(left->NodeQueueId && right->NodeQueueId && +         "NodeQueueId cannot be zero"); +  return (left->NodeQueueId > right->NodeQueueId); +} + +// Bottom up +bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { +  if (int res = checkSpecialNodes(left, right)) +    return res > 0; + +  return BURRSort(left, right, SPQ); +} + +// Source order, otherwise bottom up. +bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { +  if (int res = checkSpecialNodes(left, right)) +    return res > 0; + +  unsigned LOrder = SPQ->getNodeOrdering(left); +  unsigned ROrder = SPQ->getNodeOrdering(right); + +  // Prefer an ordering where the lower the non-zero order number, the higher +  // the preference. +  if ((LOrder || ROrder) && LOrder != ROrder) +    return LOrder != 0 && (LOrder < ROrder || ROrder == 0); + +  return BURRSort(left, right, SPQ); +} + +// If the time between now and when the instruction will be ready can cover +// the spill code, then avoid adding it to the ready queue. This gives long +// stalls highest priority and allows hoisting across calls. It should also +// speed up processing the available queue. +bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { +  static const unsigned ReadyDelay = 3; + +  if (SPQ->MayReduceRegPressure(SU)) return true; + +  if (SU->getHeight() > (CurCycle + ReadyDelay)) return false; + +  if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay) +      != ScheduleHazardRecognizer::NoHazard) +    return false; + +  return true; +} + +// Return true if right should be scheduled with higher priority than left. +bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { +  if (int res = checkSpecialNodes(left, right)) +    return res > 0; + +  if (left->isCall || right->isCall) +    // No way to compute latency of calls. +    return BURRSort(left, right, SPQ); + +  bool LHigh = SPQ->HighRegPressure(left); +  bool RHigh = SPQ->HighRegPressure(right); +  // Avoid causing spills. If register pressure is high, schedule for +  // register pressure reduction. +  if (LHigh && !RHigh) { +    LLVM_DEBUG(dbgs() << "  pressure SU(" << left->NodeNum << ") > SU(" +                      << right->NodeNum << ")\n"); +    return true; +  } +  else if (!LHigh && RHigh) { +    LLVM_DEBUG(dbgs() << "  pressure SU(" << right->NodeNum << ") > SU(" +                      << left->NodeNum << ")\n"); +    return false; +  } +  if (!LHigh && !RHigh) { +    int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ); +    if (result != 0) +      return result > 0; +  } +  return BURRSort(left, right, SPQ); +} + +// Schedule as many instructions in each cycle as possible. So don't make an +// instruction available unless it is ready in the current cycle. +bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { +  if (SU->getHeight() > CurCycle) return false; + +  if (SPQ->getHazardRec()->getHazardType(SU, 0) +      != ScheduleHazardRecognizer::NoHazard) +    return false; + +  return true; +} + +static bool canEnableCoalescing(SUnit *SU) { +  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; +  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) +    // CopyToReg should be close to its uses to facilitate coalescing and +    // avoid spilling. +    return true; + +  if (Opc == TargetOpcode::EXTRACT_SUBREG || +      Opc == TargetOpcode::SUBREG_TO_REG || +      Opc == TargetOpcode::INSERT_SUBREG) +    // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be +    // close to their uses to facilitate coalescing. +    return true; + +  if (SU->NumPreds == 0 && SU->NumSuccs != 0) +    // If SU does not have a register def, schedule it close to its uses +    // because it does not lengthen any live ranges. +    return true; + +  return false; +} + +// list-ilp is currently an experimental scheduler that allows various +// heuristics to be enabled prior to the normal register reduction logic. +bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { +  if (int res = checkSpecialNodes(left, right)) +    return res > 0; + +  if (left->isCall || right->isCall) +    // No way to compute latency of calls. +    return BURRSort(left, right, SPQ); + +  unsigned LLiveUses = 0, RLiveUses = 0; +  int LPDiff = 0, RPDiff = 0; +  if (!DisableSchedRegPressure || !DisableSchedLiveUses) { +    LPDiff = SPQ->RegPressureDiff(left, LLiveUses); +    RPDiff = SPQ->RegPressureDiff(right, RLiveUses); +  } +  if (!DisableSchedRegPressure && LPDiff != RPDiff) { +    LLVM_DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum +                      << "): " << LPDiff << " != SU(" << right->NodeNum +                      << "): " << RPDiff << "\n"); +    return LPDiff > RPDiff; +  } + +  if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) { +    bool LReduce = canEnableCoalescing(left); +    bool RReduce = canEnableCoalescing(right); +    if (LReduce && !RReduce) return false; +    if (RReduce && !LReduce) return true; +  } + +  if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) { +    LLVM_DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses +                      << " != SU(" << right->NodeNum << "): " << RLiveUses +                      << "\n"); +    return LLiveUses < RLiveUses; +  } + +  if (!DisableSchedStalls) { +    bool LStall = BUHasStall(left, left->getHeight(), SPQ); +    bool RStall = BUHasStall(right, right->getHeight(), SPQ); +    if (LStall != RStall) +      return left->getHeight() > right->getHeight(); +  } + +  if (!DisableSchedCriticalPath) { +    int spread = (int)left->getDepth() - (int)right->getDepth(); +    if (std::abs(spread) > MaxReorderWindow) { +      LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " +                        << left->getDepth() << " != SU(" << right->NodeNum +                        << "): " << right->getDepth() << "\n"); +      return left->getDepth() < right->getDepth(); +    } +  } + +  if (!DisableSchedHeight && left->getHeight() != right->getHeight()) { +    int spread = (int)left->getHeight() - (int)right->getHeight(); +    if (std::abs(spread) > MaxReorderWindow) +      return left->getHeight() > right->getHeight(); +  } + +  return BURRSort(left, right, SPQ); +} + +void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { +  SUnits = &sunits; +  // Add pseudo dependency edges for two-address nodes. +  if (!Disable2AddrHack) +    AddPseudoTwoAddrDeps(); +  // Reroute edges to nodes with multiple uses. +  if (!TracksRegPressure && !SrcOrder) +    PrescheduleNodesWithMultipleUses(); +  // Calculate node priorities. +  CalculateSethiUllmanNumbers(); + +  // For single block loops, mark nodes that look like canonical IV increments. +  if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) +    for (SUnit &SU : sunits) +      initVRegCycle(&SU); +} + +//===----------------------------------------------------------------------===// +//                    Preschedule for Register Pressure +//===----------------------------------------------------------------------===// + +bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) { +  if (SU->isTwoAddress) { +    unsigned Opc = SU->getNode()->getMachineOpcode(); +    const MCInstrDesc &MCID = TII->get(Opc); +    unsigned NumRes = MCID.getNumDefs(); +    unsigned NumOps = MCID.getNumOperands() - NumRes; +    for (unsigned i = 0; i != NumOps; ++i) { +      if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) { +        SDNode *DU = SU->getNode()->getOperand(i).getNode(); +        if (DU->getNodeId() != -1 && +            Op->OrigNode == &(*SUnits)[DU->getNodeId()]) +          return true; +      } +    } +  } +  return false; +} + +/// canClobberReachingPhysRegUse - True if SU would clobber one of it's +/// successor's explicit physregs whose definition can reach DepSU. +/// i.e. DepSU should not be scheduled above SU. +static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, +                                         ScheduleDAGRRList *scheduleDAG, +                                         const TargetInstrInfo *TII, +                                         const TargetRegisterInfo *TRI) { +  const MCPhysReg *ImpDefs +    = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs(); +  const uint32_t *RegMask = getNodeRegMask(SU->getNode()); +  if(!ImpDefs && !RegMask) +    return false; + +  for (const SDep &Succ : SU->Succs) { +    SUnit *SuccSU = Succ.getSUnit(); +    for (const SDep &SuccPred : SuccSU->Preds) { +      if (!SuccPred.isAssignedRegDep()) +        continue; + +      if (RegMask && +          MachineOperand::clobbersPhysReg(RegMask, SuccPred.getReg()) && +          scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit())) +        return true; + +      if (ImpDefs) +        for (const MCPhysReg *ImpDef = ImpDefs; *ImpDef; ++ImpDef) +          // Return true if SU clobbers this physical register use and the +          // definition of the register reaches from DepSU. IsReachable queries +          // a topological forward sort of the DAG (following the successors). +          if (TRI->regsOverlap(*ImpDef, SuccPred.getReg()) && +              scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit())) +            return true; +    } +  } +  return false; +} + +/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's +/// physical register defs. +static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, +                                  const TargetInstrInfo *TII, +                                  const TargetRegisterInfo *TRI) { +  SDNode *N = SuccSU->getNode(); +  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); +  const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); +  assert(ImpDefs && "Caller should check hasPhysRegDefs"); +  for (const SDNode *SUNode = SU->getNode(); SUNode; +       SUNode = SUNode->getGluedNode()) { +    if (!SUNode->isMachineOpcode()) +      continue; +    const MCPhysReg *SUImpDefs = +      TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); +    const uint32_t *SURegMask = getNodeRegMask(SUNode); +    if (!SUImpDefs && !SURegMask) +      continue; +    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { +      MVT VT = N->getSimpleValueType(i); +      if (VT == MVT::Glue || VT == MVT::Other) +        continue; +      if (!N->hasAnyUseOfValue(i)) +        continue; +      unsigned Reg = ImpDefs[i - NumDefs]; +      if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg)) +        return true; +      if (!SUImpDefs) +        continue; +      for (;*SUImpDefs; ++SUImpDefs) { +        unsigned SUReg = *SUImpDefs; +        if (TRI->regsOverlap(Reg, SUReg)) +          return true; +      } +    } +  } +  return false; +} + +/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses +/// are not handled well by the general register pressure reduction +/// heuristics. When presented with code like this: +/// +///      N +///    / | +///   /  | +///  U  store +///  | +/// ... +/// +/// the heuristics tend to push the store up, but since the +/// operand of the store has another use (U), this would increase +/// the length of that other use (the U->N edge). +/// +/// This function transforms code like the above to route U's +/// dependence through the store when possible, like this: +/// +///      N +///      || +///      || +///     store +///       | +///       U +///       | +///      ... +/// +/// This results in the store being scheduled immediately +/// after N, which shortens the U->N live range, reducing +/// register pressure. +void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { +  // Visit all the nodes in topological order, working top-down. +  for (SUnit &SU : *SUnits) { +    // For now, only look at nodes with no data successors, such as stores. +    // These are especially important, due to the heuristics in +    // getNodePriority for nodes with no data successors. +    if (SU.NumSuccs != 0) +      continue; +    // For now, only look at nodes with exactly one data predecessor. +    if (SU.NumPreds != 1) +      continue; +    // Avoid prescheduling copies to virtual registers, which don't behave +    // like other nodes from the perspective of scheduling heuristics. +    if (SDNode *N = SU.getNode()) +      if (N->getOpcode() == ISD::CopyToReg && +          Register::isVirtualRegister( +              cast<RegisterSDNode>(N->getOperand(1))->getReg())) +        continue; + +    SDNode *PredFrameSetup = nullptr; +    for (const SDep &Pred : SU.Preds) +      if (Pred.isCtrl() && Pred.getSUnit()) { +        // Find the predecessor which is not data dependence. +        SDNode *PredND = Pred.getSUnit()->getNode(); + +        // If PredND is FrameSetup, we should not pre-scheduled the node, +        // or else, when bottom up scheduling, ADJCALLSTACKDOWN and +        // ADJCALLSTACKUP may hold CallResource too long and make other +        // calls can't be scheduled. If there's no other available node +        // to schedule, the schedular will try to rename the register by +        // creating copy to avoid the conflict which will fail because +        // CallResource is not a real physical register. +        if (PredND && PredND->isMachineOpcode() && +            (PredND->getMachineOpcode() == TII->getCallFrameSetupOpcode())) { +          PredFrameSetup = PredND; +          break; +        } +      } +    // Skip the node has FrameSetup parent. +    if (PredFrameSetup != nullptr) +      continue; + +    // Locate the single data predecessor. +    SUnit *PredSU = nullptr; +    for (const SDep &Pred : SU.Preds) +      if (!Pred.isCtrl()) { +        PredSU = Pred.getSUnit(); +        break; +      } +    assert(PredSU); + +    // Don't rewrite edges that carry physregs, because that requires additional +    // support infrastructure. +    if (PredSU->hasPhysRegDefs) +      continue; +    // Short-circuit the case where SU is PredSU's only data successor. +    if (PredSU->NumSuccs == 1) +      continue; +    // Avoid prescheduling to copies from virtual registers, which don't behave +    // like other nodes from the perspective of scheduling heuristics. +    if (SDNode *N = SU.getNode()) +      if (N->getOpcode() == ISD::CopyFromReg && +          Register::isVirtualRegister( +              cast<RegisterSDNode>(N->getOperand(1))->getReg())) +        continue; + +    // Perform checks on the successors of PredSU. +    for (const SDep &PredSucc : PredSU->Succs) { +      SUnit *PredSuccSU = PredSucc.getSUnit(); +      if (PredSuccSU == &SU) continue; +      // If PredSU has another successor with no data successors, for +      // now don't attempt to choose either over the other. +      if (PredSuccSU->NumSuccs == 0) +        goto outer_loop_continue; +      // Don't break physical register dependencies. +      if (SU.hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) +        if (canClobberPhysRegDefs(PredSuccSU, &SU, TII, TRI)) +          goto outer_loop_continue; +      // Don't introduce graph cycles. +      if (scheduleDAG->IsReachable(&SU, PredSuccSU)) +        goto outer_loop_continue; +    } + +    // Ok, the transformation is safe and the heuristics suggest it is +    // profitable. Update the graph. +    LLVM_DEBUG( +        dbgs() << "    Prescheduling SU #" << SU.NodeNum << " next to PredSU #" +               << PredSU->NodeNum +               << " to guide scheduling in the presence of multiple uses\n"); +    for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { +      SDep Edge = PredSU->Succs[i]; +      assert(!Edge.isAssignedRegDep()); +      SUnit *SuccSU = Edge.getSUnit(); +      if (SuccSU != &SU) { +        Edge.setSUnit(PredSU); +        scheduleDAG->RemovePred(SuccSU, Edge); +        scheduleDAG->AddPredQueued(&SU, Edge); +        Edge.setSUnit(&SU); +        scheduleDAG->AddPredQueued(SuccSU, Edge); +        --i; +      } +    } +  outer_loop_continue:; +  } +} + +/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses +/// it as a def&use operand. Add a pseudo control edge from it to the other +/// node (if it won't create a cycle) so the two-address one will be scheduled +/// first (lower in the schedule). If both nodes are two-address, favor the +/// one that has a CopyToReg use (more likely to be a loop induction update). +/// If both are two-address, but one is commutable while the other is not +/// commutable, favor the one that's not commutable. +void RegReductionPQBase::AddPseudoTwoAddrDeps() { +  for (SUnit &SU : *SUnits) { +    if (!SU.isTwoAddress) +      continue; + +    SDNode *Node = SU.getNode(); +    if (!Node || !Node->isMachineOpcode() || SU.getNode()->getGluedNode()) +      continue; + +    bool isLiveOut = hasOnlyLiveOutUses(&SU); +    unsigned Opc = Node->getMachineOpcode(); +    const MCInstrDesc &MCID = TII->get(Opc); +    unsigned NumRes = MCID.getNumDefs(); +    unsigned NumOps = MCID.getNumOperands() - NumRes; +    for (unsigned j = 0; j != NumOps; ++j) { +      if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1) +        continue; +      SDNode *DU = SU.getNode()->getOperand(j).getNode(); +      if (DU->getNodeId() == -1) +        continue; +      const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; +      if (!DUSU) +        continue; +      for (const SDep &Succ : DUSU->Succs) { +        if (Succ.isCtrl()) +          continue; +        SUnit *SuccSU = Succ.getSUnit(); +        if (SuccSU == &SU) +          continue; +        // Be conservative. Ignore if nodes aren't at roughly the same +        // depth and height. +        if (SuccSU->getHeight() < SU.getHeight() && +            (SU.getHeight() - SuccSU->getHeight()) > 1) +          continue; +        // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge +        // constrains whatever is using the copy, instead of the copy +        // itself. In the case that the copy is coalesced, this +        // preserves the intent of the pseudo two-address heurietics. +        while (SuccSU->Succs.size() == 1 && +               SuccSU->getNode()->isMachineOpcode() && +               SuccSU->getNode()->getMachineOpcode() == +                 TargetOpcode::COPY_TO_REGCLASS) +          SuccSU = SuccSU->Succs.front().getSUnit(); +        // Don't constrain non-instruction nodes. +        if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) +          continue; +        // Don't constrain nodes with physical register defs if the +        // predecessor can clobber them. +        if (SuccSU->hasPhysRegDefs && SU.hasPhysRegClobbers) { +          if (canClobberPhysRegDefs(SuccSU, &SU, TII, TRI)) +            continue; +        } +        // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; +        // these may be coalesced away. We want them close to their uses. +        unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); +        if (SuccOpc == TargetOpcode::EXTRACT_SUBREG || +            SuccOpc == TargetOpcode::INSERT_SUBREG || +            SuccOpc == TargetOpcode::SUBREG_TO_REG) +          continue; +        if (!canClobberReachingPhysRegUse(SuccSU, &SU, scheduleDAG, TII, TRI) && +            (!canClobber(SuccSU, DUSU) || +             (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) || +             (!SU.isCommutable && SuccSU->isCommutable)) && +            !scheduleDAG->IsReachable(SuccSU, &SU)) { +          LLVM_DEBUG(dbgs() +                     << "    Adding a pseudo-two-addr edge from SU #" +                     << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); +          scheduleDAG->AddPredQueued(&SU, SDep(SuccSU, SDep::Artificial)); +        } +      } +    } +  } +} + +//===----------------------------------------------------------------------===// +//                         Public Constructor Functions +//===----------------------------------------------------------------------===// + +ScheduleDAGSDNodes * +llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, +                                 CodeGenOpt::Level OptLevel) { +  const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); +  const TargetInstrInfo *TII = STI.getInstrInfo(); +  const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + +  BURegReductionPriorityQueue *PQ = +    new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr); +  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); +  PQ->setScheduleDAG(SD); +  return SD; +} + +ScheduleDAGSDNodes * +llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, +                                   CodeGenOpt::Level OptLevel) { +  const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); +  const TargetInstrInfo *TII = STI.getInstrInfo(); +  const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + +  SrcRegReductionPriorityQueue *PQ = +    new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr); +  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); +  PQ->setScheduleDAG(SD); +  return SD; +} + +ScheduleDAGSDNodes * +llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, +                                   CodeGenOpt::Level OptLevel) { +  const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); +  const TargetInstrInfo *TII = STI.getInstrInfo(); +  const TargetRegisterInfo *TRI = STI.getRegisterInfo(); +  const TargetLowering *TLI = IS->TLI; + +  HybridBURRPriorityQueue *PQ = +    new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); + +  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); +  PQ->setScheduleDAG(SD); +  return SD; +} + +ScheduleDAGSDNodes * +llvm::createILPListDAGScheduler(SelectionDAGISel *IS, +                                CodeGenOpt::Level OptLevel) { +  const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); +  const TargetInstrInfo *TII = STI.getInstrInfo(); +  const TargetRegisterInfo *TRI = STI.getRegisterInfo(); +  const TargetLowering *TLI = IS->TLI; + +  ILPBURRPriorityQueue *PQ = +    new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); +  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); +  PQ->setScheduleDAG(SD); +  return SD; +} | 
