src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2019-12-20 19:53:05 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2019-12-20 19:53:05 +0000
commit	0b57cec536236d46e3dba9bd041533462f33dbb7 (patch)
tree	56229dbdbbf76d18580f72f789003db17246c8d9 /contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
parent	718ef55ec7785aae63f98f8ca05dc07ed399c16d (diff)

Notes

Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp')

-rw-r--r--

contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp

3757

1 files changed, 3757 insertions, 0 deletions

diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
new file mode 100644
index 000000000000..ae1170ad1be6
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp

@@ -0,0 +1,3757 @@

+//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//

+//

+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

+// See https://llvm.org/LICENSE.txt for license information.

+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

+//

+//===----------------------------------------------------------------------===//

+//

+// MachineScheduler schedules machine instructions after phi elimination. It

+// preserves LiveIntervals so it can be invoked before register allocation.

+//

+//===----------------------------------------------------------------------===//

+#include "llvm/CodeGen/MachineScheduler.h"

+#include "llvm/ADT/ArrayRef.h"

+#include "llvm/ADT/BitVector.h"

+#include "llvm/ADT/DenseMap.h"

+#include "llvm/ADT/PriorityQueue.h"

+#include "llvm/ADT/STLExtras.h"

+#include "llvm/ADT/SmallVector.h"

+#include "llvm/ADT/iterator_range.h"

+#include "llvm/Analysis/AliasAnalysis.h"

+#include "llvm/CodeGen/LiveInterval.h"

+#include "llvm/CodeGen/LiveIntervals.h"

+#include "llvm/CodeGen/MachineBasicBlock.h"

+#include "llvm/CodeGen/MachineDominators.h"

+#include "llvm/CodeGen/MachineFunction.h"

+#include "llvm/CodeGen/MachineFunctionPass.h"

+#include "llvm/CodeGen/MachineInstr.h"

+#include "llvm/CodeGen/MachineLoopInfo.h"

+#include "llvm/CodeGen/MachineOperand.h"

+#include "llvm/CodeGen/MachinePassRegistry.h"

+#include "llvm/CodeGen/MachineRegisterInfo.h"

+#include "llvm/CodeGen/Passes.h"

+#include "llvm/CodeGen/RegisterClassInfo.h"

+#include "llvm/CodeGen/RegisterPressure.h"

+#include "llvm/CodeGen/ScheduleDAG.h"

+#include "llvm/CodeGen/ScheduleDAGInstrs.h"

+#include "llvm/CodeGen/ScheduleDAGMutation.h"

+#include "llvm/CodeGen/ScheduleDFS.h"

+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"

+#include "llvm/CodeGen/SlotIndexes.h"

+#include "llvm/CodeGen/TargetFrameLowering.h"

+#include "llvm/CodeGen/TargetInstrInfo.h"

+#include "llvm/CodeGen/TargetLowering.h"

+#include "llvm/CodeGen/TargetPassConfig.h"

+#include "llvm/CodeGen/TargetRegisterInfo.h"

+#include "llvm/CodeGen/TargetSchedule.h"

+#include "llvm/CodeGen/TargetSubtargetInfo.h"

+#include "llvm/Config/llvm-config.h"

+#include "llvm/MC/LaneBitmask.h"

+#include "llvm/Pass.h"

+#include "llvm/Support/CommandLine.h"

+#include "llvm/Support/Compiler.h"

+#include "llvm/Support/Debug.h"

+#include "llvm/Support/ErrorHandling.h"

+#include "llvm/Support/GraphWriter.h"

+#include "llvm/Support/MachineValueType.h"

+#include "llvm/Support/raw_ostream.h"

+#include <algorithm>

+#include <cassert>

+#include <cstdint>

+#include <iterator>

+#include <limits>

+#include <memory>

+#include <string>

+#include <tuple>

+#include <utility>

+#include <vector>

+using namespace llvm;

+#define DEBUG_TYPE "machine-scheduler"

+namespace llvm {

+cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,

+ cl::desc("Force top-down list scheduling"));

+cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,

+ cl::desc("Force bottom-up list scheduling"));

+cl::opt<bool>

+DumpCriticalPathLength("misched-dcpl", cl::Hidden,

+ cl::desc("Print critical path length to stdout"));

+} // end namespace llvm

+#ifndef NDEBUG

+static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,

+ cl::desc("Pop up a window to show MISched dags after they are processed"));

+/// In some situations a few uninteresting nodes depend on nearly all other

+/// nodes in the graph, provide a cutoff to hide them.

+static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,

+ cl::desc("Hide nodes with more predecessor/successor than cutoff"));

+static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,

+ cl::desc("Stop scheduling after N instructions"), cl::init(~0U));

+static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,

+ cl::desc("Only schedule this function"));

+static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,

+ cl::desc("Only schedule this MBB#"));

+static cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,

+ cl::desc("Print schedule DAGs"));

+#else

+static const bool ViewMISchedDAGs = false;

+static const bool PrintDAGs = false;

+#endif // NDEBUG

+/// Avoid quadratic complexity in unusually large basic blocks by limiting the

+/// size of the ready lists.

+static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,

+ cl::desc("Limit ready list to N instructions"), cl::init(256));

+static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,

+ cl::desc("Enable register pressure scheduling."), cl::init(true));

+static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,

+ cl::desc("Enable cyclic critical path analysis."), cl::init(true));

+static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,

+ cl::desc("Enable memop clustering."),

+ cl::init(true));

+static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,

+ cl::desc("Verify machine instrs before and after machine scheduling"));

+// DAG subtrees must have at least this many nodes.

+static const unsigned MinSubtreeSize = 8;

+// Pin the vtables to this file.

+void MachineSchedStrategy::anchor() {}

+void ScheduleDAGMutation::anchor() {}

+//===----------------------------------------------------------------------===//

+// Machine Instruction Scheduling Pass and Registry

+//===----------------------------------------------------------------------===//

+MachineSchedContext::MachineSchedContext() {

+ RegClassInfo = new RegisterClassInfo();

+MachineSchedContext::~MachineSchedContext() {

+ delete RegClassInfo;

+namespace {

+/// Base class for a machine scheduler class that can run at any point.

+class MachineSchedulerBase : public MachineSchedContext,

+ public MachineFunctionPass {

+public:

+ MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}

+ void print(raw_ostream &O, const Module* = nullptr) const override;

+protected:

+ void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);

+};

+/// MachineScheduler runs after coalescing and before register allocation.

+class MachineScheduler : public MachineSchedulerBase {

+public:

+ MachineScheduler();

+ void getAnalysisUsage(AnalysisUsage &AU) const override;

+ bool runOnMachineFunction(MachineFunction&) override;

+ static char ID; // Class identification, replacement for typeinfo

+protected:

+ ScheduleDAGInstrs *createMachineScheduler();

+};

+/// PostMachineScheduler runs after shortly before code emission.

+class PostMachineScheduler : public MachineSchedulerBase {

+public:

+ PostMachineScheduler();

+ void getAnalysisUsage(AnalysisUsage &AU) const override;

+ bool runOnMachineFunction(MachineFunction&) override;

+ static char ID; // Class identification, replacement for typeinfo

+protected:

+ ScheduleDAGInstrs *createPostMachineScheduler();

+};

+} // end anonymous namespace

+char MachineScheduler::ID = 0;

+char &llvm::MachineSchedulerID = MachineScheduler::ID;

+INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,

+ "Machine Instruction Scheduler", false, false)

+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)

+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)

+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)

+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)

+INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,

+ "Machine Instruction Scheduler", false, false)

+MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {

+ initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());

+void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {

+ AU.setPreservesCFG();

+ AU.addRequiredID(MachineDominatorsID);

+ AU.addRequired<MachineLoopInfo>();

+ AU.addRequired<AAResultsWrapperPass>();

+ AU.addRequired<TargetPassConfig>();

+ AU.addRequired<SlotIndexes>();

+ AU.addPreserved<SlotIndexes>();

+ AU.addRequired<LiveIntervals>();

+ AU.addPreserved<LiveIntervals>();

+ MachineFunctionPass::getAnalysisUsage(AU);

+char PostMachineScheduler::ID = 0;

+char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;

+INITIALIZE_PASS(PostMachineScheduler, "postmisched",

+ "PostRA Machine Instruction Scheduler", false, false)

+PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {

+ initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry());

+void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {

+ AU.setPreservesCFG();

+ AU.addRequiredID(MachineDominatorsID);

+ AU.addRequired<MachineLoopInfo>();

+ AU.addRequired<TargetPassConfig>();

+ MachineFunctionPass::getAnalysisUsage(AU);

+MachinePassRegistry<MachineSchedRegistry::ScheduleDAGCtor>

+ MachineSchedRegistry::Registry;

+/// A dummy default scheduler factory indicates whether the scheduler

+/// is overridden on the command line.

+static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {

+ return nullptr;

+/// MachineSchedOpt allows command line selection of the scheduler.

+static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,

+ RegisterPassParser<MachineSchedRegistry>>

+MachineSchedOpt("misched",

+ cl::init(&useDefaultMachineSched), cl::Hidden,

+ cl::desc("Machine instruction scheduler to use"));

+static MachineSchedRegistry

+DefaultSchedRegistry("default", "Use the target's default scheduler choice.",

+ useDefaultMachineSched);

+static cl::opt<bool> EnableMachineSched(

+ "enable-misched",

+ cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),

+ cl::Hidden);

+static cl::opt<bool> EnablePostRAMachineSched(

+ "enable-post-misched",

+ cl::desc("Enable the post-ra machine instruction scheduling pass."),

+ cl::init(true), cl::Hidden);

+/// Decrement this iterator until reaching the top or a non-debug instr.

+static MachineBasicBlock::const_iterator

+priorNonDebug(MachineBasicBlock::const_iterator I,

+ MachineBasicBlock::const_iterator Beg) {

+ assert(I != Beg && "reached the top of the region, cannot decrement");

+ while (--I != Beg) {

+ if (!I->isDebugInstr())

+ break;

+ }

+ return I;

+/// Non-const version.

+static MachineBasicBlock::iterator

+priorNonDebug(MachineBasicBlock::iterator I,

+ MachineBasicBlock::const_iterator Beg) {

+ return priorNonDebug(MachineBasicBlock::const_iterator(I), Beg)

+ .getNonConstIterator();

+/// If this iterator is a debug value, increment until reaching the End or a

+/// non-debug instruction.

+static MachineBasicBlock::const_iterator

+nextIfDebug(MachineBasicBlock::const_iterator I,

+ MachineBasicBlock::const_iterator End) {

+ for(; I != End; ++I) {

+ if (!I->isDebugInstr())

+ break;

+ }

+ return I;

+/// Non-const version.

+static MachineBasicBlock::iterator

+nextIfDebug(MachineBasicBlock::iterator I,

+ MachineBasicBlock::const_iterator End) {

+ return nextIfDebug(MachineBasicBlock::const_iterator(I), End)

+ .getNonConstIterator();

+/// Instantiate a ScheduleDAGInstrs that will be owned by the caller.

+ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {

+ // Select the scheduler, or set the default.

+ MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;

+ if (Ctor != useDefaultMachineSched)

+ return Ctor(this);

+ // Get the default scheduler set by the target for this function.

+ ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);

+ if (Scheduler)

+ return Scheduler;

+ // Default to GenericScheduler.

+ return createGenericSchedLive(this);

+/// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by

+/// the caller. We don't have a command line option to override the postRA

+/// scheduler. The Target must configure it.

+ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {

+ // Get the postRA scheduler set by the target for this function.

+ ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);

+ if (Scheduler)

+ return Scheduler;

+ // Default to GenericScheduler.

+ return createGenericSchedPostRA(this);

+/// Top-level MachineScheduler pass driver.

+///

+/// Visit blocks in function order. Divide each block into scheduling regions

+/// and visit them bottom-up. Visiting regions bottom-up is not required, but is

+/// consistent with the DAG builder, which traverses the interior of the

+/// scheduling regions bottom-up.

+///

+/// This design avoids exposing scheduling boundaries to the DAG builder,

+/// simplifying the DAG builder's support for "special" target instructions.

+/// At the same time the design allows target schedulers to operate across

+/// scheduling boundaries, for example to bundle the boundary instructions

+/// without reordering them. This creates complexity, because the target

+/// scheduler must update the RegionBegin and RegionEnd positions cached by

+/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler

+/// design would be to split blocks at scheduling boundaries, but LLVM has a

+/// general bias against block splitting purely for implementation simplicity.

+bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {

+ if (skipFunction(mf.getFunction()))

+ return false;

+ if (EnableMachineSched.getNumOccurrences()) {

+ if (!EnableMachineSched)

+ return false;

+ } else if (!mf.getSubtarget().enableMachineScheduler())

+ return false;

+ LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));

+ // Initialize the context of the pass.

+ MF = &mf;

+ MLI = &getAnalysis<MachineLoopInfo>();

+ MDT = &getAnalysis<MachineDominatorTree>();

+ PassConfig = &getAnalysis<TargetPassConfig>();

+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();

+ LIS = &getAnalysis<LiveIntervals>();

+ if (VerifyScheduling) {

+ LLVM_DEBUG(LIS->dump());

+ MF->verify(this, "Before machine scheduling.");

+ }

+ RegClassInfo->runOnMachineFunction(*MF);

+ // Instantiate the selected scheduler for this target, function, and

+ // optimization level.

+ std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());

+ scheduleRegions(*Scheduler, false);

+ LLVM_DEBUG(LIS->dump());

+ if (VerifyScheduling)

+ MF->verify(this, "After machine scheduling.");

+ return true;

+bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {

+ if (skipFunction(mf.getFunction()))

+ return false;

+ if (EnablePostRAMachineSched.getNumOccurrences()) {

+ if (!EnablePostRAMachineSched)

+ return false;

+ } else if (!mf.getSubtarget().enablePostRAScheduler()) {

+ LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");

+ return false;

+ }

+ LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));

+ // Initialize the context of the pass.

+ MF = &mf;

+ MLI = &getAnalysis<MachineLoopInfo>();

+ PassConfig = &getAnalysis<TargetPassConfig>();

+ if (VerifyScheduling)

+ MF->verify(this, "Before post machine scheduling.");

+ // Instantiate the selected scheduler for this target, function, and

+ // optimization level.

+ std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());

+ scheduleRegions(*Scheduler, true);

+ if (VerifyScheduling)

+ MF->verify(this, "After post machine scheduling.");

+ return true;

+/// Return true of the given instruction should not be included in a scheduling

+/// region.

+///

+/// MachineScheduler does not currently support scheduling across calls. To

+/// handle calls, the DAG builder needs to be modified to create register

+/// anti/output dependencies on the registers clobbered by the call's regmask

+/// operand. In PreRA scheduling, the stack pointer adjustment already prevents

+/// scheduling across calls. In PostRA scheduling, we need the isCall to enforce

+/// the boundary, but there would be no benefit to postRA scheduling across

+/// calls this late anyway.

+static bool isSchedBoundary(MachineBasicBlock::iterator MI,

+ MachineBasicBlock *MBB,

+ MachineFunction *MF,

+ const TargetInstrInfo *TII) {

+ return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);

+/// A region of an MBB for scheduling.

+namespace {

+struct SchedRegion {

+ /// RegionBegin is the first instruction in the scheduling region, and

+ /// RegionEnd is either MBB->end() or the scheduling boundary after the

+ /// last instruction in the scheduling region. These iterators cannot refer

+ /// to instructions outside of the identified scheduling region because

+ /// those may be reordered before scheduling this region.

+ MachineBasicBlock::iterator RegionBegin;

+ MachineBasicBlock::iterator RegionEnd;

+ unsigned NumRegionInstrs;

+ SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E,

+ unsigned N) :

+ RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}

+};

+} // end anonymous namespace

+using MBBRegionsVector = SmallVector<SchedRegion, 16>;

+static void

+getSchedRegions(MachineBasicBlock *MBB,

+ MBBRegionsVector &Regions,

+ bool RegionsTopDown) {

+ MachineFunction *MF = MBB->getParent();

+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();

+ MachineBasicBlock::iterator I = nullptr;

+ for(MachineBasicBlock::iterator RegionEnd = MBB->end();

+ RegionEnd != MBB->begin(); RegionEnd = I) {

+ // Avoid decrementing RegionEnd for blocks with no terminator.

+ if (RegionEnd != MBB->end() ||

+ isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {

+ --RegionEnd;

+ }

+ // The next region starts above the previous region. Look backward in the

+ // instruction stream until we find the nearest boundary.

+ unsigned NumRegionInstrs = 0;

+ I = RegionEnd;

+ for (;I != MBB->begin(); --I) {

+ MachineInstr &MI = *std::prev(I);

+ if (isSchedBoundary(&MI, &*MBB, MF, TII))

+ break;

+ if (!MI.isDebugInstr()) {

+ // MBB::size() uses instr_iterator to count. Here we need a bundle to

+ // count as a single instruction.

+ ++NumRegionInstrs;

+ }

+ // It's possible we found a scheduling region that only has debug

+ // instructions. Don't bother scheduling these.

+ if (NumRegionInstrs != 0)

+ Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));

+ }

+ if (RegionsTopDown)

+ std::reverse(Regions.begin(), Regions.end());

+/// Main driver for both MachineScheduler and PostMachineScheduler.

+void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,

+ bool FixKillFlags) {

+ // Visit all machine basic blocks.

+ //

+ // TODO: Visit blocks in global postorder or postorder within the bottom-up

+ // loop tree. Then we can optionally compute global RegPressure.

+ for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();

+ MBB != MBBEnd; ++MBB) {

+ Scheduler.startBlock(&*MBB);

+#ifndef NDEBUG

+ if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())

+ continue;

+ if (SchedOnlyBlock.getNumOccurrences()

+ && (int)SchedOnlyBlock != MBB->getNumber())

+ continue;

+#endif

+ // Break the block into scheduling regions [I, RegionEnd). RegionEnd

+ // points to the scheduling boundary at the bottom of the region. The DAG

+ // does not include RegionEnd, but the region does (i.e. the next

+ // RegionEnd is above the previous RegionBegin). If the current block has

+ // no terminator then RegionEnd == MBB->end() for the bottom region.

+ //

+ // All the regions of MBB are first found and stored in MBBRegions, which

+ // will be processed (MBB) top-down if initialized with true.

+ //

+ // The Scheduler may insert instructions during either schedule() or

+ // exitRegion(), even for empty regions. So the local iterators 'I' and

+ // 'RegionEnd' are invalid across these calls. Instructions must not be

+ // added to other regions than the current one without updating MBBRegions.

+ MBBRegionsVector MBBRegions;

+ getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());

+ for (MBBRegionsVector::iterator R = MBBRegions.begin();

+ R != MBBRegions.end(); ++R) {

+ MachineBasicBlock::iterator I = R->RegionBegin;

+ MachineBasicBlock::iterator RegionEnd = R->RegionEnd;

+ unsigned NumRegionInstrs = R->NumRegionInstrs;

+ // Notify the scheduler of the region, even if we may skip scheduling

+ // it. Perhaps it still needs to be bundled.

+ Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);

+ // Skip empty scheduling regions (0 or 1 schedulable instructions).

+ if (I == RegionEnd || I == std::prev(RegionEnd)) {

+ // Close the current region. Bundle the terminator if needed.

+ // This invalidates 'RegionEnd' and 'I'.

+ Scheduler.exitRegion();

+ continue;

+ }

+ LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");

+ LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)

+ << " " << MBB->getName() << "\n From: " << *I

+ << " To: ";

+ if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;

+ else dbgs() << "End";

+ dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');

+ if (DumpCriticalPathLength) {

+ errs() << MF->getName();

+ errs() << ":%bb. " << MBB->getNumber();

+ errs() << " " << MBB->getName() << " \n";

+ }

+ // Schedule a region: possibly reorder instructions.

+ // This invalidates the original region iterators.

+ Scheduler.schedule();

+ // Close the current region.

+ Scheduler.exitRegion();

+ }

+ Scheduler.finishBlock();

+ // FIXME: Ideally, no further passes should rely on kill flags. However,

+ // thumb2 size reduction is currently an exception, so the PostMIScheduler

+ // needs to do this.

+ if (FixKillFlags)

+ Scheduler.fixupKills(*MBB);

+ }

+ Scheduler.finalizeSchedule();

+void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {

+ // unimplemented

+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

+LLVM_DUMP_METHOD void ReadyQueue::dump() const {

+ dbgs() << "Queue " << Name << ": ";

+ for (const SUnit *SU : Queue)

+ dbgs() << SU->NodeNum << " ";

+ dbgs() << "\n";

+#endif

+//===----------------------------------------------------------------------===//

+// ScheduleDAGMI - Basic machine instruction scheduling. This is

+// independent of PreRA/PostRA scheduling and involves no extra book-keeping for

+// virtual registers.

+// ===----------------------------------------------------------------------===/

+// Provide a vtable anchor.

+ScheduleDAGMI::~ScheduleDAGMI() = default;

+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When

+/// NumPredsLeft reaches zero, release the successor node.

+///

+/// FIXME: Adjust SuccSU height based on MinLatency.

+void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {

+ SUnit *SuccSU = SuccEdge->getSUnit();

+ if (SuccEdge->isWeak()) {

+ --SuccSU->WeakPredsLeft;

+ if (SuccEdge->isCluster())

+ NextClusterSucc = SuccSU;

+ return;

+ }

+#ifndef NDEBUG

+ if (SuccSU->NumPredsLeft == 0) {

+ dbgs() << "*** Scheduling failed! ***\n";

+ dumpNode(*SuccSU);

+ dbgs() << " has been released too many times!\n";

+ llvm_unreachable(nullptr);

+ }

+#endif

+ // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,

+ // CurrCycle may have advanced since then.

+ if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())

+ SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();

+ --SuccSU->NumPredsLeft;

+ if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)

+ SchedImpl->releaseTopNode(SuccSU);

+/// releaseSuccessors - Call releaseSucc on each of SU's successors.

+void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {

+ for (SDep &Succ : SU->Succs)

+ releaseSucc(SU, &Succ);

+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When

+/// NumSuccsLeft reaches zero, release the predecessor node.

+///

+/// FIXME: Adjust PredSU height based on MinLatency.

+void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {

+ SUnit *PredSU = PredEdge->getSUnit();

+ if (PredEdge->isWeak()) {

+ --PredSU->WeakSuccsLeft;

+ if (PredEdge->isCluster())

+ NextClusterPred = PredSU;

+ return;

+ }

+#ifndef NDEBUG

+ if (PredSU->NumSuccsLeft == 0) {

+ dbgs() << "*** Scheduling failed! ***\n";

+ dumpNode(*PredSU);

+ dbgs() << " has been released too many times!\n";

+ llvm_unreachable(nullptr);

+ }

+#endif

+ // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,

+ // CurrCycle may have advanced since then.

+ if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())

+ PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();

+ --PredSU->NumSuccsLeft;

+ if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)

+ SchedImpl->releaseBottomNode(PredSU);

+/// releasePredecessors - Call releasePred on each of SU's predecessors.

+void ScheduleDAGMI::releasePredecessors(SUnit *SU) {

+ for (SDep &Pred : SU->Preds)

+ releasePred(SU, &Pred);

+void ScheduleDAGMI::startBlock(MachineBasicBlock *bb) {

+ ScheduleDAGInstrs::startBlock(bb);

+ SchedImpl->enterMBB(bb);

+void ScheduleDAGMI::finishBlock() {

+ SchedImpl->leaveMBB();

+ ScheduleDAGInstrs::finishBlock();

+/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after

+/// crossing a scheduling boundary. [begin, end) includes all instructions in

+/// the region, including the boundary itself and single-instruction regions

+/// that don't get scheduled.

+void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,

+ MachineBasicBlock::iterator begin,

+ MachineBasicBlock::iterator end,

+ unsigned regioninstrs)

+ ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);

+ SchedImpl->initPolicy(begin, end, regioninstrs);

+/// This is normally called from the main scheduler loop but may also be invoked

+/// by the scheduling strategy to perform additional code motion.

+void ScheduleDAGMI::moveInstruction(

+ MachineInstr *MI, MachineBasicBlock::iterator InsertPos) {

+ // Advance RegionBegin if the first instruction moves down.

+ if (&*RegionBegin == MI)

+ ++RegionBegin;

+ // Update the instruction stream.

+ BB->splice(InsertPos, BB, MI);

+ // Update LiveIntervals

+ if (LIS)

+ LIS->handleMove(*MI, /*UpdateFlags=*/true);

+ // Recede RegionBegin if an instruction moves above the first.

+ if (RegionBegin == InsertPos)

+ RegionBegin = MI;

+bool ScheduleDAGMI::checkSchedLimit() {

+#ifndef NDEBUG

+ if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {

+ CurrentTop = CurrentBottom;

+ return false;

+ }

+ ++NumInstrsScheduled;

+#endif

+ return true;

+/// Per-region scheduling driver, called back from

+/// MachineScheduler::runOnMachineFunction. This is a simplified driver that

+/// does not consider liveness or register pressure. It is useful for PostRA

+/// scheduling and potentially other custom schedulers.

+void ScheduleDAGMI::schedule() {

+ LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");

+ LLVM_DEBUG(SchedImpl->dumpPolicy());

+ // Build the DAG.

+ buildSchedGraph(AA);

+ postprocessDAG();

+ SmallVector<SUnit*, 8> TopRoots, BotRoots;

+ findRootsAndBiasEdges(TopRoots, BotRoots);

+ LLVM_DEBUG(dump());

+ if (PrintDAGs) dump();

+ if (ViewMISchedDAGs) viewGraph();

+ // Initialize the strategy before modifying the DAG.

+ // This may initialize a DFSResult to be used for queue priority.

+ SchedImpl->initialize(this);

+ // Initialize ready queues now that the DAG and priority data are finalized.

+ initQueues(TopRoots, BotRoots);

+ bool IsTopNode = false;

+ while (true) {

+ LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");

+ SUnit *SU = SchedImpl->pickNode(IsTopNode);

+ if (!SU) break;

+ assert(!SU->isScheduled && "Node already scheduled");

+ if (!checkSchedLimit())

+ break;

+ MachineInstr *MI = SU->getInstr();

+ if (IsTopNode) {

+ assert(SU->isTopReady() && "node still has unscheduled dependencies");

+ if (&*CurrentTop == MI)

+ CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);

+ else

+ moveInstruction(MI, CurrentTop);

+ } else {

+ assert(SU->isBottomReady() && "node still has unscheduled dependencies");

+ MachineBasicBlock::iterator priorII =

+ priorNonDebug(CurrentBottom, CurrentTop);

+ if (&*priorII == MI)

+ CurrentBottom = priorII;

+ else {

+ if (&*CurrentTop == MI)

+ CurrentTop = nextIfDebug(++CurrentTop, priorII);

+ moveInstruction(MI, CurrentBottom);

+ CurrentBottom = MI;

+ }

+ // Notify the scheduling strategy before updating the DAG.

+ // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues

+ // runs, it can then use the accurate ReadyCycle time to determine whether

+ // newly released nodes can move to the readyQ.

+ SchedImpl->schedNode(SU, IsTopNode);

+ updateQueues(SU, IsTopNode);

+ }

+ assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");

+ placeDebugValues();

+ LLVM_DEBUG({

+ dbgs() << "*** Final schedule for "

+ << printMBBReference(*begin()->getParent()) << " ***\n";

+ dumpSchedule();

+ dbgs() << '\n';

+ });

+/// Apply each ScheduleDAGMutation step in order.

+void ScheduleDAGMI::postprocessDAG() {

+ for (auto &m : Mutations)

+ m->apply(this);

+void ScheduleDAGMI::

+findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,

+ SmallVectorImpl<SUnit*> &BotRoots) {

+ for (SUnit &SU : SUnits) {

+ assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");

+ // Order predecessors so DFSResult follows the critical path.

+ SU.biasCriticalPath();

+ // A SUnit is ready to top schedule if it has no predecessors.

+ if (!SU.NumPredsLeft)

+ TopRoots.push_back(&SU);

+ // A SUnit is ready to bottom schedule if it has no successors.

+ if (!SU.NumSuccsLeft)

+ BotRoots.push_back(&SU);

+ }

+ ExitSU.biasCriticalPath();

+/// Identify DAG roots and setup scheduler queues.

+void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,

+ ArrayRef<SUnit*> BotRoots) {

+ NextClusterSucc = nullptr;

+ NextClusterPred = nullptr;

+ // Release all DAG roots for scheduling, not including EntrySU/ExitSU.

+ //

+ // Nodes with unreleased weak edges can still be roots.

+ // Release top roots in forward order.

+ for (SUnit *SU : TopRoots)

+ SchedImpl->releaseTopNode(SU);

+ // Release bottom roots in reverse order so the higher priority nodes appear

+ // first. This is more natural and slightly more efficient.

+ for (SmallVectorImpl<SUnit*>::const_reverse_iterator

+ I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {

+ SchedImpl->releaseBottomNode(*I);

+ }

+ releaseSuccessors(&EntrySU);

+ releasePredecessors(&ExitSU);

+ SchedImpl->registerRoots();

+ // Advance past initial DebugValues.

+ CurrentTop = nextIfDebug(RegionBegin, RegionEnd);

+ CurrentBottom = RegionEnd;

+/// Update scheduler queues after scheduling an instruction.

+void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {

+ // Release dependent instructions for scheduling.

+ if (IsTopNode)

+ releaseSuccessors(SU);

+ else

+ releasePredecessors(SU);

+ SU->isScheduled = true;

+/// Reinsert any remaining debug_values, just like the PostRA scheduler.

+void ScheduleDAGMI::placeDebugValues() {

+ // If first instruction was a DBG_VALUE then put it back.

+ if (FirstDbgValue) {

+ BB->splice(RegionBegin, BB, FirstDbgValue);

+ RegionBegin = FirstDbgValue;

+ }

+ for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator

+ DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {

+ std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);

+ MachineInstr *DbgValue = P.first;

+ MachineBasicBlock::iterator OrigPrevMI = P.second;

+ if (&*RegionBegin == DbgValue)

+ ++RegionBegin;

+ BB->splice(++OrigPrevMI, BB, DbgValue);

+ if (OrigPrevMI == std::prev(RegionEnd))

+ RegionEnd = DbgValue;

+ }

+ DbgValues.clear();

+ FirstDbgValue = nullptr;

+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

+LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {

+ for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {

+ if (SUnit *SU = getSUnit(&(*MI)))

+ dumpNode(*SU);

+ else

+ dbgs() << "Missing SUnit\n";

+ }

+#endif

+//===----------------------------------------------------------------------===//

+// ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals

+// preservation.

+//===----------------------------------------------------------------------===//

+ScheduleDAGMILive::~ScheduleDAGMILive() {

+ delete DFSResult;

+void ScheduleDAGMILive::collectVRegUses(SUnit &SU) {

+ const MachineInstr &MI = *SU.getInstr();

+ for (const MachineOperand &MO : MI.operands()) {

+ if (!MO.isReg())

+ continue;

+ if (!MO.readsReg())

+ continue;

+ if (TrackLaneMasks && !MO.isUse())

+ continue;

+ unsigned Reg = MO.getReg();

+ if (!TargetRegisterInfo::isVirtualRegister(Reg))

+ continue;

+ // Ignore re-defs.

+ if (TrackLaneMasks) {

+ bool FoundDef = false;

+ for (const MachineOperand &MO2 : MI.operands()) {

+ if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {

+ FoundDef = true;

+ break;

+ }

+ if (FoundDef)

+ continue;

+ }

+ // Record this local VReg use.

+ VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);

+ for (; UI != VRegUses.end(); ++UI) {

+ if (UI->SU == &SU)

+ break;

+ }

+ if (UI == VRegUses.end())

+ VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));

+ }

+/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after

+/// crossing a scheduling boundary. [begin, end) includes all instructions in

+/// the region, including the boundary itself and single-instruction regions

+/// that don't get scheduled.

+void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,

+ MachineBasicBlock::iterator begin,

+ MachineBasicBlock::iterator end,

+ unsigned regioninstrs)

+ // ScheduleDAGMI initializes SchedImpl's per-region policy.

+ ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);

+ // For convenience remember the end of the liveness region.

+ LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);

+ SUPressureDiffs.clear();

+ ShouldTrackPressure = SchedImpl->shouldTrackPressure();

+ ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();

+ assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&

+ "ShouldTrackLaneMasks requires ShouldTrackPressure");

+// Setup the register pressure trackers for the top scheduled top and bottom

+// scheduled regions.

+void ScheduleDAGMILive::initRegPressure() {

+ VRegUses.clear();

+ VRegUses.setUniverse(MRI.getNumVirtRegs());

+ for (SUnit &SU : SUnits)

+ collectVRegUses(SU);

+ TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,

+ ShouldTrackLaneMasks, false);

+ BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,

+ ShouldTrackLaneMasks, false);

+ // Close the RPTracker to finalize live ins.

+ RPTracker.closeRegion();

+ LLVM_DEBUG(RPTracker.dump());

+ // Initialize the live ins and live outs.

+ TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);

+ BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);

+ // Close one end of the tracker so we can call

+ // getMaxUpward/DownwardPressureDelta before advancing across any

+ // instructions. This converts currently live regs into live ins/outs.

+ TopRPTracker.closeTop();

+ BotRPTracker.closeBottom();

+ BotRPTracker.initLiveThru(RPTracker);

+ if (!BotRPTracker.getLiveThru().empty()) {

+ TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());

+ LLVM_DEBUG(dbgs() << "Live Thru: ";

+ dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));

+ };

+ // For each live out vreg reduce the pressure change associated with other

+ // uses of the same vreg below the live-out reaching def.

+ updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);

+ // Account for liveness generated by the region boundary.

+ if (LiveRegionEnd != RegionEnd) {

+ SmallVector<RegisterMaskPair, 8> LiveUses;

+ BotRPTracker.recede(&LiveUses);

+ updatePressureDiffs(LiveUses);

+ }

+ LLVM_DEBUG(dbgs() << "Top Pressure:\n";

+ dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);

+ dbgs() << "Bottom Pressure:\n";

+ dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI););

+ assert((BotRPTracker.getPos() == RegionEnd ||

+ (RegionEnd->isDebugInstr() &&

+ BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&

+ "Can't find the region bottom");

+ // Cache the list of excess pressure sets in this region. This will also track

+ // the max pressure in the scheduled code for these sets.

+ RegionCriticalPSets.clear();

+ const std::vector<unsigned> &RegionPressure =

+ RPTracker.getPressure().MaxSetPressure;

+ for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {

+ unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);

+ if (RegionPressure[i] > Limit) {

+ LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit

+ << " Actual " << RegionPressure[i] << "\n");

+ RegionCriticalPSets.push_back(PressureChange(i));

+ }

+ LLVM_DEBUG(dbgs() << "Excess PSets: ";

+ for (const PressureChange &RCPS

+ : RegionCriticalPSets) dbgs()

+ << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";

+ dbgs() << "\n");

+void ScheduleDAGMILive::

+updateScheduledPressure(const SUnit *SU,

+ const std::vector<unsigned> &NewMaxPressure) {

+ const PressureDiff &PDiff = getPressureDiff(SU);

+ unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();

+ for (const PressureChange &PC : PDiff) {

+ if (!PC.isValid())

+ break;

+ unsigned ID = PC.getPSet();

+ while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)

+ ++CritIdx;

+ if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {

+ if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()

+ && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())

+ RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);

+ }

+ unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);

+ if (NewMaxPressure[ID] >= Limit - 2) {

+ LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "

+ << NewMaxPressure[ID]

+ << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")

+ << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]

+ << " livethru)\n");

+ }

+/// Update the PressureDiff array for liveness after scheduling this

+/// instruction.

+void ScheduleDAGMILive::updatePressureDiffs(

+ ArrayRef<RegisterMaskPair> LiveUses) {

+ for (const RegisterMaskPair &P : LiveUses) {

+ unsigned Reg = P.RegUnit;

+ /// FIXME: Currently assuming single-use physregs.

+ if (!TRI->isVirtualRegister(Reg))

+ continue;

+ if (ShouldTrackLaneMasks) {

+ // If the register has just become live then other uses won't change

+ // this fact anymore => decrement pressure.

+ // If the register has just become dead then other uses make it come

+ // back to life => increment pressure.

+ bool Decrement = P.LaneMask.any();

+ for (const VReg2SUnit &V2SU

+ : make_range(VRegUses.find(Reg), VRegUses.end())) {

+ SUnit &SU = *V2SU.SU;

+ if (SU.isScheduled || &SU == &ExitSU)

+ continue;

+ PressureDiff &PDiff = getPressureDiff(&SU);

+ PDiff.addPressureChange(Reg, Decrement, &MRI);

+ LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "

+ << printReg(Reg, TRI) << ':'

+ << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();

+ dbgs() << " to "; PDiff.dump(*TRI););

+ }

+ } else {

+ assert(P.LaneMask.any());

+ LLVM_DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");

+ // This may be called before CurrentBottom has been initialized. However,

+ // BotRPTracker must have a valid position. We want the value live into the

+ // instruction or live out of the block, so ask for the previous

+ // instruction's live-out.

+ const LiveInterval &LI = LIS->getInterval(Reg);

+ VNInfo *VNI;

+ MachineBasicBlock::const_iterator I =

+ nextIfDebug(BotRPTracker.getPos(), BB->end());

+ if (I == BB->end())

+ VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));

+ else {

+ LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));

+ VNI = LRQ.valueIn();

+ }

+ // RegisterPressureTracker guarantees that readsReg is true for LiveUses.

+ assert(VNI && "No live value at use.");

+ for (const VReg2SUnit &V2SU

+ : make_range(VRegUses.find(Reg), VRegUses.end())) {

+ SUnit *SU = V2SU.SU;

+ // If this use comes before the reaching def, it cannot be a last use,

+ // so decrease its pressure change.

+ if (!SU->isScheduled && SU != &ExitSU) {

+ LiveQueryResult LRQ =

+ LI.Query(LIS->getInstructionIndex(*SU->getInstr()));

+ if (LRQ.valueIn() == VNI) {

+ PressureDiff &PDiff = getPressureDiff(SU);

+ PDiff.addPressureChange(Reg, true, &MRI);

+ LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "

+ << *SU->getInstr();

+ dbgs() << " to "; PDiff.dump(*TRI););

+ }

+void ScheduleDAGMILive::dump() const {

+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

+ if (EntrySU.getInstr() != nullptr)

+ dumpNodeAll(EntrySU);

+ for (const SUnit &SU : SUnits) {

+ dumpNodeAll(SU);

+ if (ShouldTrackPressure) {

+ dbgs() << " Pressure Diff : ";

+ getPressureDiff(&SU).dump(*TRI);

+ }

+ dbgs() << " Single Issue : ";

+ if (SchedModel.mustBeginGroup(SU.getInstr()) &&

+ SchedModel.mustEndGroup(SU.getInstr()))

+ dbgs() << "true;";

+ else

+ dbgs() << "false;";

+ dbgs() << '\n';

+ }

+ if (ExitSU.getInstr() != nullptr)

+ dumpNodeAll(ExitSU);

+#endif

+/// schedule - Called back from MachineScheduler::runOnMachineFunction

+/// after setting up the current scheduling region. [RegionBegin, RegionEnd)

+/// only includes instructions that have DAG nodes, not scheduling boundaries.

+///

+/// This is a skeletal driver, with all the functionality pushed into helpers,

+/// so that it can be easily extended by experimental schedulers. Generally,

+/// implementing MachineSchedStrategy should be sufficient to implement a new

+/// scheduling algorithm. However, if a scheduler further subclasses

+/// ScheduleDAGMILive then it will want to override this virtual method in order

+/// to update any specialized state.

+void ScheduleDAGMILive::schedule() {

+ LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");

+ LLVM_DEBUG(SchedImpl->dumpPolicy());

+ buildDAGWithRegPressure();

+ postprocessDAG();

+ SmallVector<SUnit*, 8> TopRoots, BotRoots;

+ findRootsAndBiasEdges(TopRoots, BotRoots);

+ // Initialize the strategy before modifying the DAG.

+ // This may initialize a DFSResult to be used for queue priority.

+ SchedImpl->initialize(this);

+ LLVM_DEBUG(dump());

+ if (PrintDAGs) dump();

+ if (ViewMISchedDAGs) viewGraph();

+ // Initialize ready queues now that the DAG and priority data are finalized.

+ initQueues(TopRoots, BotRoots);

+ bool IsTopNode = false;

+ while (true) {

+ LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");

+ SUnit *SU = SchedImpl->pickNode(IsTopNode);

+ if (!SU) break;

+ assert(!SU->isScheduled && "Node already scheduled");

+ if (!checkSchedLimit())

+ break;

+ scheduleMI(SU, IsTopNode);

+ if (DFSResult) {

+ unsigned SubtreeID = DFSResult->getSubtreeID(SU);

+ if (!ScheduledTrees.test(SubtreeID)) {

+ ScheduledTrees.set(SubtreeID);

+ DFSResult->scheduleTree(SubtreeID);

+ SchedImpl->scheduleTree(SubtreeID);

+ }

+ // Notify the scheduling strategy after updating the DAG.

+ SchedImpl->schedNode(SU, IsTopNode);

+ updateQueues(SU, IsTopNode);

+ }

+ assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");

+ placeDebugValues();

+ LLVM_DEBUG({

+ dbgs() << "*** Final schedule for "

+ << printMBBReference(*begin()->getParent()) << " ***\n";

+ dumpSchedule();

+ dbgs() << '\n';

+ });

+/// Build the DAG and setup three register pressure trackers.

+void ScheduleDAGMILive::buildDAGWithRegPressure() {

+ if (!ShouldTrackPressure) {

+ RPTracker.reset();

+ RegionCriticalPSets.clear();

+ buildSchedGraph(AA);

+ return;

+ }

+ // Initialize the register pressure tracker used by buildSchedGraph.

+ RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,

+ ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);

+ // Account for liveness generate by the region boundary.

+ if (LiveRegionEnd != RegionEnd)

+ RPTracker.recede();

+ // Build the DAG, and compute current register pressure.

+ buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);

+ // Initialize top/bottom trackers after computing region pressure.

+ initRegPressure();

+void ScheduleDAGMILive::computeDFSResult() {

+ if (!DFSResult)

+ DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);

+ DFSResult->clear();

+ ScheduledTrees.clear();

+ DFSResult->resize(SUnits.size());

+ DFSResult->compute(SUnits);

+ ScheduledTrees.resize(DFSResult->getNumSubtrees());

+/// Compute the max cyclic critical path through the DAG. The scheduling DAG

+/// only provides the critical path for single block loops. To handle loops that

+/// span blocks, we could use the vreg path latencies provided by

+/// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently

+/// available for use in the scheduler.

+///

+/// The cyclic path estimation identifies a def-use pair that crosses the back

+/// edge and considers the depth and height of the nodes. For example, consider

+/// the following instruction sequence where each instruction has unit latency

+/// and defines an epomymous virtual register:

+///

+/// a->b(a,c)->c(b)->d(c)->exit

+///

+/// The cyclic critical path is a two cycles: b->c->b

+/// The acyclic critical path is four cycles: a->b->c->d->exit

+/// LiveOutHeight = height(c) = len(c->d->exit) = 2

+/// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3

+/// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4

+/// LiveInDepth = depth(b) = len(a->b) = 1

+///

+/// LiveOutDepth - LiveInDepth = 3 - 1 = 2

+/// LiveInHeight - LiveOutHeight = 4 - 2 = 2

+/// CyclicCriticalPath = min(2, 2) = 2

+///

+/// This could be relevant to PostRA scheduling, but is currently implemented

+/// assuming LiveIntervals.

+unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {

+ // This only applies to single block loop.

+ if (!BB->isSuccessor(BB))

+ return 0;

+ unsigned MaxCyclicLatency = 0;

+ // Visit each live out vreg def to find def/use pairs that cross iterations.

+ for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {

+ unsigned Reg = P.RegUnit;

+ if (!TRI->isVirtualRegister(Reg))

+ continue;

+ const LiveInterval &LI = LIS->getInterval(Reg);

+ const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));

+ if (!DefVNI)

+ continue;

+ MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);

+ const SUnit *DefSU = getSUnit(DefMI);

+ if (!DefSU)

+ continue;

+ unsigned LiveOutHeight = DefSU->getHeight();

+ unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;

+ // Visit all local users of the vreg def.

+ for (const VReg2SUnit &V2SU

+ : make_range(VRegUses.find(Reg), VRegUses.end())) {

+ SUnit *SU = V2SU.SU;

+ if (SU == &ExitSU)

+ continue;

+ // Only consider uses of the phi.

+ LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));

+ if (!LRQ.valueIn()->isPHIDef())

+ continue;

+ // Assume that a path spanning two iterations is a cycle, which could

+ // overestimate in strange cases. This allows cyclic latency to be

+ // estimated as the minimum slack of the vreg's depth or height.

+ unsigned CyclicLatency = 0;

+ if (LiveOutDepth > SU->getDepth())

+ CyclicLatency = LiveOutDepth - SU->getDepth();

+ unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;

+ if (LiveInHeight > LiveOutHeight) {

+ if (LiveInHeight - LiveOutHeight < CyclicLatency)

+ CyclicLatency = LiveInHeight - LiveOutHeight;

+ } else

+ CyclicLatency = 0;

+ LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("

+ << SU->NodeNum << ") = " << CyclicLatency << "c\n");

+ if (CyclicLatency > MaxCyclicLatency)

+ MaxCyclicLatency = CyclicLatency;

+ }

+ LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");

+ return MaxCyclicLatency;

+/// Release ExitSU predecessors and setup scheduler queues. Re-position

+/// the Top RP tracker in case the region beginning has changed.

+void ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots,

+ ArrayRef<SUnit*> BotRoots) {

+ ScheduleDAGMI::initQueues(TopRoots, BotRoots);

+ if (ShouldTrackPressure) {

+ assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");

+ TopRPTracker.setPos(CurrentTop);

+ }

+/// Move an instruction and update register pressure.

+void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {

+ // Move the instruction to its new location in the instruction stream.

+ MachineInstr *MI = SU->getInstr();

+ if (IsTopNode) {

+ assert(SU->isTopReady() && "node still has unscheduled dependencies");

+ if (&*CurrentTop == MI)

+ CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);

+ else {

+ moveInstruction(MI, CurrentTop);

+ TopRPTracker.setPos(MI);

+ }

+ if (ShouldTrackPressure) {

+ // Update top scheduled pressure.

+ RegisterOperands RegOpers;

+ RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);

+ if (ShouldTrackLaneMasks) {

+ // Adjust liveness and add missing dead+read-undef flags.

+ SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();

+ RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);

+ } else {

+ // Adjust for missing dead-def flags.

+ RegOpers.detectDeadDefs(*MI, *LIS);

+ }

+ TopRPTracker.advance(RegOpers);

+ assert(TopRPTracker.getPos() == CurrentTop && "out of sync");

+ LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(

+ TopRPTracker.getRegSetPressureAtPos(), TRI););

+ updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);

+ }

+ } else {

+ assert(SU->isBottomReady() && "node still has unscheduled dependencies");

+ MachineBasicBlock::iterator priorII =

+ priorNonDebug(CurrentBottom, CurrentTop);

+ if (&*priorII == MI)

+ CurrentBottom = priorII;

+ else {

+ if (&*CurrentTop == MI) {

+ CurrentTop = nextIfDebug(++CurrentTop, priorII);

+ TopRPTracker.setPos(CurrentTop);

+ }

+ moveInstruction(MI, CurrentBottom);

+ CurrentBottom = MI;

+ BotRPTracker.setPos(CurrentBottom);

+ }

+ if (ShouldTrackPressure) {

+ RegisterOperands RegOpers;

+ RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);

+ if (ShouldTrackLaneMasks) {

+ // Adjust liveness and add missing dead+read-undef flags.

+ SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();

+ RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);

+ } else {

+ // Adjust for missing dead-def flags.

+ RegOpers.detectDeadDefs(*MI, *LIS);

+ }

+ if (BotRPTracker.getPos() != CurrentBottom)

+ BotRPTracker.recedeSkipDebugValues();

+ SmallVector<RegisterMaskPair, 8> LiveUses;

+ BotRPTracker.recede(RegOpers, &LiveUses);

+ assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");

+ LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(

+ BotRPTracker.getRegSetPressureAtPos(), TRI););

+ updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);

+ updatePressureDiffs(LiveUses);

+ }

+//===----------------------------------------------------------------------===//

+// BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.

+//===----------------------------------------------------------------------===//

+namespace {

+/// Post-process the DAG to create cluster edges between neighboring

+/// loads or between neighboring stores.

+class BaseMemOpClusterMutation : public ScheduleDAGMutation {

+ struct MemOpInfo {

+ SUnit *SU;

+ const MachineOperand *BaseOp;

+ int64_t Offset;

+ MemOpInfo(SUnit *su, const MachineOperand *Op, int64_t ofs)

+ : SU(su), BaseOp(Op), Offset(ofs) {}

+ bool operator<(const MemOpInfo &RHS) const {

+ if (BaseOp->getType() != RHS.BaseOp->getType())

+ return BaseOp->getType() < RHS.BaseOp->getType();

+ if (BaseOp->isReg())

+ return std::make_tuple(BaseOp->getReg(), Offset, SU->NodeNum) <

+ std::make_tuple(RHS.BaseOp->getReg(), RHS.Offset,

+ RHS.SU->NodeNum);

+ if (BaseOp->isFI()) {

+ const MachineFunction &MF =

+ *BaseOp->getParent()->getParent()->getParent();

+ const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();

+ bool StackGrowsDown = TFI.getStackGrowthDirection() ==

+ TargetFrameLowering::StackGrowsDown;

+ // Can't use tuple comparison here since we might need to use a

+ // different order when the stack grows down.

+ if (BaseOp->getIndex() != RHS.BaseOp->getIndex())

+ return StackGrowsDown ? BaseOp->getIndex() > RHS.BaseOp->getIndex()

+ : BaseOp->getIndex() < RHS.BaseOp->getIndex();

+ if (Offset != RHS.Offset)

+ return StackGrowsDown ? Offset > RHS.Offset : Offset < RHS.Offset;

+ return SU->NodeNum < RHS.SU->NodeNum;

+ }

+ llvm_unreachable("MemOpClusterMutation only supports register or frame "

+ "index bases.");

+ }

+ };

+ const TargetInstrInfo *TII;

+ const TargetRegisterInfo *TRI;

+ bool IsLoad;

+public:

+ BaseMemOpClusterMutation(const TargetInstrInfo *tii,

+ const TargetRegisterInfo *tri, bool IsLoad)

+ : TII(tii), TRI(tri), IsLoad(IsLoad) {}

+ void apply(ScheduleDAGInstrs *DAGInstrs) override;

+protected:

+ void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG);

+};

+class StoreClusterMutation : public BaseMemOpClusterMutation {

+public:

+ StoreClusterMutation(const TargetInstrInfo *tii,

+ const TargetRegisterInfo *tri)

+ : BaseMemOpClusterMutation(tii, tri, false) {}

+};

+class LoadClusterMutation : public BaseMemOpClusterMutation {

+public:

+ LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)

+ : BaseMemOpClusterMutation(tii, tri, true) {}

+};

+} // end anonymous namespace

+namespace llvm {

+std::unique_ptr<ScheduleDAGMutation>

+createLoadClusterDAGMutation(const TargetInstrInfo *TII,

+ const TargetRegisterInfo *TRI) {

+ return EnableMemOpCluster ? llvm::make_unique<LoadClusterMutation>(TII, TRI)

+ : nullptr;

+std::unique_ptr<ScheduleDAGMutation>

+createStoreClusterDAGMutation(const TargetInstrInfo *TII,

+ const TargetRegisterInfo *TRI) {

+ return EnableMemOpCluster ? llvm::make_unique<StoreClusterMutation>(TII, TRI)

+ : nullptr;

+} // end namespace llvm

+void BaseMemOpClusterMutation::clusterNeighboringMemOps(

+ ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG) {

+ SmallVector<MemOpInfo, 32> MemOpRecords;

+ for (SUnit *SU : MemOps) {

+ const MachineOperand *BaseOp;

+ int64_t Offset;

+ if (TII->getMemOperandWithOffset(*SU->getInstr(), BaseOp, Offset, TRI))

+ MemOpRecords.push_back(MemOpInfo(SU, BaseOp, Offset));

+ }

+ if (MemOpRecords.size() < 2)

+ return;

+ llvm::sort(MemOpRecords);

+ unsigned ClusterLength = 1;

+ for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {

+ SUnit *SUa = MemOpRecords[Idx].SU;

+ SUnit *SUb = MemOpRecords[Idx+1].SU;

+ if (TII->shouldClusterMemOps(*MemOpRecords[Idx].BaseOp,

+ *MemOpRecords[Idx + 1].BaseOp,

+ ClusterLength) &&

+ DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {

+ LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("

+ << SUb->NodeNum << ")\n");

+ // Copy successor edges from SUa to SUb. Interleaving computation

+ // dependent on SUa can prevent load combining due to register reuse.

+ // Predecessor edges do not need to be copied from SUb to SUa since nearby

+ // loads should have effectively the same inputs.

+ for (const SDep &Succ : SUa->Succs) {

+ if (Succ.getSUnit() == SUb)

+ continue;

+ LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum

+ << ")\n");

+ DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));

+ }

+ ++ClusterLength;

+ } else

+ ClusterLength = 1;

+ }

+/// Callback from DAG postProcessing to create cluster edges for loads.

+void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {

+ // Map DAG NodeNum to store chain ID.

+ DenseMap<unsigned, unsigned> StoreChainIDs;

+ // Map each store chain to a set of dependent MemOps.

+ SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;

+ for (SUnit &SU : DAG->SUnits) {

+ if ((IsLoad && !SU.getInstr()->mayLoad()) ||

+ (!IsLoad && !SU.getInstr()->mayStore()))

+ continue;

+ unsigned ChainPredID = DAG->SUnits.size();

+ for (const SDep &Pred : SU.Preds) {

+ if (Pred.isCtrl()) {

+ ChainPredID = Pred.getSUnit()->NodeNum;

+ break;

+ }

+ // Check if this chain-like pred has been seen

+ // before. ChainPredID==MaxNodeID at the top of the schedule.

+ unsigned NumChains = StoreChainDependents.size();

+ std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =

+ StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));

+ if (Result.second)

+ StoreChainDependents.resize(NumChains + 1);

+ StoreChainDependents[Result.first->second].push_back(&SU);

+ }

+ // Iterate over the store chains.

+ for (auto &SCD : StoreChainDependents)

+ clusterNeighboringMemOps(SCD, DAG);

+//===----------------------------------------------------------------------===//

+// CopyConstrain - DAG post-processing to encourage copy elimination.

+//===----------------------------------------------------------------------===//

+namespace {

+/// Post-process the DAG to create weak edges from all uses of a copy to

+/// the one use that defines the copy's source vreg, most likely an induction

+/// variable increment.

+class CopyConstrain : public ScheduleDAGMutation {

+ // Transient state.

+ SlotIndex RegionBeginIdx;

+ // RegionEndIdx is the slot index of the last non-debug instruction in the

+ // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.

+ SlotIndex RegionEndIdx;

+public:

+ CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}

+ void apply(ScheduleDAGInstrs *DAGInstrs) override;

+protected:

+ void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);

+};

+} // end anonymous namespace

+namespace llvm {

+std::unique_ptr<ScheduleDAGMutation>

+createCopyConstrainDAGMutation(const TargetInstrInfo *TII,

+ const TargetRegisterInfo *TRI) {

+ return llvm::make_unique<CopyConstrain>(TII, TRI);

+} // end namespace llvm

+/// constrainLocalCopy handles two possibilities:

+/// 1) Local src:

+/// I0: = dst

+/// I1: src = ...

+/// I2: = dst

+/// I3: dst = src (copy)

+/// (create pred->succ edges I0->I1, I2->I1)

+///

+/// 2) Local copy:

+/// I0: dst = src (copy)

+/// I1: = dst

+/// I2: src = ...

+/// I3: = dst

+/// (create pred->succ edges I1->I2, I3->I2)

+///

+/// Although the MachineScheduler is currently constrained to single blocks,

+/// this algorithm should handle extended blocks. An EBB is a set of

+/// contiguously numbered blocks such that the previous block in the EBB is

+/// always the single predecessor.

+void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {

+ LiveIntervals *LIS = DAG->getLIS();

+ MachineInstr *Copy = CopySU->getInstr();

+ // Check for pure vreg copies.

+ const MachineOperand &SrcOp = Copy->getOperand(1);

+ unsigned SrcReg = SrcOp.getReg();

+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || !SrcOp.readsReg())

+ return;

+ const MachineOperand &DstOp = Copy->getOperand(0);

+ unsigned DstReg = DstOp.getReg();

+ if (!TargetRegisterInfo::isVirtualRegister(DstReg) || DstOp.isDead())

+ return;

+ // Check if either the dest or source is local. If it's live across a back

+ // edge, it's not local. Note that if both vregs are live across the back

+ // edge, we cannot successfully contrain the copy without cyclic scheduling.

+ // If both the copy's source and dest are local live intervals, then we

+ // should treat the dest as the global for the purpose of adding

+ // constraints. This adds edges from source's other uses to the copy.

+ unsigned LocalReg = SrcReg;

+ unsigned GlobalReg = DstReg;

+ LiveInterval *LocalLI = &LIS->getInterval(LocalReg);

+ if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {

+ LocalReg = DstReg;

+ GlobalReg = SrcReg;

+ LocalLI = &LIS->getInterval(LocalReg);

+ if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))

+ return;

+ }

+ LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);

+ // Find the global segment after the start of the local LI.

+ LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());

+ // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a

+ // local live range. We could create edges from other global uses to the local

+ // start, but the coalescer should have already eliminated these cases, so

+ // don't bother dealing with it.

+ if (GlobalSegment == GlobalLI->end())

+ return;

+ // If GlobalSegment is killed at the LocalLI->start, the call to find()

+ // returned the next global segment. But if GlobalSegment overlaps with

+ // LocalLI->start, then advance to the next segment. If a hole in GlobalLI

+ // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.

+ if (GlobalSegment->contains(LocalLI->beginIndex()))

+ ++GlobalSegment;

+ if (GlobalSegment == GlobalLI->end())

+ return;

+ // Check if GlobalLI contains a hole in the vicinity of LocalLI.

+ if (GlobalSegment != GlobalLI->begin()) {

+ // Two address defs have no hole.

+ if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,

+ GlobalSegment->start)) {

+ return;

+ }

+ // If the prior global segment may be defined by the same two-address

+ // instruction that also defines LocalLI, then can't make a hole here.

+ if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,

+ LocalLI->beginIndex())) {

+ return;

+ }

+ // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise

+ // it would be a disconnected component in the live range.

+ assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&

+ "Disconnected LRG within the scheduling region.");

+ }

+ MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);

+ if (!GlobalDef)

+ return;

+ SUnit *GlobalSU = DAG->getSUnit(GlobalDef);

+ if (!GlobalSU)

+ return;

+ // GlobalDef is the bottom of the GlobalLI hole. Open the hole by

+ // constraining the uses of the last local def to precede GlobalDef.

+ SmallVector<SUnit*,8> LocalUses;

+ const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());

+ MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);

+ SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);

+ for (const SDep &Succ : LastLocalSU->Succs) {

+ if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)

+ continue;

+ if (Succ.getSUnit() == GlobalSU)

+ continue;

+ if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))

+ return;

+ LocalUses.push_back(Succ.getSUnit());

+ }

+ // Open the top of the GlobalLI hole by constraining any earlier global uses

+ // to precede the start of LocalLI.

+ SmallVector<SUnit*,8> GlobalUses;

+ MachineInstr *FirstLocalDef =

+ LIS->getInstructionFromIndex(LocalLI->beginIndex());

+ SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);

+ for (const SDep &Pred : GlobalSU->Preds) {

+ if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)

+ continue;

+ if (Pred.getSUnit() == FirstLocalSU)

+ continue;

+ if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))

+ return;

+ GlobalUses.push_back(Pred.getSUnit());

+ }

+ LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");

+ // Add the weak edges.

+ for (SmallVectorImpl<SUnit*>::const_iterator

+ I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {

+ LLVM_DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU("

+ << GlobalSU->NodeNum << ")\n");

+ DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));

+ }

+ for (SmallVectorImpl<SUnit*>::const_iterator

+ I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {

+ LLVM_DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU("

+ << FirstLocalSU->NodeNum << ")\n");

+ DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));

+ }

+/// Callback from DAG postProcessing to create weak edges to encourage

+/// copy elimination.

+void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {

+ ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);

+ assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");

+ MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());

+ if (FirstPos == DAG->end())

+ return;

+ RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);

+ RegionEndIdx = DAG->getLIS()->getInstructionIndex(

+ *priorNonDebug(DAG->end(), DAG->begin()));

+ for (SUnit &SU : DAG->SUnits) {

+ if (!SU.getInstr()->isCopy())

+ continue;

+ constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));

+ }

+//===----------------------------------------------------------------------===//

+// MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler

+// and possibly other custom schedulers.

+//===----------------------------------------------------------------------===//

+static const unsigned InvalidCycle = ~0U;

+SchedBoundary::~SchedBoundary() { delete HazardRec; }

+/// Given a Count of resource usage and a Latency value, return true if a

+/// SchedBoundary becomes resource limited.

+/// If we are checking after scheduling a node, we should return true when

+/// we just reach the resource limit.

+static bool checkResourceLimit(unsigned LFactor, unsigned Count,

+ unsigned Latency, bool AfterSchedNode) {

+ int ResCntFactor = (int)(Count - (Latency * LFactor));

+ if (AfterSchedNode)

+ return ResCntFactor >= (int)LFactor;

+ else

+ return ResCntFactor > (int)LFactor;

+void SchedBoundary::reset() {

+ // A new HazardRec is created for each DAG and owned by SchedBoundary.

+ // Destroying and reconstructing it is very expensive though. So keep

+ // invalid, placeholder HazardRecs.

+ if (HazardRec && HazardRec->isEnabled()) {

+ delete HazardRec;

+ HazardRec = nullptr;

+ }

+ Available.clear();

+ Pending.clear();

+ CheckPending = false;

+ CurrCycle = 0;

+ CurrMOps = 0;

+ MinReadyCycle = std::numeric_limits<unsigned>::max();

+ ExpectedLatency = 0;

+ DependentLatency = 0;

+ RetiredMOps = 0;

+ MaxExecutedResCount = 0;

+ ZoneCritResIdx = 0;

+ IsResourceLimited = false;

+ ReservedCycles.clear();

+ ReservedCyclesIndex.clear();

+#ifndef NDEBUG

+ // Track the maximum number of stall cycles that could arise either from the

+ // latency of a DAG edge or the number of cycles that a processor resource is

+ // reserved (SchedBoundary::ReservedCycles).

+ MaxObservedStall = 0;

+#endif

+ // Reserve a zero-count for invalid CritResIdx.

+ ExecutedResCounts.resize(1);

+ assert(!ExecutedResCounts[0] && "nonzero count for bad resource");

+void SchedRemainder::

+init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {

+ reset();

+ if (!SchedModel->hasInstrSchedModel())

+ return;

+ RemainingCounts.resize(SchedModel->getNumProcResourceKinds());

+ for (SUnit &SU : DAG->SUnits) {

+ const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);

+ RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)

+ * SchedModel->getMicroOpFactor();

+ for (TargetSchedModel::ProcResIter

+ PI = SchedModel->getWriteProcResBegin(SC),

+ PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {

+ unsigned PIdx = PI->ProcResourceIdx;

+ unsigned Factor = SchedModel->getResourceFactor(PIdx);

+ RemainingCounts[PIdx] += (Factor * PI->Cycles);

+ }

+void SchedBoundary::

+init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {

+ reset();

+ DAG = dag;

+ SchedModel = smodel;

+ Rem = rem;

+ if (SchedModel->hasInstrSchedModel()) {

+ unsigned ResourceCount = SchedModel->getNumProcResourceKinds();

+ ReservedCyclesIndex.resize(ResourceCount);

+ ExecutedResCounts.resize(ResourceCount);

+ unsigned NumUnits = 0;

+ for (unsigned i = 0; i < ResourceCount; ++i) {

+ ReservedCyclesIndex[i] = NumUnits;

+ NumUnits += SchedModel->getProcResource(i)->NumUnits;

+ }

+ ReservedCycles.resize(NumUnits, InvalidCycle);

+ }

+/// Compute the stall cycles based on this SUnit's ready time. Heuristics treat

+/// these "soft stalls" differently than the hard stall cycles based on CPU

+/// resources and computed by checkHazard(). A fully in-order model

+/// (MicroOpBufferSize==0) will not make use of this since instructions are not

+/// available for scheduling until they are ready. However, a weaker in-order

+/// model may use this for heuristics. For example, if a processor has in-order

+/// behavior when reading certain resources, this may come into play.

+unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {

+ if (!SU->isUnbuffered)

+ return 0;

+ unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);

+ if (ReadyCycle > CurrCycle)

+ return ReadyCycle - CurrCycle;

+ return 0;

+/// Compute the next cycle at which the given processor resource unit

+/// can be scheduled.

+unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx,

+ unsigned Cycles) {

+ unsigned NextUnreserved = ReservedCycles[InstanceIdx];

+ // If this resource has never been used, always return cycle zero.

+ if (NextUnreserved == InvalidCycle)

+ return 0;

+ // For bottom-up scheduling add the cycles needed for the current operation.

+ if (!isTop())

+ NextUnreserved += Cycles;

+ return NextUnreserved;

+/// Compute the next cycle at which the given processor resource can be

+/// scheduled. Returns the next cycle and the index of the processor resource

+/// instance in the reserved cycles vector.

+std::pair<unsigned, unsigned>

+SchedBoundary::getNextResourceCycle(unsigned PIdx, unsigned Cycles) {

+ unsigned MinNextUnreserved = InvalidCycle;

+ unsigned InstanceIdx = 0;

+ unsigned StartIndex = ReservedCyclesIndex[PIdx];

+ unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;

+ assert(NumberOfInstances > 0 &&

+ "Cannot have zero instances of a ProcResource");

+ for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;

+ ++I) {

+ unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles);

+ if (MinNextUnreserved > NextUnreserved) {

+ InstanceIdx = I;

+ MinNextUnreserved = NextUnreserved;

+ }

+ return std::make_pair(MinNextUnreserved, InstanceIdx);

+/// Does this SU have a hazard within the current instruction group.

+///

+/// The scheduler supports two modes of hazard recognition. The first is the

+/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that

+/// supports highly complicated in-order reservation tables

+/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.

+///

+/// The second is a streamlined mechanism that checks for hazards based on

+/// simple counters that the scheduler itself maintains. It explicitly checks

+/// for instruction dispatch limitations, including the number of micro-ops that

+/// can dispatch per cycle.

+///

+/// TODO: Also check whether the SU must start a new group.

+bool SchedBoundary::checkHazard(SUnit *SU) {

+ if (HazardRec->isEnabled()

+ && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {

+ return true;

+ }

+ unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());

+ if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {

+ LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="

+ << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');

+ return true;

+ }

+ if (CurrMOps > 0 &&

+ ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||

+ (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {

+ LLVM_DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "

+ << (isTop() ? "begin" : "end") << " group\n");

+ return true;

+ }

+ if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {

+ const MCSchedClassDesc *SC = DAG->getSchedClass(SU);

+ for (const MCWriteProcResEntry &PE :

+ make_range(SchedModel->getWriteProcResBegin(SC),

+ SchedModel->getWriteProcResEnd(SC))) {

+ unsigned ResIdx = PE.ProcResourceIdx;

+ unsigned Cycles = PE.Cycles;

+ unsigned NRCycle, InstanceIdx;

+ std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(ResIdx, Cycles);

+ if (NRCycle > CurrCycle) {

+#ifndef NDEBUG

+ MaxObservedStall = std::max(Cycles, MaxObservedStall);

+#endif

+ LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "

+ << SchedModel->getResourceName(ResIdx)

+ << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']'

+ << "=" << NRCycle << "c\n");

+ return true;

+ }

+ return false;

+// Find the unscheduled node in ReadySUs with the highest latency.

+unsigned SchedBoundary::

+findMaxLatency(ArrayRef<SUnit*> ReadySUs) {

+ SUnit *LateSU = nullptr;

+ unsigned RemLatency = 0;

+ for (SUnit *SU : ReadySUs) {

+ unsigned L = getUnscheduledLatency(SU);

+ if (L > RemLatency) {

+ RemLatency = L;

+ LateSU = SU;

+ }

+ if (LateSU) {

+ LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("

+ << LateSU->NodeNum << ") " << RemLatency << "c\n");

+ }

+ return RemLatency;

+// Count resources in this zone and the remaining unscheduled

+// instruction. Return the max count, scaled. Set OtherCritIdx to the critical

+// resource index, or zero if the zone is issue limited.

+unsigned SchedBoundary::

+getOtherResourceCount(unsigned &OtherCritIdx) {

+ OtherCritIdx = 0;

+ if (!SchedModel->hasInstrSchedModel())

+ return 0;

+ unsigned OtherCritCount = Rem->RemIssueCount

+ + (RetiredMOps * SchedModel->getMicroOpFactor());

+ LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "

+ << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');

+ for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();

+ PIdx != PEnd; ++PIdx) {

+ unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];

+ if (OtherCount > OtherCritCount) {

+ OtherCritCount = OtherCount;

+ OtherCritIdx = PIdx;

+ }

+ if (OtherCritIdx) {

+ LLVM_DEBUG(

+ dbgs() << " " << Available.getName() << " + Remain CritRes: "

+ << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)

+ << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");

+ }

+ return OtherCritCount;

+void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {

+ assert(SU->getInstr() && "Scheduled SUnit must have instr");

+#ifndef NDEBUG

+ // ReadyCycle was been bumped up to the CurrCycle when this node was

+ // scheduled, but CurrCycle may have been eagerly advanced immediately after

+ // scheduling, so may now be greater than ReadyCycle.

+ if (ReadyCycle > CurrCycle)

+ MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);

+#endif

+ if (ReadyCycle < MinReadyCycle)

+ MinReadyCycle = ReadyCycle;

+ // Check for interlocks first. For the purpose of other heuristics, an

+ // instruction that cannot issue appears as if it's not in the ReadyQueue.

+ bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;

+ if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU) ||

+ Available.size() >= ReadyListLimit)

+ Pending.push(SU);

+ else

+ Available.push(SU);

+/// Move the boundary of scheduled code by one cycle.

+void SchedBoundary::bumpCycle(unsigned NextCycle) {

+ if (SchedModel->getMicroOpBufferSize() == 0) {

+ assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&

+ "MinReadyCycle uninitialized");

+ if (MinReadyCycle > NextCycle)

+ NextCycle = MinReadyCycle;

+ }

+ // Update the current micro-ops, which will issue in the next cycle.

+ unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);

+ CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;

+ // Decrement DependentLatency based on the next cycle.

+ if ((NextCycle - CurrCycle) > DependentLatency)

+ DependentLatency = 0;

+ else

+ DependentLatency -= (NextCycle - CurrCycle);

+ if (!HazardRec->isEnabled()) {

+ // Bypass HazardRec virtual calls.

+ CurrCycle = NextCycle;

+ } else {

+ // Bypass getHazardType calls in case of long latency.

+ for (; CurrCycle != NextCycle; ++CurrCycle) {

+ if (isTop())

+ HazardRec->AdvanceCycle();

+ else

+ HazardRec->RecedeCycle();

+ }

+ CheckPending = true;

+ IsResourceLimited =

+ checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),

+ getScheduledLatency(), true);

+ LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()

+ << '\n');

+void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {

+ ExecutedResCounts[PIdx] += Count;

+ if (ExecutedResCounts[PIdx] > MaxExecutedResCount)

+ MaxExecutedResCount = ExecutedResCounts[PIdx];

+/// Add the given processor resource to this scheduled zone.

+///

+/// \param Cycles indicates the number of consecutive (non-pipelined) cycles

+/// during which this resource is consumed.

+///

+/// \return the next cycle at which the instruction may execute without

+/// oversubscribing resources.

+unsigned SchedBoundary::

+countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {

+ unsigned Factor = SchedModel->getResourceFactor(PIdx);

+ unsigned Count = Factor * Cycles;

+ LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"

+ << Cycles << "x" << Factor << "u\n");

+ // Update Executed resources counts.

+ incExecutedResources(PIdx, Count);

+ assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");

+ Rem->RemainingCounts[PIdx] -= Count;

+ // Check if this resource exceeds the current critical resource. If so, it

+ // becomes the critical resource.

+ if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {

+ ZoneCritResIdx = PIdx;

+ LLVM_DEBUG(dbgs() << " *** Critical resource "

+ << SchedModel->getResourceName(PIdx) << ": "

+ << getResourceCount(PIdx) / SchedModel->getLatencyFactor()

+ << "c\n");

+ }

+ // For reserved resources, record the highest cycle using the resource.

+ unsigned NextAvailable, InstanceIdx;

+ std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(PIdx, Cycles);

+ if (NextAvailable > CurrCycle) {

+ LLVM_DEBUG(dbgs() << " Resource conflict: "

+ << SchedModel->getResourceName(PIdx)

+ << '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']'

+ << " reserved until @" << NextAvailable << "\n");

+ }

+ return NextAvailable;

+/// Move the boundary of scheduled code by one SUnit.

+void SchedBoundary::bumpNode(SUnit *SU) {

+ // Update the reservation table.

+ if (HazardRec->isEnabled()) {

+ if (!isTop() && SU->isCall) {

+ // Calls are scheduled with their preceding instructions. For bottom-up

+ // scheduling, clear the pipeline state before emitting.

+ HazardRec->Reset();

+ }

+ HazardRec->EmitInstruction(SU);

+ // Scheduling an instruction may have made pending instructions available.

+ CheckPending = true;

+ }

+ // checkHazard should prevent scheduling multiple instructions per cycle that

+ // exceed the issue width.

+ const MCSchedClassDesc *SC = DAG->getSchedClass(SU);

+ unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());

+ assert(

+ (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&

+ "Cannot schedule this instruction's MicroOps in the current cycle.");

+ unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);

+ LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");

+ unsigned NextCycle = CurrCycle;

+ switch (SchedModel->getMicroOpBufferSize()) {

+ case 0:

+ assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");

+ break;

+ case 1:

+ if (ReadyCycle > NextCycle) {

+ NextCycle = ReadyCycle;

+ LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");

+ }

+ break;

+ default:

+ // We don't currently model the OOO reorder buffer, so consider all

+ // scheduled MOps to be "retired". We do loosely model in-order resource

+ // latency. If this instruction uses an in-order resource, account for any

+ // likely stall cycles.

+ if (SU->isUnbuffered && ReadyCycle > NextCycle)

+ NextCycle = ReadyCycle;

+ break;

+ }

+ RetiredMOps += IncMOps;

+ // Update resource counts and critical resource.

+ if (SchedModel->hasInstrSchedModel()) {

+ unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();

+ assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");

+ Rem->RemIssueCount -= DecRemIssue;

+ if (ZoneCritResIdx) {

+ // Scale scheduled micro-ops for comparing with the critical resource.

+ unsigned ScaledMOps =

+ RetiredMOps * SchedModel->getMicroOpFactor();

+ // If scaled micro-ops are now more than the previous critical resource by

+ // a full cycle, then micro-ops issue becomes critical.

+ if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))

+ >= (int)SchedModel->getLatencyFactor()) {

+ ZoneCritResIdx = 0;

+ LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "

+ << ScaledMOps / SchedModel->getLatencyFactor()

+ << "c\n");

+ }

+ for (TargetSchedModel::ProcResIter

+ PI = SchedModel->getWriteProcResBegin(SC),

+ PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {

+ unsigned RCycle =

+ countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);

+ if (RCycle > NextCycle)

+ NextCycle = RCycle;

+ }

+ if (SU->hasReservedResource) {

+ // For reserved resources, record the highest cycle using the resource.

+ // For top-down scheduling, this is the cycle in which we schedule this

+ // instruction plus the number of cycles the operations reserves the

+ // resource. For bottom-up is it simply the instruction's cycle.

+ for (TargetSchedModel::ProcResIter

+ PI = SchedModel->getWriteProcResBegin(SC),

+ PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {

+ unsigned PIdx = PI->ProcResourceIdx;

+ if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {

+ unsigned ReservedUntil, InstanceIdx;

+ std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(PIdx, 0);

+ if (isTop()) {

+ ReservedCycles[InstanceIdx] =

+ std::max(ReservedUntil, NextCycle + PI->Cycles);

+ } else

+ ReservedCycles[InstanceIdx] = NextCycle;

+ }

+ // Update ExpectedLatency and DependentLatency.

+ unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;

+ unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;

+ if (SU->getDepth() > TopLatency) {

+ TopLatency = SU->getDepth();

+ LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU("

+ << SU->NodeNum << ") " << TopLatency << "c\n");

+ }

+ if (SU->getHeight() > BotLatency) {

+ BotLatency = SU->getHeight();

+ LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU("

+ << SU->NodeNum << ") " << BotLatency << "c\n");

+ }

+ // If we stall for any reason, bump the cycle.

+ if (NextCycle > CurrCycle)

+ bumpCycle(NextCycle);

+ else

+ // After updating ZoneCritResIdx and ExpectedLatency, check if we're

+ // resource limited. If a stall occurred, bumpCycle does this.

+ IsResourceLimited =

+ checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),

+ getScheduledLatency(), true);

+ // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle

+ // resets CurrMOps. Loop to handle instructions with more MOps than issue in

+ // one cycle. Since we commonly reach the max MOps here, opportunistically

+ // bump the cycle to avoid uselessly checking everything in the readyQ.

+ CurrMOps += IncMOps;

+ // Bump the cycle count for issue group constraints.

+ // This must be done after NextCycle has been adjust for all other stalls.

+ // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set

+ // currCycle to X.

+ if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||

+ (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {

+ LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin")

+ << " group\n");

+ bumpCycle(++NextCycle);

+ }

+ while (CurrMOps >= SchedModel->getIssueWidth()) {

+ LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "

+ << CurrCycle << '\n');

+ bumpCycle(++NextCycle);

+ }

+ LLVM_DEBUG(dumpScheduledState());

+/// Release pending ready nodes in to the available queue. This makes them

+/// visible to heuristics.

+void SchedBoundary::releasePending() {

+ // If the available queue is empty, it is safe to reset MinReadyCycle.

+ if (Available.empty())

+ MinReadyCycle = std::numeric_limits<unsigned>::max();

+ // Check to see if any of the pending instructions are ready to issue. If

+ // so, add them to the available queue.

+ bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;

+ for (unsigned i = 0, e = Pending.size(); i != e; ++i) {

+ SUnit *SU = *(Pending.begin()+i);

+ unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;

+ if (ReadyCycle < MinReadyCycle)

+ MinReadyCycle = ReadyCycle;

+ if (!IsBuffered && ReadyCycle > CurrCycle)

+ continue;

+ if (checkHazard(SU))

+ continue;

+ if (Available.size() >= ReadyListLimit)

+ break;

+ Available.push(SU);

+ Pending.remove(Pending.begin()+i);

+ --i; --e;

+ }

+ CheckPending = false;

+/// Remove SU from the ready set for this boundary.

+void SchedBoundary::removeReady(SUnit *SU) {

+ if (Available.isInQueue(SU))

+ Available.remove(Available.find(SU));

+ else {

+ assert(Pending.isInQueue(SU) && "bad ready count");

+ Pending.remove(Pending.find(SU));

+ }

+/// If this queue only has one ready candidate, return it. As a side effect,

+/// defer any nodes that now hit a hazard, and advance the cycle until at least

+/// one node is ready. If multiple instructions are ready, return NULL.

+SUnit *SchedBoundary::pickOnlyChoice() {

+ if (CheckPending)

+ releasePending();

+ if (CurrMOps > 0) {

+ // Defer any ready instrs that now have a hazard.

+ for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {

+ if (checkHazard(*I)) {

+ Pending.push(*I);

+ I = Available.remove(I);

+ continue;

+ }

+ ++I;

+ }

+ for (unsigned i = 0; Available.empty(); ++i) {

+// FIXME: Re-enable assert once PR20057 is resolved.

+// assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&

+// "permanent hazard");

+ (void)i;

+ bumpCycle(CurrCycle + 1);

+ releasePending();

+ }

+ LLVM_DEBUG(Pending.dump());

+ LLVM_DEBUG(Available.dump());

+ if (Available.size() == 1)

+ return *Available.begin();

+ return nullptr;

+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

+// This is useful information to dump after bumpNode.

+// Note that the Queue contents are more useful before pickNodeFromQueue.

+LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const {

+ unsigned ResFactor;

+ unsigned ResCount;

+ if (ZoneCritResIdx) {

+ ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);

+ ResCount = getResourceCount(ZoneCritResIdx);

+ } else {

+ ResFactor = SchedModel->getMicroOpFactor();

+ ResCount = RetiredMOps * ResFactor;

+ }

+ unsigned LFactor = SchedModel->getLatencyFactor();

+ dbgs() << Available.getName() << " @" << CurrCycle << "c\n"

+ << " Retired: " << RetiredMOps;

+ dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";

+ dbgs() << "\n Critical: " << ResCount / LFactor << "c, "

+ << ResCount / ResFactor << " "

+ << SchedModel->getResourceName(ZoneCritResIdx)

+ << "\n ExpectedLatency: " << ExpectedLatency << "c\n"

+ << (IsResourceLimited ? " - Resource" : " - Latency")

+ << " limited.\n";

+#endif

+//===----------------------------------------------------------------------===//

+// GenericScheduler - Generic implementation of MachineSchedStrategy.

+//===----------------------------------------------------------------------===//

+void GenericSchedulerBase::SchedCandidate::

+initResourceDelta(const ScheduleDAGMI *DAG,

+ const TargetSchedModel *SchedModel) {

+ if (!Policy.ReduceResIdx && !Policy.DemandResIdx)

+ return;

+ const MCSchedClassDesc *SC = DAG->getSchedClass(SU);

+ for (TargetSchedModel::ProcResIter

+ PI = SchedModel->getWriteProcResBegin(SC),

+ PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {

+ if (PI->ProcResourceIdx == Policy.ReduceResIdx)

+ ResDelta.CritResources += PI->Cycles;

+ if (PI->ProcResourceIdx == Policy.DemandResIdx)

+ ResDelta.DemandedResources += PI->Cycles;

+ }

+/// Compute remaining latency. We need this both to determine whether the

+/// overall schedule has become latency-limited and whether the instructions

+/// outside this zone are resource or latency limited.

+///

+/// The "dependent" latency is updated incrementally during scheduling as the

+/// max height/depth of scheduled nodes minus the cycles since it was

+/// scheduled:

+/// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone

+///

+/// The "independent" latency is the max ready queue depth:

+/// ILat = max N.depth for N in Available|Pending

+///

+/// RemainingLatency is the greater of independent and dependent latency.

+///

+/// These computations are expensive, especially in DAGs with many edges, so

+/// only do them if necessary.

+static unsigned computeRemLatency(SchedBoundary &CurrZone) {

+ unsigned RemLatency = CurrZone.getDependentLatency();

+ RemLatency = std::max(RemLatency,

+ CurrZone.findMaxLatency(CurrZone.Available.elements()));

+ RemLatency = std::max(RemLatency,

+ CurrZone.findMaxLatency(CurrZone.Pending.elements()));

+ return RemLatency;

+/// Returns true if the current cycle plus remaning latency is greater than

+/// the critical path in the scheduling region.

+bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,

+ SchedBoundary &CurrZone,

+ bool ComputeRemLatency,

+ unsigned &RemLatency) const {

+ // The current cycle is already greater than the critical path, so we are

+ // already latency limited and don't need to compute the remaining latency.

+ if (CurrZone.getCurrCycle() > Rem.CriticalPath)

+ return true;

+ // If we haven't scheduled anything yet, then we aren't latency limited.

+ if (CurrZone.getCurrCycle() == 0)

+ return false;

+ if (ComputeRemLatency)

+ RemLatency = computeRemLatency(CurrZone);

+ return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;

+/// Set the CandPolicy given a scheduling zone given the current resources and

+/// latencies inside and outside the zone.

+void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,

+ SchedBoundary &CurrZone,

+ SchedBoundary *OtherZone) {

+ // Apply preemptive heuristics based on the total latency and resources

+ // inside and outside this zone. Potential stalls should be considered before

+ // following this policy.

+ // Compute the critical resource outside the zone.

+ unsigned OtherCritIdx = 0;

+ unsigned OtherCount =

+ OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;

+ bool OtherResLimited = false;

+ unsigned RemLatency = 0;

+ bool RemLatencyComputed = false;

+ if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {

+ RemLatency = computeRemLatency(CurrZone);

+ RemLatencyComputed = true;

+ OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),

+ OtherCount, RemLatency, false);

+ }

+ // Schedule aggressively for latency in PostRA mode. We don't check for

+ // acyclic latency during PostRA, and highly out-of-order processors will

+ // skip PostRA scheduling.

+ if (!OtherResLimited &&

+ (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,

+ RemLatency))) {

+ Policy.ReduceLatency |= true;

+ LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()

+ << " RemainingLatency " << RemLatency << " + "

+ << CurrZone.getCurrCycle() << "c > CritPath "

+ << Rem.CriticalPath << "\n");

+ }

+ // If the same resource is limiting inside and outside the zone, do nothing.

+ if (CurrZone.getZoneCritResIdx() == OtherCritIdx)

+ return;

+ LLVM_DEBUG(if (CurrZone.isResourceLimited()) {

+ dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "

+ << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";

+ } if (OtherResLimited) dbgs()

+ << " RemainingLimit: "

+ << SchedModel->getResourceName(OtherCritIdx) << "\n";

+ if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()

+ << " Latency limited both directions.\n");

+ if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)

+ Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();

+ if (OtherResLimited)

+ Policy.DemandResIdx = OtherCritIdx;

+#ifndef NDEBUG

+const char *GenericSchedulerBase::getReasonStr(

+ GenericSchedulerBase::CandReason Reason) {

+ switch (Reason) {

+ case NoCand: return "NOCAND ";

+ case Only1: return "ONLY1 ";

+ case PhysReg: return "PHYS-REG ";

+ case RegExcess: return "REG-EXCESS";

+ case RegCritical: return "REG-CRIT ";

+ case Stall: return "STALL ";

+ case Cluster: return "CLUSTER ";

+ case Weak: return "WEAK ";

+ case RegMax: return "REG-MAX ";

+ case ResourceReduce: return "RES-REDUCE";

+ case ResourceDemand: return "RES-DEMAND";

+ case TopDepthReduce: return "TOP-DEPTH ";

+ case TopPathReduce: return "TOP-PATH ";

+ case BotHeightReduce:return "BOT-HEIGHT";

+ case BotPathReduce: return "BOT-PATH ";

+ case NextDefUse: return "DEF-USE ";

+ case NodeOrder: return "ORDER ";

+ };

+ llvm_unreachable("Unknown reason!");

+void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {

+ PressureChange P;

+ unsigned ResIdx = 0;

+ unsigned Latency = 0;

+ switch (Cand.Reason) {

+ default:

+ break;

+ case RegExcess:

+ P = Cand.RPDelta.Excess;

+ break;

+ case RegCritical:

+ P = Cand.RPDelta.CriticalMax;

+ break;

+ case RegMax:

+ P = Cand.RPDelta.CurrentMax;

+ break;

+ case ResourceReduce:

+ ResIdx = Cand.Policy.ReduceResIdx;

+ break;

+ case ResourceDemand:

+ ResIdx = Cand.Policy.DemandResIdx;

+ break;

+ case TopDepthReduce:

+ Latency = Cand.SU->getDepth();

+ break;

+ case TopPathReduce:

+ Latency = Cand.SU->getHeight();

+ break;

+ case BotHeightReduce:

+ Latency = Cand.SU->getHeight();

+ break;

+ case BotPathReduce:

+ Latency = Cand.SU->getDepth();

+ break;

+ }

+ dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);

+ if (P.isValid())

+ dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())

+ << ":" << P.getUnitInc() << " ";

+ else

+ dbgs() << " ";

+ if (ResIdx)

+ dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";

+ else

+ dbgs() << " ";

+ if (Latency)

+ dbgs() << " " << Latency << " cycles ";

+ else

+ dbgs() << " ";

+ dbgs() << '\n';

+#endif

+namespace llvm {

+/// Return true if this heuristic determines order.

+bool tryLess(int TryVal, int CandVal,

+ GenericSchedulerBase::SchedCandidate &TryCand,

+ GenericSchedulerBase::SchedCandidate &Cand,

+ GenericSchedulerBase::CandReason Reason) {

+ if (TryVal < CandVal) {

+ TryCand.Reason = Reason;

+ return true;

+ }

+ if (TryVal > CandVal) {

+ if (Cand.Reason > Reason)

+ Cand.Reason = Reason;

+ return true;

+ }

+ return false;

+bool tryGreater(int TryVal, int CandVal,

+ GenericSchedulerBase::SchedCandidate &TryCand,

+ GenericSchedulerBase::SchedCandidate &Cand,

+ GenericSchedulerBase::CandReason Reason) {

+ if (TryVal > CandVal) {

+ TryCand.Reason = Reason;

+ return true;

+ }

+ if (TryVal < CandVal) {

+ if (Cand.Reason > Reason)

+ Cand.Reason = Reason;

+ return true;

+ }

+ return false;

+bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,

+ GenericSchedulerBase::SchedCandidate &Cand,

+ SchedBoundary &Zone) {

+ if (Zone.isTop()) {

+ if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {

+ if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),

+ TryCand, Cand, GenericSchedulerBase::TopDepthReduce))

+ return true;

+ }

+ if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),

+ TryCand, Cand, GenericSchedulerBase::TopPathReduce))

+ return true;

+ } else {

+ if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {

+ if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),

+ TryCand, Cand, GenericSchedulerBase::BotHeightReduce))

+ return true;

+ }

+ if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),

+ TryCand, Cand, GenericSchedulerBase::BotPathReduce))

+ return true;

+ }

+ return false;

+} // end namespace llvm

+static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {

+ LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")

+ << GenericSchedulerBase::getReasonStr(Reason) << '\n');

+static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) {

+ tracePick(Cand.Reason, Cand.AtTop);

+void GenericScheduler::initialize(ScheduleDAGMI *dag) {

+ assert(dag->hasVRegLiveness() &&

+ "(PreRA)GenericScheduler needs vreg liveness");

+ DAG = static_cast<ScheduleDAGMILive*>(dag);

+ SchedModel = DAG->getSchedModel();

+ TRI = DAG->TRI;

+ Rem.init(DAG, SchedModel);

+ Top.init(DAG, SchedModel, &Rem);

+ Bot.init(DAG, SchedModel, &Rem);

+ // Initialize resource counts.

+ // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or

+ // are disabled, then these HazardRecs will be disabled.

+ const InstrItineraryData *Itin = SchedModel->getInstrItineraries();

+ if (!Top.HazardRec) {

+ Top.HazardRec =

+ DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(

+ Itin, DAG);

+ }

+ if (!Bot.HazardRec) {

+ Bot.HazardRec =

+ DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(

+ Itin, DAG);

+ }

+ TopCand.SU = nullptr;

+ BotCand.SU = nullptr;

+/// Initialize the per-region scheduling policy.

+void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,

+ MachineBasicBlock::iterator End,

+ unsigned NumRegionInstrs) {

+ const MachineFunction &MF = *Begin->getMF();

+ const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();

+ // Avoid setting up the register pressure tracker for small regions to save

+ // compile time. As a rough heuristic, only track pressure when the number of

+ // schedulable instructions exceeds half the integer register file.

+ RegionPolicy.ShouldTrackPressure = true;

+ for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {

+ MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;

+ if (TLI->isTypeLegal(LegalIntVT)) {

+ unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(

+ TLI->getRegClassFor(LegalIntVT));

+ RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);

+ }

+ // For generic targets, we default to bottom-up, because it's simpler and more

+ // compile-time optimizations have been implemented in that direction.

+ RegionPolicy.OnlyBottomUp = true;

+ // Allow the subtarget to override default policy.

+ MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);

+ // After subtarget overrides, apply command line options.

+ if (!EnableRegPressure) {

+ RegionPolicy.ShouldTrackPressure = false;

+ RegionPolicy.ShouldTrackLaneMasks = false;

+ }

+ // Check -misched-topdown/bottomup can force or unforce scheduling direction.

+ // e.g. -misched-bottomup=false allows scheduling in both directions.

+ assert((!ForceTopDown || !ForceBottomUp) &&

+ "-misched-topdown incompatible with -misched-bottomup");

+ if (ForceBottomUp.getNumOccurrences() > 0) {

+ RegionPolicy.OnlyBottomUp = ForceBottomUp;

+ if (RegionPolicy.OnlyBottomUp)

+ RegionPolicy.OnlyTopDown = false;

+ }

+ if (ForceTopDown.getNumOccurrences() > 0) {

+ RegionPolicy.OnlyTopDown = ForceTopDown;

+ if (RegionPolicy.OnlyTopDown)

+ RegionPolicy.OnlyBottomUp = false;

+ }

+void GenericScheduler::dumpPolicy() const {

+ // Cannot completely remove virtual function even in release mode.

+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

+ dbgs() << "GenericScheduler RegionPolicy: "

+ << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure

+ << " OnlyTopDown=" << RegionPolicy.OnlyTopDown

+ << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp

+ << "\n";

+#endif

+/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic

+/// critical path by more cycles than it takes to drain the instruction buffer.

+/// We estimate an upper bounds on in-flight instructions as:

+///

+/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )

+/// InFlightIterations = AcyclicPath / CyclesPerIteration

+/// InFlightResources = InFlightIterations * LoopResources

+///

+/// TODO: Check execution resources in addition to IssueCount.

+void GenericScheduler::checkAcyclicLatency() {

+ if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)

+ return;

+ // Scaled number of cycles per loop iteration.

+ unsigned IterCount =

+ std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),

+ Rem.RemIssueCount);

+ // Scaled acyclic critical path.

+ unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();

+ // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop

+ unsigned InFlightCount =

+ (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;

+ unsigned BufferLimit =

+ SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();

+ Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;

+ LLVM_DEBUG(

+ dbgs() << "IssueCycles="

+ << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "

+ << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()

+ << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount

+ << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()

+ << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";

+ if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n");

+void GenericScheduler::registerRoots() {

+ Rem.CriticalPath = DAG->ExitSU.getDepth();

+ // Some roots may not feed into ExitSU. Check all of them in case.

+ for (const SUnit *SU : Bot.Available) {

+ if (SU->getDepth() > Rem.CriticalPath)

+ Rem.CriticalPath = SU->getDepth();

+ }

+ LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');

+ if (DumpCriticalPathLength) {

+ errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";

+ }

+ if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {

+ Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();

+ checkAcyclicLatency();

+ }

+namespace llvm {

+bool tryPressure(const PressureChange &TryP,

+ const PressureChange &CandP,

+ GenericSchedulerBase::SchedCandidate &TryCand,

+ GenericSchedulerBase::SchedCandidate &Cand,

+ GenericSchedulerBase::CandReason Reason,

+ const TargetRegisterInfo *TRI,

+ const MachineFunction &MF) {

+ // If one candidate decreases and the other increases, go with it.

+ // Invalid candidates have UnitInc==0.

+ if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,

+ Reason)) {

+ return true;

+ }

+ // Do not compare the magnitude of pressure changes between top and bottom

+ // boundary.

+ if (Cand.AtTop != TryCand.AtTop)

+ return false;

+ // If both candidates affect the same set in the same boundary, go with the

+ // smallest increase.

+ unsigned TryPSet = TryP.getPSetOrMax();

+ unsigned CandPSet = CandP.getPSetOrMax();

+ if (TryPSet == CandPSet) {

+ return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,

+ Reason);

+ }

+ int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :

+ std::numeric_limits<int>::max();

+ int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :

+ std::numeric_limits<int>::max();

+ // If the candidates are decreasing pressure, reverse priority.

+ if (TryP.getUnitInc() < 0)

+ std::swap(TryRank, CandRank);

+ return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);

+unsigned getWeakLeft(const SUnit *SU, bool isTop) {

+ return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;

+/// Minimize physical register live ranges. Regalloc wants them adjacent to

+/// their physreg def/use.

+///

+/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf

+/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled

+/// with the operation that produces or consumes the physreg. We'll do this when

+/// regalloc has support for parallel copies.

+int biasPhysReg(const SUnit *SU, bool isTop) {

+ const MachineInstr *MI = SU->getInstr();

+ if (MI->isCopy()) {

+ unsigned ScheduledOper = isTop ? 1 : 0;

+ unsigned UnscheduledOper = isTop ? 0 : 1;

+ // If we have already scheduled the physreg produce/consumer, immediately

+ // schedule the copy.

+ if (TargetRegisterInfo::isPhysicalRegister(

+ MI->getOperand(ScheduledOper).getReg()))

+ return 1;

+ // If the physreg is at the boundary, defer it. Otherwise schedule it

+ // immediately to free the dependent. We can hoist the copy later.

+ bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;

+ if (TargetRegisterInfo::isPhysicalRegister(

+ MI->getOperand(UnscheduledOper).getReg()))

+ return AtBoundary ? -1 : 1;

+ }

+ if (MI->isMoveImmediate()) {

+ // If we have a move immediate and all successors have been assigned, bias

+ // towards scheduling this later. Make sure all register defs are to

+ // physical registers.

+ bool DoBias = true;

+ for (const MachineOperand &Op : MI->defs()) {

+ if (Op.isReg() && !TargetRegisterInfo::isPhysicalRegister(Op.getReg())) {

+ DoBias = false;

+ break;

+ }

+ if (DoBias)

+ return isTop ? -1 : 1;

+ }

+ return 0;

+} // end namespace llvm

+void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,

+ bool AtTop,

+ const RegPressureTracker &RPTracker,

+ RegPressureTracker &TempTracker) {

+ Cand.SU = SU;

+ Cand.AtTop = AtTop;

+ if (DAG->isTrackingPressure()) {

+ if (AtTop) {

+ TempTracker.getMaxDownwardPressureDelta(

+ Cand.SU->getInstr(),

+ Cand.RPDelta,

+ DAG->getRegionCriticalPSets(),

+ DAG->getRegPressure().MaxSetPressure);

+ } else {

+ if (VerifyScheduling) {

+ TempTracker.getMaxUpwardPressureDelta(

+ Cand.SU->getInstr(),

+ &DAG->getPressureDiff(Cand.SU),

+ Cand.RPDelta,

+ DAG->getRegionCriticalPSets(),

+ DAG->getRegPressure().MaxSetPressure);

+ } else {

+ RPTracker.getUpwardPressureDelta(

+ Cand.SU->getInstr(),

+ DAG->getPressureDiff(Cand.SU),

+ Cand.RPDelta,

+ DAG->getRegionCriticalPSets(),

+ DAG->getRegPressure().MaxSetPressure);

+ }

+ LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()

+ << " Try SU(" << Cand.SU->NodeNum << ") "

+ << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"

+ << Cand.RPDelta.Excess.getUnitInc() << "\n");

+/// Apply a set of heuristics to a new candidate. Heuristics are currently

+/// hierarchical. This may be more efficient than a graduated cost model because

+/// we don't need to evaluate all aspects of the model for each node in the

+/// queue. But it's really done to make the heuristics easier to debug and

+/// statistically analyze.

+///

+/// \param Cand provides the policy and current best candidate.

+/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.

+/// \param Zone describes the scheduled zone that we are extending, or nullptr

+// if Cand is from a different zone than TryCand.

+void GenericScheduler::tryCandidate(SchedCandidate &Cand,

+ SchedCandidate &TryCand,

+ SchedBoundary *Zone) const {

+ // Initialize the candidate if needed.

+ if (!Cand.isValid()) {

+ TryCand.Reason = NodeOrder;

+ return;

+ }

+ // Bias PhysReg Defs and copies to their uses and defined respectively.

+ if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),

+ biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))

+ return;

+ // Avoid exceeding the target's limit.

+ if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,

+ Cand.RPDelta.Excess,

+ TryCand, Cand, RegExcess, TRI,

+ DAG->MF))

+ return;

+ // Avoid increasing the max critical pressure in the scheduled region.

+ if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,

+ Cand.RPDelta.CriticalMax,

+ TryCand, Cand, RegCritical, TRI,

+ DAG->MF))

+ return;

+ // We only compare a subset of features when comparing nodes between

+ // Top and Bottom boundary. Some properties are simply incomparable, in many

+ // other instances we should only override the other boundary if something

+ // is a clear good pick on one boundary. Skip heuristics that are more

+ // "tie-breaking" in nature.

+ bool SameBoundary = Zone != nullptr;

+ if (SameBoundary) {

+ // For loops that are acyclic path limited, aggressively schedule for

+ // latency. Within an single cycle, whenever CurrMOps > 0, allow normal

+ // heuristics to take precedence.

+ if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&

+ tryLatency(TryCand, Cand, *Zone))

+ return;

+ // Prioritize instructions that read unbuffered resources by stall cycles.

+ if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),

+ Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))

+ return;

+ }

+ // Keep clustered nodes together to encourage downstream peephole

+ // optimizations which may reduce resource requirements.

+ //

+ // This is a best effort to set things up for a post-RA pass. Optimizations

+ // like generating loads of multiple registers should ideally be done within

+ // the scheduler pass by combining the loads during DAG postprocessing.

+ const SUnit *CandNextClusterSU =

+ Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();

+ const SUnit *TryCandNextClusterSU =

+ TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();

+ if (tryGreater(TryCand.SU == TryCandNextClusterSU,

+ Cand.SU == CandNextClusterSU,

+ TryCand, Cand, Cluster))

+ return;

+ if (SameBoundary) {

+ // Weak edges are for clustering and other constraints.

+ if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),

+ getWeakLeft(Cand.SU, Cand.AtTop),

+ TryCand, Cand, Weak))

+ return;

+ }

+ // Avoid increasing the max pressure of the entire region.

+ if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,

+ Cand.RPDelta.CurrentMax,

+ TryCand, Cand, RegMax, TRI,

+ DAG->MF))

+ return;

+ if (SameBoundary) {

+ // Avoid critical resource consumption and balance the schedule.

+ TryCand.initResourceDelta(DAG, SchedModel);

+ if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,

+ TryCand, Cand, ResourceReduce))

+ return;

+ if (tryGreater(TryCand.ResDelta.DemandedResources,

+ Cand.ResDelta.DemandedResources,

+ TryCand, Cand, ResourceDemand))

+ return;

+ // Avoid serializing long latency dependence chains.

+ // For acyclic path limited loops, latency was already checked above.

+ if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&

+ !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))

+ return;

+ // Fall through to original instruction order.

+ if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)

+ || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {

+ TryCand.Reason = NodeOrder;

+ }

+/// Pick the best candidate from the queue.

+///

+/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during

+/// DAG building. To adjust for the current scheduling location we need to

+/// maintain the number of vreg uses remaining to be top-scheduled.

+void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,

+ const CandPolicy &ZonePolicy,

+ const RegPressureTracker &RPTracker,

+ SchedCandidate &Cand) {

+ // getMaxPressureDelta temporarily modifies the tracker.

+ RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);

+ ReadyQueue &Q = Zone.Available;

+ for (SUnit *SU : Q) {

+ SchedCandidate TryCand(ZonePolicy);

+ initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);

+ // Pass SchedBoundary only when comparing nodes from the same boundary.

+ SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;

+ tryCandidate(Cand, TryCand, ZoneArg);

+ if (TryCand.Reason != NoCand) {

+ // Initialize resource delta if needed in case future heuristics query it.

+ if (TryCand.ResDelta == SchedResourceDelta())

+ TryCand.initResourceDelta(DAG, SchedModel);

+ Cand.setBest(TryCand);

+ LLVM_DEBUG(traceCandidate(Cand));

+ }

+/// Pick the best candidate node from either the top or bottom queue.

+SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {

+ // Schedule as far as possible in the direction of no choice. This is most

+ // efficient, but also provides the best heuristics for CriticalPSets.

+ if (SUnit *SU = Bot.pickOnlyChoice()) {

+ IsTopNode = false;

+ tracePick(Only1, false);

+ return SU;

+ }

+ if (SUnit *SU = Top.pickOnlyChoice()) {

+ IsTopNode = true;

+ tracePick(Only1, true);

+ return SU;

+ }

+ // Set the bottom-up policy based on the state of the current bottom zone and

+ // the instructions outside the zone, including the top zone.

+ CandPolicy BotPolicy;

+ setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);

+ // Set the top-down policy based on the state of the current top zone and

+ // the instructions outside the zone, including the bottom zone.

+ CandPolicy TopPolicy;

+ setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);

+ // See if BotCand is still valid (because we previously scheduled from Top).

+ LLVM_DEBUG(dbgs() << "Picking from Bot:\n");

+ if (!BotCand.isValid() || BotCand.SU->isScheduled ||

+ BotCand.Policy != BotPolicy) {

+ BotCand.reset(CandPolicy());

+ pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);

+ assert(BotCand.Reason != NoCand && "failed to find the first candidate");

+ } else {

+ LLVM_DEBUG(traceCandidate(BotCand));

+#ifndef NDEBUG

+ if (VerifyScheduling) {

+ SchedCandidate TCand;

+ TCand.reset(CandPolicy());

+ pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);

+ assert(TCand.SU == BotCand.SU &&

+ "Last pick result should correspond to re-picking right now");

+ }

+#endif

+ }

+ // Check if the top Q has a better candidate.

+ LLVM_DEBUG(dbgs() << "Picking from Top:\n");

+ if (!TopCand.isValid() || TopCand.SU->isScheduled ||

+ TopCand.Policy != TopPolicy) {

+ TopCand.reset(CandPolicy());

+ pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);

+ assert(TopCand.Reason != NoCand && "failed to find the first candidate");

+ } else {

+ LLVM_DEBUG(traceCandidate(TopCand));

+#ifndef NDEBUG

+ if (VerifyScheduling) {

+ SchedCandidate TCand;

+ TCand.reset(CandPolicy());

+ pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);

+ assert(TCand.SU == TopCand.SU &&

+ "Last pick result should correspond to re-picking right now");

+ }

+#endif

+ }

+ // Pick best from BotCand and TopCand.

+ assert(BotCand.isValid());

+ assert(TopCand.isValid());

+ SchedCandidate Cand = BotCand;

+ TopCand.Reason = NoCand;

+ tryCandidate(Cand, TopCand, nullptr);

+ if (TopCand.Reason != NoCand) {

+ Cand.setBest(TopCand);

+ LLVM_DEBUG(traceCandidate(Cand));

+ }

+ IsTopNode = Cand.AtTop;

+ tracePick(Cand);

+ return Cand.SU;

+/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.

+SUnit *GenericScheduler::pickNode(bool &IsTopNode) {

+ if (DAG->top() == DAG->bottom()) {

+ assert(Top.Available.empty() && Top.Pending.empty() &&

+ Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");

+ return nullptr;

+ }

+ SUnit *SU;

+ do {

+ if (RegionPolicy.OnlyTopDown) {

+ SU = Top.pickOnlyChoice();

+ if (!SU) {

+ CandPolicy NoPolicy;

+ TopCand.reset(NoPolicy);

+ pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);

+ assert(TopCand.Reason != NoCand && "failed to find a candidate");

+ tracePick(TopCand);

+ SU = TopCand.SU;

+ }

+ IsTopNode = true;

+ } else if (RegionPolicy.OnlyBottomUp) {

+ SU = Bot.pickOnlyChoice();

+ if (!SU) {

+ CandPolicy NoPolicy;

+ BotCand.reset(NoPolicy);

+ pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);

+ assert(BotCand.Reason != NoCand && "failed to find a candidate");

+ tracePick(BotCand);

+ SU = BotCand.SU;

+ }

+ IsTopNode = false;

+ } else {

+ SU = pickNodeBidirectional(IsTopNode);

+ }

+ } while (SU->isScheduled);

+ if (SU->isTopReady())

+ Top.removeReady(SU);

+ if (SU->isBottomReady())

+ Bot.removeReady(SU);

+ LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "

+ << *SU->getInstr());

+ return SU;

+void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) {

+ MachineBasicBlock::iterator InsertPos = SU->getInstr();

+ if (!isTop)

+ ++InsertPos;

+ SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;

+ // Find already scheduled copies with a single physreg dependence and move

+ // them just above the scheduled instruction.

+ for (SDep &Dep : Deps) {

+ if (Dep.getKind() != SDep::Data || !TRI->isPhysicalRegister(Dep.getReg()))

+ continue;

+ SUnit *DepSU = Dep.getSUnit();

+ if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)

+ continue;

+ MachineInstr *Copy = DepSU->getInstr();

+ if (!Copy->isCopy() && !Copy->isMoveImmediate())

+ continue;

+ LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";

+ DAG->dumpNode(*Dep.getSUnit()));

+ DAG->moveInstruction(Copy, InsertPos);

+ }

+/// Update the scheduler's state after scheduling a node. This is the same node

+/// that was just returned by pickNode(). However, ScheduleDAGMILive needs to

+/// update it's state based on the current cycle before MachineSchedStrategy

+/// does.

+///

+/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling

+/// them here. See comments in biasPhysReg.

+void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {

+ if (IsTopNode) {

+ SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());

+ Top.bumpNode(SU);

+ if (SU->hasPhysRegUses)

+ reschedulePhysReg(SU, true);

+ } else {

+ SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());

+ Bot.bumpNode(SU);

+ if (SU->hasPhysRegDefs)

+ reschedulePhysReg(SU, false);

+ }

+/// Create the standard converging machine scheduler. This will be used as the

+/// default scheduler if the target does not set a default.

+ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {

+ ScheduleDAGMILive *DAG =

+ new ScheduleDAGMILive(C, llvm::make_unique<GenericScheduler>(C));

+ // Register DAG post-processors.

+ //

+ // FIXME: extend the mutation API to allow earlier mutations to instantiate

+ // data and pass it to later mutations. Have a single mutation that gathers

+ // the interesting nodes in one pass.

+ DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));

+ return DAG;

+static ScheduleDAGInstrs *createConveringSched(MachineSchedContext *C) {

+ return createGenericSchedLive(C);

+static MachineSchedRegistry

+GenericSchedRegistry("converge", "Standard converging scheduler.",

+ createConveringSched);

+//===----------------------------------------------------------------------===//

+// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.

+//===----------------------------------------------------------------------===//

+void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {

+ DAG = Dag;

+ SchedModel = DAG->getSchedModel();

+ TRI = DAG->TRI;

+ Rem.init(DAG, SchedModel);

+ Top.init(DAG, SchedModel, &Rem);

+ BotRoots.clear();

+ // Initialize the HazardRecognizers. If itineraries don't exist, are empty,

+ // or are disabled, then these HazardRecs will be disabled.

+ const InstrItineraryData *Itin = SchedModel->getInstrItineraries();

+ if (!Top.HazardRec) {

+ Top.HazardRec =

+ DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(

+ Itin, DAG);

+ }

+void PostGenericScheduler::registerRoots() {

+ Rem.CriticalPath = DAG->ExitSU.getDepth();

+ // Some roots may not feed into ExitSU. Check all of them in case.

+ for (const SUnit *SU : BotRoots) {

+ if (SU->getDepth() > Rem.CriticalPath)

+ Rem.CriticalPath = SU->getDepth();

+ }

+ LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');

+ if (DumpCriticalPathLength) {

+ errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";

+ }

+/// Apply a set of heuristics to a new candidate for PostRA scheduling.

+///

+/// \param Cand provides the policy and current best candidate.

+/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.

+void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,

+ SchedCandidate &TryCand) {

+ // Initialize the candidate if needed.

+ if (!Cand.isValid()) {

+ TryCand.Reason = NodeOrder;

+ return;

+ }

+ // Prioritize instructions that read unbuffered resources by stall cycles.

+ if (tryLess(Top.getLatencyStallCycles(TryCand.SU),

+ Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))

+ return;

+ // Keep clustered nodes together.

+ if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),

+ Cand.SU == DAG->getNextClusterSucc(),

+ TryCand, Cand, Cluster))

+ return;

+ // Avoid critical resource consumption and balance the schedule.

+ if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,

+ TryCand, Cand, ResourceReduce))

+ return;

+ if (tryGreater(TryCand.ResDelta.DemandedResources,

+ Cand.ResDelta.DemandedResources,

+ TryCand, Cand, ResourceDemand))

+ return;

+ // Avoid serializing long latency dependence chains.

+ if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {

+ return;

+ }

+ // Fall through to original instruction order.

+ if (TryCand.SU->NodeNum < Cand.SU->NodeNum)

+ TryCand.Reason = NodeOrder;

+void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {

+ ReadyQueue &Q = Top.Available;

+ for (SUnit *SU : Q) {

+ SchedCandidate TryCand(Cand.Policy);

+ TryCand.SU = SU;

+ TryCand.AtTop = true;

+ TryCand.initResourceDelta(DAG, SchedModel);

+ tryCandidate(Cand, TryCand);

+ if (TryCand.Reason != NoCand) {

+ Cand.setBest(TryCand);

+ LLVM_DEBUG(traceCandidate(Cand));

+ }

+/// Pick the next node to schedule.

+SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {

+ if (DAG->top() == DAG->bottom()) {

+ assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");

+ return nullptr;

+ }

+ SUnit *SU;

+ do {

+ SU = Top.pickOnlyChoice();

+ if (SU) {

+ tracePick(Only1, true);

+ } else {

+ CandPolicy NoPolicy;

+ SchedCandidate TopCand(NoPolicy);

+ // Set the top-down policy based on the state of the current top zone and

+ // the instructions outside the zone, including the bottom zone.

+ setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);

+ pickNodeFromQueue(TopCand);

+ assert(TopCand.Reason != NoCand && "failed to find a candidate");

+ tracePick(TopCand);

+ SU = TopCand.SU;

+ }

+ } while (SU->isScheduled);

+ IsTopNode = true;

+ Top.removeReady(SU);

+ LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "

+ << *SU->getInstr());

+ return SU;

+/// Called after ScheduleDAGMI has scheduled an instruction and updated

+/// scheduled/remaining flags in the DAG nodes.

+void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {

+ SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());

+ Top.bumpNode(SU);

+ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {

+ return new ScheduleDAGMI(C, llvm::make_unique<PostGenericScheduler>(C),

+ /*RemoveKillFlags=*/true);

+//===----------------------------------------------------------------------===//

+// ILP Scheduler. Currently for experimental analysis of heuristics.

+//===----------------------------------------------------------------------===//

+namespace {

+/// Order nodes by the ILP metric.

+struct ILPOrder {

+ const SchedDFSResult *DFSResult = nullptr;

+ const BitVector *ScheduledTrees = nullptr;

+ bool MaximizeILP;

+ ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}

+ /// Apply a less-than relation on node priority.

+ ///

+ /// (Return true if A comes after B in the Q.)

+ bool operator()(const SUnit *A, const SUnit *B) const {

+ unsigned SchedTreeA = DFSResult->getSubtreeID(A);

+ unsigned SchedTreeB = DFSResult->getSubtreeID(B);

+ if (SchedTreeA != SchedTreeB) {

+ // Unscheduled trees have lower priority.

+ if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))

+ return ScheduledTrees->test(SchedTreeB);

+ // Trees with shallower connections have have lower priority.

+ if (DFSResult->getSubtreeLevel(SchedTreeA)

+ != DFSResult->getSubtreeLevel(SchedTreeB)) {

+ return DFSResult->getSubtreeLevel(SchedTreeA)

+ < DFSResult->getSubtreeLevel(SchedTreeB);

+ }

+ if (MaximizeILP)

+ return DFSResult->getILP(A) < DFSResult->getILP(B);

+ else

+ return DFSResult->getILP(A) > DFSResult->getILP(B);

+ }

+};

+/// Schedule based on the ILP metric.

+class ILPScheduler : public MachineSchedStrategy {

+ ScheduleDAGMILive *DAG = nullptr;

+ ILPOrder Cmp;

+ std::vector<SUnit*> ReadyQ;

+public:

+ ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}

+ void initialize(ScheduleDAGMI *dag) override {

+ assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");

+ DAG = static_cast<ScheduleDAGMILive*>(dag);

+ DAG->computeDFSResult();

+ Cmp.DFSResult = DAG->getDFSResult();

+ Cmp.ScheduledTrees = &DAG->getScheduledTrees();

+ ReadyQ.clear();

+ }

+ void registerRoots() override {

+ // Restore the heap in ReadyQ with the updated DFS results.

+ std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);

+ }

+ /// Implement MachineSchedStrategy interface.

+ /// -----------------------------------------

+ /// Callback to select the highest priority node from the ready Q.

+ SUnit *pickNode(bool &IsTopNode) override {

+ if (ReadyQ.empty()) return nullptr;

+ std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);

+ SUnit *SU = ReadyQ.back();

+ ReadyQ.pop_back();

+ IsTopNode = false;

+ LLVM_DEBUG(dbgs() << "Pick node "

+ << "SU(" << SU->NodeNum << ") "

+ << " ILP: " << DAG->getDFSResult()->getILP(SU)

+ << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)

+ << " @"

+ << DAG->getDFSResult()->getSubtreeLevel(

+ DAG->getDFSResult()->getSubtreeID(SU))

+ << '\n'

+ << "Scheduling " << *SU->getInstr());

+ return SU;

+ }

+ /// Scheduler callback to notify that a new subtree is scheduled.

+ void scheduleTree(unsigned SubtreeID) override {

+ std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);

+ }

+ /// Callback after a node is scheduled. Mark a newly scheduled tree, notify

+ /// DFSResults, and resort the priority Q.

+ void schedNode(SUnit *SU, bool IsTopNode) override {

+ assert(!IsTopNode && "SchedDFSResult needs bottom-up");

+ }

+ void releaseTopNode(SUnit *) override { /*only called for top roots*/ }

+ void releaseBottomNode(SUnit *SU) override {

+ ReadyQ.push_back(SU);

+ std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);

+ }

+};

+} // end anonymous namespace

+static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {

+ return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(true));

+static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {

+ return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(false));

+static MachineSchedRegistry ILPMaxRegistry(

+ "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);

+static MachineSchedRegistry ILPMinRegistry(

+ "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);

+//===----------------------------------------------------------------------===//

+// Machine Instruction Shuffler for Correctness Testing

+//===----------------------------------------------------------------------===//

+#ifndef NDEBUG

+namespace {

+/// Apply a less-than relation on the node order, which corresponds to the

+/// instruction order prior to scheduling. IsReverse implements greater-than.

+template<bool IsReverse>

+struct SUnitOrder {

+ bool operator()(SUnit *A, SUnit *B) const {

+ if (IsReverse)

+ return A->NodeNum > B->NodeNum;

+ else

+ return A->NodeNum < B->NodeNum;

+ }

+};

+/// Reorder instructions as much as possible.

+class InstructionShuffler : public MachineSchedStrategy {

+ bool IsAlternating;

+ bool IsTopDown;

+ // Using a less-than relation (SUnitOrder<false>) for the TopQ priority

+ // gives nodes with a higher number higher priority causing the latest

+ // instructions to be scheduled first.

+ PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>

+ TopQ;

+ // When scheduling bottom-up, use greater-than as the queue priority.

+ PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>>

+ BottomQ;

+public:

+ InstructionShuffler(bool alternate, bool topdown)

+ : IsAlternating(alternate), IsTopDown(topdown) {}

+ void initialize(ScheduleDAGMI*) override {

+ TopQ.clear();

+ BottomQ.clear();

+ }

+ /// Implement MachineSchedStrategy interface.

+ /// -----------------------------------------

+ SUnit *pickNode(bool &IsTopNode) override {

+ SUnit *SU;

+ if (IsTopDown) {

+ do {

+ if (TopQ.empty()) return nullptr;

+ SU = TopQ.top();

+ TopQ.pop();

+ } while (SU->isScheduled);

+ IsTopNode = true;

+ } else {

+ do {

+ if (BottomQ.empty()) return nullptr;

+ SU = BottomQ.top();

+ BottomQ.pop();

+ } while (SU->isScheduled);

+ IsTopNode = false;

+ }

+ if (IsAlternating)

+ IsTopDown = !IsTopDown;

+ return SU;

+ }

+ void schedNode(SUnit *SU, bool IsTopNode) override {}

+ void releaseTopNode(SUnit *SU) override {

+ TopQ.push(SU);

+ }

+ void releaseBottomNode(SUnit *SU) override {

+ BottomQ.push(SU);

+ }

+};

+} // end anonymous namespace

+static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {

+ bool Alternate = !ForceTopDown && !ForceBottomUp;

+ bool TopDown = !ForceBottomUp;

+ assert((TopDown || !ForceTopDown) &&

+ "-misched-topdown incompatible with -misched-bottomup");

+ return new ScheduleDAGMILive(

+ C, llvm::make_unique<InstructionShuffler>(Alternate, TopDown));

+static MachineSchedRegistry ShufflerRegistry(

+ "shuffle", "Shuffle machine instructions alternating directions",

+ createInstructionShuffler);

+#endif // !NDEBUG

+//===----------------------------------------------------------------------===//

+// GraphWriter support for ScheduleDAGMILive.

+//===----------------------------------------------------------------------===//

+#ifndef NDEBUG

+namespace llvm {

+template<> struct GraphTraits<

+ ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};

+template<>

+struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {

+ DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}

+ static std::string getGraphName(const ScheduleDAG *G) {

+ return G->MF.getName();

+ }

+ static bool renderGraphFromBottomUp() {

+ return true;

+ }

+ static bool isNodeHidden(const SUnit *Node) {

+ if (ViewMISchedCutoff == 0)

+ return false;

+ return (Node->Preds.size() > ViewMISchedCutoff

+ || Node->Succs.size() > ViewMISchedCutoff);

+ }

+ /// If you want to override the dot attributes printed for a particular

+ /// edge, override this method.

+ static std::string getEdgeAttributes(const SUnit *Node,

+ SUnitIterator EI,

+ const ScheduleDAG *Graph) {

+ if (EI.isArtificialDep())

+ return "color=cyan,style=dashed";

+ if (EI.isCtrlDep())

+ return "color=blue,style=dashed";

+ return "";

+ }

+ static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {

+ std::string Str;

+ raw_string_ostream SS(Str);

+ const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);

+ const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?

+ static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;

+ SS << "SU:" << SU->NodeNum;

+ if (DFS)

+ SS << " I:" << DFS->getNumInstrs(SU);

+ return SS.str();

+ }

+ static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {

+ return G->getGraphNodeLabel(SU);

+ }

+ static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {

+ std::string Str("shape=Mrecord");

+ const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);

+ const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?

+ static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;

+ if (DFS) {

+ Str += ",style=filled,fillcolor=\"#";

+ Str += DOT::getColorString(DFS->getSubtreeID(N));

+ Str += '"';

+ }

+ return Str;

+ }

+};

+} // end namespace llvm

+#endif // NDEBUG

+/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG

+/// rendered using 'dot'.

+void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {

+#ifndef NDEBUG

+ ViewGraph(this, Name, false, Title);

+#else

+ errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "

+ << "systems with Graphviz or gv!\n";

+#endif // NDEBUG

+/// Out-of-line implementation with no arguments is handy for gdb.

+void ScheduleDAGMI::viewGraph() {

+ viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());