src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2021-12-25 22:36:56 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2022-05-14 11:44:01 +0000
commit	0eae32dcef82f6f06de6419a0d623d7def0cc8f6 (patch)
tree	55b7e05be47b835fd137915bee1e64026c35e71c /contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
parent	4824e7fd18a1223177218d4aec1b3c6c5c4a444e (diff)
parent	77fc4c146f0870ffb09c1afb823ccbe742c5e6ff (diff)

Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp')

-rw-r--r--

contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp

1009

1 files changed, 1009 insertions, 0 deletions

diff --git a/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
new file mode 100644
index 000000000000..cbc5d9ec169b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp

@@ -0,0 +1,1009 @@

+//===- VLIWMachineScheduler.cpp - VLIW-Focused Scheduling Pass ------------===//

+//

+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

+// See https://llvm.org/LICENSE.txt for license information.

+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

+//

+//===----------------------------------------------------------------------===//

+//

+// MachineScheduler schedules machine instructions after phi elimination. It

+// preserves LiveIntervals so it can be invoked before register allocation.

+//

+//===----------------------------------------------------------------------===//

+#include "llvm/CodeGen/VLIWMachineScheduler.h"

+#include "llvm/ADT/SmallVector.h"

+#include "llvm/CodeGen/DFAPacketizer.h"

+#include "llvm/CodeGen/MachineBasicBlock.h"

+#include "llvm/CodeGen/MachineFunction.h"

+#include "llvm/CodeGen/MachineInstr.h"

+#include "llvm/CodeGen/MachineLoopInfo.h"

+#include "llvm/CodeGen/RegisterClassInfo.h"

+#include "llvm/CodeGen/RegisterPressure.h"

+#include "llvm/CodeGen/ScheduleDAG.h"

+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"

+#include "llvm/CodeGen/TargetInstrInfo.h"

+#include "llvm/CodeGen/TargetOpcodes.h"

+#include "llvm/CodeGen/TargetRegisterInfo.h"

+#include "llvm/CodeGen/TargetSchedule.h"

+#include "llvm/CodeGen/TargetSubtargetInfo.h"

+#include "llvm/IR/Function.h"

+#include "llvm/Support/CommandLine.h"

+#include "llvm/Support/Debug.h"

+#include "llvm/Support/raw_ostream.h"

+#include <algorithm>

+#include <cassert>

+#include <iomanip>

+#include <limits>

+#include <memory>

+#include <sstream>

+using namespace llvm;

+#define DEBUG_TYPE "machine-scheduler"

+static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure", cl::Hidden,

+ cl::ZeroOrMore, cl::init(false));

+static cl::opt<bool> UseNewerCandidate("use-newer-candidate", cl::Hidden,

+ cl::ZeroOrMore, cl::init(true));

+static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level",

+ cl::Hidden, cl::ZeroOrMore,

+ cl::init(1));

+// Check if the scheduler should penalize instructions that are available to

+// early due to a zero-latency dependence.

+static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden,

+ cl::ZeroOrMore, cl::init(true));

+// This value is used to determine if a register class is a high pressure set.

+// We compute the maximum number of registers needed and divided by the total

+// available. Then, we compare the result to this value.

+static cl::opt<float> RPThreshold("vliw-misched-reg-pressure", cl::Hidden,

+ cl::init(0.75f),

+ cl::desc("High register pressure threhold."));

+VLIWResourceModel::VLIWResourceModel(const TargetSubtargetInfo &STI,

+ const TargetSchedModel *SM)

+ : TII(STI.getInstrInfo()), SchedModel(SM) {

+ ResourcesModel = createPacketizer(STI);

+ // This hard requirement could be relaxed,

+ // but for now do not let it proceed.

+ assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");

+ Packet.reserve(SchedModel->getIssueWidth());

+ Packet.clear();

+ ResourcesModel->clearResources();

+void VLIWResourceModel::reset() {

+ Packet.clear();

+ ResourcesModel->clearResources();

+VLIWResourceModel::~VLIWResourceModel() { delete ResourcesModel; }

+/// Return true if there is a dependence between SUd and SUu.

+bool VLIWResourceModel::hasDependence(const SUnit *SUd, const SUnit *SUu) {

+ if (SUd->Succs.size() == 0)

+ return false;

+ for (const auto &S : SUd->Succs) {

+ // Since we do not add pseudos to packets, might as well

+ // ignore order dependencies.

+ if (S.isCtrl())

+ continue;

+ if (S.getSUnit() == SUu && S.getLatency() > 0)

+ return true;

+ }

+ return false;

+/// Check if scheduling of this SU is possible

+/// in the current packet.

+/// It is _not_ precise (statefull), it is more like

+/// another heuristic. Many corner cases are figured

+/// empirically.

+bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) {

+ if (!SU || !SU->getInstr())

+ return false;

+ // First see if the pipeline could receive this instruction

+ // in the current cycle.

+ switch (SU->getInstr()->getOpcode()) {

+ default:

+ if (!ResourcesModel->canReserveResources(*SU->getInstr()))

+ return false;

+ break;

+ case TargetOpcode::EXTRACT_SUBREG:

+ case TargetOpcode::INSERT_SUBREG:

+ case TargetOpcode::SUBREG_TO_REG:

+ case TargetOpcode::REG_SEQUENCE:

+ case TargetOpcode::IMPLICIT_DEF:

+ case TargetOpcode::COPY:

+ case TargetOpcode::INLINEASM:

+ case TargetOpcode::INLINEASM_BR:

+ break;

+ }

+ // Now see if there are no other dependencies to instructions already

+ // in the packet.

+ if (IsTop) {

+ for (unsigned i = 0, e = Packet.size(); i != e; ++i)

+ if (hasDependence(Packet[i], SU))

+ return false;

+ } else {

+ for (unsigned i = 0, e = Packet.size(); i != e; ++i)

+ if (hasDependence(SU, Packet[i]))

+ return false;

+ }

+ return true;

+/// Keep track of available resources.

+bool VLIWResourceModel::reserveResources(SUnit *SU, bool IsTop) {

+ bool startNewCycle = false;

+ // Artificially reset state.

+ if (!SU) {

+ reset();

+ TotalPackets++;

+ return false;

+ }

+ // If this SU does not fit in the packet or the packet is now full

+ // start a new one.

+ if (!isResourceAvailable(SU, IsTop) ||

+ Packet.size() >= SchedModel->getIssueWidth()) {

+ reset();

+ TotalPackets++;

+ startNewCycle = true;

+ }

+ switch (SU->getInstr()->getOpcode()) {

+ default:

+ ResourcesModel->reserveResources(*SU->getInstr());

+ break;

+ case TargetOpcode::EXTRACT_SUBREG:

+ case TargetOpcode::INSERT_SUBREG:

+ case TargetOpcode::SUBREG_TO_REG:

+ case TargetOpcode::REG_SEQUENCE:

+ case TargetOpcode::IMPLICIT_DEF:

+ case TargetOpcode::KILL:

+ case TargetOpcode::CFI_INSTRUCTION:

+ case TargetOpcode::EH_LABEL:

+ case TargetOpcode::COPY:

+ case TargetOpcode::INLINEASM:

+ case TargetOpcode::INLINEASM_BR:

+ break;

+ }

+ Packet.push_back(SU);

+#ifndef NDEBUG

+ LLVM_DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");

+ for (unsigned i = 0, e = Packet.size(); i != e; ++i) {

+ LLVM_DEBUG(dbgs() << "\t[" << i << "] SU(");

+ LLVM_DEBUG(dbgs() << Packet[i]->NodeNum << ")\t");

+ LLVM_DEBUG(Packet[i]->getInstr()->dump());

+ }

+#endif

+ return startNewCycle;

+DFAPacketizer *

+VLIWResourceModel::createPacketizer(const TargetSubtargetInfo &STI) const {

+ return STI.getInstrInfo()->CreateTargetScheduleState(STI);

+/// schedule - Called back from MachineScheduler::runOnMachineFunction

+/// after setting up the current scheduling region. [RegionBegin, RegionEnd)

+/// only includes instructions that have DAG nodes, not scheduling boundaries.

+void VLIWMachineScheduler::schedule() {

+ LLVM_DEBUG(dbgs() << "********** MI Converging Scheduling VLIW "

+ << printMBBReference(*BB) << " " << BB->getName()

+ << " in_func " << BB->getParent()->getName()

+ << " at loop depth " << MLI->getLoopDepth(BB) << " \n");

+ buildDAGWithRegPressure();

+ Topo.InitDAGTopologicalSorting();

+ // Postprocess the DAG to add platform-specific artificial dependencies.

+ postprocessDAG();

+ SmallVector<SUnit *, 8> TopRoots, BotRoots;

+ findRootsAndBiasEdges(TopRoots, BotRoots);

+ // Initialize the strategy before modifying the DAG.

+ SchedImpl->initialize(this);

+ LLVM_DEBUG({

+ unsigned maxH = 0;

+ for (const SUnit &SU : SUnits)

+ if (SU.getHeight() > maxH)

+ maxH = SU.getHeight();

+ dbgs() << "Max Height " << maxH << "\n";

+ });

+ LLVM_DEBUG({

+ unsigned maxD = 0;

+ for (const SUnit &SU : SUnits)

+ if (SU.getDepth() > maxD)

+ maxD = SU.getDepth();

+ dbgs() << "Max Depth " << maxD << "\n";

+ });

+ LLVM_DEBUG(dump());

+ if (ViewMISchedDAGs)

+ viewGraph();

+ initQueues(TopRoots, BotRoots);

+ bool IsTopNode = false;

+ while (true) {

+ LLVM_DEBUG(

+ dbgs() << "** VLIWMachineScheduler::schedule picking next node\n");

+ SUnit *SU = SchedImpl->pickNode(IsTopNode);

+ if (!SU)

+ break;

+ if (!checkSchedLimit())

+ break;

+ scheduleMI(SU, IsTopNode);

+ // Notify the scheduling strategy after updating the DAG.

+ SchedImpl->schedNode(SU, IsTopNode);

+ updateQueues(SU, IsTopNode);

+ }

+ assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");

+ placeDebugValues();

+ LLVM_DEBUG({

+ dbgs() << "*** Final schedule for "

+ << printMBBReference(*begin()->getParent()) << " ***\n";

+ dumpSchedule();

+ dbgs() << '\n';

+ });

+void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {

+ DAG = static_cast<VLIWMachineScheduler *>(dag);

+ SchedModel = DAG->getSchedModel();

+ Top.init(DAG, SchedModel);

+ Bot.init(DAG, SchedModel);

+ // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or

+ // are disabled, then these HazardRecs will be disabled.

+ const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries();

+ const TargetSubtargetInfo &STI = DAG->MF.getSubtarget();

+ const TargetInstrInfo *TII = STI.getInstrInfo();

+ delete Top.HazardRec;

+ delete Bot.HazardRec;

+ Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);

+ Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);

+ delete Top.ResourceModel;

+ delete Bot.ResourceModel;

+ Top.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel());

+ Bot.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel());

+ const std::vector<unsigned> &MaxPressure =

+ DAG->getRegPressure().MaxSetPressure;

+ HighPressureSets.assign(MaxPressure.size(), 0);

+ for (unsigned i = 0, e = MaxPressure.size(); i < e; ++i) {

+ unsigned Limit = DAG->getRegClassInfo()->getRegPressureSetLimit(i);

+ HighPressureSets[i] =

+ ((float)MaxPressure[i] > ((float)Limit * RPThreshold));

+ }

+ assert((!ForceTopDown || !ForceBottomUp) &&

+ "-misched-topdown incompatible with -misched-bottomup");

+VLIWResourceModel *ConvergingVLIWScheduler::createVLIWResourceModel(

+ const TargetSubtargetInfo &STI, const TargetSchedModel *SchedModel) const {

+ return new VLIWResourceModel(STI, SchedModel);

+void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {

+ for (const SDep &PI : SU->Preds) {

+ unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle;

+ unsigned MinLatency = PI.getLatency();

+#ifndef NDEBUG

+ Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);

+#endif

+ if (SU->TopReadyCycle < PredReadyCycle + MinLatency)

+ SU->TopReadyCycle = PredReadyCycle + MinLatency;

+ }

+ if (!SU->isScheduled)

+ Top.releaseNode(SU, SU->TopReadyCycle);

+void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) {

+ assert(SU->getInstr() && "Scheduled SUnit must have instr");

+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E;

+ ++I) {

+ unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;

+ unsigned MinLatency = I->getLatency();

+#ifndef NDEBUG

+ Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);

+#endif

+ if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)

+ SU->BotReadyCycle = SuccReadyCycle + MinLatency;

+ }

+ if (!SU->isScheduled)

+ Bot.releaseNode(SU, SU->BotReadyCycle);

+ConvergingVLIWScheduler::VLIWSchedBoundary::~VLIWSchedBoundary() {

+ delete ResourceModel;

+ delete HazardRec;

+/// Does this SU have a hazard within the current instruction group.

+///

+/// The scheduler supports two modes of hazard recognition. The first is the

+/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that

+/// supports highly complicated in-order reservation tables

+/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.

+///

+/// The second is a streamlined mechanism that checks for hazards based on

+/// simple counters that the scheduler itself maintains. It explicitly checks

+/// for instruction dispatch limitations, including the number of micro-ops that

+/// can dispatch per cycle.

+///

+/// TODO: Also check whether the SU must start a new group.

+bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) {

+ if (HazardRec->isEnabled())

+ return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;

+ unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());

+ if (IssueCount + uops > SchedModel->getIssueWidth())

+ return true;

+ return false;

+void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode(

+ SUnit *SU, unsigned ReadyCycle) {

+ if (ReadyCycle < MinReadyCycle)

+ MinReadyCycle = ReadyCycle;

+ // Check for interlocks first. For the purpose of other heuristics, an

+ // instruction that cannot issue appears as if it's not in the ReadyQueue.

+ if (ReadyCycle > CurrCycle || checkHazard(SU))

+ Pending.push(SU);

+ else

+ Available.push(SU);

+/// Move the boundary of scheduled code by one cycle.

+void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() {

+ unsigned Width = SchedModel->getIssueWidth();

+ IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;

+ assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&

+ "MinReadyCycle uninitialized");

+ unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);

+ if (!HazardRec->isEnabled()) {

+ // Bypass HazardRec virtual calls.

+ CurrCycle = NextCycle;

+ } else {

+ // Bypass getHazardType calls in case of long latency.

+ for (; CurrCycle != NextCycle; ++CurrCycle) {

+ if (isTop())

+ HazardRec->AdvanceCycle();

+ else

+ HazardRec->RecedeCycle();

+ }

+ CheckPending = true;

+ LLVM_DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle "

+ << CurrCycle << '\n');

+/// Move the boundary of scheduled code by one SUnit.

+void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) {

+ bool startNewCycle = false;

+ // Update the reservation table.

+ if (HazardRec->isEnabled()) {

+ if (!isTop() && SU->isCall) {

+ // Calls are scheduled with their preceding instructions. For bottom-up

+ // scheduling, clear the pipeline state before emitting.

+ HazardRec->Reset();

+ }

+ HazardRec->EmitInstruction(SU);

+ }

+ // Update DFA model.

+ startNewCycle = ResourceModel->reserveResources(SU, isTop());

+ // Check the instruction group dispatch limit.

+ // TODO: Check if this SU must end a dispatch group.

+ IssueCount += SchedModel->getNumMicroOps(SU->getInstr());

+ if (startNewCycle) {

+ LLVM_DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');

+ bumpCycle();

+ } else

+ LLVM_DEBUG(dbgs() << "*** IssueCount " << IssueCount << " at cycle "

+ << CurrCycle << '\n');

+/// Release pending ready nodes in to the available queue. This makes them

+/// visible to heuristics.

+void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() {

+ // If the available queue is empty, it is safe to reset MinReadyCycle.

+ if (Available.empty())

+ MinReadyCycle = std::numeric_limits<unsigned>::max();

+ // Check to see if any of the pending instructions are ready to issue. If

+ // so, add them to the available queue.

+ for (unsigned i = 0, e = Pending.size(); i != e; ++i) {

+ SUnit *SU = *(Pending.begin() + i);

+ unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;

+ if (ReadyCycle < MinReadyCycle)

+ MinReadyCycle = ReadyCycle;

+ if (ReadyCycle > CurrCycle)

+ continue;

+ if (checkHazard(SU))

+ continue;

+ Available.push(SU);

+ Pending.remove(Pending.begin() + i);

+ --i;

+ --e;

+ }

+ CheckPending = false;

+/// Remove SU from the ready set for this boundary.

+void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) {

+ if (Available.isInQueue(SU))

+ Available.remove(Available.find(SU));

+ else {

+ assert(Pending.isInQueue(SU) && "bad ready count");

+ Pending.remove(Pending.find(SU));

+ }

+/// If this queue only has one ready candidate, return it. As a side effect,

+/// advance the cycle until at least one node is ready. If multiple instructions

+/// are ready, return NULL.

+SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() {

+ if (CheckPending)

+ releasePending();

+ auto AdvanceCycle = [this]() {

+ if (Available.empty())

+ return true;

+ if (Available.size() == 1 && Pending.size() > 0)

+ return !ResourceModel->isResourceAvailable(*Available.begin(), isTop()) ||

+ getWeakLeft(*Available.begin(), isTop()) != 0;

+ return false;

+ };

+ for (unsigned i = 0; AdvanceCycle(); ++i) {

+ assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&

+ "permanent hazard");

+ (void)i;

+ ResourceModel->reserveResources(nullptr, isTop());

+ bumpCycle();

+ releasePending();

+ }

+ if (Available.size() == 1)

+ return *Available.begin();

+ return nullptr;

+#ifndef NDEBUG

+void ConvergingVLIWScheduler::traceCandidate(const char *Label,

+ const ReadyQueue &Q, SUnit *SU,

+ int Cost, PressureChange P) {

+ dbgs() << Label << " " << Q.getName() << " ";

+ if (P.isValid())

+ dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":"

+ << P.getUnitInc() << " ";

+ else

+ dbgs() << " ";

+ dbgs() << "cost(" << Cost << ")\t";

+ DAG->dumpNode(*SU);

+// Very detailed queue dump, to be used with higher verbosity levels.

+void ConvergingVLIWScheduler::readyQueueVerboseDump(

+ const RegPressureTracker &RPTracker, SchedCandidate &Candidate,

+ ReadyQueue &Q) {

+ RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);

+ dbgs() << ">>> " << Q.getName() << "\n";

+ for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {

+ RegPressureDelta RPDelta;

+ TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,

+ DAG->getRegionCriticalPSets(),

+ DAG->getRegPressure().MaxSetPressure);

+ std::stringstream dbgstr;

+ dbgstr << "SU(" << std::setw(3) << (*I)->NodeNum << ")";

+ dbgs() << dbgstr.str();

+ SchedulingCost(Q, *I, Candidate, RPDelta, true);

+ dbgs() << "\t";

+ (*I)->getInstr()->dump();

+ }

+ dbgs() << "\n";

+#endif

+/// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor

+/// of SU, return true (we may have duplicates)

+static inline bool isSingleUnscheduledPred(SUnit *SU, SUnit *SU2) {

+ if (SU->NumPredsLeft == 0)

+ return false;

+ for (auto &Pred : SU->Preds) {

+ // We found an available, but not scheduled, predecessor.

+ if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2))

+ return false;

+ }

+ return true;

+/// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor

+/// of SU, return true (we may have duplicates)

+static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) {

+ if (SU->NumSuccsLeft == 0)

+ return false;

+ for (auto &Succ : SU->Succs) {

+ // We found an available, but not scheduled, successor.

+ if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2))

+ return false;

+ }

+ return true;

+/// Check if the instruction changes the register pressure of a register in the

+/// high pressure set. The function returns a negative value if the pressure

+/// decreases and a positive value is the pressure increases. If the instruction

+/// doesn't use a high pressure register or doesn't change the register

+/// pressure, then return 0.

+int ConvergingVLIWScheduler::pressureChange(const SUnit *SU, bool isBotUp) {

+ PressureDiff &PD = DAG->getPressureDiff(SU);

+ for (auto &P : PD) {

+ if (!P.isValid())

+ continue;

+ // The pressure differences are computed bottom-up, so the comparision for

+ // an increase is positive in the bottom direction, but negative in the

+ // top-down direction.

+ if (HighPressureSets[P.getPSet()])

+ return (isBotUp ? P.getUnitInc() : -P.getUnitInc());

+ }

+ return 0;

+/// Single point to compute overall scheduling cost.

+/// TODO: More heuristics will be used soon.

+int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,

+ SchedCandidate &Candidate,

+ RegPressureDelta &Delta,

+ bool verbose) {

+ // Initial trivial priority.

+ int ResCount = 1;

+ // Do not waste time on a node that is already scheduled.

+ if (!SU || SU->isScheduled)

+ return ResCount;

+ LLVM_DEBUG(if (verbose) dbgs()

+ << ((Q.getID() == TopQID) ? "(top|" : "(bot|"));

+ // Forced priority is high.

+ if (SU->isScheduleHigh) {

+ ResCount += PriorityOne;

+ LLVM_DEBUG(dbgs() << "H|");

+ }

+ unsigned IsAvailableAmt = 0;

+ // Critical path first.

+ if (Q.getID() == TopQID) {

+ if (Top.isLatencyBound(SU)) {

+ LLVM_DEBUG(if (verbose) dbgs() << "LB|");

+ ResCount += (SU->getHeight() * ScaleTwo);

+ }

+ LLVM_DEBUG(if (verbose) {

+ std::stringstream dbgstr;

+ dbgstr << "h" << std::setw(3) << SU->getHeight() << "|";

+ dbgs() << dbgstr.str();

+ });

+ // If resources are available for it, multiply the

+ // chance of scheduling.

+ if (Top.ResourceModel->isResourceAvailable(SU, true)) {

+ IsAvailableAmt = (PriorityTwo + PriorityThree);

+ ResCount += IsAvailableAmt;

+ LLVM_DEBUG(if (verbose) dbgs() << "A|");

+ } else

+ LLVM_DEBUG(if (verbose) dbgs() << " |");

+ } else {

+ if (Bot.isLatencyBound(SU)) {

+ LLVM_DEBUG(if (verbose) dbgs() << "LB|");

+ ResCount += (SU->getDepth() * ScaleTwo);

+ }

+ LLVM_DEBUG(if (verbose) {

+ std::stringstream dbgstr;

+ dbgstr << "d" << std::setw(3) << SU->getDepth() << "|";

+ dbgs() << dbgstr.str();

+ });

+ // If resources are available for it, multiply the

+ // chance of scheduling.

+ if (Bot.ResourceModel->isResourceAvailable(SU, false)) {

+ IsAvailableAmt = (PriorityTwo + PriorityThree);

+ ResCount += IsAvailableAmt;

+ LLVM_DEBUG(if (verbose) dbgs() << "A|");

+ } else

+ LLVM_DEBUG(if (verbose) dbgs() << " |");

+ }

+ unsigned NumNodesBlocking = 0;

+ if (Q.getID() == TopQID) {

+ // How many SUs does it block from scheduling?

+ // Look at all of the successors of this node.

+ // Count the number of nodes that

+ // this node is the sole unscheduled node for.

+ if (Top.isLatencyBound(SU))

+ for (const SDep &SI : SU->Succs)

+ if (isSingleUnscheduledPred(SI.getSUnit(), SU))

+ ++NumNodesBlocking;

+ } else {

+ // How many unscheduled predecessors block this node?

+ if (Bot.isLatencyBound(SU))

+ for (const SDep &PI : SU->Preds)

+ if (isSingleUnscheduledSucc(PI.getSUnit(), SU))

+ ++NumNodesBlocking;

+ }

+ ResCount += (NumNodesBlocking * ScaleTwo);

+ LLVM_DEBUG(if (verbose) {

+ std::stringstream dbgstr;

+ dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|";

+ dbgs() << dbgstr.str();

+ });

+ // Factor in reg pressure as a heuristic.

+ if (!IgnoreBBRegPressure) {

+ // Decrease priority by the amount that register pressure exceeds the limit.

+ ResCount -= (Delta.Excess.getUnitInc() * PriorityOne);

+ // Decrease priority if register pressure exceeds the limit.

+ ResCount -= (Delta.CriticalMax.getUnitInc() * PriorityOne);

+ // Decrease priority slightly if register pressure would increase over the

+ // current maximum.

+ ResCount -= (Delta.CurrentMax.getUnitInc() * PriorityTwo);

+ // If there are register pressure issues, then we remove the value added for

+ // the instruction being available. The rationale is that we really don't

+ // want to schedule an instruction that causes a spill.

+ if (IsAvailableAmt && pressureChange(SU, Q.getID() != TopQID) > 0 &&

+ (Delta.Excess.getUnitInc() || Delta.CriticalMax.getUnitInc() ||

+ Delta.CurrentMax.getUnitInc()))

+ ResCount -= IsAvailableAmt;

+ LLVM_DEBUG(if (verbose) {

+ dbgs() << "RP " << Delta.Excess.getUnitInc() << "/"

+ << Delta.CriticalMax.getUnitInc() << "/"

+ << Delta.CurrentMax.getUnitInc() << ")|";

+ });

+ }

+ // Give preference to a zero latency instruction if the dependent

+ // instruction is in the current packet.

+ if (Q.getID() == TopQID && getWeakLeft(SU, true) == 0) {

+ for (const SDep &PI : SU->Preds) {

+ if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() &&

+ PI.getLatency() == 0 &&

+ Top.ResourceModel->isInPacket(PI.getSUnit())) {

+ ResCount += PriorityThree;

+ LLVM_DEBUG(if (verbose) dbgs() << "Z|");

+ }

+ } else if (Q.getID() == BotQID && getWeakLeft(SU, false) == 0) {

+ for (const SDep &SI : SU->Succs) {

+ if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() &&

+ SI.getLatency() == 0 &&

+ Bot.ResourceModel->isInPacket(SI.getSUnit())) {

+ ResCount += PriorityThree;

+ LLVM_DEBUG(if (verbose) dbgs() << "Z|");

+ }

+ // If the instruction has a non-zero latency dependence with an instruction in

+ // the current packet, then it should not be scheduled yet. The case occurs

+ // when the dependent instruction is scheduled in a new packet, so the

+ // scheduler updates the current cycle and pending instructions become

+ // available.

+ if (CheckEarlyAvail) {

+ if (Q.getID() == TopQID) {

+ for (const auto &PI : SU->Preds) {

+ if (PI.getLatency() > 0 &&

+ Top.ResourceModel->isInPacket(PI.getSUnit())) {

+ ResCount -= PriorityOne;

+ LLVM_DEBUG(if (verbose) dbgs() << "D|");

+ }

+ } else {

+ for (const auto &SI : SU->Succs) {

+ if (SI.getLatency() > 0 &&

+ Bot.ResourceModel->isInPacket(SI.getSUnit())) {

+ ResCount -= PriorityOne;

+ LLVM_DEBUG(if (verbose) dbgs() << "D|");

+ }

+ LLVM_DEBUG(if (verbose) {

+ std::stringstream dbgstr;

+ dbgstr << "Total " << std::setw(4) << ResCount << ")";

+ dbgs() << dbgstr.str();

+ });

+ return ResCount;

+/// Pick the best candidate from the top queue.

+///

+/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during

+/// DAG building. To adjust for the current scheduling location we need to

+/// maintain the number of vreg uses remaining to be top-scheduled.

+ConvergingVLIWScheduler::CandResult

+ConvergingVLIWScheduler::pickNodeFromQueue(VLIWSchedBoundary &Zone,

+ const RegPressureTracker &RPTracker,

+ SchedCandidate &Candidate) {

+ ReadyQueue &Q = Zone.Available;

+ LLVM_DEBUG(if (SchedDebugVerboseLevel > 1)

+ readyQueueVerboseDump(RPTracker, Candidate, Q);

+ else Q.dump(););

+ // getMaxPressureDelta temporarily modifies the tracker.

+ RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);

+ // BestSU remains NULL if no top candidates beat the best existing candidate.

+ CandResult FoundCandidate = NoCand;

+ for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {

+ RegPressureDelta RPDelta;

+ TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,

+ DAG->getRegionCriticalPSets(),

+ DAG->getRegPressure().MaxSetPressure);

+ int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false);

+ // Initialize the candidate if needed.

+ if (!Candidate.SU) {

+ LLVM_DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost));

+ Candidate.SU = *I;

+ Candidate.RPDelta = RPDelta;

+ Candidate.SCost = CurrentCost;

+ FoundCandidate = NodeOrder;

+ continue;

+ }

+ // Choose node order for negative cost candidates. There is no good

+ // candidate in this case.

+ if (CurrentCost < 0 && Candidate.SCost < 0) {

+ if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) ||

+ (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {

+ LLVM_DEBUG(traceCandidate("NCAND", Q, *I, CurrentCost));

+ Candidate.SU = *I;

+ Candidate.RPDelta = RPDelta;

+ Candidate.SCost = CurrentCost;

+ FoundCandidate = NodeOrder;

+ }

+ continue;

+ }

+ // Best cost.

+ if (CurrentCost > Candidate.SCost) {

+ LLVM_DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost));

+ Candidate.SU = *I;

+ Candidate.RPDelta = RPDelta;

+ Candidate.SCost = CurrentCost;

+ FoundCandidate = BestCost;

+ continue;

+ }

+ // Choose an instruction that does not depend on an artificial edge.

+ unsigned CurrWeak = getWeakLeft(*I, (Q.getID() == TopQID));

+ unsigned CandWeak = getWeakLeft(Candidate.SU, (Q.getID() == TopQID));

+ if (CurrWeak != CandWeak) {

+ if (CurrWeak < CandWeak) {

+ LLVM_DEBUG(traceCandidate("WCAND", Q, *I, CurrentCost));

+ Candidate.SU = *I;

+ Candidate.RPDelta = RPDelta;

+ Candidate.SCost = CurrentCost;

+ FoundCandidate = Weak;

+ }

+ continue;

+ }

+ if (CurrentCost == Candidate.SCost && Zone.isLatencyBound(*I)) {

+ unsigned CurrSize, CandSize;

+ if (Q.getID() == TopQID) {

+ CurrSize = (*I)->Succs.size();

+ CandSize = Candidate.SU->Succs.size();

+ } else {

+ CurrSize = (*I)->Preds.size();

+ CandSize = Candidate.SU->Preds.size();

+ }

+ if (CurrSize > CandSize) {

+ LLVM_DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost));

+ Candidate.SU = *I;

+ Candidate.RPDelta = RPDelta;

+ Candidate.SCost = CurrentCost;

+ FoundCandidate = BestCost;

+ }

+ // Keep the old candidate if it's a better candidate. That is, don't use

+ // the subsequent tie breaker.

+ if (CurrSize != CandSize)

+ continue;

+ }

+ // Tie breaker.

+ // To avoid scheduling indeterminism, we need a tie breaker

+ // for the case when cost is identical for two nodes.

+ if (UseNewerCandidate && CurrentCost == Candidate.SCost) {

+ if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) ||

+ (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {

+ LLVM_DEBUG(traceCandidate("TCAND", Q, *I, CurrentCost));

+ Candidate.SU = *I;

+ Candidate.RPDelta = RPDelta;

+ Candidate.SCost = CurrentCost;

+ FoundCandidate = NodeOrder;

+ continue;

+ }

+ // Fall through to original instruction order.

+ // Only consider node order if Candidate was chosen from this Q.

+ if (FoundCandidate == NoCand)

+ continue;

+ }

+ return FoundCandidate;

+/// Pick the best candidate node from either the top or bottom queue.

+SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) {

+ // Schedule as far as possible in the direction of no choice. This is most

+ // efficient, but also provides the best heuristics for CriticalPSets.

+ if (SUnit *SU = Bot.pickOnlyChoice()) {

+ LLVM_DEBUG(dbgs() << "Picked only Bottom\n");

+ IsTopNode = false;

+ return SU;

+ }

+ if (SUnit *SU = Top.pickOnlyChoice()) {

+ LLVM_DEBUG(dbgs() << "Picked only Top\n");

+ IsTopNode = true;

+ return SU;

+ }

+ SchedCandidate BotCand;

+ // Prefer bottom scheduling when heuristics are silent.

+ CandResult BotResult =

+ pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);

+ assert(BotResult != NoCand && "failed to find the first candidate");

+ // If either Q has a single candidate that provides the least increase in

+ // Excess pressure, we can immediately schedule from that Q.

+ //

+ // RegionCriticalPSets summarizes the pressure within the scheduled region and

+ // affects picking from either Q. If scheduling in one direction must

+ // increase pressure for one of the excess PSets, then schedule in that

+ // direction first to provide more freedom in the other direction.

+ if (BotResult == SingleExcess || BotResult == SingleCritical) {

+ LLVM_DEBUG(dbgs() << "Prefered Bottom Node\n");

+ IsTopNode = false;

+ return BotCand.SU;

+ }

+ // Check if the top Q has a better candidate.

+ SchedCandidate TopCand;

+ CandResult TopResult =

+ pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);

+ assert(TopResult != NoCand && "failed to find the first candidate");

+ if (TopResult == SingleExcess || TopResult == SingleCritical) {

+ LLVM_DEBUG(dbgs() << "Prefered Top Node\n");

+ IsTopNode = true;

+ return TopCand.SU;

+ }

+ // If either Q has a single candidate that minimizes pressure above the

+ // original region's pressure pick it.

+ if (BotResult == SingleMax) {

+ LLVM_DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n");

+ IsTopNode = false;

+ return BotCand.SU;

+ }

+ if (TopResult == SingleMax) {

+ LLVM_DEBUG(dbgs() << "Prefered Top Node SingleMax\n");

+ IsTopNode = true;

+ return TopCand.SU;

+ }

+ if (TopCand.SCost > BotCand.SCost) {

+ LLVM_DEBUG(dbgs() << "Prefered Top Node Cost\n");

+ IsTopNode = true;

+ return TopCand.SU;

+ }

+ // Otherwise prefer the bottom candidate in node order.

+ LLVM_DEBUG(dbgs() << "Prefered Bottom in Node order\n");

+ IsTopNode = false;

+ return BotCand.SU;

+/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.

+SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {

+ if (DAG->top() == DAG->bottom()) {

+ assert(Top.Available.empty() && Top.Pending.empty() &&

+ Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");

+ return nullptr;

+ }

+ SUnit *SU;

+ if (ForceTopDown) {

+ SU = Top.pickOnlyChoice();

+ if (!SU) {

+ SchedCandidate TopCand;

+ CandResult TopResult =

+ pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);

+ assert(TopResult != NoCand && "failed to find the first candidate");

+ (void)TopResult;

+ SU = TopCand.SU;

+ }

+ IsTopNode = true;

+ } else if (ForceBottomUp) {

+ SU = Bot.pickOnlyChoice();

+ if (!SU) {

+ SchedCandidate BotCand;

+ CandResult BotResult =

+ pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);

+ assert(BotResult != NoCand && "failed to find the first candidate");

+ (void)BotResult;

+ SU = BotCand.SU;

+ }

+ IsTopNode = false;

+ } else {

+ SU = pickNodeBidrectional(IsTopNode);

+ }

+ if (SU->isTopReady())

+ Top.removeReady(SU);

+ if (SU->isBottomReady())

+ Bot.removeReady(SU);

+ LLVM_DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")

+ << " Scheduling instruction in cycle "

+ << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << " ("

+ << reportPackets() << ")\n";

+ DAG->dumpNode(*SU));

+ return SU;

+/// Update the scheduler's state after scheduling a node. This is the same node

+/// that was just returned by pickNode(). However, VLIWMachineScheduler needs

+/// to update it's state based on the current cycle before MachineSchedStrategy

+/// does.

+void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) {

+ if (IsTopNode) {

+ Top.bumpNode(SU);

+ SU->TopReadyCycle = Top.CurrCycle;

+ } else {

+ Bot.bumpNode(SU);

+ SU->BotReadyCycle = Bot.CurrCycle;

+ }