aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2021-12-25 22:36:56 +0000
committerDimitry Andric <dim@FreeBSD.org>2022-05-14 11:44:01 +0000
commit0eae32dcef82f6f06de6419a0d623d7def0cc8f6 (patch)
tree55b7e05be47b835fd137915bee1e64026c35e71c /contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
parent4824e7fd18a1223177218d4aec1b3c6c5c4a444e (diff)
parent77fc4c146f0870ffb09c1afb823ccbe742c5e6ff (diff)
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp1009
1 files changed, 1009 insertions, 0 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
new file mode 100644
index 000000000000..cbc5d9ec169b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
@@ -0,0 +1,1009 @@
+//===- VLIWMachineScheduler.cpp - VLIW-Focused Scheduling Pass ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// MachineScheduler schedules machine instructions after phi elimination. It
+// preserves LiveIntervals so it can be invoked before register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/VLIWMachineScheduler.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <iomanip>
+#include <limits>
+#include <memory>
+#include <sstream>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "machine-scheduler"
+
+static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure", cl::Hidden,
+ cl::ZeroOrMore, cl::init(false));
+
+static cl::opt<bool> UseNewerCandidate("use-newer-candidate", cl::Hidden,
+ cl::ZeroOrMore, cl::init(true));
+
+static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level",
+ cl::Hidden, cl::ZeroOrMore,
+ cl::init(1));
+
+// Check if the scheduler should penalize instructions that are available to
+// early due to a zero-latency dependence.
+static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden,
+ cl::ZeroOrMore, cl::init(true));
+
+// This value is used to determine if a register class is a high pressure set.
+// We compute the maximum number of registers needed and divided by the total
+// available. Then, we compare the result to this value.
+static cl::opt<float> RPThreshold("vliw-misched-reg-pressure", cl::Hidden,
+ cl::init(0.75f),
+ cl::desc("High register pressure threhold."));
+
+VLIWResourceModel::VLIWResourceModel(const TargetSubtargetInfo &STI,
+ const TargetSchedModel *SM)
+ : TII(STI.getInstrInfo()), SchedModel(SM) {
+ ResourcesModel = createPacketizer(STI);
+
+ // This hard requirement could be relaxed,
+ // but for now do not let it proceed.
+ assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
+
+ Packet.reserve(SchedModel->getIssueWidth());
+ Packet.clear();
+ ResourcesModel->clearResources();
+}
+
+void VLIWResourceModel::reset() {
+ Packet.clear();
+ ResourcesModel->clearResources();
+}
+
+VLIWResourceModel::~VLIWResourceModel() { delete ResourcesModel; }
+
+/// Return true if there is a dependence between SUd and SUu.
+bool VLIWResourceModel::hasDependence(const SUnit *SUd, const SUnit *SUu) {
+ if (SUd->Succs.size() == 0)
+ return false;
+
+ for (const auto &S : SUd->Succs) {
+ // Since we do not add pseudos to packets, might as well
+ // ignore order dependencies.
+ if (S.isCtrl())
+ continue;
+
+ if (S.getSUnit() == SUu && S.getLatency() > 0)
+ return true;
+ }
+ return false;
+}
+
+/// Check if scheduling of this SU is possible
+/// in the current packet.
+/// It is _not_ precise (statefull), it is more like
+/// another heuristic. Many corner cases are figured
+/// empirically.
+bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) {
+ if (!SU || !SU->getInstr())
+ return false;
+
+ // First see if the pipeline could receive this instruction
+ // in the current cycle.
+ switch (SU->getInstr()->getOpcode()) {
+ default:
+ if (!ResourcesModel->canReserveResources(*SU->getInstr()))
+ return false;
+ break;
+ case TargetOpcode::EXTRACT_SUBREG:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::COPY:
+ case TargetOpcode::INLINEASM:
+ case TargetOpcode::INLINEASM_BR:
+ break;
+ }
+
+ // Now see if there are no other dependencies to instructions already
+ // in the packet.
+ if (IsTop) {
+ for (unsigned i = 0, e = Packet.size(); i != e; ++i)
+ if (hasDependence(Packet[i], SU))
+ return false;
+ } else {
+ for (unsigned i = 0, e = Packet.size(); i != e; ++i)
+ if (hasDependence(SU, Packet[i]))
+ return false;
+ }
+ return true;
+}
+
+/// Keep track of available resources.
+bool VLIWResourceModel::reserveResources(SUnit *SU, bool IsTop) {
+ bool startNewCycle = false;
+ // Artificially reset state.
+ if (!SU) {
+ reset();
+ TotalPackets++;
+ return false;
+ }
+ // If this SU does not fit in the packet or the packet is now full
+ // start a new one.
+ if (!isResourceAvailable(SU, IsTop) ||
+ Packet.size() >= SchedModel->getIssueWidth()) {
+ reset();
+ TotalPackets++;
+ startNewCycle = true;
+ }
+
+ switch (SU->getInstr()->getOpcode()) {
+ default:
+ ResourcesModel->reserveResources(*SU->getInstr());
+ break;
+ case TargetOpcode::EXTRACT_SUBREG:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ case TargetOpcode::CFI_INSTRUCTION:
+ case TargetOpcode::EH_LABEL:
+ case TargetOpcode::COPY:
+ case TargetOpcode::INLINEASM:
+ case TargetOpcode::INLINEASM_BR:
+ break;
+ }
+ Packet.push_back(SU);
+
+#ifndef NDEBUG
+ LLVM_DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");
+ for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
+ LLVM_DEBUG(dbgs() << "\t[" << i << "] SU(");
+ LLVM_DEBUG(dbgs() << Packet[i]->NodeNum << ")\t");
+ LLVM_DEBUG(Packet[i]->getInstr()->dump());
+ }
+#endif
+
+ return startNewCycle;
+}
+
+DFAPacketizer *
+VLIWResourceModel::createPacketizer(const TargetSubtargetInfo &STI) const {
+ return STI.getInstrInfo()->CreateTargetScheduleState(STI);
+}
+
+/// schedule - Called back from MachineScheduler::runOnMachineFunction
+/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
+/// only includes instructions that have DAG nodes, not scheduling boundaries.
+void VLIWMachineScheduler::schedule() {
+ LLVM_DEBUG(dbgs() << "********** MI Converging Scheduling VLIW "
+ << printMBBReference(*BB) << " " << BB->getName()
+ << " in_func " << BB->getParent()->getName()
+ << " at loop depth " << MLI->getLoopDepth(BB) << " \n");
+
+ buildDAGWithRegPressure();
+
+ Topo.InitDAGTopologicalSorting();
+
+ // Postprocess the DAG to add platform-specific artificial dependencies.
+ postprocessDAG();
+
+ SmallVector<SUnit *, 8> TopRoots, BotRoots;
+ findRootsAndBiasEdges(TopRoots, BotRoots);
+
+ // Initialize the strategy before modifying the DAG.
+ SchedImpl->initialize(this);
+
+ LLVM_DEBUG({
+ unsigned maxH = 0;
+ for (const SUnit &SU : SUnits)
+ if (SU.getHeight() > maxH)
+ maxH = SU.getHeight();
+ dbgs() << "Max Height " << maxH << "\n";
+ });
+ LLVM_DEBUG({
+ unsigned maxD = 0;
+ for (const SUnit &SU : SUnits)
+ if (SU.getDepth() > maxD)
+ maxD = SU.getDepth();
+ dbgs() << "Max Depth " << maxD << "\n";
+ });
+ LLVM_DEBUG(dump());
+ if (ViewMISchedDAGs)
+ viewGraph();
+
+ initQueues(TopRoots, BotRoots);
+
+ bool IsTopNode = false;
+ while (true) {
+ LLVM_DEBUG(
+ dbgs() << "** VLIWMachineScheduler::schedule picking next node\n");
+ SUnit *SU = SchedImpl->pickNode(IsTopNode);
+ if (!SU)
+ break;
+
+ if (!checkSchedLimit())
+ break;
+
+ scheduleMI(SU, IsTopNode);
+
+ // Notify the scheduling strategy after updating the DAG.
+ SchedImpl->schedNode(SU, IsTopNode);
+
+ updateQueues(SU, IsTopNode);
+ }
+ assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
+
+ placeDebugValues();
+
+ LLVM_DEBUG({
+ dbgs() << "*** Final schedule for "
+ << printMBBReference(*begin()->getParent()) << " ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
+}
+
+void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
+ DAG = static_cast<VLIWMachineScheduler *>(dag);
+ SchedModel = DAG->getSchedModel();
+
+ Top.init(DAG, SchedModel);
+ Bot.init(DAG, SchedModel);
+
+ // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
+ // are disabled, then these HazardRecs will be disabled.
+ const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries();
+ const TargetSubtargetInfo &STI = DAG->MF.getSubtarget();
+ const TargetInstrInfo *TII = STI.getInstrInfo();
+ delete Top.HazardRec;
+ delete Bot.HazardRec;
+ Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
+ Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
+
+ delete Top.ResourceModel;
+ delete Bot.ResourceModel;
+ Top.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel());
+ Bot.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel());
+
+ const std::vector<unsigned> &MaxPressure =
+ DAG->getRegPressure().MaxSetPressure;
+ HighPressureSets.assign(MaxPressure.size(), 0);
+ for (unsigned i = 0, e = MaxPressure.size(); i < e; ++i) {
+ unsigned Limit = DAG->getRegClassInfo()->getRegPressureSetLimit(i);
+ HighPressureSets[i] =
+ ((float)MaxPressure[i] > ((float)Limit * RPThreshold));
+ }
+
+ assert((!ForceTopDown || !ForceBottomUp) &&
+ "-misched-topdown incompatible with -misched-bottomup");
+}
+
+VLIWResourceModel *ConvergingVLIWScheduler::createVLIWResourceModel(
+ const TargetSubtargetInfo &STI, const TargetSchedModel *SchedModel) const {
+ return new VLIWResourceModel(STI, SchedModel);
+}
+
+void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
+ for (const SDep &PI : SU->Preds) {
+ unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle;
+ unsigned MinLatency = PI.getLatency();
+#ifndef NDEBUG
+ Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
+#endif
+ if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
+ SU->TopReadyCycle = PredReadyCycle + MinLatency;
+ }
+
+ if (!SU->isScheduled)
+ Top.releaseNode(SU, SU->TopReadyCycle);
+}
+
+void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) {
+ assert(SU->getInstr() && "Scheduled SUnit must have instr");
+
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E;
+ ++I) {
+ unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
+ unsigned MinLatency = I->getLatency();
+#ifndef NDEBUG
+ Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
+#endif
+ if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
+ SU->BotReadyCycle = SuccReadyCycle + MinLatency;
+ }
+
+ if (!SU->isScheduled)
+ Bot.releaseNode(SU, SU->BotReadyCycle);
+}
+
+ConvergingVLIWScheduler::VLIWSchedBoundary::~VLIWSchedBoundary() {
+ delete ResourceModel;
+ delete HazardRec;
+}
+
+/// Does this SU have a hazard within the current instruction group.
+///
+/// The scheduler supports two modes of hazard recognition. The first is the
+/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
+/// supports highly complicated in-order reservation tables
+/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
+///
+/// The second is a streamlined mechanism that checks for hazards based on
+/// simple counters that the scheduler itself maintains. It explicitly checks
+/// for instruction dispatch limitations, including the number of micro-ops that
+/// can dispatch per cycle.
+///
+/// TODO: Also check whether the SU must start a new group.
+bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) {
+ if (HazardRec->isEnabled())
+ return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
+
+ unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
+ if (IssueCount + uops > SchedModel->getIssueWidth())
+ return true;
+
+ return false;
+}
+
+void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode(
+ SUnit *SU, unsigned ReadyCycle) {
+ if (ReadyCycle < MinReadyCycle)
+ MinReadyCycle = ReadyCycle;
+
+ // Check for interlocks first. For the purpose of other heuristics, an
+ // instruction that cannot issue appears as if it's not in the ReadyQueue.
+ if (ReadyCycle > CurrCycle || checkHazard(SU))
+
+ Pending.push(SU);
+ else
+ Available.push(SU);
+}
+
+/// Move the boundary of scheduled code by one cycle.
+void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() {
+ unsigned Width = SchedModel->getIssueWidth();
+ IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
+
+ assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
+ "MinReadyCycle uninitialized");
+ unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
+
+ if (!HazardRec->isEnabled()) {
+ // Bypass HazardRec virtual calls.
+ CurrCycle = NextCycle;
+ } else {
+ // Bypass getHazardType calls in case of long latency.
+ for (; CurrCycle != NextCycle; ++CurrCycle) {
+ if (isTop())
+ HazardRec->AdvanceCycle();
+ else
+ HazardRec->RecedeCycle();
+ }
+ }
+ CheckPending = true;
+
+ LLVM_DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle "
+ << CurrCycle << '\n');
+}
+
+/// Move the boundary of scheduled code by one SUnit.
+void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) {
+ bool startNewCycle = false;
+
+ // Update the reservation table.
+ if (HazardRec->isEnabled()) {
+ if (!isTop() && SU->isCall) {
+ // Calls are scheduled with their preceding instructions. For bottom-up
+ // scheduling, clear the pipeline state before emitting.
+ HazardRec->Reset();
+ }
+ HazardRec->EmitInstruction(SU);
+ }
+
+ // Update DFA model.
+ startNewCycle = ResourceModel->reserveResources(SU, isTop());
+
+ // Check the instruction group dispatch limit.
+ // TODO: Check if this SU must end a dispatch group.
+ IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
+ if (startNewCycle) {
+ LLVM_DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
+ bumpCycle();
+ } else
+ LLVM_DEBUG(dbgs() << "*** IssueCount " << IssueCount << " at cycle "
+ << CurrCycle << '\n');
+}
+
+/// Release pending ready nodes in to the available queue. This makes them
+/// visible to heuristics.
+void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() {
+ // If the available queue is empty, it is safe to reset MinReadyCycle.
+ if (Available.empty())
+ MinReadyCycle = std::numeric_limits<unsigned>::max();
+
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
+ SUnit *SU = *(Pending.begin() + i);
+ unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
+
+ if (ReadyCycle < MinReadyCycle)
+ MinReadyCycle = ReadyCycle;
+
+ if (ReadyCycle > CurrCycle)
+ continue;
+
+ if (checkHazard(SU))
+ continue;
+
+ Available.push(SU);
+ Pending.remove(Pending.begin() + i);
+ --i;
+ --e;
+ }
+ CheckPending = false;
+}
+
+/// Remove SU from the ready set for this boundary.
+void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) {
+ if (Available.isInQueue(SU))
+ Available.remove(Available.find(SU));
+ else {
+ assert(Pending.isInQueue(SU) && "bad ready count");
+ Pending.remove(Pending.find(SU));
+ }
+}
+
+/// If this queue only has one ready candidate, return it. As a side effect,
+/// advance the cycle until at least one node is ready. If multiple instructions
+/// are ready, return NULL.
+SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() {
+ if (CheckPending)
+ releasePending();
+
+ auto AdvanceCycle = [this]() {
+ if (Available.empty())
+ return true;
+ if (Available.size() == 1 && Pending.size() > 0)
+ return !ResourceModel->isResourceAvailable(*Available.begin(), isTop()) ||
+ getWeakLeft(*Available.begin(), isTop()) != 0;
+ return false;
+ };
+ for (unsigned i = 0; AdvanceCycle(); ++i) {
+ assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
+ "permanent hazard");
+ (void)i;
+ ResourceModel->reserveResources(nullptr, isTop());
+ bumpCycle();
+ releasePending();
+ }
+ if (Available.size() == 1)
+ return *Available.begin();
+ return nullptr;
+}
+
+#ifndef NDEBUG
+void ConvergingVLIWScheduler::traceCandidate(const char *Label,
+ const ReadyQueue &Q, SUnit *SU,
+ int Cost, PressureChange P) {
+ dbgs() << Label << " " << Q.getName() << " ";
+ if (P.isValid())
+ dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":"
+ << P.getUnitInc() << " ";
+ else
+ dbgs() << " ";
+ dbgs() << "cost(" << Cost << ")\t";
+ DAG->dumpNode(*SU);
+}
+
+// Very detailed queue dump, to be used with higher verbosity levels.
+void ConvergingVLIWScheduler::readyQueueVerboseDump(
+ const RegPressureTracker &RPTracker, SchedCandidate &Candidate,
+ ReadyQueue &Q) {
+ RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
+
+ dbgs() << ">>> " << Q.getName() << "\n";
+ for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
+ RegPressureDelta RPDelta;
+ TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
+ DAG->getRegionCriticalPSets(),
+ DAG->getRegPressure().MaxSetPressure);
+ std::stringstream dbgstr;
+ dbgstr << "SU(" << std::setw(3) << (*I)->NodeNum << ")";
+ dbgs() << dbgstr.str();
+ SchedulingCost(Q, *I, Candidate, RPDelta, true);
+ dbgs() << "\t";
+ (*I)->getInstr()->dump();
+ }
+ dbgs() << "\n";
+}
+#endif
+
+/// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor
+/// of SU, return true (we may have duplicates)
+static inline bool isSingleUnscheduledPred(SUnit *SU, SUnit *SU2) {
+ if (SU->NumPredsLeft == 0)
+ return false;
+
+ for (auto &Pred : SU->Preds) {
+ // We found an available, but not scheduled, predecessor.
+ if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2))
+ return false;
+ }
+
+ return true;
+}
+
+/// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor
+/// of SU, return true (we may have duplicates)
+static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) {
+ if (SU->NumSuccsLeft == 0)
+ return false;
+
+ for (auto &Succ : SU->Succs) {
+ // We found an available, but not scheduled, successor.
+ if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2))
+ return false;
+ }
+ return true;
+}
+
+/// Check if the instruction changes the register pressure of a register in the
+/// high pressure set. The function returns a negative value if the pressure
+/// decreases and a positive value is the pressure increases. If the instruction
+/// doesn't use a high pressure register or doesn't change the register
+/// pressure, then return 0.
+int ConvergingVLIWScheduler::pressureChange(const SUnit *SU, bool isBotUp) {
+ PressureDiff &PD = DAG->getPressureDiff(SU);
+ for (auto &P : PD) {
+ if (!P.isValid())
+ continue;
+ // The pressure differences are computed bottom-up, so the comparision for
+ // an increase is positive in the bottom direction, but negative in the
+ // top-down direction.
+ if (HighPressureSets[P.getPSet()])
+ return (isBotUp ? P.getUnitInc() : -P.getUnitInc());
+ }
+ return 0;
+}
+
+/// Single point to compute overall scheduling cost.
+/// TODO: More heuristics will be used soon.
+int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
+ SchedCandidate &Candidate,
+ RegPressureDelta &Delta,
+ bool verbose) {
+ // Initial trivial priority.
+ int ResCount = 1;
+
+ // Do not waste time on a node that is already scheduled.
+ if (!SU || SU->isScheduled)
+ return ResCount;
+
+ LLVM_DEBUG(if (verbose) dbgs()
+ << ((Q.getID() == TopQID) ? "(top|" : "(bot|"));
+ // Forced priority is high.
+ if (SU->isScheduleHigh) {
+ ResCount += PriorityOne;
+ LLVM_DEBUG(dbgs() << "H|");
+ }
+
+ unsigned IsAvailableAmt = 0;
+ // Critical path first.
+ if (Q.getID() == TopQID) {
+ if (Top.isLatencyBound(SU)) {
+ LLVM_DEBUG(if (verbose) dbgs() << "LB|");
+ ResCount += (SU->getHeight() * ScaleTwo);
+ }
+
+ LLVM_DEBUG(if (verbose) {
+ std::stringstream dbgstr;
+ dbgstr << "h" << std::setw(3) << SU->getHeight() << "|";
+ dbgs() << dbgstr.str();
+ });
+
+ // If resources are available for it, multiply the
+ // chance of scheduling.
+ if (Top.ResourceModel->isResourceAvailable(SU, true)) {
+ IsAvailableAmt = (PriorityTwo + PriorityThree);
+ ResCount += IsAvailableAmt;
+ LLVM_DEBUG(if (verbose) dbgs() << "A|");
+ } else
+ LLVM_DEBUG(if (verbose) dbgs() << " |");
+ } else {
+ if (Bot.isLatencyBound(SU)) {
+ LLVM_DEBUG(if (verbose) dbgs() << "LB|");
+ ResCount += (SU->getDepth() * ScaleTwo);
+ }
+
+ LLVM_DEBUG(if (verbose) {
+ std::stringstream dbgstr;
+ dbgstr << "d" << std::setw(3) << SU->getDepth() << "|";
+ dbgs() << dbgstr.str();
+ });
+
+ // If resources are available for it, multiply the
+ // chance of scheduling.
+ if (Bot.ResourceModel->isResourceAvailable(SU, false)) {
+ IsAvailableAmt = (PriorityTwo + PriorityThree);
+ ResCount += IsAvailableAmt;
+ LLVM_DEBUG(if (verbose) dbgs() << "A|");
+ } else
+ LLVM_DEBUG(if (verbose) dbgs() << " |");
+ }
+
+ unsigned NumNodesBlocking = 0;
+ if (Q.getID() == TopQID) {
+ // How many SUs does it block from scheduling?
+ // Look at all of the successors of this node.
+ // Count the number of nodes that
+ // this node is the sole unscheduled node for.
+ if (Top.isLatencyBound(SU))
+ for (const SDep &SI : SU->Succs)
+ if (isSingleUnscheduledPred(SI.getSUnit(), SU))
+ ++NumNodesBlocking;
+ } else {
+ // How many unscheduled predecessors block this node?
+ if (Bot.isLatencyBound(SU))
+ for (const SDep &PI : SU->Preds)
+ if (isSingleUnscheduledSucc(PI.getSUnit(), SU))
+ ++NumNodesBlocking;
+ }
+ ResCount += (NumNodesBlocking * ScaleTwo);
+
+ LLVM_DEBUG(if (verbose) {
+ std::stringstream dbgstr;
+ dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|";
+ dbgs() << dbgstr.str();
+ });
+
+ // Factor in reg pressure as a heuristic.
+ if (!IgnoreBBRegPressure) {
+ // Decrease priority by the amount that register pressure exceeds the limit.
+ ResCount -= (Delta.Excess.getUnitInc() * PriorityOne);
+ // Decrease priority if register pressure exceeds the limit.
+ ResCount -= (Delta.CriticalMax.getUnitInc() * PriorityOne);
+ // Decrease priority slightly if register pressure would increase over the
+ // current maximum.
+ ResCount -= (Delta.CurrentMax.getUnitInc() * PriorityTwo);
+ // If there are register pressure issues, then we remove the value added for
+ // the instruction being available. The rationale is that we really don't
+ // want to schedule an instruction that causes a spill.
+ if (IsAvailableAmt && pressureChange(SU, Q.getID() != TopQID) > 0 &&
+ (Delta.Excess.getUnitInc() || Delta.CriticalMax.getUnitInc() ||
+ Delta.CurrentMax.getUnitInc()))
+ ResCount -= IsAvailableAmt;
+ LLVM_DEBUG(if (verbose) {
+ dbgs() << "RP " << Delta.Excess.getUnitInc() << "/"
+ << Delta.CriticalMax.getUnitInc() << "/"
+ << Delta.CurrentMax.getUnitInc() << ")|";
+ });
+ }
+
+ // Give preference to a zero latency instruction if the dependent
+ // instruction is in the current packet.
+ if (Q.getID() == TopQID && getWeakLeft(SU, true) == 0) {
+ for (const SDep &PI : SU->Preds) {
+ if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() &&
+ PI.getLatency() == 0 &&
+ Top.ResourceModel->isInPacket(PI.getSUnit())) {
+ ResCount += PriorityThree;
+ LLVM_DEBUG(if (verbose) dbgs() << "Z|");
+ }
+ }
+ } else if (Q.getID() == BotQID && getWeakLeft(SU, false) == 0) {
+ for (const SDep &SI : SU->Succs) {
+ if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() &&
+ SI.getLatency() == 0 &&
+ Bot.ResourceModel->isInPacket(SI.getSUnit())) {
+ ResCount += PriorityThree;
+ LLVM_DEBUG(if (verbose) dbgs() << "Z|");
+ }
+ }
+ }
+
+ // If the instruction has a non-zero latency dependence with an instruction in
+ // the current packet, then it should not be scheduled yet. The case occurs
+ // when the dependent instruction is scheduled in a new packet, so the
+ // scheduler updates the current cycle and pending instructions become
+ // available.
+ if (CheckEarlyAvail) {
+ if (Q.getID() == TopQID) {
+ for (const auto &PI : SU->Preds) {
+ if (PI.getLatency() > 0 &&
+ Top.ResourceModel->isInPacket(PI.getSUnit())) {
+ ResCount -= PriorityOne;
+ LLVM_DEBUG(if (verbose) dbgs() << "D|");
+ }
+ }
+ } else {
+ for (const auto &SI : SU->Succs) {
+ if (SI.getLatency() > 0 &&
+ Bot.ResourceModel->isInPacket(SI.getSUnit())) {
+ ResCount -= PriorityOne;
+ LLVM_DEBUG(if (verbose) dbgs() << "D|");
+ }
+ }
+ }
+ }
+
+ LLVM_DEBUG(if (verbose) {
+ std::stringstream dbgstr;
+ dbgstr << "Total " << std::setw(4) << ResCount << ")";
+ dbgs() << dbgstr.str();
+ });
+
+ return ResCount;
+}
+
+/// Pick the best candidate from the top queue.
+///
+/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
+/// DAG building. To adjust for the current scheduling location we need to
+/// maintain the number of vreg uses remaining to be top-scheduled.
+ConvergingVLIWScheduler::CandResult
+ConvergingVLIWScheduler::pickNodeFromQueue(VLIWSchedBoundary &Zone,
+ const RegPressureTracker &RPTracker,
+ SchedCandidate &Candidate) {
+ ReadyQueue &Q = Zone.Available;
+ LLVM_DEBUG(if (SchedDebugVerboseLevel > 1)
+ readyQueueVerboseDump(RPTracker, Candidate, Q);
+ else Q.dump(););
+
+ // getMaxPressureDelta temporarily modifies the tracker.
+ RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
+
+ // BestSU remains NULL if no top candidates beat the best existing candidate.
+ CandResult FoundCandidate = NoCand;
+ for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
+ RegPressureDelta RPDelta;
+ TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
+ DAG->getRegionCriticalPSets(),
+ DAG->getRegPressure().MaxSetPressure);
+
+ int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false);
+
+ // Initialize the candidate if needed.
+ if (!Candidate.SU) {
+ LLVM_DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = NodeOrder;
+ continue;
+ }
+
+ // Choose node order for negative cost candidates. There is no good
+ // candidate in this case.
+ if (CurrentCost < 0 && Candidate.SCost < 0) {
+ if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) ||
+ (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
+ LLVM_DEBUG(traceCandidate("NCAND", Q, *I, CurrentCost));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = NodeOrder;
+ }
+ continue;
+ }
+
+ // Best cost.
+ if (CurrentCost > Candidate.SCost) {
+ LLVM_DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = BestCost;
+ continue;
+ }
+
+ // Choose an instruction that does not depend on an artificial edge.
+ unsigned CurrWeak = getWeakLeft(*I, (Q.getID() == TopQID));
+ unsigned CandWeak = getWeakLeft(Candidate.SU, (Q.getID() == TopQID));
+ if (CurrWeak != CandWeak) {
+ if (CurrWeak < CandWeak) {
+ LLVM_DEBUG(traceCandidate("WCAND", Q, *I, CurrentCost));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = Weak;
+ }
+ continue;
+ }
+
+ if (CurrentCost == Candidate.SCost && Zone.isLatencyBound(*I)) {
+ unsigned CurrSize, CandSize;
+ if (Q.getID() == TopQID) {
+ CurrSize = (*I)->Succs.size();
+ CandSize = Candidate.SU->Succs.size();
+ } else {
+ CurrSize = (*I)->Preds.size();
+ CandSize = Candidate.SU->Preds.size();
+ }
+ if (CurrSize > CandSize) {
+ LLVM_DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = BestCost;
+ }
+ // Keep the old candidate if it's a better candidate. That is, don't use
+ // the subsequent tie breaker.
+ if (CurrSize != CandSize)
+ continue;
+ }
+
+ // Tie breaker.
+ // To avoid scheduling indeterminism, we need a tie breaker
+ // for the case when cost is identical for two nodes.
+ if (UseNewerCandidate && CurrentCost == Candidate.SCost) {
+ if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) ||
+ (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
+ LLVM_DEBUG(traceCandidate("TCAND", Q, *I, CurrentCost));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = NodeOrder;
+ continue;
+ }
+ }
+
+ // Fall through to original instruction order.
+ // Only consider node order if Candidate was chosen from this Q.
+ if (FoundCandidate == NoCand)
+ continue;
+ }
+ return FoundCandidate;
+}
+
+/// Pick the best candidate node from either the top or bottom queue.
+SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) {
+ // Schedule as far as possible in the direction of no choice. This is most
+ // efficient, but also provides the best heuristics for CriticalPSets.
+ if (SUnit *SU = Bot.pickOnlyChoice()) {
+ LLVM_DEBUG(dbgs() << "Picked only Bottom\n");
+ IsTopNode = false;
+ return SU;
+ }
+ if (SUnit *SU = Top.pickOnlyChoice()) {
+ LLVM_DEBUG(dbgs() << "Picked only Top\n");
+ IsTopNode = true;
+ return SU;
+ }
+ SchedCandidate BotCand;
+ // Prefer bottom scheduling when heuristics are silent.
+ CandResult BotResult =
+ pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
+ assert(BotResult != NoCand && "failed to find the first candidate");
+
+ // If either Q has a single candidate that provides the least increase in
+ // Excess pressure, we can immediately schedule from that Q.
+ //
+ // RegionCriticalPSets summarizes the pressure within the scheduled region and
+ // affects picking from either Q. If scheduling in one direction must
+ // increase pressure for one of the excess PSets, then schedule in that
+ // direction first to provide more freedom in the other direction.
+ if (BotResult == SingleExcess || BotResult == SingleCritical) {
+ LLVM_DEBUG(dbgs() << "Prefered Bottom Node\n");
+ IsTopNode = false;
+ return BotCand.SU;
+ }
+ // Check if the top Q has a better candidate.
+ SchedCandidate TopCand;
+ CandResult TopResult =
+ pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
+ assert(TopResult != NoCand && "failed to find the first candidate");
+
+ if (TopResult == SingleExcess || TopResult == SingleCritical) {
+ LLVM_DEBUG(dbgs() << "Prefered Top Node\n");
+ IsTopNode = true;
+ return TopCand.SU;
+ }
+ // If either Q has a single candidate that minimizes pressure above the
+ // original region's pressure pick it.
+ if (BotResult == SingleMax) {
+ LLVM_DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n");
+ IsTopNode = false;
+ return BotCand.SU;
+ }
+ if (TopResult == SingleMax) {
+ LLVM_DEBUG(dbgs() << "Prefered Top Node SingleMax\n");
+ IsTopNode = true;
+ return TopCand.SU;
+ }
+ if (TopCand.SCost > BotCand.SCost) {
+ LLVM_DEBUG(dbgs() << "Prefered Top Node Cost\n");
+ IsTopNode = true;
+ return TopCand.SU;
+ }
+ // Otherwise prefer the bottom candidate in node order.
+ LLVM_DEBUG(dbgs() << "Prefered Bottom in Node order\n");
+ IsTopNode = false;
+ return BotCand.SU;
+}
+
+/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
+SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
+ if (DAG->top() == DAG->bottom()) {
+ assert(Top.Available.empty() && Top.Pending.empty() &&
+ Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
+ return nullptr;
+ }
+ SUnit *SU;
+ if (ForceTopDown) {
+ SU = Top.pickOnlyChoice();
+ if (!SU) {
+ SchedCandidate TopCand;
+ CandResult TopResult =
+ pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
+ assert(TopResult != NoCand && "failed to find the first candidate");
+ (void)TopResult;
+ SU = TopCand.SU;
+ }
+ IsTopNode = true;
+ } else if (ForceBottomUp) {
+ SU = Bot.pickOnlyChoice();
+ if (!SU) {
+ SchedCandidate BotCand;
+ CandResult BotResult =
+ pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
+ assert(BotResult != NoCand && "failed to find the first candidate");
+ (void)BotResult;
+ SU = BotCand.SU;
+ }
+ IsTopNode = false;
+ } else {
+ SU = pickNodeBidrectional(IsTopNode);
+ }
+ if (SU->isTopReady())
+ Top.removeReady(SU);
+ if (SU->isBottomReady())
+ Bot.removeReady(SU);
+
+ LLVM_DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
+ << " Scheduling instruction in cycle "
+ << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << " ("
+ << reportPackets() << ")\n";
+ DAG->dumpNode(*SU));
+ return SU;
+}
+
+/// Update the scheduler's state after scheduling a node. This is the same node
+/// that was just returned by pickNode(). However, VLIWMachineScheduler needs
+/// to update it's state based on the current cycle before MachineSchedStrategy
+/// does.
+void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) {
+ if (IsTopNode) {
+ Top.bumpNode(SU);
+ SU->TopReadyCycle = Top.CurrCycle;
+ } else {
+ Bot.bumpNode(SU);
+ SU->BotReadyCycle = Bot.CurrCycle;
+ }
+}