summaryrefslogtreecommitdiff
path: root/lib/Target/Hexagon/HexagonMachineScheduler.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/Hexagon/HexagonMachineScheduler.cpp')
-rw-r--r--lib/Target/Hexagon/HexagonMachineScheduler.cpp406
1 files changed, 368 insertions, 38 deletions
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index 7a52d6874c33b..6dcac0dc7ee2e 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -13,28 +13,126 @@
//===----------------------------------------------------------------------===//
#include "HexagonMachineScheduler.h"
+#include "HexagonSubtarget.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
#include "llvm/IR/Function.h"
+#include <iomanip>
+#include <sstream>
+
+static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false));
+
+static cl::opt<bool> SchedPredsCloser("sched-preds-closer",
+ cl::Hidden, cl::ZeroOrMore, cl::init(true));
+
+static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level",
+ cl::Hidden, cl::ZeroOrMore, cl::init(1));
+
+static cl::opt<bool> TopUseShorterTie("top-use-shorter-tie",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false));
+
+static cl::opt<bool> BotUseShorterTie("bot-use-shorter-tie",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false));
+
+static cl::opt<bool> DisableTCTie("disable-tc-tie",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false));
+
+static cl::opt<bool> SchedRetvalOptimization("sched-retval-optimization",
+ cl::Hidden, cl::ZeroOrMore, cl::init(true));
+
+// Check if the scheduler should penalize instructions that are available to
+// early due to a zero-latency dependence.
+static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden,
+ cl::ZeroOrMore, cl::init(true));
+
using namespace llvm;
#define DEBUG_TYPE "misched"
-/// Platform-specific modifications to DAG.
-void VLIWMachineScheduler::postprocessDAG() {
+class HexagonCallMutation : public ScheduleDAGMutation {
+public:
+ void apply(ScheduleDAGInstrs *DAG) override;
+private:
+ bool shouldTFRICallBind(const HexagonInstrInfo &HII,
+ const SUnit &Inst1, const SUnit &Inst2) const;
+};
+
+// Check if a call and subsequent A2_tfrpi instructions should maintain
+// scheduling affinity. We are looking for the TFRI to be consumed in
+// the next instruction. This should help reduce the instances of
+// double register pairs being allocated and scheduled before a call
+// when not used until after the call. This situation is exacerbated
+// by the fact that we allocate the pair from the callee saves list,
+// leading to excess spills and restores.
+bool HexagonCallMutation::shouldTFRICallBind(const HexagonInstrInfo &HII,
+ const SUnit &Inst1, const SUnit &Inst2) const {
+ if (Inst1.getInstr()->getOpcode() != Hexagon::A2_tfrpi)
+ return false;
+
+ // TypeXTYPE are 64 bit operations.
+ if (HII.getType(Inst2.getInstr()) == HexagonII::TypeXTYPE)
+ return true;
+ return false;
+}
+
+void HexagonCallMutation::apply(ScheduleDAGInstrs *DAG) {
SUnit* LastSequentialCall = nullptr;
+ unsigned VRegHoldingRet = 0;
+ unsigned RetRegister;
+ SUnit* LastUseOfRet = nullptr;
+ auto &TRI = *DAG->MF.getSubtarget().getRegisterInfo();
+ auto &HII = *DAG->MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+
// Currently we only catch the situation when compare gets scheduled
// before preceding call.
- for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
+ for (unsigned su = 0, e = DAG->SUnits.size(); su != e; ++su) {
// Remember the call.
- if (SUnits[su].getInstr()->isCall())
- LastSequentialCall = &(SUnits[su]);
+ if (DAG->SUnits[su].getInstr()->isCall())
+ LastSequentialCall = &DAG->SUnits[su];
// Look for a compare that defines a predicate.
- else if (SUnits[su].getInstr()->isCompare() && LastSequentialCall)
- SUnits[su].addPred(SDep(LastSequentialCall, SDep::Barrier));
+ else if (DAG->SUnits[su].getInstr()->isCompare() && LastSequentialCall)
+ DAG->SUnits[su].addPred(SDep(LastSequentialCall, SDep::Barrier));
+ // Look for call and tfri* instructions.
+ else if (SchedPredsCloser && LastSequentialCall && su > 1 && su < e-1 &&
+ shouldTFRICallBind(HII, DAG->SUnits[su], DAG->SUnits[su+1]))
+ DAG->SUnits[su].addPred(SDep(&DAG->SUnits[su-1], SDep::Barrier));
+ // Prevent redundant register copies between two calls, which are caused by
+ // both the return value and the argument for the next call being in %R0.
+ // Example:
+ // 1: <call1>
+ // 2: %VregX = COPY %R0
+ // 3: <use of %VregX>
+ // 4: %R0 = ...
+ // 5: <call2>
+ // The scheduler would often swap 3 and 4, so an additional register is
+ // needed. This code inserts a Barrier dependence between 3 & 4 to prevent
+ // this. The same applies for %D0 and %V0/%W0, which are also handled.
+ else if (SchedRetvalOptimization) {
+ const MachineInstr *MI = DAG->SUnits[su].getInstr();
+ if (MI->isCopy() && (MI->readsRegister(Hexagon::R0, &TRI) ||
+ MI->readsRegister(Hexagon::V0, &TRI))) {
+ // %vregX = COPY %R0
+ VRegHoldingRet = MI->getOperand(0).getReg();
+ RetRegister = MI->getOperand(1).getReg();
+ LastUseOfRet = nullptr;
+ } else if (VRegHoldingRet && MI->readsVirtualRegister(VRegHoldingRet))
+ // <use of %vregX>
+ LastUseOfRet = &DAG->SUnits[su];
+ else if (LastUseOfRet && MI->definesRegister(RetRegister, &TRI))
+ // %R0 = ...
+ DAG->SUnits[su].addPred(SDep(LastUseOfRet, SDep::Barrier));
+ }
}
}
+
+/// Save the last formed packet
+void VLIWResourceModel::savePacket() {
+ OldPacket = Packet;
+}
+
/// Check if scheduling of this SU is possible
/// in the current packet.
/// It is _not_ precise (statefull), it is more like
@@ -48,7 +146,7 @@ bool VLIWResourceModel::isResourceAvailable(SUnit *SU) {
// in the current cycle.
switch (SU->getInstr()->getOpcode()) {
default:
- if (!ResourcesModel->canReserveResources(SU->getInstr()))
+ if (!ResourcesModel->canReserveResources(*SU->getInstr()))
return false;
case TargetOpcode::EXTRACT_SUBREG:
case TargetOpcode::INSERT_SUBREG:
@@ -60,11 +158,19 @@ bool VLIWResourceModel::isResourceAvailable(SUnit *SU) {
break;
}
+ MachineFunction &MF = *SU->getInstr()->getParent()->getParent();
+ auto &QII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+
// Now see if there are no other dependencies to instructions already
// in the packet.
for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
if (Packet[i]->Succs.size() == 0)
continue;
+
+ // Enable .cur formation.
+ if (QII.mayBeCurLoad(Packet[i]->getInstr()))
+ continue;
+
for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
E = Packet[i]->Succs.end(); I != E; ++I) {
// Since we do not add pseudos to packets, might as well
@@ -85,6 +191,7 @@ bool VLIWResourceModel::reserveResources(SUnit *SU) {
// Artificially reset state.
if (!SU) {
ResourcesModel->clearResources();
+ savePacket();
Packet.clear();
TotalPackets++;
return false;
@@ -93,6 +200,7 @@ bool VLIWResourceModel::reserveResources(SUnit *SU) {
// start a new one.
if (!isResourceAvailable(SU)) {
ResourcesModel->clearResources();
+ savePacket();
Packet.clear();
TotalPackets++;
startNewCycle = true;
@@ -100,7 +208,7 @@ bool VLIWResourceModel::reserveResources(SUnit *SU) {
switch (SU->getInstr()->getOpcode()) {
default:
- ResourcesModel->reserveResources(SU->getInstr());
+ ResourcesModel->reserveResources(*SU->getInstr());
break;
case TargetOpcode::EXTRACT_SUBREG:
case TargetOpcode::INSERT_SUBREG:
@@ -129,6 +237,7 @@ bool VLIWResourceModel::reserveResources(SUnit *SU) {
// we start fresh.
if (Packet.size() >= SchedModel->getIssueWidth()) {
ResourcesModel->clearResources();
+ savePacket();
Packet.clear();
TotalPackets++;
startNewCycle = true;
@@ -150,19 +259,12 @@ void VLIWMachineScheduler::schedule() {
buildDAGWithRegPressure();
- // Postprocess the DAG to add platform-specific artificial dependencies.
- postprocessDAG();
-
SmallVector<SUnit*, 8> TopRoots, BotRoots;
findRootsAndBiasEdges(TopRoots, BotRoots);
// Initialize the strategy before modifying the DAG.
SchedImpl->initialize(this);
- // To view Height/Depth correctly, they should be accessed at least once.
- //
- // FIXME: SUnit::dumpAll always recompute depth and height now. The max
- // depth/height could be computed directly from the roots and leaves.
DEBUG(unsigned maxH = 0;
for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
if (SUnits[su].getHeight() > maxH)
@@ -197,6 +299,13 @@ void VLIWMachineScheduler::schedule() {
assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
placeDebugValues();
+
+ DEBUG({
+ unsigned BBNum = begin()->getParent()->getNumber();
+ dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
}
void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
@@ -223,16 +332,18 @@ void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
assert((!llvm::ForceTopDown || !llvm::ForceBottomUp) &&
"-misched-topdown incompatible with -misched-bottomup");
+
+ DAG->addMutation(make_unique<HexagonSubtarget::HexagonDAGMutation>());
+ DAG->addMutation(make_unique<HexagonCallMutation>());
}
void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
if (SU->isScheduled)
return;
- for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
- unsigned MinLatency = I->getLatency();
+ for (const SDep &PI : SU->Preds) {
+ unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle;
+ unsigned MinLatency = PI.getLatency();
#ifndef NDEBUG
Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
#endif
@@ -321,8 +432,8 @@ void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() {
}
CheckPending = true;
- DEBUG(dbgs() << "*** " << Available.getName() << " cycle "
- << CurrCycle << '\n');
+ DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle "
+ << CurrCycle << '\n');
}
/// Move the boundary of scheduled code by one SUnit.
@@ -414,16 +525,38 @@ SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() {
#ifndef NDEBUG
void ConvergingVLIWScheduler::traceCandidate(const char *Label,
- const ReadyQueue &Q,
- SUnit *SU, PressureChange P) {
+ const ReadyQueue &Q, SUnit *SU, int Cost, PressureChange P) {
dbgs() << Label << " " << Q.getName() << " ";
if (P.isValid())
dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":"
<< P.getUnitInc() << " ";
else
dbgs() << " ";
+ dbgs() << "cost(" << Cost << ")\t";
SU->dump(DAG);
}
+
+// Very detailed queue dump, to be used with higher verbosity levels.
+void ConvergingVLIWScheduler::readyQueueVerboseDump(
+ const RegPressureTracker &RPTracker, SchedCandidate &Candidate,
+ ReadyQueue &Q) {
+ RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
+
+ dbgs() << ">>> " << Q.getName() << "\n";
+ for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
+ RegPressureDelta RPDelta;
+ TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
+ DAG->getRegionCriticalPSets(),
+ DAG->getRegPressure().MaxSetPressure);
+ std::stringstream dbgstr;
+ dbgstr << "SU(" << std::setw(3) << (*I)->NodeNum << ")";
+ dbgs() << dbgstr.str();
+ SchedulingCost(Q, *I, Candidate, RPDelta, true);
+ dbgs() << "\t";
+ (*I)->getInstr()->dump();
+ }
+ dbgs() << "\n";
+}
#endif
/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
@@ -466,6 +599,7 @@ static SUnit *getSingleUnscheduledSucc(SUnit *SU) {
// heuristic components for cost computation.
static const unsigned PriorityOne = 200;
static const unsigned PriorityTwo = 50;
+static const unsigned PriorityThree = 75;
static const unsigned ScaleTwo = 10;
static const unsigned FactorOne = 2;
@@ -482,25 +616,50 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
if (!SU || SU->isScheduled)
return ResCount;
+ MachineInstr *Instr = SU->getInstr();
+
+ DEBUG(if (verbose) dbgs() << ((Q.getID() == TopQID) ? "(top|" : "(bot|"));
// Forced priority is high.
- if (SU->isScheduleHigh)
+ if (SU->isScheduleHigh) {
ResCount += PriorityOne;
+ DEBUG(dbgs() << "H|");
+ }
// Critical path first.
if (Q.getID() == TopQID) {
ResCount += (SU->getHeight() * ScaleTwo);
+ DEBUG(if (verbose) {
+ std::stringstream dbgstr;
+ dbgstr << "h" << std::setw(3) << SU->getHeight() << "|";
+ dbgs() << dbgstr.str();
+ });
+
// If resources are available for it, multiply the
// chance of scheduling.
- if (Top.ResourceModel->isResourceAvailable(SU))
+ if (Top.ResourceModel->isResourceAvailable(SU)) {
ResCount <<= FactorOne;
+ ResCount += PriorityThree;
+ DEBUG(if (verbose) dbgs() << "A|");
+ } else
+ DEBUG(if (verbose) dbgs() << " |");
} else {
ResCount += (SU->getDepth() * ScaleTwo);
+ DEBUG(if (verbose) {
+ std::stringstream dbgstr;
+ dbgstr << "d" << std::setw(3) << SU->getDepth() << "|";
+ dbgs() << dbgstr.str();
+ });
+
// If resources are available for it, multiply the
// chance of scheduling.
- if (Bot.ResourceModel->isResourceAvailable(SU))
+ if (Bot.ResourceModel->isResourceAvailable(SU)) {
ResCount <<= FactorOne;
+ ResCount += PriorityThree;
+ DEBUG(if (verbose) dbgs() << "A|");
+ } else
+ DEBUG(if (verbose) dbgs() << " |");
}
unsigned NumNodesBlocking = 0;
@@ -509,24 +668,121 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
// Look at all of the successors of this node.
// Count the number of nodes that
// this node is the sole unscheduled node for.
- for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I)
- if (getSingleUnscheduledPred(I->getSUnit()) == SU)
+ for (const SDep &SI : SU->Succs)
+ if (getSingleUnscheduledPred(SI.getSUnit()) == SU)
++NumNodesBlocking;
} else {
// How many unscheduled predecessors block this node?
- for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I)
- if (getSingleUnscheduledSucc(I->getSUnit()) == SU)
+ for (const SDep &PI : SU->Preds)
+ if (getSingleUnscheduledSucc(PI.getSUnit()) == SU)
++NumNodesBlocking;
}
ResCount += (NumNodesBlocking * ScaleTwo);
+ DEBUG(if (verbose) {
+ std::stringstream dbgstr;
+ dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|";
+ dbgs() << dbgstr.str();
+ });
+
// Factor in reg pressure as a heuristic.
- ResCount -= (Delta.Excess.getUnitInc()*PriorityTwo);
- ResCount -= (Delta.CriticalMax.getUnitInc()*PriorityTwo);
+ if (!IgnoreBBRegPressure) {
+ // Decrease priority by the amount that register pressure exceeds the limit.
+ ResCount -= (Delta.Excess.getUnitInc()*PriorityOne);
+ // Decrease priority if register pressure exceeds the limit.
+ ResCount -= (Delta.CriticalMax.getUnitInc()*PriorityOne);
+ // Decrease priority slightly if register pressure would increase over the
+ // current maximum.
+ ResCount -= (Delta.CurrentMax.getUnitInc()*PriorityTwo);
+ DEBUG(if (verbose) {
+ dbgs() << "RP " << Delta.Excess.getUnitInc() << "/"
+ << Delta.CriticalMax.getUnitInc() <<"/"
+ << Delta.CurrentMax.getUnitInc() << ")|";
+ });
+ }
+
+ // Give a little extra priority to a .cur instruction if there is a resource
+ // available for it.
+ auto &QST = DAG->MF.getSubtarget<HexagonSubtarget>();
+ auto &QII = *QST.getInstrInfo();
+ if (SU->isInstr() && QII.mayBeCurLoad(SU->getInstr())) {
+ if (Q.getID() == TopQID && Top.ResourceModel->isResourceAvailable(SU)) {
+ ResCount += PriorityTwo;
+ DEBUG(if (verbose) dbgs() << "C|");
+ } else if (Q.getID() == BotQID &&
+ Bot.ResourceModel->isResourceAvailable(SU)) {
+ ResCount += PriorityTwo;
+ DEBUG(if (verbose) dbgs() << "C|");
+ }
+ }
+
+ // Give preference to a zero latency instruction if the dependent
+ // instruction is in the current packet.
+ if (Q.getID() == TopQID) {
+ for (const SDep &PI : SU->Preds) {
+ if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() &&
+ PI.getLatency() == 0 &&
+ Top.ResourceModel->isInPacket(PI.getSUnit())) {
+ ResCount += PriorityThree;
+ DEBUG(if (verbose) dbgs() << "Z|");
+ }
+ }
+ } else {
+ for (const SDep &SI : SU->Succs) {
+ if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() &&
+ SI.getLatency() == 0 &&
+ Bot.ResourceModel->isInPacket(SI.getSUnit())) {
+ ResCount += PriorityThree;
+ DEBUG(if (verbose) dbgs() << "Z|");
+ }
+ }
+ }
+
+ // Give less preference to an instruction that will cause a stall with
+ // an instruction in the previous packet.
+ if (QII.isV60VectorInstruction(Instr)) {
+ // Check for stalls in the previous packet.
+ if (Q.getID() == TopQID) {
+ for (auto J : Top.ResourceModel->OldPacket)
+ if (QII.producesStall(J->getInstr(), Instr))
+ ResCount -= PriorityOne;
+ } else {
+ for (auto J : Bot.ResourceModel->OldPacket)
+ if (QII.producesStall(Instr, J->getInstr()))
+ ResCount -= PriorityOne;
+ }
+ }
- DEBUG(if (verbose) dbgs() << " Total(" << ResCount << ")");
+ // If the instruction has a non-zero latency dependence with an instruction in
+ // the current packet, then it should not be scheduled yet. The case occurs
+ // when the dependent instruction is scheduled in a new packet, so the
+ // scheduler updates the current cycle and pending instructions become
+ // available.
+ if (CheckEarlyAvail) {
+ if (Q.getID() == TopQID) {
+ for (const auto &PI : SU->Preds) {
+ if (PI.getLatency() > 0 &&
+ Top.ResourceModel->isInPacket(PI.getSUnit())) {
+ ResCount -= PriorityOne;
+ DEBUG(if (verbose) dbgs() << "D|");
+ }
+ }
+ } else {
+ for (const auto &SI : SU->Succs) {
+ if (SI.getLatency() > 0 &&
+ Bot.ResourceModel->isInPacket(SI.getSUnit())) {
+ ResCount -= PriorityOne;
+ DEBUG(if (verbose) dbgs() << "D|");
+ }
+ }
+ }
+ }
+
+ DEBUG(if (verbose) {
+ std::stringstream dbgstr;
+ dbgstr << "Total " << std::setw(4) << ResCount << ")";
+ dbgs() << dbgstr.str();
+ });
return ResCount;
}
@@ -539,7 +795,9 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
ConvergingVLIWScheduler::CandResult ConvergingVLIWScheduler::
pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker,
SchedCandidate &Candidate) {
- DEBUG(Q.dump());
+ DEBUG(if (SchedDebugVerboseLevel > 1)
+ readyQueueVerboseDump(RPTracker, Candidate, Q);
+ else Q.dump(););
// getMaxPressureDelta temporarily modifies the tracker.
RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
@@ -556,6 +814,7 @@ pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker,
// Initialize the candidate if needed.
if (!Candidate.SU) {
+ DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost));
Candidate.SU = *I;
Candidate.RPDelta = RPDelta;
Candidate.SCost = CurrentCost;
@@ -565,7 +824,7 @@ pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker,
// Best cost.
if (CurrentCost > Candidate.SCost) {
- DEBUG(traceCandidate("CCAND", Q, *I));
+ DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost));
Candidate.SU = *I;
Candidate.RPDelta = RPDelta;
Candidate.SCost = CurrentCost;
@@ -573,6 +832,69 @@ pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker,
continue;
}
+ // Tie breaker using Timing Class.
+ if (!DisableTCTie) {
+ auto &QST = DAG->MF.getSubtarget<HexagonSubtarget>();
+ auto &QII = *QST.getInstrInfo();
+
+ const MachineInstr *MI = (*I)->getInstr();
+ const MachineInstr *CandI = Candidate.SU->getInstr();
+ const InstrItineraryData *InstrItins = QST.getInstrItineraryData();
+
+ unsigned InstrLatency = QII.getInstrTimingClassLatency(InstrItins, MI);
+ unsigned CandLatency = QII.getInstrTimingClassLatency(InstrItins, CandI);
+ DEBUG(dbgs() << "TC Tie Breaker Cand: "
+ << CandLatency << " Instr:" << InstrLatency << "\n"
+ << *MI << *CandI << "\n");
+ if (Q.getID() == TopQID && CurrentCost == Candidate.SCost) {
+ if (InstrLatency < CandLatency && TopUseShorterTie) {
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = BestCost;
+ DEBUG(dbgs() << "Used top shorter tie breaker\n");
+ continue;
+ } else if (InstrLatency > CandLatency && !TopUseShorterTie) {
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = BestCost;
+ DEBUG(dbgs() << "Used top longer tie breaker\n");
+ continue;
+ }
+ } else if (Q.getID() == BotQID && CurrentCost == Candidate.SCost) {
+ if (InstrLatency < CandLatency && BotUseShorterTie) {
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = BestCost;
+ DEBUG(dbgs() << "Used Bot shorter tie breaker\n");
+ continue;
+ } else if (InstrLatency > CandLatency && !BotUseShorterTie) {
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = BestCost;
+ DEBUG(dbgs() << "Used Bot longer tie breaker\n");
+ continue;
+ }
+ }
+ }
+
+ if (CurrentCost == Candidate.SCost) {
+ if ((Q.getID() == TopQID &&
+ (*I)->Succs.size() > Candidate.SU->Succs.size()) ||
+ (Q.getID() == BotQID &&
+ (*I)->Preds.size() < Candidate.SU->Preds.size())) {
+ DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = BestCost;
+ continue;
+ }
+ }
+
// Fall through to original instruction order.
// Only consider node order if Candidate was chosen from this Q.
if (FoundCandidate == NoCand)
@@ -586,10 +908,12 @@ SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) {
// Schedule as far as possible in the direction of no choice. This is most
// efficient, but also provides the best heuristics for CriticalPSets.
if (SUnit *SU = Bot.pickOnlyChoice()) {
+ DEBUG(dbgs() << "Picked only Bottom\n");
IsTopNode = false;
return SU;
}
if (SUnit *SU = Top.pickOnlyChoice()) {
+ DEBUG(dbgs() << "Picked only Top\n");
IsTopNode = true;
return SU;
}
@@ -607,6 +931,7 @@ SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) {
// increase pressure for one of the excess PSets, then schedule in that
// direction first to provide more freedom in the other direction.
if (BotResult == SingleExcess || BotResult == SingleCritical) {
+ DEBUG(dbgs() << "Prefered Bottom Node\n");
IsTopNode = false;
return BotCand.SU;
}
@@ -617,24 +942,29 @@ SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) {
assert(TopResult != NoCand && "failed to find the first candidate");
if (TopResult == SingleExcess || TopResult == SingleCritical) {
+ DEBUG(dbgs() << "Prefered Top Node\n");
IsTopNode = true;
return TopCand.SU;
}
// If either Q has a single candidate that minimizes pressure above the
// original region's pressure pick it.
if (BotResult == SingleMax) {
+ DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n");
IsTopNode = false;
return BotCand.SU;
}
if (TopResult == SingleMax) {
+ DEBUG(dbgs() << "Prefered Top Node SingleMax\n");
IsTopNode = true;
return TopCand.SU;
}
if (TopCand.SCost > BotCand.SCost) {
+ DEBUG(dbgs() << "Prefered Top Node Cost\n");
IsTopNode = true;
return TopCand.SU;
}
// Otherwise prefer the bottom candidate in node order.
+ DEBUG(dbgs() << "Prefered Bottom in Node order\n");
IsTopNode = false;
return BotCand.SU;
}