diff options
Diffstat (limited to 'lib/Target/Hexagon/HexagonMachineScheduler.cpp')
-rw-r--r-- | lib/Target/Hexagon/HexagonMachineScheduler.cpp | 406 |
1 files changed, 368 insertions, 38 deletions
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp index 7a52d6874c33b..6dcac0dc7ee2e 100644 --- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp +++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -13,28 +13,126 @@ //===----------------------------------------------------------------------===// #include "HexagonMachineScheduler.h" +#include "HexagonSubtarget.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/ScheduleDAGMutation.h" #include "llvm/IR/Function.h" +#include <iomanip> +#include <sstream> + +static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure", + cl::Hidden, cl::ZeroOrMore, cl::init(false)); + +static cl::opt<bool> SchedPredsCloser("sched-preds-closer", + cl::Hidden, cl::ZeroOrMore, cl::init(true)); + +static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level", + cl::Hidden, cl::ZeroOrMore, cl::init(1)); + +static cl::opt<bool> TopUseShorterTie("top-use-shorter-tie", + cl::Hidden, cl::ZeroOrMore, cl::init(false)); + +static cl::opt<bool> BotUseShorterTie("bot-use-shorter-tie", + cl::Hidden, cl::ZeroOrMore, cl::init(false)); + +static cl::opt<bool> DisableTCTie("disable-tc-tie", + cl::Hidden, cl::ZeroOrMore, cl::init(false)); + +static cl::opt<bool> SchedRetvalOptimization("sched-retval-optimization", + cl::Hidden, cl::ZeroOrMore, cl::init(true)); + +// Check if the scheduler should penalize instructions that are available to +// early due to a zero-latency dependence. +static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden, + cl::ZeroOrMore, cl::init(true)); + using namespace llvm; #define DEBUG_TYPE "misched" -/// Platform-specific modifications to DAG. -void VLIWMachineScheduler::postprocessDAG() { +class HexagonCallMutation : public ScheduleDAGMutation { +public: + void apply(ScheduleDAGInstrs *DAG) override; +private: + bool shouldTFRICallBind(const HexagonInstrInfo &HII, + const SUnit &Inst1, const SUnit &Inst2) const; +}; + +// Check if a call and subsequent A2_tfrpi instructions should maintain +// scheduling affinity. We are looking for the TFRI to be consumed in +// the next instruction. This should help reduce the instances of +// double register pairs being allocated and scheduled before a call +// when not used until after the call. This situation is exacerbated +// by the fact that we allocate the pair from the callee saves list, +// leading to excess spills and restores. +bool HexagonCallMutation::shouldTFRICallBind(const HexagonInstrInfo &HII, + const SUnit &Inst1, const SUnit &Inst2) const { + if (Inst1.getInstr()->getOpcode() != Hexagon::A2_tfrpi) + return false; + + // TypeXTYPE are 64 bit operations. + if (HII.getType(Inst2.getInstr()) == HexagonII::TypeXTYPE) + return true; + return false; +} + +void HexagonCallMutation::apply(ScheduleDAGInstrs *DAG) { SUnit* LastSequentialCall = nullptr; + unsigned VRegHoldingRet = 0; + unsigned RetRegister; + SUnit* LastUseOfRet = nullptr; + auto &TRI = *DAG->MF.getSubtarget().getRegisterInfo(); + auto &HII = *DAG->MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); + // Currently we only catch the situation when compare gets scheduled // before preceding call. - for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { + for (unsigned su = 0, e = DAG->SUnits.size(); su != e; ++su) { // Remember the call. - if (SUnits[su].getInstr()->isCall()) - LastSequentialCall = &(SUnits[su]); + if (DAG->SUnits[su].getInstr()->isCall()) + LastSequentialCall = &DAG->SUnits[su]; // Look for a compare that defines a predicate. - else if (SUnits[su].getInstr()->isCompare() && LastSequentialCall) - SUnits[su].addPred(SDep(LastSequentialCall, SDep::Barrier)); + else if (DAG->SUnits[su].getInstr()->isCompare() && LastSequentialCall) + DAG->SUnits[su].addPred(SDep(LastSequentialCall, SDep::Barrier)); + // Look for call and tfri* instructions. + else if (SchedPredsCloser && LastSequentialCall && su > 1 && su < e-1 && + shouldTFRICallBind(HII, DAG->SUnits[su], DAG->SUnits[su+1])) + DAG->SUnits[su].addPred(SDep(&DAG->SUnits[su-1], SDep::Barrier)); + // Prevent redundant register copies between two calls, which are caused by + // both the return value and the argument for the next call being in %R0. + // Example: + // 1: <call1> + // 2: %VregX = COPY %R0 + // 3: <use of %VregX> + // 4: %R0 = ... + // 5: <call2> + // The scheduler would often swap 3 and 4, so an additional register is + // needed. This code inserts a Barrier dependence between 3 & 4 to prevent + // this. The same applies for %D0 and %V0/%W0, which are also handled. + else if (SchedRetvalOptimization) { + const MachineInstr *MI = DAG->SUnits[su].getInstr(); + if (MI->isCopy() && (MI->readsRegister(Hexagon::R0, &TRI) || + MI->readsRegister(Hexagon::V0, &TRI))) { + // %vregX = COPY %R0 + VRegHoldingRet = MI->getOperand(0).getReg(); + RetRegister = MI->getOperand(1).getReg(); + LastUseOfRet = nullptr; + } else if (VRegHoldingRet && MI->readsVirtualRegister(VRegHoldingRet)) + // <use of %vregX> + LastUseOfRet = &DAG->SUnits[su]; + else if (LastUseOfRet && MI->definesRegister(RetRegister, &TRI)) + // %R0 = ... + DAG->SUnits[su].addPred(SDep(LastUseOfRet, SDep::Barrier)); + } } } + +/// Save the last formed packet +void VLIWResourceModel::savePacket() { + OldPacket = Packet; +} + /// Check if scheduling of this SU is possible /// in the current packet. /// It is _not_ precise (statefull), it is more like @@ -48,7 +146,7 @@ bool VLIWResourceModel::isResourceAvailable(SUnit *SU) { // in the current cycle. switch (SU->getInstr()->getOpcode()) { default: - if (!ResourcesModel->canReserveResources(SU->getInstr())) + if (!ResourcesModel->canReserveResources(*SU->getInstr())) return false; case TargetOpcode::EXTRACT_SUBREG: case TargetOpcode::INSERT_SUBREG: @@ -60,11 +158,19 @@ bool VLIWResourceModel::isResourceAvailable(SUnit *SU) { break; } + MachineFunction &MF = *SU->getInstr()->getParent()->getParent(); + auto &QII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); + // Now see if there are no other dependencies to instructions already // in the packet. for (unsigned i = 0, e = Packet.size(); i != e; ++i) { if (Packet[i]->Succs.size() == 0) continue; + + // Enable .cur formation. + if (QII.mayBeCurLoad(Packet[i]->getInstr())) + continue; + for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(), E = Packet[i]->Succs.end(); I != E; ++I) { // Since we do not add pseudos to packets, might as well @@ -85,6 +191,7 @@ bool VLIWResourceModel::reserveResources(SUnit *SU) { // Artificially reset state. if (!SU) { ResourcesModel->clearResources(); + savePacket(); Packet.clear(); TotalPackets++; return false; @@ -93,6 +200,7 @@ bool VLIWResourceModel::reserveResources(SUnit *SU) { // start a new one. if (!isResourceAvailable(SU)) { ResourcesModel->clearResources(); + savePacket(); Packet.clear(); TotalPackets++; startNewCycle = true; @@ -100,7 +208,7 @@ bool VLIWResourceModel::reserveResources(SUnit *SU) { switch (SU->getInstr()->getOpcode()) { default: - ResourcesModel->reserveResources(SU->getInstr()); + ResourcesModel->reserveResources(*SU->getInstr()); break; case TargetOpcode::EXTRACT_SUBREG: case TargetOpcode::INSERT_SUBREG: @@ -129,6 +237,7 @@ bool VLIWResourceModel::reserveResources(SUnit *SU) { // we start fresh. if (Packet.size() >= SchedModel->getIssueWidth()) { ResourcesModel->clearResources(); + savePacket(); Packet.clear(); TotalPackets++; startNewCycle = true; @@ -150,19 +259,12 @@ void VLIWMachineScheduler::schedule() { buildDAGWithRegPressure(); - // Postprocess the DAG to add platform-specific artificial dependencies. - postprocessDAG(); - SmallVector<SUnit*, 8> TopRoots, BotRoots; findRootsAndBiasEdges(TopRoots, BotRoots); // Initialize the strategy before modifying the DAG. SchedImpl->initialize(this); - // To view Height/Depth correctly, they should be accessed at least once. - // - // FIXME: SUnit::dumpAll always recompute depth and height now. The max - // depth/height could be computed directly from the roots and leaves. DEBUG(unsigned maxH = 0; for (unsigned su = 0, e = SUnits.size(); su != e; ++su) if (SUnits[su].getHeight() > maxH) @@ -197,6 +299,13 @@ void VLIWMachineScheduler::schedule() { assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); placeDebugValues(); + + DEBUG({ + unsigned BBNum = begin()->getParent()->getNumber(); + dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); } void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) { @@ -223,16 +332,18 @@ void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) { assert((!llvm::ForceTopDown || !llvm::ForceBottomUp) && "-misched-topdown incompatible with -misched-bottomup"); + + DAG->addMutation(make_unique<HexagonSubtarget::HexagonDAGMutation>()); + DAG->addMutation(make_unique<HexagonCallMutation>()); } void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) { if (SU->isScheduled) return; - for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; - unsigned MinLatency = I->getLatency(); + for (const SDep &PI : SU->Preds) { + unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle; + unsigned MinLatency = PI.getLatency(); #ifndef NDEBUG Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); #endif @@ -321,8 +432,8 @@ void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() { } CheckPending = true; - DEBUG(dbgs() << "*** " << Available.getName() << " cycle " - << CurrCycle << '\n'); + DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle " + << CurrCycle << '\n'); } /// Move the boundary of scheduled code by one SUnit. @@ -414,16 +525,38 @@ SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() { #ifndef NDEBUG void ConvergingVLIWScheduler::traceCandidate(const char *Label, - const ReadyQueue &Q, - SUnit *SU, PressureChange P) { + const ReadyQueue &Q, SUnit *SU, int Cost, PressureChange P) { dbgs() << Label << " " << Q.getName() << " "; if (P.isValid()) dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":" << P.getUnitInc() << " "; else dbgs() << " "; + dbgs() << "cost(" << Cost << ")\t"; SU->dump(DAG); } + +// Very detailed queue dump, to be used with higher verbosity levels. +void ConvergingVLIWScheduler::readyQueueVerboseDump( + const RegPressureTracker &RPTracker, SchedCandidate &Candidate, + ReadyQueue &Q) { + RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker); + + dbgs() << ">>> " << Q.getName() << "\n"; + for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { + RegPressureDelta RPDelta; + TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, + DAG->getRegionCriticalPSets(), + DAG->getRegPressure().MaxSetPressure); + std::stringstream dbgstr; + dbgstr << "SU(" << std::setw(3) << (*I)->NodeNum << ")"; + dbgs() << dbgstr.str(); + SchedulingCost(Q, *I, Candidate, RPDelta, true); + dbgs() << "\t"; + (*I)->getInstr()->dump(); + } + dbgs() << "\n"; +} #endif /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor @@ -466,6 +599,7 @@ static SUnit *getSingleUnscheduledSucc(SUnit *SU) { // heuristic components for cost computation. static const unsigned PriorityOne = 200; static const unsigned PriorityTwo = 50; +static const unsigned PriorityThree = 75; static const unsigned ScaleTwo = 10; static const unsigned FactorOne = 2; @@ -482,25 +616,50 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, if (!SU || SU->isScheduled) return ResCount; + MachineInstr *Instr = SU->getInstr(); + + DEBUG(if (verbose) dbgs() << ((Q.getID() == TopQID) ? "(top|" : "(bot|")); // Forced priority is high. - if (SU->isScheduleHigh) + if (SU->isScheduleHigh) { ResCount += PriorityOne; + DEBUG(dbgs() << "H|"); + } // Critical path first. if (Q.getID() == TopQID) { ResCount += (SU->getHeight() * ScaleTwo); + DEBUG(if (verbose) { + std::stringstream dbgstr; + dbgstr << "h" << std::setw(3) << SU->getHeight() << "|"; + dbgs() << dbgstr.str(); + }); + // If resources are available for it, multiply the // chance of scheduling. - if (Top.ResourceModel->isResourceAvailable(SU)) + if (Top.ResourceModel->isResourceAvailable(SU)) { ResCount <<= FactorOne; + ResCount += PriorityThree; + DEBUG(if (verbose) dbgs() << "A|"); + } else + DEBUG(if (verbose) dbgs() << " |"); } else { ResCount += (SU->getDepth() * ScaleTwo); + DEBUG(if (verbose) { + std::stringstream dbgstr; + dbgstr << "d" << std::setw(3) << SU->getDepth() << "|"; + dbgs() << dbgstr.str(); + }); + // If resources are available for it, multiply the // chance of scheduling. - if (Bot.ResourceModel->isResourceAvailable(SU)) + if (Bot.ResourceModel->isResourceAvailable(SU)) { ResCount <<= FactorOne; + ResCount += PriorityThree; + DEBUG(if (verbose) dbgs() << "A|"); + } else + DEBUG(if (verbose) dbgs() << " |"); } unsigned NumNodesBlocking = 0; @@ -509,24 +668,121 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, // Look at all of the successors of this node. // Count the number of nodes that // this node is the sole unscheduled node for. - for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) - if (getSingleUnscheduledPred(I->getSUnit()) == SU) + for (const SDep &SI : SU->Succs) + if (getSingleUnscheduledPred(SI.getSUnit()) == SU) ++NumNodesBlocking; } else { // How many unscheduled predecessors block this node? - for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) - if (getSingleUnscheduledSucc(I->getSUnit()) == SU) + for (const SDep &PI : SU->Preds) + if (getSingleUnscheduledSucc(PI.getSUnit()) == SU) ++NumNodesBlocking; } ResCount += (NumNodesBlocking * ScaleTwo); + DEBUG(if (verbose) { + std::stringstream dbgstr; + dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|"; + dbgs() << dbgstr.str(); + }); + // Factor in reg pressure as a heuristic. - ResCount -= (Delta.Excess.getUnitInc()*PriorityTwo); - ResCount -= (Delta.CriticalMax.getUnitInc()*PriorityTwo); + if (!IgnoreBBRegPressure) { + // Decrease priority by the amount that register pressure exceeds the limit. + ResCount -= (Delta.Excess.getUnitInc()*PriorityOne); + // Decrease priority if register pressure exceeds the limit. + ResCount -= (Delta.CriticalMax.getUnitInc()*PriorityOne); + // Decrease priority slightly if register pressure would increase over the + // current maximum. + ResCount -= (Delta.CurrentMax.getUnitInc()*PriorityTwo); + DEBUG(if (verbose) { + dbgs() << "RP " << Delta.Excess.getUnitInc() << "/" + << Delta.CriticalMax.getUnitInc() <<"/" + << Delta.CurrentMax.getUnitInc() << ")|"; + }); + } + + // Give a little extra priority to a .cur instruction if there is a resource + // available for it. + auto &QST = DAG->MF.getSubtarget<HexagonSubtarget>(); + auto &QII = *QST.getInstrInfo(); + if (SU->isInstr() && QII.mayBeCurLoad(SU->getInstr())) { + if (Q.getID() == TopQID && Top.ResourceModel->isResourceAvailable(SU)) { + ResCount += PriorityTwo; + DEBUG(if (verbose) dbgs() << "C|"); + } else if (Q.getID() == BotQID && + Bot.ResourceModel->isResourceAvailable(SU)) { + ResCount += PriorityTwo; + DEBUG(if (verbose) dbgs() << "C|"); + } + } + + // Give preference to a zero latency instruction if the dependent + // instruction is in the current packet. + if (Q.getID() == TopQID) { + for (const SDep &PI : SU->Preds) { + if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() && + PI.getLatency() == 0 && + Top.ResourceModel->isInPacket(PI.getSUnit())) { + ResCount += PriorityThree; + DEBUG(if (verbose) dbgs() << "Z|"); + } + } + } else { + for (const SDep &SI : SU->Succs) { + if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() && + SI.getLatency() == 0 && + Bot.ResourceModel->isInPacket(SI.getSUnit())) { + ResCount += PriorityThree; + DEBUG(if (verbose) dbgs() << "Z|"); + } + } + } + + // Give less preference to an instruction that will cause a stall with + // an instruction in the previous packet. + if (QII.isV60VectorInstruction(Instr)) { + // Check for stalls in the previous packet. + if (Q.getID() == TopQID) { + for (auto J : Top.ResourceModel->OldPacket) + if (QII.producesStall(J->getInstr(), Instr)) + ResCount -= PriorityOne; + } else { + for (auto J : Bot.ResourceModel->OldPacket) + if (QII.producesStall(Instr, J->getInstr())) + ResCount -= PriorityOne; + } + } - DEBUG(if (verbose) dbgs() << " Total(" << ResCount << ")"); + // If the instruction has a non-zero latency dependence with an instruction in + // the current packet, then it should not be scheduled yet. The case occurs + // when the dependent instruction is scheduled in a new packet, so the + // scheduler updates the current cycle and pending instructions become + // available. + if (CheckEarlyAvail) { + if (Q.getID() == TopQID) { + for (const auto &PI : SU->Preds) { + if (PI.getLatency() > 0 && + Top.ResourceModel->isInPacket(PI.getSUnit())) { + ResCount -= PriorityOne; + DEBUG(if (verbose) dbgs() << "D|"); + } + } + } else { + for (const auto &SI : SU->Succs) { + if (SI.getLatency() > 0 && + Bot.ResourceModel->isInPacket(SI.getSUnit())) { + ResCount -= PriorityOne; + DEBUG(if (verbose) dbgs() << "D|"); + } + } + } + } + + DEBUG(if (verbose) { + std::stringstream dbgstr; + dbgstr << "Total " << std::setw(4) << ResCount << ")"; + dbgs() << dbgstr.str(); + }); return ResCount; } @@ -539,7 +795,9 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, ConvergingVLIWScheduler::CandResult ConvergingVLIWScheduler:: pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker, SchedCandidate &Candidate) { - DEBUG(Q.dump()); + DEBUG(if (SchedDebugVerboseLevel > 1) + readyQueueVerboseDump(RPTracker, Candidate, Q); + else Q.dump();); // getMaxPressureDelta temporarily modifies the tracker. RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); @@ -556,6 +814,7 @@ pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker, // Initialize the candidate if needed. if (!Candidate.SU) { + DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost)); Candidate.SU = *I; Candidate.RPDelta = RPDelta; Candidate.SCost = CurrentCost; @@ -565,7 +824,7 @@ pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker, // Best cost. if (CurrentCost > Candidate.SCost) { - DEBUG(traceCandidate("CCAND", Q, *I)); + DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost)); Candidate.SU = *I; Candidate.RPDelta = RPDelta; Candidate.SCost = CurrentCost; @@ -573,6 +832,69 @@ pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker, continue; } + // Tie breaker using Timing Class. + if (!DisableTCTie) { + auto &QST = DAG->MF.getSubtarget<HexagonSubtarget>(); + auto &QII = *QST.getInstrInfo(); + + const MachineInstr *MI = (*I)->getInstr(); + const MachineInstr *CandI = Candidate.SU->getInstr(); + const InstrItineraryData *InstrItins = QST.getInstrItineraryData(); + + unsigned InstrLatency = QII.getInstrTimingClassLatency(InstrItins, MI); + unsigned CandLatency = QII.getInstrTimingClassLatency(InstrItins, CandI); + DEBUG(dbgs() << "TC Tie Breaker Cand: " + << CandLatency << " Instr:" << InstrLatency << "\n" + << *MI << *CandI << "\n"); + if (Q.getID() == TopQID && CurrentCost == Candidate.SCost) { + if (InstrLatency < CandLatency && TopUseShorterTie) { + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + Candidate.SCost = CurrentCost; + FoundCandidate = BestCost; + DEBUG(dbgs() << "Used top shorter tie breaker\n"); + continue; + } else if (InstrLatency > CandLatency && !TopUseShorterTie) { + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + Candidate.SCost = CurrentCost; + FoundCandidate = BestCost; + DEBUG(dbgs() << "Used top longer tie breaker\n"); + continue; + } + } else if (Q.getID() == BotQID && CurrentCost == Candidate.SCost) { + if (InstrLatency < CandLatency && BotUseShorterTie) { + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + Candidate.SCost = CurrentCost; + FoundCandidate = BestCost; + DEBUG(dbgs() << "Used Bot shorter tie breaker\n"); + continue; + } else if (InstrLatency > CandLatency && !BotUseShorterTie) { + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + Candidate.SCost = CurrentCost; + FoundCandidate = BestCost; + DEBUG(dbgs() << "Used Bot longer tie breaker\n"); + continue; + } + } + } + + if (CurrentCost == Candidate.SCost) { + if ((Q.getID() == TopQID && + (*I)->Succs.size() > Candidate.SU->Succs.size()) || + (Q.getID() == BotQID && + (*I)->Preds.size() < Candidate.SU->Preds.size())) { + DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost)); + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + Candidate.SCost = CurrentCost; + FoundCandidate = BestCost; + continue; + } + } + // Fall through to original instruction order. // Only consider node order if Candidate was chosen from this Q. if (FoundCandidate == NoCand) @@ -586,10 +908,12 @@ SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) { // Schedule as far as possible in the direction of no choice. This is most // efficient, but also provides the best heuristics for CriticalPSets. if (SUnit *SU = Bot.pickOnlyChoice()) { + DEBUG(dbgs() << "Picked only Bottom\n"); IsTopNode = false; return SU; } if (SUnit *SU = Top.pickOnlyChoice()) { + DEBUG(dbgs() << "Picked only Top\n"); IsTopNode = true; return SU; } @@ -607,6 +931,7 @@ SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) { // increase pressure for one of the excess PSets, then schedule in that // direction first to provide more freedom in the other direction. if (BotResult == SingleExcess || BotResult == SingleCritical) { + DEBUG(dbgs() << "Prefered Bottom Node\n"); IsTopNode = false; return BotCand.SU; } @@ -617,24 +942,29 @@ SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) { assert(TopResult != NoCand && "failed to find the first candidate"); if (TopResult == SingleExcess || TopResult == SingleCritical) { + DEBUG(dbgs() << "Prefered Top Node\n"); IsTopNode = true; return TopCand.SU; } // If either Q has a single candidate that minimizes pressure above the // original region's pressure pick it. if (BotResult == SingleMax) { + DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n"); IsTopNode = false; return BotCand.SU; } if (TopResult == SingleMax) { + DEBUG(dbgs() << "Prefered Top Node SingleMax\n"); IsTopNode = true; return TopCand.SU; } if (TopCand.SCost > BotCand.SCost) { + DEBUG(dbgs() << "Prefered Top Node Cost\n"); IsTopNode = true; return TopCand.SU; } // Otherwise prefer the bottom candidate in node order. + DEBUG(dbgs() << "Prefered Bottom in Node order\n"); IsTopNode = false; return BotCand.SU; } |