aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2021-08-22 19:00:43 +0000
committerDimitry Andric <dim@FreeBSD.org>2021-11-13 20:39:49 +0000
commitfe6060f10f634930ff71b7c50291ddc610da2475 (patch)
tree1483580c790bd4d27b6500a7542b5ee00534d3cc /contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
parentb61bce17f346d79cecfd8f195a64b10f77be43b1 (diff)
parent344a3780b2e33f6ca763666c380202b18aab72a3 (diff)
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp142
1 files changed, 94 insertions, 48 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
index 8d51bb26103a..4f42a2c8aeff 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -297,7 +297,7 @@ priorNonDebug(MachineBasicBlock::const_iterator I,
MachineBasicBlock::const_iterator Beg) {
assert(I != Beg && "reached the top of the region, cannot decrement");
while (--I != Beg) {
- if (!I->isDebugInstr())
+ if (!I->isDebugOrPseudoInstr())
break;
}
return I;
@@ -317,7 +317,7 @@ static MachineBasicBlock::const_iterator
nextIfDebug(MachineBasicBlock::const_iterator I,
MachineBasicBlock::const_iterator End) {
for(; I != End; ++I) {
- if (!I->isDebugInstr())
+ if (!I->isDebugOrPseudoInstr())
break;
}
return I;
@@ -508,7 +508,7 @@ getSchedRegions(MachineBasicBlock *MBB,
MachineInstr &MI = *std::prev(I);
if (isSchedBoundary(&MI, &*MBB, MF, TII))
break;
- if (!MI.isDebugInstr()) {
+ if (!MI.isDebugOrPseudoInstr()) {
// MBB::size() uses instr_iterator to count. Here we need a bundle to
// count as a single instruction.
++NumRegionInstrs;
@@ -927,8 +927,8 @@ void ScheduleDAGMI::placeDebugValues() {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
- for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
- if (SUnit *SU = getSUnit(&(*MI)))
+ for (MachineInstr &MI : *this) {
+ if (SUnit *SU = getSUnit(&MI))
dumpNode(*SU);
else
dbgs() << "Missing SUnit\n";
@@ -1927,17 +1927,15 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
}
LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
// Add the weak edges.
- for (SmallVectorImpl<SUnit*>::const_iterator
- I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
- LLVM_DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU("
+ for (SUnit *LU : LocalUses) {
+ LLVM_DEBUG(dbgs() << " Local use SU(" << LU->NodeNum << ") -> SU("
<< GlobalSU->NodeNum << ")\n");
- DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
+ DAG->addEdge(GlobalSU, SDep(LU, SDep::Weak));
}
- for (SmallVectorImpl<SUnit*>::const_iterator
- I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
- LLVM_DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU("
+ for (SUnit *GU : GlobalUses) {
+ LLVM_DEBUG(dbgs() << " Global use SU(" << GU->NodeNum << ") -> SU("
<< FirstLocalSU->NodeNum << ")\n");
- DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
+ DAG->addEdge(FirstLocalSU, SDep(GU, SDep::Weak));
}
}
@@ -2006,6 +2004,7 @@ void SchedBoundary::reset() {
IsResourceLimited = false;
ReservedCycles.clear();
ReservedCyclesIndex.clear();
+ ResourceGroupSubUnitMasks.clear();
#ifndef NDEBUG
// Track the maximum number of stall cycles that could arise either from the
// latency of a DAG edge or the number of cycles that a processor resource is
@@ -2047,11 +2046,18 @@ init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
ReservedCyclesIndex.resize(ResourceCount);
ExecutedResCounts.resize(ResourceCount);
+ ResourceGroupSubUnitMasks.resize(ResourceCount, APInt(ResourceCount, 0));
unsigned NumUnits = 0;
for (unsigned i = 0; i < ResourceCount; ++i) {
ReservedCyclesIndex[i] = NumUnits;
NumUnits += SchedModel->getProcResource(i)->NumUnits;
+ if (isUnbufferedGroup(i)) {
+ auto SubUnits = SchedModel->getProcResource(i)->SubUnitsIdxBegin;
+ for (unsigned U = 0, UE = SchedModel->getProcResource(i)->NumUnits;
+ U != UE; ++U)
+ ResourceGroupSubUnitMasks[i].setBit(SubUnits[U]);
+ }
}
ReservedCycles.resize(NumUnits, InvalidCycle);
@@ -2093,7 +2099,9 @@ unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx,
/// scheduled. Returns the next cycle and the index of the processor resource
/// instance in the reserved cycles vector.
std::pair<unsigned, unsigned>
-SchedBoundary::getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
+SchedBoundary::getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx,
+ unsigned Cycles) {
+
unsigned MinNextUnreserved = InvalidCycle;
unsigned InstanceIdx = 0;
unsigned StartIndex = ReservedCyclesIndex[PIdx];
@@ -2101,6 +2109,35 @@ SchedBoundary::getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
assert(NumberOfInstances > 0 &&
"Cannot have zero instances of a ProcResource");
+ if (isUnbufferedGroup(PIdx)) {
+ // If any subunits are used by the instruction, report that the resource
+ // group is available at 0, effectively removing the group record from
+ // hazarding and basing the hazarding decisions on the subunit records.
+ // Otherwise, choose the first available instance from among the subunits.
+ // Specifications which assign cycles to both the subunits and the group or
+ // which use an unbuffered group with buffered subunits will appear to
+ // schedule strangely. In the first case, the additional cycles for the
+ // group will be ignored. In the second, the group will be ignored
+ // entirely.
+ for (const MCWriteProcResEntry &PE :
+ make_range(SchedModel->getWriteProcResBegin(SC),
+ SchedModel->getWriteProcResEnd(SC)))
+ if (ResourceGroupSubUnitMasks[PIdx][PE.ProcResourceIdx])
+ return std::make_pair(0u, StartIndex);
+
+ auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin;
+ for (unsigned I = 0, End = NumberOfInstances; I < End; ++I) {
+ unsigned NextUnreserved, NextInstanceIdx;
+ std::tie(NextUnreserved, NextInstanceIdx) =
+ getNextResourceCycle(SC, SubUnits[I], Cycles);
+ if (MinNextUnreserved > NextUnreserved) {
+ InstanceIdx = NextInstanceIdx;
+ MinNextUnreserved = NextUnreserved;
+ }
+ }
+ return std::make_pair(MinNextUnreserved, InstanceIdx);
+ }
+
for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
++I) {
unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles);
@@ -2154,7 +2191,7 @@ bool SchedBoundary::checkHazard(SUnit *SU) {
unsigned ResIdx = PE.ProcResourceIdx;
unsigned Cycles = PE.Cycles;
unsigned NRCycle, InstanceIdx;
- std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(ResIdx, Cycles);
+ std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(SC, ResIdx, Cycles);
if (NRCycle > CurrCycle) {
#ifndef NDEBUG
MaxObservedStall = std::max(Cycles, MaxObservedStall);
@@ -2304,8 +2341,8 @@ void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
///
/// \return the next cycle at which the instruction may execute without
/// oversubscribing resources.
-unsigned SchedBoundary::
-countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
+unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx,
+ unsigned Cycles, unsigned NextCycle) {
unsigned Factor = SchedModel->getResourceFactor(PIdx);
unsigned Count = Factor * Cycles;
LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
@@ -2327,7 +2364,7 @@ countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
}
// For reserved resources, record the highest cycle using the resource.
unsigned NextAvailable, InstanceIdx;
- std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(PIdx, Cycles);
+ std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(SC, PIdx, Cycles);
if (NextAvailable > CurrCycle) {
LLVM_DEBUG(dbgs() << " Resource conflict: "
<< SchedModel->getResourceName(PIdx)
@@ -2407,7 +2444,7 @@ void SchedBoundary::bumpNode(SUnit *SU) {
PI = SchedModel->getWriteProcResBegin(SC),
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
unsigned RCycle =
- countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
+ countResource(SC, PI->ProcResourceIdx, PI->Cycles, NextCycle);
if (RCycle > NextCycle)
NextCycle = RCycle;
}
@@ -2422,7 +2459,8 @@ void SchedBoundary::bumpNode(SUnit *SU) {
unsigned PIdx = PI->ProcResourceIdx;
if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
unsigned ReservedUntil, InstanceIdx;
- std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(PIdx, 0);
+ std::tie(ReservedUntil, InstanceIdx) =
+ getNextResourceCycle(SC, PIdx, 0);
if (isTop()) {
ReservedCycles[InstanceIdx] =
std::max(ReservedUntil, NextCycle + PI->Cycles);
@@ -2780,6 +2818,8 @@ void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
namespace llvm {
/// Return true if this heuristic determines order.
+/// TODO: Consider refactor return type of these functions as integer or enum,
+/// as we may need to differentiate whether TryCand is better than Cand.
bool tryLess(int TryVal, int CandVal,
GenericSchedulerBase::SchedCandidate &TryCand,
GenericSchedulerBase::SchedCandidate &Cand,
@@ -3138,34 +3178,35 @@ void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,
/// \param Cand provides the policy and current best candidate.
/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
/// \param Zone describes the scheduled zone that we are extending, or nullptr
-// if Cand is from a different zone than TryCand.
-void GenericScheduler::tryCandidate(SchedCandidate &Cand,
+/// if Cand is from a different zone than TryCand.
+/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
+bool GenericScheduler::tryCandidate(SchedCandidate &Cand,
SchedCandidate &TryCand,
SchedBoundary *Zone) const {
// Initialize the candidate if needed.
if (!Cand.isValid()) {
TryCand.Reason = NodeOrder;
- return;
+ return true;
}
// Bias PhysReg Defs and copies to their uses and defined respectively.
if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
- return;
+ return TryCand.Reason != NoCand;
// Avoid exceeding the target's limit.
if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
Cand.RPDelta.Excess,
TryCand, Cand, RegExcess, TRI,
DAG->MF))
- return;
+ return TryCand.Reason != NoCand;
// Avoid increasing the max critical pressure in the scheduled region.
if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
Cand.RPDelta.CriticalMax,
TryCand, Cand, RegCritical, TRI,
DAG->MF))
- return;
+ return TryCand.Reason != NoCand;
// We only compare a subset of features when comparing nodes between
// Top and Bottom boundary. Some properties are simply incomparable, in many
@@ -3179,12 +3220,12 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
// heuristics to take precedence.
if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
tryLatency(TryCand, Cand, *Zone))
- return;
+ return TryCand.Reason != NoCand;
// Prioritize instructions that read unbuffered resources by stall cycles.
if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
- return;
+ return TryCand.Reason != NoCand;
}
// Keep clustered nodes together to encourage downstream peephole
@@ -3200,14 +3241,14 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
if (tryGreater(TryCand.SU == TryCandNextClusterSU,
Cand.SU == CandNextClusterSU,
TryCand, Cand, Cluster))
- return;
+ return TryCand.Reason != NoCand;
if (SameBoundary) {
// Weak edges are for clustering and other constraints.
if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
getWeakLeft(Cand.SU, Cand.AtTop),
TryCand, Cand, Weak))
- return;
+ return TryCand.Reason != NoCand;
}
// Avoid increasing the max pressure of the entire region.
@@ -3215,31 +3256,34 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
Cand.RPDelta.CurrentMax,
TryCand, Cand, RegMax, TRI,
DAG->MF))
- return;
+ return TryCand.Reason != NoCand;
if (SameBoundary) {
// Avoid critical resource consumption and balance the schedule.
TryCand.initResourceDelta(DAG, SchedModel);
if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
TryCand, Cand, ResourceReduce))
- return;
+ return TryCand.Reason != NoCand;
if (tryGreater(TryCand.ResDelta.DemandedResources,
Cand.ResDelta.DemandedResources,
TryCand, Cand, ResourceDemand))
- return;
+ return TryCand.Reason != NoCand;
// Avoid serializing long latency dependence chains.
// For acyclic path limited loops, latency was already checked above.
if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
!Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
- return;
+ return TryCand.Reason != NoCand;
// Fall through to original instruction order.
if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
|| (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
TryCand.Reason = NodeOrder;
+ return true;
}
}
+
+ return false;
}
/// Pick the best candidate from the queue.
@@ -3261,8 +3305,7 @@ void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
// Pass SchedBoundary only when comparing nodes from the same boundary.
SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
- tryCandidate(Cand, TryCand, ZoneArg);
- if (TryCand.Reason != NoCand) {
+ if (tryCandidate(Cand, TryCand, ZoneArg)) {
// Initialize resource delta if needed in case future heuristics query it.
if (TryCand.ResDelta == SchedResourceDelta())
TryCand.initResourceDelta(DAG, SchedModel);
@@ -3340,8 +3383,7 @@ SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
assert(TopCand.isValid());
SchedCandidate Cand = BotCand;
TopCand.Reason = NoCand;
- tryCandidate(Cand, TopCand, nullptr);
- if (TopCand.Reason != NoCand) {
+ if (tryCandidate(Cand, TopCand, nullptr)) {
Cand.setBest(TopCand);
LLVM_DEBUG(traceCandidate(Cand));
}
@@ -3505,42 +3547,47 @@ void PostGenericScheduler::registerRoots() {
///
/// \param Cand provides the policy and current best candidate.
/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
-void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
+/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
+bool PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
SchedCandidate &TryCand) {
// Initialize the candidate if needed.
if (!Cand.isValid()) {
TryCand.Reason = NodeOrder;
- return;
+ return true;
}
// Prioritize instructions that read unbuffered resources by stall cycles.
if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
- return;
+ return TryCand.Reason != NoCand;
// Keep clustered nodes together.
if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
Cand.SU == DAG->getNextClusterSucc(),
TryCand, Cand, Cluster))
- return;
+ return TryCand.Reason != NoCand;
// Avoid critical resource consumption and balance the schedule.
if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
TryCand, Cand, ResourceReduce))
- return;
+ return TryCand.Reason != NoCand;
if (tryGreater(TryCand.ResDelta.DemandedResources,
Cand.ResDelta.DemandedResources,
TryCand, Cand, ResourceDemand))
- return;
+ return TryCand.Reason != NoCand;
// Avoid serializing long latency dependence chains.
if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
- return;
+ return TryCand.Reason != NoCand;
}
// Fall through to original instruction order.
- if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
+ if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
TryCand.Reason = NodeOrder;
+ return true;
+ }
+
+ return false;
}
void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
@@ -3550,8 +3597,7 @@ void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
TryCand.SU = SU;
TryCand.AtTop = true;
TryCand.initResourceDelta(DAG, SchedModel);
- tryCandidate(Cand, TryCand);
- if (TryCand.Reason != NoCand) {
+ if (tryCandidate(Cand, TryCand)) {
Cand.setBest(TryCand);
LLVM_DEBUG(traceCandidate(Cand));
}