diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2023-12-18 20:30:12 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2024-04-06 20:11:55 +0000 |
commit | 5f757f3ff9144b609b3c433dfd370cc6bdc191ad (patch) | |
tree | 1b4e980b866cd26a00af34c0a653eb640bd09caf /contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp | |
parent | 3e1c8a35f741a5d114d0ba670b15191355711fe9 (diff) | |
parent | 312c0ed19cc5276a17bacf2120097bec4515b0f1 (diff) |
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp | 159 |
1 files changed, 88 insertions, 71 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp index ba5432459d12..886137d86f87 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp @@ -747,9 +747,9 @@ void ScheduleDAGMI::finishBlock() { ScheduleDAGInstrs::finishBlock(); } -/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after -/// crossing a scheduling boundary. [begin, end) includes all instructions in -/// the region, including the boundary itself and single-instruction regions +/// enterRegion - Called back from PostMachineScheduler::runOnMachineFunction +/// after crossing a scheduling boundary. [begin, end) includes all instructions +/// in the region, including the boundary itself and single-instruction regions /// that don't get scheduled. void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, @@ -793,9 +793,9 @@ bool ScheduleDAGMI::checkSchedLimit() { } /// Per-region scheduling driver, called back from -/// MachineScheduler::runOnMachineFunction. This is a simplified driver that -/// does not consider liveness or register pressure. It is useful for PostRA -/// scheduling and potentially other custom schedulers. +/// PostMachineScheduler::runOnMachineFunction. This is a simplified driver +/// that does not consider liveness or register pressure. It is useful for +/// PostRA scheduling and potentially other custom schedulers. void ScheduleDAGMI::schedule() { LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n"); LLVM_DEBUG(SchedImpl->dumpPolicy()); @@ -980,8 +980,8 @@ LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceTopDown() const { for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC), PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) { - if (SU->TopReadyCycle + PI->Cycles - 1 > LastCycle) - LastCycle = SU->TopReadyCycle + PI->Cycles - 1; + if (SU->TopReadyCycle + PI->ReleaseAtCycle - 1 > LastCycle) + LastCycle = SU->TopReadyCycle + PI->ReleaseAtCycle - 1; } } // Print the header with the cycles @@ -1017,19 +1017,20 @@ LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceTopDown() const { llvm::stable_sort(ResourcesIt, [](const MCWriteProcResEntry &LHS, const MCWriteProcResEntry &RHS) -> bool { - return LHS.StartAtCycle < RHS.StartAtCycle || - (LHS.StartAtCycle == RHS.StartAtCycle && - LHS.Cycles < RHS.Cycles); + return LHS.AcquireAtCycle < RHS.AcquireAtCycle || + (LHS.AcquireAtCycle == RHS.AcquireAtCycle && + LHS.ReleaseAtCycle < RHS.ReleaseAtCycle); }); for (const MCWriteProcResEntry &PI : ResourcesIt) { C = FirstCycle; const std::string ResName = SchedModel.getResourceName(PI.ProcResourceIdx); dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth); - for (; C < SU->TopReadyCycle + PI.StartAtCycle; ++C) { + for (; C < SU->TopReadyCycle + PI.AcquireAtCycle; ++C) { dbgs() << llvm::left_justify("|", ColWidth); } - for (unsigned I = 0, E = PI.Cycles - PI.StartAtCycle; I != E; ++I, ++C) + for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E; + ++I, ++C) dbgs() << llvm::left_justify("| x", ColWidth); while (C++ <= LastCycle) dbgs() << llvm::left_justify("|", ColWidth); @@ -1061,8 +1062,8 @@ LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceBottomUp() const { for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC), PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) { - if ((int)SU->BotReadyCycle - PI->Cycles + 1 < LastCycle) - LastCycle = (int)SU->BotReadyCycle - PI->Cycles + 1; + if ((int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1 < LastCycle) + LastCycle = (int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1; } } // Print the header with the cycles @@ -1097,19 +1098,20 @@ LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceBottomUp() const { llvm::stable_sort(ResourcesIt, [](const MCWriteProcResEntry &LHS, const MCWriteProcResEntry &RHS) -> bool { - return LHS.StartAtCycle < RHS.StartAtCycle || - (LHS.StartAtCycle == RHS.StartAtCycle && - LHS.Cycles < RHS.Cycles); + return LHS.AcquireAtCycle < RHS.AcquireAtCycle || + (LHS.AcquireAtCycle == RHS.AcquireAtCycle && + LHS.ReleaseAtCycle < RHS.ReleaseAtCycle); }); for (const MCWriteProcResEntry &PI : ResourcesIt) { C = FirstCycle; const std::string ResName = SchedModel.getResourceName(PI.ProcResourceIdx); dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth); - for (; C > ((int)SU->BotReadyCycle - (int)PI.StartAtCycle); --C) { + for (; C > ((int)SU->BotReadyCycle - (int)PI.AcquireAtCycle); --C) { dbgs() << llvm::left_justify("|", ColWidth); } - for (unsigned I = 0, E = PI.Cycles - PI.StartAtCycle; I != E; ++I, --C) + for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E; + ++I, --C) dbgs() << llvm::left_justify("| x", ColWidth); while (C-- >= LastCycle) dbgs() << llvm::left_justify("|", ColWidth); @@ -1696,11 +1698,12 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation { SmallVector<const MachineOperand *, 4> BaseOps; int64_t Offset; unsigned Width; + bool OffsetIsScalable; MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps, - int64_t Offset, unsigned Width) + int64_t Offset, bool OffsetIsScalable, unsigned Width) : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset), - Width(Width) {} + Width(Width), OffsetIsScalable(OffsetIsScalable) {} static bool Compare(const MachineOperand *const &A, const MachineOperand *const &B) { @@ -1829,8 +1832,10 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps( SUnit2ClusterInfo[MemOpa.SU->NodeNum].second + MemOpb.Width; } - if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, ClusterLength, - CurrentClusterBytes)) + if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpa.Offset, + MemOpa.OffsetIsScalable, MemOpb.BaseOps, + MemOpb.Offset, MemOpb.OffsetIsScalable, + ClusterLength, CurrentClusterBytes)) continue; SUnit *SUa = MemOpa.SU; @@ -1897,7 +1902,8 @@ void BaseMemOpClusterMutation::collectMemOpRecords( unsigned Width; if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset, OffsetIsScalable, Width, TRI)) { - MemOpRecords.push_back(MemOpInfo(&SU, BaseOps, Offset, Width)); + MemOpRecords.push_back( + MemOpInfo(&SU, BaseOps, Offset, OffsetIsScalable, Width)); LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: " << Offset << ", OffsetIsScalable: " << OffsetIsScalable @@ -2237,8 +2243,9 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { unsigned PIdx = PI->ProcResourceIdx; unsigned Factor = SchedModel->getResourceFactor(PIdx); - assert(PI->Cycles >= PI->StartAtCycle); - RemainingCounts[PIdx] += (Factor * (PI->Cycles - PI->StartAtCycle)); + assert(PI->ReleaseAtCycle >= PI->AcquireAtCycle); + RemainingCounts[PIdx] += + (Factor * (PI->ReleaseAtCycle - PI->AcquireAtCycle)); } } } @@ -2291,15 +2298,15 @@ unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) { /// Compute the next cycle at which the given processor resource unit /// can be scheduled. unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx, - unsigned Cycles, - unsigned StartAtCycle) { + unsigned ReleaseAtCycle, + unsigned AcquireAtCycle) { if (SchedModel && SchedModel->enableIntervals()) { if (isTop()) return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromTop( - CurrCycle, StartAtCycle, Cycles); + CurrCycle, AcquireAtCycle, ReleaseAtCycle); return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromBottom( - CurrCycle, StartAtCycle, Cycles); + CurrCycle, AcquireAtCycle, ReleaseAtCycle); } unsigned NextUnreserved = ReservedCycles[InstanceIdx]; @@ -2308,7 +2315,7 @@ unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx, return CurrCycle; // For bottom-up scheduling add the cycles needed for the current operation. if (!isTop()) - NextUnreserved = std::max(CurrCycle, NextUnreserved + Cycles); + NextUnreserved = std::max(CurrCycle, NextUnreserved + ReleaseAtCycle); return NextUnreserved; } @@ -2317,7 +2324,8 @@ unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx, /// instance in the reserved cycles vector. std::pair<unsigned, unsigned> SchedBoundary::getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx, - unsigned Cycles, unsigned StartAtCycle) { + unsigned ReleaseAtCycle, + unsigned AcquireAtCycle) { if (MischedDetailResourceBooking) { LLVM_DEBUG(dbgs() << " Resource booking (@" << CurrCycle << "c): \n"); LLVM_DEBUG(dumpReservedCycles()); @@ -2331,26 +2339,30 @@ SchedBoundary::getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx, "Cannot have zero instances of a ProcResource"); if (isUnbufferedGroup(PIdx)) { - // If any subunits are used by the instruction, report that the resource - // group is available at 0, effectively removing the group record from - // hazarding and basing the hazarding decisions on the subunit records. - // Otherwise, choose the first available instance from among the subunits. - // Specifications which assign cycles to both the subunits and the group or - // which use an unbuffered group with buffered subunits will appear to - // schedule strangely. In the first case, the additional cycles for the - // group will be ignored. In the second, the group will be ignored - // entirely. + // If any subunits are used by the instruction, report that the + // subunits of the resource group are available at the first cycle + // in which the unit is available, effectively removing the group + // record from hazarding and basing the hazarding decisions on the + // subunit records. Otherwise, choose the first available instance + // from among the subunits. Specifications which assign cycles to + // both the subunits and the group or which use an unbuffered + // group with buffered subunits will appear to schedule + // strangely. In the first case, the additional cycles for the + // group will be ignored. In the second, the group will be + // ignored entirely. for (const MCWriteProcResEntry &PE : make_range(SchedModel->getWriteProcResBegin(SC), SchedModel->getWriteProcResEnd(SC))) if (ResourceGroupSubUnitMasks[PIdx][PE.ProcResourceIdx]) - return std::make_pair(0u, StartIndex); + return std::make_pair(getNextResourceCycleByInstance( + StartIndex, ReleaseAtCycle, AcquireAtCycle), + StartIndex); auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin; for (unsigned I = 0, End = NumberOfInstances; I < End; ++I) { unsigned NextUnreserved, NextInstanceIdx; std::tie(NextUnreserved, NextInstanceIdx) = - getNextResourceCycle(SC, SubUnits[I], Cycles, StartAtCycle); + getNextResourceCycle(SC, SubUnits[I], ReleaseAtCycle, AcquireAtCycle); if (MinNextUnreserved > NextUnreserved) { InstanceIdx = NextInstanceIdx; MinNextUnreserved = NextUnreserved; @@ -2362,7 +2374,7 @@ SchedBoundary::getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx, for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End; ++I) { unsigned NextUnreserved = - getNextResourceCycleByInstance(I, Cycles, StartAtCycle); + getNextResourceCycleByInstance(I, ReleaseAtCycle, AcquireAtCycle); if (MischedDetailResourceBooking) LLVM_DEBUG(dbgs() << " Instance " << I - StartIndex << " available @" << NextUnreserved << "c\n"); @@ -2419,14 +2431,14 @@ bool SchedBoundary::checkHazard(SUnit *SU) { make_range(SchedModel->getWriteProcResBegin(SC), SchedModel->getWriteProcResEnd(SC))) { unsigned ResIdx = PE.ProcResourceIdx; - unsigned Cycles = PE.Cycles; - unsigned StartAtCycle = PE.StartAtCycle; + unsigned ReleaseAtCycle = PE.ReleaseAtCycle; + unsigned AcquireAtCycle = PE.AcquireAtCycle; unsigned NRCycle, InstanceIdx; std::tie(NRCycle, InstanceIdx) = - getNextResourceCycle(SC, ResIdx, Cycles, StartAtCycle); + getNextResourceCycle(SC, ResIdx, ReleaseAtCycle, AcquireAtCycle); if (NRCycle > CurrCycle) { #if LLVM_ENABLE_ABI_BREAKING_CHECKS - MaxObservedStall = std::max(Cycles, MaxObservedStall); + MaxObservedStall = std::max(ReleaseAtCycle, MaxObservedStall); #endif LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") " << SchedModel->getResourceName(ResIdx) @@ -2568,18 +2580,22 @@ void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) { /// Add the given processor resource to this scheduled zone. /// -/// \param Cycles indicates the number of consecutive (non-pipelined) cycles -/// during which this resource is consumed. +/// \param ReleaseAtCycle indicates the number of consecutive (non-pipelined) +/// cycles during which this resource is released. +/// +/// \param AcquireAtCycle indicates the number of consecutive (non-pipelined) +/// cycles at which the resource is aquired after issue (assuming no stalls). /// /// \return the next cycle at which the instruction may execute without /// oversubscribing resources. unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx, - unsigned Cycles, unsigned NextCycle, - unsigned StartAtCycle) { + unsigned ReleaseAtCycle, + unsigned NextCycle, + unsigned AcquireAtCycle) { unsigned Factor = SchedModel->getResourceFactor(PIdx); - unsigned Count = Factor * (Cycles - StartAtCycle); + unsigned Count = Factor * (ReleaseAtCycle- AcquireAtCycle); LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +" - << Cycles << "x" << Factor << "u\n"); + << ReleaseAtCycle << "x" << Factor << "u\n"); // Update Executed resources counts. incExecutedResources(PIdx, Count); @@ -2598,7 +2614,7 @@ unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx, // For reserved resources, record the highest cycle using the resource. unsigned NextAvailable, InstanceIdx; std::tie(NextAvailable, InstanceIdx) = - getNextResourceCycle(SC, PIdx, Cycles, StartAtCycle); + getNextResourceCycle(SC, PIdx, ReleaseAtCycle, AcquireAtCycle); if (NextAvailable > CurrCycle) { LLVM_DEBUG(dbgs() << " Resource conflict: " << SchedModel->getResourceName(PIdx) @@ -2677,8 +2693,9 @@ void SchedBoundary::bumpNode(SUnit *SU) { for (TargetSchedModel::ProcResIter PI = SchedModel->getWriteProcResBegin(SC), PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { - unsigned RCycle = countResource(SC, PI->ProcResourceIdx, PI->Cycles, - NextCycle, PI->StartAtCycle); + unsigned RCycle = + countResource(SC, PI->ProcResourceIdx, PI->ReleaseAtCycle, NextCycle, + PI->AcquireAtCycle); if (RCycle > NextCycle) NextCycle = RCycle; } @@ -2695,27 +2712,27 @@ void SchedBoundary::bumpNode(SUnit *SU) { if (SchedModel && SchedModel->enableIntervals()) { unsigned ReservedUntil, InstanceIdx; - std::tie(ReservedUntil, InstanceIdx) = - getNextResourceCycle(SC, PIdx, PI->Cycles, PI->StartAtCycle); + std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle( + SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle); if (isTop()) { ReservedResourceSegments[InstanceIdx].add( ResourceSegments::getResourceIntervalTop( - NextCycle, PI->StartAtCycle, PI->Cycles), + NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle), MIResourceCutOff); } else { ReservedResourceSegments[InstanceIdx].add( ResourceSegments::getResourceIntervalBottom( - NextCycle, PI->StartAtCycle, PI->Cycles), + NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle), MIResourceCutOff); } } else { unsigned ReservedUntil, InstanceIdx; - std::tie(ReservedUntil, InstanceIdx) = - getNextResourceCycle(SC, PIdx, PI->Cycles, PI->StartAtCycle); + std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle( + SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle); if (isTop()) { ReservedCycles[InstanceIdx] = - std::max(ReservedUntil, NextCycle + PI->Cycles); + std::max(ReservedUntil, NextCycle + PI->ReleaseAtCycle); } else ReservedCycles[InstanceIdx] = NextCycle; } @@ -2913,9 +2930,9 @@ initResourceDelta(const ScheduleDAGMI *DAG, PI = SchedModel->getWriteProcResBegin(SC), PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { if (PI->ProcResourceIdx == Policy.ReduceResIdx) - ResDelta.CritResources += PI->Cycles; + ResDelta.CritResources += PI->ReleaseAtCycle; if (PI->ProcResourceIdx == Policy.DemandResIdx) - ResDelta.DemandedResources += PI->Cycles; + ResDelta.DemandedResources += PI->ReleaseAtCycle; } } @@ -3956,7 +3973,7 @@ struct ILPOrder { if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB)) return ScheduledTrees->test(SchedTreeB); - // Trees with shallower connections have have lower priority. + // Trees with shallower connections have lower priority. if (DFSResult->getSubtreeLevel(SchedTreeA) != DFSResult->getSubtreeLevel(SchedTreeB)) { return DFSResult->getSubtreeLevel(SchedTreeA) @@ -4243,7 +4260,7 @@ static bool sortIntervals(const ResourceSegments::IntervalTy &A, } unsigned ResourceSegments::getFirstAvailableAt( - unsigned CurrCycle, unsigned StartAtCycle, unsigned Cycle, + unsigned CurrCycle, unsigned AcquireAtCycle, unsigned Cycle, std::function<ResourceSegments::IntervalTy(unsigned, unsigned, unsigned)> IntervalBuilder) const { assert(std::is_sorted(std::begin(_Intervals), std::end(_Intervals), @@ -4251,7 +4268,7 @@ unsigned ResourceSegments::getFirstAvailableAt( "Cannot execute on an un-sorted set of intervals."); unsigned RetCycle = CurrCycle; ResourceSegments::IntervalTy NewInterval = - IntervalBuilder(RetCycle, StartAtCycle, Cycle); + IntervalBuilder(RetCycle, AcquireAtCycle, Cycle); for (auto &Interval : _Intervals) { if (!intersects(NewInterval, Interval)) continue; @@ -4261,7 +4278,7 @@ unsigned ResourceSegments::getFirstAvailableAt( assert(Interval.second > NewInterval.first && "Invalid intervals configuration."); RetCycle += (unsigned)Interval.second - (unsigned)NewInterval.first; - NewInterval = IntervalBuilder(RetCycle, StartAtCycle, Cycle); + NewInterval = IntervalBuilder(RetCycle, AcquireAtCycle, Cycle); } return RetCycle; } |