summaryrefslogtreecommitdiff
path: root/lib/CodeGen/MachineScheduler.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2019-01-19 10:01:25 +0000
committerDimitry Andric <dim@FreeBSD.org>2019-01-19 10:01:25 +0000
commitd8e91e46262bc44006913e6796843909f1ac7bcd (patch)
tree7d0c143d9b38190e0fa0180805389da22cd834c5 /lib/CodeGen/MachineScheduler.cpp
parentb7eb8e35e481a74962664b63dfb09483b200209a (diff)
Notes
Diffstat (limited to 'lib/CodeGen/MachineScheduler.cpp')
-rw-r--r--lib/CodeGen/MachineScheduler.cpp268
1 files changed, 177 insertions, 91 deletions
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 502d18f08f93..90dad9d399fe 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -41,6 +41,7 @@
#include "llvm/CodeGen/ScheduleDFS.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -100,8 +101,11 @@ static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
cl::desc("Only schedule this function"));
static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
cl::desc("Only schedule this MBB#"));
+static cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
+ cl::desc("Print schedule DAGs"));
#else
-static bool ViewMISchedDAGs = false;
+static const bool ViewMISchedDAGs = false;
+static const bool PrintDAGs = false;
#endif // NDEBUG
/// Avoid quadratic complexity in unusually large basic blocks by limiting the
@@ -237,7 +241,8 @@ void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-MachinePassRegistry MachineSchedRegistry::Registry;
+MachinePassRegistry<MachineSchedRegistry::ScheduleDAGCtor>
+ MachineSchedRegistry::Registry;
/// A dummy default scheduler factory indicates whether the scheduler
/// is overridden on the command line.
@@ -633,7 +638,7 @@ void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
#ifndef NDEBUG
if (SuccSU->NumPredsLeft == 0) {
dbgs() << "*** Scheduling failed! ***\n";
- SuccSU->dump(this);
+ dumpNode(*SuccSU);
dbgs() << " has been released too many times!\n";
llvm_unreachable(nullptr);
}
@@ -670,7 +675,7 @@ void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
#ifndef NDEBUG
if (PredSU->NumSuccsLeft == 0) {
dbgs() << "*** Scheduling failed! ***\n";
- PredSU->dump(this);
+ dumpNode(*PredSU);
dbgs() << " has been released too many times!\n";
llvm_unreachable(nullptr);
}
@@ -764,10 +769,8 @@ void ScheduleDAGMI::schedule() {
SmallVector<SUnit*, 8> TopRoots, BotRoots;
findRootsAndBiasEdges(TopRoots, BotRoots);
- LLVM_DEBUG(if (EntrySU.getInstr() != nullptr) EntrySU.dumpAll(this);
- for (const SUnit &SU
- : SUnits) SU.dumpAll(this);
- if (ExitSU.getInstr() != nullptr) ExitSU.dumpAll(this););
+ LLVM_DEBUG(dump());
+ if (PrintDAGs) dump();
if (ViewMISchedDAGs) viewGraph();
// Initialize the strategy before modifying the DAG.
@@ -920,7 +923,7 @@ void ScheduleDAGMI::placeDebugValues() {
LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
if (SUnit *SU = getSUnit(&(*MI)))
- SU->dump(this);
+ dumpNode(*SU);
else
dbgs() << "Missing SUnit\n";
}
@@ -1171,6 +1174,29 @@ void ScheduleDAGMILive::updatePressureDiffs(
}
}
+void ScheduleDAGMILive::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ if (EntrySU.getInstr() != nullptr)
+ dumpNodeAll(EntrySU);
+ for (const SUnit &SU : SUnits) {
+ dumpNodeAll(SU);
+ if (ShouldTrackPressure) {
+ dbgs() << " Pressure Diff : ";
+ getPressureDiff(&SU).dump(*TRI);
+ }
+ dbgs() << " Single Issue : ";
+ if (SchedModel.mustBeginGroup(SU.getInstr()) &&
+ SchedModel.mustEndGroup(SU.getInstr()))
+ dbgs() << "true;";
+ else
+ dbgs() << "false;";
+ dbgs() << '\n';
+ }
+ if (ExitSU.getInstr() != nullptr)
+ dumpNodeAll(ExitSU);
+#endif
+}
+
/// schedule - Called back from MachineScheduler::runOnMachineFunction
/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
/// only includes instructions that have DAG nodes, not scheduling boundaries.
@@ -1197,22 +1223,8 @@ void ScheduleDAGMILive::schedule() {
// This may initialize a DFSResult to be used for queue priority.
SchedImpl->initialize(this);
- LLVM_DEBUG(if (EntrySU.getInstr() != nullptr) EntrySU.dumpAll(this);
- for (const SUnit &SU
- : SUnits) {
- SU.dumpAll(this);
- if (ShouldTrackPressure) {
- dbgs() << " Pressure Diff : ";
- getPressureDiff(&SU).dump(*TRI);
- }
- dbgs() << " Single Issue : ";
- if (SchedModel.mustBeginGroup(SU.getInstr()) &&
- SchedModel.mustEndGroup(SU.getInstr()))
- dbgs() << "true;";
- else
- dbgs() << "false;";
- dbgs() << '\n';
- } if (ExitSU.getInstr() != nullptr) ExitSU.dumpAll(this););
+ LLVM_DEBUG(dump());
+ if (PrintDAGs) dump();
if (ViewMISchedDAGs) viewGraph();
// Initialize ready queues now that the DAG and priority data are finalized.
@@ -1472,15 +1484,40 @@ namespace {
class BaseMemOpClusterMutation : public ScheduleDAGMutation {
struct MemOpInfo {
SUnit *SU;
- unsigned BaseReg;
+ MachineOperand *BaseOp;
int64_t Offset;
- MemOpInfo(SUnit *su, unsigned reg, int64_t ofs)
- : SU(su), BaseReg(reg), Offset(ofs) {}
+ MemOpInfo(SUnit *su, MachineOperand *Op, int64_t ofs)
+ : SU(su), BaseOp(Op), Offset(ofs) {}
+
+ bool operator<(const MemOpInfo &RHS) const {
+ if (BaseOp->getType() != RHS.BaseOp->getType())
+ return BaseOp->getType() < RHS.BaseOp->getType();
+
+ if (BaseOp->isReg())
+ return std::make_tuple(BaseOp->getReg(), Offset, SU->NodeNum) <
+ std::make_tuple(RHS.BaseOp->getReg(), RHS.Offset,
+ RHS.SU->NodeNum);
+ if (BaseOp->isFI()) {
+ const MachineFunction &MF =
+ *BaseOp->getParent()->getParent()->getParent();
+ const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
+ bool StackGrowsDown = TFI.getStackGrowthDirection() ==
+ TargetFrameLowering::StackGrowsDown;
+ // Can't use tuple comparison here since we might need to use a
+ // different order when the stack grows down.
+ if (BaseOp->getIndex() != RHS.BaseOp->getIndex())
+ return StackGrowsDown ? BaseOp->getIndex() > RHS.BaseOp->getIndex()
+ : BaseOp->getIndex() < RHS.BaseOp->getIndex();
+
+ if (Offset != RHS.Offset)
+ return StackGrowsDown ? Offset > RHS.Offset : Offset < RHS.Offset;
+
+ return SU->NodeNum < RHS.SU->NodeNum;
+ }
- bool operator<(const MemOpInfo&RHS) const {
- return std::tie(BaseReg, Offset, SU->NodeNum) <
- std::tie(RHS.BaseReg, RHS.Offset, RHS.SU->NodeNum);
+ llvm_unreachable("MemOpClusterMutation only supports register or frame "
+ "index bases.");
}
};
@@ -1536,21 +1573,21 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
SmallVector<MemOpInfo, 32> MemOpRecords;
for (SUnit *SU : MemOps) {
- unsigned BaseReg;
+ MachineOperand *BaseOp;
int64_t Offset;
- if (TII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseReg, Offset, TRI))
- MemOpRecords.push_back(MemOpInfo(SU, BaseReg, Offset));
+ if (TII->getMemOperandWithOffset(*SU->getInstr(), BaseOp, Offset, TRI))
+ MemOpRecords.push_back(MemOpInfo(SU, BaseOp, Offset));
}
if (MemOpRecords.size() < 2)
return;
- llvm::sort(MemOpRecords.begin(), MemOpRecords.end());
+ llvm::sort(MemOpRecords);
unsigned ClusterLength = 1;
for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
SUnit *SUa = MemOpRecords[Idx].SU;
SUnit *SUb = MemOpRecords[Idx+1].SU;
- if (TII->shouldClusterMemOps(*SUa->getInstr(), MemOpRecords[Idx].BaseReg,
- *SUb->getInstr(), MemOpRecords[Idx+1].BaseReg,
+ if (TII->shouldClusterMemOps(*MemOpRecords[Idx].BaseOp,
+ *MemOpRecords[Idx + 1].BaseOp,
ClusterLength) &&
DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
@@ -2397,6 +2434,52 @@ initResourceDelta(const ScheduleDAGMI *DAG,
}
}
+/// Compute remaining latency. We need this both to determine whether the
+/// overall schedule has become latency-limited and whether the instructions
+/// outside this zone are resource or latency limited.
+///
+/// The "dependent" latency is updated incrementally during scheduling as the
+/// max height/depth of scheduled nodes minus the cycles since it was
+/// scheduled:
+/// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
+///
+/// The "independent" latency is the max ready queue depth:
+/// ILat = max N.depth for N in Available|Pending
+///
+/// RemainingLatency is the greater of independent and dependent latency.
+///
+/// These computations are expensive, especially in DAGs with many edges, so
+/// only do them if necessary.
+static unsigned computeRemLatency(SchedBoundary &CurrZone) {
+ unsigned RemLatency = CurrZone.getDependentLatency();
+ RemLatency = std::max(RemLatency,
+ CurrZone.findMaxLatency(CurrZone.Available.elements()));
+ RemLatency = std::max(RemLatency,
+ CurrZone.findMaxLatency(CurrZone.Pending.elements()));
+ return RemLatency;
+}
+
+/// Returns true if the current cycle plus remaning latency is greater than
+/// the critical path in the scheduling region.
+bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
+ SchedBoundary &CurrZone,
+ bool ComputeRemLatency,
+ unsigned &RemLatency) const {
+ // The current cycle is already greater than the critical path, so we are
+ // already latency limited and don't need to compute the remaining latency.
+ if (CurrZone.getCurrCycle() > Rem.CriticalPath)
+ return true;
+
+ // If we haven't scheduled anything yet, then we aren't latency limited.
+ if (CurrZone.getCurrCycle() == 0)
+ return false;
+
+ if (ComputeRemLatency)
+ RemLatency = computeRemLatency(CurrZone);
+
+ return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
+}
+
/// Set the CandPolicy given a scheduling zone given the current resources and
/// latencies inside and outside the zone.
void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
@@ -2406,46 +2489,32 @@ void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
// inside and outside this zone. Potential stalls should be considered before
// following this policy.
- // Compute remaining latency. We need this both to determine whether the
- // overall schedule has become latency-limited and whether the instructions
- // outside this zone are resource or latency limited.
- //
- // The "dependent" latency is updated incrementally during scheduling as the
- // max height/depth of scheduled nodes minus the cycles since it was
- // scheduled:
- // DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
- //
- // The "independent" latency is the max ready queue depth:
- // ILat = max N.depth for N in Available|Pending
- //
- // RemainingLatency is the greater of independent and dependent latency.
- unsigned RemLatency = CurrZone.getDependentLatency();
- RemLatency = std::max(RemLatency,
- CurrZone.findMaxLatency(CurrZone.Available.elements()));
- RemLatency = std::max(RemLatency,
- CurrZone.findMaxLatency(CurrZone.Pending.elements()));
-
// Compute the critical resource outside the zone.
unsigned OtherCritIdx = 0;
unsigned OtherCount =
OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
bool OtherResLimited = false;
- if (SchedModel->hasInstrSchedModel())
+ unsigned RemLatency = 0;
+ bool RemLatencyComputed = false;
+ if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
+ RemLatency = computeRemLatency(CurrZone);
+ RemLatencyComputed = true;
OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
OtherCount, RemLatency);
+ }
// Schedule aggressively for latency in PostRA mode. We don't check for
// acyclic latency during PostRA, and highly out-of-order processors will
// skip PostRA scheduling.
- if (!OtherResLimited) {
- if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) {
- Policy.ReduceLatency |= true;
- LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
- << " RemainingLatency " << RemLatency << " + "
- << CurrZone.getCurrCycle() << "c > CritPath "
- << Rem.CriticalPath << "\n");
- }
+ if (!OtherResLimited &&
+ (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
+ RemLatency))) {
+ Policy.ReduceLatency |= true;
+ LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
+ << " RemainingLatency " << RemLatency << " + "
+ << CurrZone.getCurrCycle() << "c > CritPath "
+ << Rem.CriticalPath << "\n");
}
// If the same resource is limiting inside and outside the zone, do nothing.
if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
@@ -2473,7 +2542,7 @@ const char *GenericSchedulerBase::getReasonStr(
switch (Reason) {
case NoCand: return "NOCAND ";
case Only1: return "ONLY1 ";
- case PhysRegCopy: return "PREG-COPY ";
+ case PhysReg: return "PHYS-REG ";
case RegExcess: return "REG-EXCESS";
case RegCritical: return "REG-CRIT ";
case Stall: return "STALL ";
@@ -2809,24 +2878,41 @@ unsigned getWeakLeft(const SUnit *SU, bool isTop) {
/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
/// with the operation that produces or consumes the physreg. We'll do this when
/// regalloc has support for parallel copies.
-int biasPhysRegCopy(const SUnit *SU, bool isTop) {
+int biasPhysReg(const SUnit *SU, bool isTop) {
const MachineInstr *MI = SU->getInstr();
- if (!MI->isCopy())
- return 0;
- unsigned ScheduledOper = isTop ? 1 : 0;
- unsigned UnscheduledOper = isTop ? 0 : 1;
- // If we have already scheduled the physreg produce/consumer, immediately
- // schedule the copy.
- if (TargetRegisterInfo::isPhysicalRegister(
- MI->getOperand(ScheduledOper).getReg()))
- return 1;
- // If the physreg is at the boundary, defer it. Otherwise schedule it
- // immediately to free the dependent. We can hoist the copy later.
- bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
- if (TargetRegisterInfo::isPhysicalRegister(
- MI->getOperand(UnscheduledOper).getReg()))
- return AtBoundary ? -1 : 1;
+ if (MI->isCopy()) {
+ unsigned ScheduledOper = isTop ? 1 : 0;
+ unsigned UnscheduledOper = isTop ? 0 : 1;
+ // If we have already scheduled the physreg produce/consumer, immediately
+ // schedule the copy.
+ if (TargetRegisterInfo::isPhysicalRegister(
+ MI->getOperand(ScheduledOper).getReg()))
+ return 1;
+ // If the physreg is at the boundary, defer it. Otherwise schedule it
+ // immediately to free the dependent. We can hoist the copy later.
+ bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
+ if (TargetRegisterInfo::isPhysicalRegister(
+ MI->getOperand(UnscheduledOper).getReg()))
+ return AtBoundary ? -1 : 1;
+ }
+
+ if (MI->isMoveImmediate()) {
+ // If we have a move immediate and all successors have been assigned, bias
+ // towards scheduling this later. Make sure all register defs are to
+ // physical registers.
+ bool DoBias = true;
+ for (const MachineOperand &Op : MI->defs()) {
+ if (Op.isReg() && !TargetRegisterInfo::isPhysicalRegister(Op.getReg())) {
+ DoBias = false;
+ break;
+ }
+ }
+
+ if (DoBias)
+ return isTop ? -1 : 1;
+ }
+
return 0;
}
} // end namespace llvm
@@ -2887,9 +2973,9 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
return;
}
- if (tryGreater(biasPhysRegCopy(TryCand.SU, TryCand.AtTop),
- biasPhysRegCopy(Cand.SU, Cand.AtTop),
- TryCand, Cand, PhysRegCopy))
+ // Bias PhysReg Defs and copies to their uses and defined respectively.
+ if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
+ biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
return;
// Avoid exceeding the target's limit.
@@ -3136,7 +3222,7 @@ SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
return SU;
}
-void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
+void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) {
MachineBasicBlock::iterator InsertPos = SU->getInstr();
if (!isTop)
++InsertPos;
@@ -3151,10 +3237,10 @@ void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
continue;
MachineInstr *Copy = DepSU->getInstr();
- if (!Copy->isCopy())
+ if (!Copy->isCopy() && !Copy->isMoveImmediate())
continue;
LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
- Dep.getSUnit()->dump(DAG));
+ DAG->dumpNode(*Dep.getSUnit()));
DAG->moveInstruction(Copy, InsertPos);
}
}
@@ -3165,18 +3251,18 @@ void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
/// does.
///
/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
-/// them here. See comments in biasPhysRegCopy.
+/// them here. See comments in biasPhysReg.
void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
if (IsTopNode) {
SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
Top.bumpNode(SU);
if (SU->hasPhysRegUses)
- reschedulePhysRegCopies(SU, true);
+ reschedulePhysReg(SU, true);
} else {
SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
Bot.bumpNode(SU);
if (SU->hasPhysRegDefs)
- reschedulePhysRegCopies(SU, false);
+ reschedulePhysReg(SU, false);
}
}