summaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2019-12-20 19:53:05 +0000
committerDimitry Andric <dim@FreeBSD.org>2019-12-20 19:53:05 +0000
commit0b57cec536236d46e3dba9bd041533462f33dbb7 (patch)
tree56229dbdbbf76d18580f72f789003db17246c8d9 /contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
parent718ef55ec7785aae63f98f8ca05dc07ed399c16d (diff)
Notes
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp463
1 files changed, 463 insertions, 0 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
new file mode 100644
index 000000000000..e2af02227999
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
@@ -0,0 +1,463 @@
+//=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a hazard recognizer for the SystemZ scheduler.
+//
+// This class is used by the SystemZ scheduling strategy to maintain
+// the state during scheduling, and provide cost functions for
+// scheduling candidates. This includes:
+//
+// * Decoder grouping. A decoder group can maximally hold 3 uops, and
+// instructions that always begin a new group should be scheduled when
+// the current decoder group is empty.
+// * Processor resources usage. It is beneficial to balance the use of
+// resources.
+//
+// A goal is to consider all instructions, also those outside of any
+// scheduling region. Such instructions are "advanced" past and include
+// single instructions before a scheduling region, branches etc.
+//
+// A block that has only one predecessor continues scheduling with the state
+// of it (which may be updated by emitting branches).
+//
+// ===---------------------------------------------------------------------===//
+
+#include "SystemZHazardRecognizer.h"
+#include "llvm/ADT/Statistic.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "machine-scheduler"
+
+// This is the limit of processor resource usage at which the
+// scheduler should try to look for other instructions (not using the
+// critical resource).
+static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden,
+ cl::desc("The OOO window for processor "
+ "resources during scheduling."),
+ cl::init(8));
+
+unsigned SystemZHazardRecognizer::
+getNumDecoderSlots(SUnit *SU) const {
+ const MCSchedClassDesc *SC = getSchedClass(SU);
+ if (!SC->isValid())
+ return 0; // IMPLICIT_DEF / KILL -- will not make impact in output.
+
+ assert((SC->NumMicroOps != 2 || (SC->BeginGroup && !SC->EndGroup)) &&
+ "Only cracked instruction can have 2 uops.");
+ assert((SC->NumMicroOps < 3 || (SC->BeginGroup && SC->EndGroup)) &&
+ "Expanded instructions always group alone.");
+ assert((SC->NumMicroOps < 3 || (SC->NumMicroOps % 3 == 0)) &&
+ "Expanded instructions fill the group(s).");
+
+ return SC->NumMicroOps;
+}
+
+unsigned SystemZHazardRecognizer::getCurrCycleIdx(SUnit *SU) const {
+ unsigned Idx = CurrGroupSize;
+ if (GrpCount % 2)
+ Idx += 3;
+
+ if (SU != nullptr && !fitsIntoCurrentGroup(SU)) {
+ if (Idx == 1 || Idx == 2)
+ Idx = 3;
+ else if (Idx == 4 || Idx == 5)
+ Idx = 0;
+ }
+
+ return Idx;
+}
+
+ScheduleHazardRecognizer::HazardType SystemZHazardRecognizer::
+getHazardType(SUnit *m, int Stalls) {
+ return (fitsIntoCurrentGroup(m) ? NoHazard : Hazard);
+}
+
+void SystemZHazardRecognizer::Reset() {
+ CurrGroupSize = 0;
+ CurrGroupHas4RegOps = false;
+ clearProcResCounters();
+ GrpCount = 0;
+ LastFPdOpCycleIdx = UINT_MAX;
+ LastEmittedMI = nullptr;
+ LLVM_DEBUG(CurGroupDbg = "";);
+}
+
+bool
+SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
+ const MCSchedClassDesc *SC = getSchedClass(SU);
+ if (!SC->isValid())
+ return true;
+
+ // A cracked instruction only fits into schedule if the current
+ // group is empty.
+ if (SC->BeginGroup)
+ return (CurrGroupSize == 0);
+
+ // An instruction with 4 register operands will not fit in last slot.
+ assert ((CurrGroupSize < 2 || !CurrGroupHas4RegOps) &&
+ "Current decoder group is already full!");
+ if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
+ return false;
+
+ // Since a full group is handled immediately in EmitInstruction(),
+ // SU should fit into current group. NumSlots should be 1 or 0,
+ // since it is not a cracked or expanded instruction.
+ assert ((getNumDecoderSlots(SU) <= 1) && (CurrGroupSize < 3) &&
+ "Expected normal instruction to fit in non-full group!");
+
+ return true;
+}
+
+bool SystemZHazardRecognizer::has4RegOps(const MachineInstr *MI) const {
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
+ const MCInstrDesc &MID = MI->getDesc();
+ unsigned Count = 0;
+ for (unsigned OpIdx = 0; OpIdx < MID.getNumOperands(); OpIdx++) {
+ const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx, TRI, MF);
+ if (RC == nullptr)
+ continue;
+ if (OpIdx >= MID.getNumDefs() &&
+ MID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+ continue;
+ Count++;
+ }
+ return Count >= 4;
+}
+
+void SystemZHazardRecognizer::nextGroup() {
+ if (CurrGroupSize == 0)
+ return;
+
+ LLVM_DEBUG(dumpCurrGroup("Completed decode group"));
+ LLVM_DEBUG(CurGroupDbg = "";);
+
+ int NumGroups = ((CurrGroupSize > 3) ? (CurrGroupSize / 3) : 1);
+ assert((CurrGroupSize <= 3 || CurrGroupSize % 3 == 0) &&
+ "Current decoder group bad.");
+
+ // Reset counter for next group.
+ CurrGroupSize = 0;
+ CurrGroupHas4RegOps = false;
+
+ GrpCount += ((unsigned) NumGroups);
+
+ // Decrease counters for execution units.
+ for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
+ ProcResourceCounters[i] = ((ProcResourceCounters[i] > NumGroups)
+ ? (ProcResourceCounters[i] - NumGroups)
+ : 0);
+
+ // Clear CriticalResourceIdx if it is now below the threshold.
+ if (CriticalResourceIdx != UINT_MAX &&
+ (ProcResourceCounters[CriticalResourceIdx] <=
+ ProcResCostLim))
+ CriticalResourceIdx = UINT_MAX;
+
+ LLVM_DEBUG(dumpState(););
+}
+
+#ifndef NDEBUG // Debug output
+void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const {
+ OS << "SU(" << SU->NodeNum << "):";
+ OS << TII->getName(SU->getInstr()->getOpcode());
+
+ const MCSchedClassDesc *SC = getSchedClass(SU);
+ if (!SC->isValid())
+ return;
+
+ for (TargetSchedModel::ProcResIter
+ PI = SchedModel->getWriteProcResBegin(SC),
+ PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+ const MCProcResourceDesc &PRD =
+ *SchedModel->getProcResource(PI->ProcResourceIdx);
+ std::string FU(PRD.Name);
+ // trim e.g. Z13_FXaUnit -> FXa
+ FU = FU.substr(FU.find("_") + 1);
+ size_t Pos = FU.find("Unit");
+ if (Pos != std::string::npos)
+ FU.resize(Pos);
+ if (FU == "LS") // LSUnit -> LSU
+ FU = "LSU";
+ OS << "/" << FU;
+
+ if (PI->Cycles > 1)
+ OS << "(" << PI->Cycles << "cyc)";
+ }
+
+ if (SC->NumMicroOps > 1)
+ OS << "/" << SC->NumMicroOps << "uops";
+ if (SC->BeginGroup && SC->EndGroup)
+ OS << "/GroupsAlone";
+ else if (SC->BeginGroup)
+ OS << "/BeginsGroup";
+ else if (SC->EndGroup)
+ OS << "/EndsGroup";
+ if (SU->isUnbuffered)
+ OS << "/Unbuffered";
+ if (has4RegOps(SU->getInstr()))
+ OS << "/4RegOps";
+}
+
+void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const {
+ dbgs() << "++ " << Msg;
+ dbgs() << ": ";
+
+ if (CurGroupDbg.empty())
+ dbgs() << " <empty>\n";
+ else {
+ dbgs() << "{ " << CurGroupDbg << " }";
+ dbgs() << " (" << CurrGroupSize << " decoder slot"
+ << (CurrGroupSize > 1 ? "s":"")
+ << (CurrGroupHas4RegOps ? ", 4RegOps" : "")
+ << ")\n";
+ }
+}
+
+void SystemZHazardRecognizer::dumpProcResourceCounters() const {
+ bool any = false;
+
+ for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
+ if (ProcResourceCounters[i] > 0) {
+ any = true;
+ break;
+ }
+
+ if (!any)
+ return;
+
+ dbgs() << "++ | Resource counters: ";
+ for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
+ if (ProcResourceCounters[i] > 0)
+ dbgs() << SchedModel->getProcResource(i)->Name
+ << ":" << ProcResourceCounters[i] << " ";
+ dbgs() << "\n";
+
+ if (CriticalResourceIdx != UINT_MAX)
+ dbgs() << "++ | Critical resource: "
+ << SchedModel->getProcResource(CriticalResourceIdx)->Name
+ << "\n";
+}
+
+void SystemZHazardRecognizer::dumpState() const {
+ dumpCurrGroup("| Current decoder group");
+ dbgs() << "++ | Current cycle index: "
+ << getCurrCycleIdx() << "\n";
+ dumpProcResourceCounters();
+ if (LastFPdOpCycleIdx != UINT_MAX)
+ dbgs() << "++ | Last FPd cycle index: " << LastFPdOpCycleIdx << "\n";
+}
+
+#endif //NDEBUG
+
+void SystemZHazardRecognizer::clearProcResCounters() {
+ ProcResourceCounters.assign(SchedModel->getNumProcResourceKinds(), 0);
+ CriticalResourceIdx = UINT_MAX;
+}
+
+static inline bool isBranchRetTrap(MachineInstr *MI) {
+ return (MI->isBranch() || MI->isReturn() ||
+ MI->getOpcode() == SystemZ::CondTrap);
+}
+
+// Update state with SU as the next scheduled unit.
+void SystemZHazardRecognizer::
+EmitInstruction(SUnit *SU) {
+ const MCSchedClassDesc *SC = getSchedClass(SU);
+ LLVM_DEBUG(dbgs() << "++ HazardRecognizer emitting "; dumpSU(SU, dbgs());
+ dbgs() << "\n";);
+ LLVM_DEBUG(dumpCurrGroup("Decode group before emission"););
+
+ // If scheduling an SU that must begin a new decoder group, move on
+ // to next group.
+ if (!fitsIntoCurrentGroup(SU))
+ nextGroup();
+
+ LLVM_DEBUG(raw_string_ostream cgd(CurGroupDbg);
+ if (CurGroupDbg.length()) cgd << ", "; dumpSU(SU, cgd););
+
+ LastEmittedMI = SU->getInstr();
+
+ // After returning from a call, we don't know much about the state.
+ if (SU->isCall) {
+ LLVM_DEBUG(dbgs() << "++ Clearing state after call.\n";);
+ Reset();
+ LastEmittedMI = SU->getInstr();
+ return;
+ }
+
+ // Increase counter for execution unit(s).
+ for (TargetSchedModel::ProcResIter
+ PI = SchedModel->getWriteProcResBegin(SC),
+ PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+ // Don't handle FPd together with the other resources.
+ if (SchedModel->getProcResource(PI->ProcResourceIdx)->BufferSize == 1)
+ continue;
+ int &CurrCounter =
+ ProcResourceCounters[PI->ProcResourceIdx];
+ CurrCounter += PI->Cycles;
+ // Check if this is now the new critical resource.
+ if ((CurrCounter > ProcResCostLim) &&
+ (CriticalResourceIdx == UINT_MAX ||
+ (PI->ProcResourceIdx != CriticalResourceIdx &&
+ CurrCounter >
+ ProcResourceCounters[CriticalResourceIdx]))) {
+ LLVM_DEBUG(
+ dbgs() << "++ New critical resource: "
+ << SchedModel->getProcResource(PI->ProcResourceIdx)->Name
+ << "\n";);
+ CriticalResourceIdx = PI->ProcResourceIdx;
+ }
+ }
+
+ // Make note of an instruction that uses a blocking resource (FPd).
+ if (SU->isUnbuffered) {
+ LastFPdOpCycleIdx = getCurrCycleIdx(SU);
+ LLVM_DEBUG(dbgs() << "++ Last FPd cycle index: " << LastFPdOpCycleIdx
+ << "\n";);
+ }
+
+ // Insert SU into current group by increasing number of slots used
+ // in current group.
+ CurrGroupSize += getNumDecoderSlots(SU);
+ CurrGroupHas4RegOps |= has4RegOps(SU->getInstr());
+ unsigned GroupLim = (CurrGroupHas4RegOps ? 2 : 3);
+ assert((CurrGroupSize <= GroupLim || CurrGroupSize == getNumDecoderSlots(SU))
+ && "SU does not fit into decoder group!");
+
+ // Check if current group is now full/ended. If so, move on to next
+ // group to be ready to evaluate more candidates.
+ if (CurrGroupSize >= GroupLim || SC->EndGroup)
+ nextGroup();
+}
+
+int SystemZHazardRecognizer::groupingCost(SUnit *SU) const {
+ const MCSchedClassDesc *SC = getSchedClass(SU);
+ if (!SC->isValid())
+ return 0;
+
+ // If SU begins new group, it can either break a current group early
+ // or fit naturally if current group is empty (negative cost).
+ if (SC->BeginGroup) {
+ if (CurrGroupSize)
+ return 3 - CurrGroupSize;
+ return -1;
+ }
+
+ // Similarly, a group-ending SU may either fit well (last in group), or
+ // end the group prematurely.
+ if (SC->EndGroup) {
+ unsigned resultingGroupSize =
+ (CurrGroupSize + getNumDecoderSlots(SU));
+ if (resultingGroupSize < 3)
+ return (3 - resultingGroupSize);
+ return -1;
+ }
+
+ // An instruction with 4 register operands will not fit in last slot.
+ if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
+ return 1;
+
+ // Most instructions can be placed in any decoder slot.
+ return 0;
+}
+
+bool SystemZHazardRecognizer::isFPdOpPreferred_distance(SUnit *SU) const {
+ assert (SU->isUnbuffered);
+ // If this is the first FPd op, it should be scheduled high.
+ if (LastFPdOpCycleIdx == UINT_MAX)
+ return true;
+ // If this is not the first PFd op, it should go into the other side
+ // of the processor to use the other FPd unit there. This should
+ // generally happen if two FPd ops are placed with 2 other
+ // instructions between them (modulo 6).
+ unsigned SUCycleIdx = getCurrCycleIdx(SU);
+ if (LastFPdOpCycleIdx > SUCycleIdx)
+ return ((LastFPdOpCycleIdx - SUCycleIdx) == 3);
+ return ((SUCycleIdx - LastFPdOpCycleIdx) == 3);
+}
+
+int SystemZHazardRecognizer::
+resourcesCost(SUnit *SU) {
+ int Cost = 0;
+
+ const MCSchedClassDesc *SC = getSchedClass(SU);
+ if (!SC->isValid())
+ return 0;
+
+ // For a FPd op, either return min or max value as indicated by the
+ // distance to any prior FPd op.
+ if (SU->isUnbuffered)
+ Cost = (isFPdOpPreferred_distance(SU) ? INT_MIN : INT_MAX);
+ // For other instructions, give a cost to the use of the critical resource.
+ else if (CriticalResourceIdx != UINT_MAX) {
+ for (TargetSchedModel::ProcResIter
+ PI = SchedModel->getWriteProcResBegin(SC),
+ PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI)
+ if (PI->ProcResourceIdx == CriticalResourceIdx)
+ Cost = PI->Cycles;
+ }
+
+ return Cost;
+}
+
+void SystemZHazardRecognizer::emitInstruction(MachineInstr *MI,
+ bool TakenBranch) {
+ // Make a temporary SUnit.
+ SUnit SU(MI, 0);
+
+ // Set interesting flags.
+ SU.isCall = MI->isCall();
+
+ const MCSchedClassDesc *SC = SchedModel->resolveSchedClass(MI);
+ for (const MCWriteProcResEntry &PRE :
+ make_range(SchedModel->getWriteProcResBegin(SC),
+ SchedModel->getWriteProcResEnd(SC))) {
+ switch (SchedModel->getProcResource(PRE.ProcResourceIdx)->BufferSize) {
+ case 0:
+ SU.hasReservedResource = true;
+ break;
+ case 1:
+ SU.isUnbuffered = true;
+ break;
+ default:
+ break;
+ }
+ }
+
+ unsigned GroupSizeBeforeEmit = CurrGroupSize;
+ EmitInstruction(&SU);
+
+ if (!TakenBranch && isBranchRetTrap(MI)) {
+ // NT Branch on second slot ends group.
+ if (GroupSizeBeforeEmit == 1)
+ nextGroup();
+ }
+
+ if (TakenBranch && CurrGroupSize > 0)
+ nextGroup();
+
+ assert ((!MI->isTerminator() || isBranchRetTrap(MI)) &&
+ "Scheduler: unhandled terminator!");
+}
+
+void SystemZHazardRecognizer::
+copyState(SystemZHazardRecognizer *Incoming) {
+ // Current decoder group
+ CurrGroupSize = Incoming->CurrGroupSize;
+ LLVM_DEBUG(CurGroupDbg = Incoming->CurGroupDbg;);
+
+ // Processor resources
+ ProcResourceCounters = Incoming->ProcResourceCounters;
+ CriticalResourceIdx = Incoming->CriticalResourceIdx;
+
+ // FPd
+ LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx;
+ GrpCount = Incoming->GrpCount;
+}