aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/tools/llvm-mca/Views
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/tools/llvm-mca/Views')
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp645
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h348
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp98
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/DispatchStatistics.h87
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp177
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionInfoView.h100
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionView.cpp43
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionView.h60
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/RegisterFileStatistics.cpp170
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h84
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/ResourcePressureView.cpp200
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/ResourcePressureView.h103
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp91
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h64
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp177
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/SchedulerStatistics.h97
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/SummaryView.cpp113
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/SummaryView.h89
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/TimelineView.cpp332
-rw-r--r--contrib/llvm-project/llvm/tools/llvm-mca/Views/TimelineView.h188
20 files changed, 3266 insertions, 0 deletions
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp b/contrib/llvm-project/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp
new file mode 100644
index 000000000000..409a7010b80c
--- /dev/null
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp
@@ -0,0 +1,645 @@
+//===--------------------- BottleneckAnalysis.cpp ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the functionalities used by the BottleneckAnalysis
+/// to report bottleneck info.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/BottleneckAnalysis.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MCA/Support.h"
+#include "llvm/Support/Format.h"
+
+namespace llvm {
+namespace mca {
+
+#define DEBUG_TYPE "llvm-mca"
+
+PressureTracker::PressureTracker(const MCSchedModel &Model)
+ : SM(Model),
+ ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0),
+ ProcResID2Mask(Model.getNumProcResourceKinds(), 0),
+ ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0),
+ ProcResID2ResourceUsersIndex(Model.getNumProcResourceKinds(), 0) {
+ computeProcResourceMasks(SM, ProcResID2Mask);
+
+ // Ignore the invalid resource at index zero.
+ unsigned NextResourceUsersIdx = 0;
+ for (unsigned I = 1, E = Model.getNumProcResourceKinds(); I < E; ++I) {
+ const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
+ ProcResID2ResourceUsersIndex[I] = NextResourceUsersIdx;
+ NextResourceUsersIdx += ProcResource.NumUnits;
+ uint64_t ResourceMask = ProcResID2Mask[I];
+ ResIdx2ProcResID[getResourceStateIndex(ResourceMask)] = I;
+ }
+
+ ResourceUsers.resize(NextResourceUsersIdx);
+ std::fill(ResourceUsers.begin(), ResourceUsers.end(),
+ std::make_pair<unsigned, unsigned>(~0U, 0U));
+}
+
+void PressureTracker::getResourceUsers(uint64_t ResourceMask,
+ SmallVectorImpl<User> &Users) const {
+ unsigned Index = getResourceStateIndex(ResourceMask);
+ unsigned ProcResID = ResIdx2ProcResID[Index];
+ const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID);
+ for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) {
+ const User U = getResourceUser(ProcResID, I);
+ if (U.second && IPI.contains(U.first))
+ Users.emplace_back(U);
+ }
+}
+
+void PressureTracker::onInstructionDispatched(unsigned IID) {
+ IPI.insert(std::make_pair(IID, InstructionPressureInfo()));
+}
+
+void PressureTracker::onInstructionExecuted(unsigned IID) { IPI.erase(IID); }
+
+void PressureTracker::handleInstructionIssuedEvent(
+ const HWInstructionIssuedEvent &Event) {
+ unsigned IID = Event.IR.getSourceIndex();
+ for (const ResourceUse &Use : Event.UsedResources) {
+ const ResourceRef &RR = Use.first;
+ unsigned Index = ProcResID2ResourceUsersIndex[RR.first];
+ Index += llvm::countr_zero(RR.second);
+ ResourceUsers[Index] = std::make_pair(IID, Use.second.getNumerator());
+ }
+}
+
+void PressureTracker::updateResourcePressureDistribution(
+ uint64_t CumulativeMask) {
+ while (CumulativeMask) {
+ uint64_t Current = CumulativeMask & (-CumulativeMask);
+ unsigned ResIdx = getResourceStateIndex(Current);
+ unsigned ProcResID = ResIdx2ProcResID[ResIdx];
+ uint64_t Mask = ProcResID2Mask[ProcResID];
+
+ if (Mask == Current) {
+ ResourcePressureDistribution[ProcResID]++;
+ CumulativeMask ^= Current;
+ continue;
+ }
+
+ Mask ^= Current;
+ while (Mask) {
+ uint64_t SubUnit = Mask & (-Mask);
+ ResIdx = getResourceStateIndex(SubUnit);
+ ProcResID = ResIdx2ProcResID[ResIdx];
+ ResourcePressureDistribution[ProcResID]++;
+ Mask ^= SubUnit;
+ }
+
+ CumulativeMask ^= Current;
+ }
+}
+
+void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) {
+ assert(Event.Reason != HWPressureEvent::INVALID &&
+ "Unexpected invalid event!");
+
+ switch (Event.Reason) {
+ default:
+ break;
+
+ case HWPressureEvent::RESOURCES: {
+ const uint64_t ResourceMask = Event.ResourceMask;
+ updateResourcePressureDistribution(Event.ResourceMask);
+
+ for (const InstRef &IR : Event.AffectedInstructions) {
+ const Instruction &IS = *IR.getInstruction();
+ unsigned BusyResources = IS.getCriticalResourceMask() & ResourceMask;
+ if (!BusyResources)
+ continue;
+
+ unsigned IID = IR.getSourceIndex();
+ IPI[IID].ResourcePressureCycles++;
+ }
+ break;
+ }
+
+ case HWPressureEvent::REGISTER_DEPS:
+ for (const InstRef &IR : Event.AffectedInstructions) {
+ unsigned IID = IR.getSourceIndex();
+ IPI[IID].RegisterPressureCycles++;
+ }
+ break;
+
+ case HWPressureEvent::MEMORY_DEPS:
+ for (const InstRef &IR : Event.AffectedInstructions) {
+ unsigned IID = IR.getSourceIndex();
+ IPI[IID].MemoryPressureCycles++;
+ }
+ }
+}
+
+#ifndef NDEBUG
+void DependencyGraph::dumpDependencyEdge(raw_ostream &OS,
+ const DependencyEdge &DepEdge,
+ MCInstPrinter &MCIP) const {
+ unsigned FromIID = DepEdge.FromIID;
+ unsigned ToIID = DepEdge.ToIID;
+ assert(FromIID < ToIID && "Graph should be acyclic!");
+
+ const DependencyEdge::Dependency &DE = DepEdge.Dep;
+ assert(DE.Type != DependencyEdge::DT_INVALID && "Unexpected invalid edge!");
+
+ OS << " FROM: " << FromIID << " TO: " << ToIID << " ";
+ if (DE.Type == DependencyEdge::DT_REGISTER) {
+ OS << " - REGISTER: ";
+ MCIP.printRegName(OS, DE.ResourceOrRegID);
+ } else if (DE.Type == DependencyEdge::DT_MEMORY) {
+ OS << " - MEMORY";
+ } else {
+ assert(DE.Type == DependencyEdge::DT_RESOURCE &&
+ "Unsupported dependency type!");
+ OS << " - RESOURCE MASK: " << DE.ResourceOrRegID;
+ }
+ OS << " - COST: " << DE.Cost << '\n';
+}
+#endif // NDEBUG
+
+void DependencyGraph::pruneEdges(unsigned Iterations) {
+ for (DGNode &N : Nodes) {
+ unsigned NumPruned = 0;
+ const unsigned Size = N.OutgoingEdges.size();
+ // Use a cut-off threshold to prune edges with a low frequency.
+ for (unsigned I = 0, E = Size; I < E; ++I) {
+ DependencyEdge &Edge = N.OutgoingEdges[I];
+ if (Edge.Frequency == Iterations)
+ continue;
+ double Factor = (double)Edge.Frequency / Iterations;
+ if (0.10 < Factor)
+ continue;
+ Nodes[Edge.ToIID].NumPredecessors--;
+ std::swap(Edge, N.OutgoingEdges[E - 1]);
+ --E;
+ ++NumPruned;
+ }
+
+ if (NumPruned)
+ N.OutgoingEdges.resize(Size - NumPruned);
+ }
+}
+
+void DependencyGraph::initializeRootSet(
+ SmallVectorImpl<unsigned> &RootSet) const {
+ for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {
+ const DGNode &N = Nodes[I];
+ if (N.NumPredecessors == 0 && !N.OutgoingEdges.empty())
+ RootSet.emplace_back(I);
+ }
+}
+
+void DependencyGraph::propagateThroughEdges(SmallVectorImpl<unsigned> &RootSet,
+ unsigned Iterations) {
+ SmallVector<unsigned, 8> ToVisit;
+
+ // A critical sequence is computed as the longest path from a node of the
+ // RootSet to a leaf node (i.e. a node with no successors). The RootSet is
+ // composed of nodes with at least one successor, and no predecessors.
+ //
+ // Each node of the graph starts with an initial default cost of zero. The
+ // cost of a node is a measure of criticality: the higher the cost, the bigger
+ // is the performance impact.
+ // For register and memory dependencies, the cost is a function of the write
+ // latency as well as the actual delay (in cycles) caused to users.
+ // For processor resource dependencies, the cost is a function of the resource
+ // pressure. Resource interferences with low frequency values are ignored.
+ //
+ // This algorithm is very similar to a (reverse) Dijkstra. Every iteration of
+ // the inner loop selects (i.e. visits) a node N from a set of `unvisited
+ // nodes`, and then propagates the cost of N to all its neighbors.
+ //
+ // The `unvisited nodes` set initially contains all the nodes from the
+ // RootSet. A node N is added to the `unvisited nodes` if all its
+ // predecessors have been visited already.
+ //
+ // For simplicity, every node tracks the number of unvisited incoming edges in
+ // field `NumVisitedPredecessors`. When the value of that field drops to
+ // zero, then the corresponding node is added to a `ToVisit` set.
+ //
+ // At the end of every iteration of the outer loop, set `ToVisit` becomes our
+ // new `unvisited nodes` set.
+ //
+ // The algorithm terminates when the set of unvisited nodes (i.e. our RootSet)
+ // is empty. This algorithm works under the assumption that the graph is
+ // acyclic.
+ do {
+ for (unsigned IID : RootSet) {
+ const DGNode &N = Nodes[IID];
+ for (const DependencyEdge &DepEdge : N.OutgoingEdges) {
+ unsigned ToIID = DepEdge.ToIID;
+ DGNode &To = Nodes[ToIID];
+ uint64_t Cost = N.Cost + DepEdge.Dep.Cost;
+ // Check if this is the most expensive incoming edge seen so far. In
+ // case, update the total cost of the destination node (ToIID), as well
+ // its field `CriticalPredecessor`.
+ if (Cost > To.Cost) {
+ To.CriticalPredecessor = DepEdge;
+ To.Cost = Cost;
+ To.Depth = N.Depth + 1;
+ }
+ To.NumVisitedPredecessors++;
+ if (To.NumVisitedPredecessors == To.NumPredecessors)
+ ToVisit.emplace_back(ToIID);
+ }
+ }
+
+ std::swap(RootSet, ToVisit);
+ ToVisit.clear();
+ } while (!RootSet.empty());
+}
+
+void DependencyGraph::getCriticalSequence(
+ SmallVectorImpl<const DependencyEdge *> &Seq) const {
+ // At this stage, nodes of the graph have been already visited, and costs have
+ // been propagated through the edges (see method `propagateThroughEdges()`).
+
+ // Identify the node N with the highest cost in the graph. By construction,
+ // that node is the last instruction of our critical sequence.
+ // Field N.Depth would tell us the total length of the sequence.
+ //
+ // To obtain the sequence of critical edges, we simply follow the chain of
+ // critical predecessors starting from node N (field
+ // DGNode::CriticalPredecessor).
+ const auto It =
+ llvm::max_element(Nodes, [](const DGNode &Lhs, const DGNode &Rhs) {
+ return Lhs.Cost < Rhs.Cost;
+ });
+ unsigned IID = std::distance(Nodes.begin(), It);
+ Seq.resize(Nodes[IID].Depth);
+ for (const DependencyEdge *&DE : llvm::reverse(Seq)) {
+ const DGNode &N = Nodes[IID];
+ DE = &N.CriticalPredecessor;
+ IID = N.CriticalPredecessor.FromIID;
+ }
+}
+
+void BottleneckAnalysis::printInstruction(formatted_raw_ostream &FOS,
+ const MCInst &MCI,
+ bool UseDifferentColor) const {
+ FOS.PadToColumn(14);
+ if (UseDifferentColor)
+ FOS.changeColor(raw_ostream::CYAN, true, false);
+ FOS << printInstructionString(MCI);
+ if (UseDifferentColor)
+ FOS.resetColor();
+}
+
+void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const {
+ // Early exit if no bottlenecks were found during the simulation.
+ if (!SeenStallCycles || !BPI.PressureIncreaseCycles)
+ return;
+
+ SmallVector<const DependencyEdge *, 16> Seq;
+ DG.getCriticalSequence(Seq);
+ if (Seq.empty())
+ return;
+
+ OS << "\nCritical sequence based on the simulation:\n\n";
+
+ const DependencyEdge &FirstEdge = *Seq[0];
+ ArrayRef<llvm::MCInst> Source = getSource();
+ unsigned FromIID = FirstEdge.FromIID % Source.size();
+ unsigned ToIID = FirstEdge.ToIID % Source.size();
+ bool IsLoopCarried = FromIID >= ToIID;
+
+ formatted_raw_ostream FOS(OS);
+ FOS.PadToColumn(14);
+ FOS << "Instruction";
+ FOS.PadToColumn(58);
+ FOS << "Dependency Information";
+
+ bool HasColors = FOS.has_colors();
+
+ unsigned CurrentIID = 0;
+ if (IsLoopCarried) {
+ FOS << "\n +----< " << FromIID << ".";
+ printInstruction(FOS, Source[FromIID], HasColors);
+ FOS << "\n |\n | < loop carried > \n |";
+ } else {
+ while (CurrentIID < FromIID) {
+ FOS << "\n " << CurrentIID << ".";
+ printInstruction(FOS, Source[CurrentIID]);
+ CurrentIID++;
+ }
+
+ FOS << "\n +----< " << CurrentIID << ".";
+ printInstruction(FOS, Source[CurrentIID], HasColors);
+ CurrentIID++;
+ }
+
+ for (const DependencyEdge *&DE : Seq) {
+ ToIID = DE->ToIID % Source.size();
+ unsigned LastIID = CurrentIID > ToIID ? Source.size() : ToIID;
+
+ while (CurrentIID < LastIID) {
+ FOS << "\n | " << CurrentIID << ".";
+ printInstruction(FOS, Source[CurrentIID]);
+ CurrentIID++;
+ }
+
+ if (CurrentIID == ToIID) {
+ FOS << "\n +----> " << ToIID << ".";
+ printInstruction(FOS, Source[CurrentIID], HasColors);
+ } else {
+ FOS << "\n |\n | < loop carried > \n |"
+ << "\n +----> " << ToIID << ".";
+ printInstruction(FOS, Source[ToIID], HasColors);
+ }
+ FOS.PadToColumn(58);
+
+ const DependencyEdge::Dependency &Dep = DE->Dep;
+ if (HasColors)
+ FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
+
+ if (Dep.Type == DependencyEdge::DT_REGISTER) {
+ FOS << "## REGISTER dependency: ";
+ if (HasColors)
+ FOS.changeColor(raw_ostream::MAGENTA, true, false);
+ getInstPrinter().printRegName(FOS, Dep.ResourceOrRegID);
+ } else if (Dep.Type == DependencyEdge::DT_MEMORY) {
+ FOS << "## MEMORY dependency.";
+ } else {
+ assert(Dep.Type == DependencyEdge::DT_RESOURCE &&
+ "Unsupported dependency type!");
+ FOS << "## RESOURCE interference: ";
+ if (HasColors)
+ FOS.changeColor(raw_ostream::MAGENTA, true, false);
+ FOS << Tracker.resolveResourceName(Dep.ResourceOrRegID);
+ if (HasColors) {
+ FOS.resetColor();
+ FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
+ }
+ FOS << " [ probability: " << ((DE->Frequency * 100) / Iterations)
+ << "% ]";
+ }
+ if (HasColors)
+ FOS.resetColor();
+ ++CurrentIID;
+ }
+
+ while (CurrentIID < Source.size()) {
+ FOS << "\n " << CurrentIID << ".";
+ printInstruction(FOS, Source[CurrentIID]);
+ CurrentIID++;
+ }
+
+ FOS << '\n';
+ FOS.flush();
+}
+
+#ifndef NDEBUG
+void DependencyGraph::dump(raw_ostream &OS, MCInstPrinter &MCIP) const {
+ OS << "\nREG DEPS\n";
+ for (const DGNode &Node : Nodes)
+ for (const DependencyEdge &DE : Node.OutgoingEdges)
+ if (DE.Dep.Type == DependencyEdge::DT_REGISTER)
+ dumpDependencyEdge(OS, DE, MCIP);
+
+ OS << "\nMEM DEPS\n";
+ for (const DGNode &Node : Nodes)
+ for (const DependencyEdge &DE : Node.OutgoingEdges)
+ if (DE.Dep.Type == DependencyEdge::DT_MEMORY)
+ dumpDependencyEdge(OS, DE, MCIP);
+
+ OS << "\nRESOURCE DEPS\n";
+ for (const DGNode &Node : Nodes)
+ for (const DependencyEdge &DE : Node.OutgoingEdges)
+ if (DE.Dep.Type == DependencyEdge::DT_RESOURCE)
+ dumpDependencyEdge(OS, DE, MCIP);
+}
+#endif // NDEBUG
+
+void DependencyGraph::addDependency(unsigned From, unsigned To,
+ DependencyEdge::Dependency &&Dep) {
+ DGNode &NodeFrom = Nodes[From];
+ DGNode &NodeTo = Nodes[To];
+ SmallVectorImpl<DependencyEdge> &Vec = NodeFrom.OutgoingEdges;
+
+ auto It = find_if(Vec, [To, Dep](DependencyEdge &DE) {
+ return DE.ToIID == To && DE.Dep.ResourceOrRegID == Dep.ResourceOrRegID;
+ });
+
+ if (It != Vec.end()) {
+ It->Dep.Cost += Dep.Cost;
+ It->Frequency++;
+ return;
+ }
+
+ DependencyEdge DE = {Dep, From, To, 1};
+ Vec.emplace_back(DE);
+ NodeTo.NumPredecessors++;
+}
+
+BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti,
+ MCInstPrinter &Printer,
+ ArrayRef<MCInst> S, unsigned NumIter)
+ : InstructionView(sti, Printer, S), Tracker(sti.getSchedModel()),
+ DG(S.size() * 3), Iterations(NumIter), TotalCycles(0),
+ PressureIncreasedBecauseOfResources(false),
+ PressureIncreasedBecauseOfRegisterDependencies(false),
+ PressureIncreasedBecauseOfMemoryDependencies(false),
+ SeenStallCycles(false), BPI() {}
+
+void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To,
+ unsigned RegID, unsigned Cost) {
+ bool IsLoopCarried = From >= To;
+ unsigned SourceSize = getSource().size();
+ if (IsLoopCarried) {
+ DG.addRegisterDep(From, To + SourceSize, RegID, Cost);
+ DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cost);
+ return;
+ }
+ DG.addRegisterDep(From + SourceSize, To + SourceSize, RegID, Cost);
+}
+
+void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To,
+ unsigned Cost) {
+ bool IsLoopCarried = From >= To;
+ unsigned SourceSize = getSource().size();
+ if (IsLoopCarried) {
+ DG.addMemoryDep(From, To + SourceSize, Cost);
+ DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cost);
+ return;
+ }
+ DG.addMemoryDep(From + SourceSize, To + SourceSize, Cost);
+}
+
+void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To,
+ uint64_t Mask, unsigned Cost) {
+ bool IsLoopCarried = From >= To;
+ unsigned SourceSize = getSource().size();
+ if (IsLoopCarried) {
+ DG.addResourceDep(From, To + SourceSize, Mask, Cost);
+ DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cost);
+ return;
+ }
+ DG.addResourceDep(From + SourceSize, To + SourceSize, Mask, Cost);
+}
+
+void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) {
+ const unsigned IID = Event.IR.getSourceIndex();
+ if (Event.Type == HWInstructionEvent::Dispatched) {
+ Tracker.onInstructionDispatched(IID);
+ return;
+ }
+ if (Event.Type == HWInstructionEvent::Executed) {
+ Tracker.onInstructionExecuted(IID);
+ return;
+ }
+
+ if (Event.Type != HWInstructionEvent::Issued)
+ return;
+
+ ArrayRef<llvm::MCInst> Source = getSource();
+ const Instruction &IS = *Event.IR.getInstruction();
+ unsigned To = IID % Source.size();
+
+ unsigned Cycles = 2 * Tracker.getResourcePressureCycles(IID);
+ uint64_t ResourceMask = IS.getCriticalResourceMask();
+ SmallVector<std::pair<unsigned, unsigned>, 4> Users;
+ while (ResourceMask) {
+ uint64_t Current = ResourceMask & (-ResourceMask);
+ Tracker.getResourceUsers(Current, Users);
+ for (const std::pair<unsigned, unsigned> &U : Users)
+ addResourceDep(U.first % Source.size(), To, Current, U.second + Cycles);
+ Users.clear();
+ ResourceMask ^= Current;
+ }
+
+ const CriticalDependency &RegDep = IS.getCriticalRegDep();
+ if (RegDep.Cycles) {
+ Cycles = RegDep.Cycles + 2 * Tracker.getRegisterPressureCycles(IID);
+ unsigned From = RegDep.IID % Source.size();
+ addRegisterDep(From, To, RegDep.RegID, Cycles);
+ }
+
+ const CriticalDependency &MemDep = IS.getCriticalMemDep();
+ if (MemDep.Cycles) {
+ Cycles = MemDep.Cycles + 2 * Tracker.getMemoryPressureCycles(IID);
+ unsigned From = MemDep.IID % Source.size();
+ addMemoryDep(From, To, Cycles);
+ }
+
+ Tracker.handleInstructionIssuedEvent(
+ static_cast<const HWInstructionIssuedEvent &>(Event));
+
+ // Check if this is the last simulated instruction.
+ if (IID == ((Iterations * Source.size()) - 1))
+ DG.finalizeGraph(Iterations);
+}
+
+void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) {
+ assert(Event.Reason != HWPressureEvent::INVALID &&
+ "Unexpected invalid event!");
+
+ Tracker.handlePressureEvent(Event);
+
+ switch (Event.Reason) {
+ default:
+ break;
+
+ case HWPressureEvent::RESOURCES:
+ PressureIncreasedBecauseOfResources = true;
+ break;
+ case HWPressureEvent::REGISTER_DEPS:
+ PressureIncreasedBecauseOfRegisterDependencies = true;
+ break;
+ case HWPressureEvent::MEMORY_DEPS:
+ PressureIncreasedBecauseOfMemoryDependencies = true;
+ break;
+ }
+}
+
+void BottleneckAnalysis::onCycleEnd() {
+ ++TotalCycles;
+
+ bool PressureIncreasedBecauseOfDataDependencies =
+ PressureIncreasedBecauseOfRegisterDependencies ||
+ PressureIncreasedBecauseOfMemoryDependencies;
+ if (!PressureIncreasedBecauseOfResources &&
+ !PressureIncreasedBecauseOfDataDependencies)
+ return;
+
+ ++BPI.PressureIncreaseCycles;
+ if (PressureIncreasedBecauseOfRegisterDependencies)
+ ++BPI.RegisterDependencyCycles;
+ if (PressureIncreasedBecauseOfMemoryDependencies)
+ ++BPI.MemoryDependencyCycles;
+ if (PressureIncreasedBecauseOfDataDependencies)
+ ++BPI.DataDependencyCycles;
+ if (PressureIncreasedBecauseOfResources)
+ ++BPI.ResourcePressureCycles;
+ PressureIncreasedBecauseOfResources = false;
+ PressureIncreasedBecauseOfRegisterDependencies = false;
+ PressureIncreasedBecauseOfMemoryDependencies = false;
+}
+
+void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const {
+ if (!SeenStallCycles || !BPI.PressureIncreaseCycles) {
+ OS << "\n\nNo resource or data dependency bottlenecks discovered.\n";
+ return;
+ }
+
+ double PressurePerCycle =
+ (double)BPI.PressureIncreaseCycles * 100 / TotalCycles;
+ double ResourcePressurePerCycle =
+ (double)BPI.ResourcePressureCycles * 100 / TotalCycles;
+ double DDPerCycle = (double)BPI.DataDependencyCycles * 100 / TotalCycles;
+ double RegDepPressurePerCycle =
+ (double)BPI.RegisterDependencyCycles * 100 / TotalCycles;
+ double MemDepPressurePerCycle =
+ (double)BPI.MemoryDependencyCycles * 100 / TotalCycles;
+
+ OS << "\n\nCycles with backend pressure increase [ "
+ << format("%.2f", floor((PressurePerCycle * 100) + 0.5) / 100) << "% ]";
+
+ OS << "\nThroughput Bottlenecks: "
+ << "\n Resource Pressure [ "
+ << format("%.2f", floor((ResourcePressurePerCycle * 100) + 0.5) / 100)
+ << "% ]";
+
+ if (BPI.PressureIncreaseCycles) {
+ ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution();
+ const MCSchedModel &SM = getSubTargetInfo().getSchedModel();
+ for (unsigned I = 0, E = Distribution.size(); I < E; ++I) {
+ unsigned ReleaseAtCycles = Distribution[I];
+ if (ReleaseAtCycles) {
+ double Frequency = (double)ReleaseAtCycles * 100 / TotalCycles;
+ const MCProcResourceDesc &PRDesc = *SM.getProcResource(I);
+ OS << "\n - " << PRDesc.Name << " [ "
+ << format("%.2f", floor((Frequency * 100) + 0.5) / 100) << "% ]";
+ }
+ }
+ }
+
+ OS << "\n Data Dependencies: [ "
+ << format("%.2f", floor((DDPerCycle * 100) + 0.5) / 100) << "% ]";
+ OS << "\n - Register Dependencies [ "
+ << format("%.2f", floor((RegDepPressurePerCycle * 100) + 0.5) / 100)
+ << "% ]";
+ OS << "\n - Memory Dependencies [ "
+ << format("%.2f", floor((MemDepPressurePerCycle * 100) + 0.5) / 100)
+ << "% ]\n";
+}
+
+void BottleneckAnalysis::printView(raw_ostream &OS) const {
+ std::string Buffer;
+ raw_string_ostream TempStream(Buffer);
+ printBottleneckHints(TempStream);
+ TempStream.flush();
+ OS << Buffer;
+ printCriticalSequence(OS);
+}
+
+} // namespace mca.
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h b/contrib/llvm-project/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h
new file mode 100644
index 000000000000..e709b25c3f76
--- /dev/null
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h
@@ -0,0 +1,348 @@
+//===--------------------- BottleneckAnalysis.h -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the bottleneck analysis view.
+///
+/// This view internally observes backend pressure increase events in order to
+/// identify problematic data dependencies and processor resource interferences.
+///
+/// Example of bottleneck analysis report for a dot-product on X86 btver2:
+///
+/// Cycles with backend pressure increase [ 40.76% ]
+/// Throughput Bottlenecks:
+/// Resource Pressure [ 39.34% ]
+/// - JFPA [ 39.34% ]
+/// - JFPU0 [ 39.34% ]
+/// Data Dependencies: [ 1.42% ]
+/// - Register Dependencies [ 1.42% ]
+/// - Memory Dependencies [ 0.00% ]
+///
+/// According to the example, backend pressure increased during the 40.76% of
+/// the simulated cycles. In particular, the major cause of backend pressure
+/// increases was the contention on floating point adder JFPA accessible from
+/// pipeline resource JFPU0.
+///
+/// At the end of each cycle, if pressure on the simulated out-of-order buffers
+/// has increased, a backend pressure event is reported.
+/// In particular, this occurs when there is a delta between the number of uOps
+/// dispatched and the number of uOps issued to the underlying pipelines.
+///
+/// The bottleneck analysis view is also responsible for identifying and
+/// printing the most "critical" sequence of dependent instructions according to
+/// the simulated run.
+///
+/// Below is the critical sequence computed for the dot-product example on
+/// btver2:
+///
+/// Instruction Dependency Information
+/// +----< 2. vhaddps %xmm3, %xmm3, %xmm4
+/// |
+/// | < loop carried >
+/// |
+/// | 0. vmulps %xmm0, %xmm0, %xmm2
+/// +----> 1. vhaddps %xmm2, %xmm2, %xmm3 ## RESOURCE interference: JFPA [ probability: 73% ]
+/// +----> 2. vhaddps %xmm3, %xmm3, %xmm4 ## REGISTER dependency: %xmm3
+/// |
+/// | < loop carried >
+/// |
+/// +----> 1. vhaddps %xmm2, %xmm2, %xmm3 ## RESOURCE interference: JFPA [ probability: 73% ]
+///
+///
+/// The algorithm that computes the critical sequence is very similar to a
+/// critical path analysis.
+///
+/// A dependency graph is used internally to track dependencies between nodes.
+/// Nodes of the graph represent instructions from the input assembly sequence,
+/// and edges of the graph represent data dependencies or processor resource
+/// interferences.
+///
+/// Edges are dynamically 'discovered' by observing instruction state
+/// transitions and backend pressure increase events. Edges are internally
+/// ranked based on their "criticality". A dependency is considered to be
+/// critical if it takes a long time to execute, and if it contributes to
+/// backend pressure increases. Criticality is internally measured in terms of
+/// cycles; it is computed for every edge in the graph as a function of the edge
+/// latency and the number of backend pressure increase cycles contributed by
+/// that edge.
+///
+/// At the end of simulation, costs are propagated to nodes through the edges of
+/// the graph, and the most expensive path connecting the root-set (a
+/// set of nodes with no predecessors) to a leaf node is reported as critical
+/// sequence.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_BOTTLENECK_ANALYSIS_H
+#define LLVM_TOOLS_LLVM_MCA_BOTTLENECK_ANALYSIS_H
+
+#include "Views/InstructionView.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace mca {
+
+class PressureTracker {
+ const MCSchedModel &SM;
+
+ // Resource pressure distribution. There is an element for every processor
+ // resource declared by the scheduling model. Quantities are number of cycles.
+ SmallVector<unsigned, 4> ResourcePressureDistribution;
+
+ // Each processor resource is associated with a so-called processor resource
+ // mask. This vector allows to correlate processor resource IDs with processor
+ // resource masks. There is exactly one element per each processor resource
+ // declared by the scheduling model.
+ SmallVector<uint64_t, 4> ProcResID2Mask;
+
+ // Maps processor resource state indices (returned by calls to
+ // `getResourceStateIndex(Mask)` to processor resource identifiers.
+ SmallVector<unsigned, 4> ResIdx2ProcResID;
+
+ // Maps Processor Resource identifiers to ResourceUsers indices.
+ SmallVector<unsigned, 4> ProcResID2ResourceUsersIndex;
+
+ // Identifies the last user of a processor resource unit.
+ // This vector is updated on every instruction issued event.
+ // There is one entry for every processor resource unit declared by the
+ // processor model. An all_ones value is treated like an invalid instruction
+ // identifier.
+ using User = std::pair<unsigned, unsigned>;
+ SmallVector<User, 4> ResourceUsers;
+
+ struct InstructionPressureInfo {
+ unsigned RegisterPressureCycles;
+ unsigned MemoryPressureCycles;
+ unsigned ResourcePressureCycles;
+ };
+ DenseMap<unsigned, InstructionPressureInfo> IPI;
+
+ void updateResourcePressureDistribution(uint64_t CumulativeMask);
+
+ User getResourceUser(unsigned ProcResID, unsigned UnitID) const {
+ unsigned Index = ProcResID2ResourceUsersIndex[ProcResID];
+ return ResourceUsers[Index + UnitID];
+ }
+
+public:
+ PressureTracker(const MCSchedModel &Model);
+
+ ArrayRef<unsigned> getResourcePressureDistribution() const {
+ return ResourcePressureDistribution;
+ }
+
+ void getResourceUsers(uint64_t ResourceMask,
+ SmallVectorImpl<User> &Users) const;
+
+ unsigned getRegisterPressureCycles(unsigned IID) const {
+ assert(IPI.contains(IID) && "Instruction is not tracked!");
+ const InstructionPressureInfo &Info = IPI.find(IID)->second;
+ return Info.RegisterPressureCycles;
+ }
+
+ unsigned getMemoryPressureCycles(unsigned IID) const {
+ assert(IPI.contains(IID) && "Instruction is not tracked!");
+ const InstructionPressureInfo &Info = IPI.find(IID)->second;
+ return Info.MemoryPressureCycles;
+ }
+
+ unsigned getResourcePressureCycles(unsigned IID) const {
+ assert(IPI.contains(IID) && "Instruction is not tracked!");
+ const InstructionPressureInfo &Info = IPI.find(IID)->second;
+ return Info.ResourcePressureCycles;
+ }
+
+ const char *resolveResourceName(uint64_t ResourceMask) const {
+ unsigned Index = getResourceStateIndex(ResourceMask);
+ unsigned ProcResID = ResIdx2ProcResID[Index];
+ const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID);
+ return PRDesc.Name;
+ }
+
+ void onInstructionDispatched(unsigned IID);
+ void onInstructionExecuted(unsigned IID);
+
+ void handlePressureEvent(const HWPressureEvent &Event);
+ void handleInstructionIssuedEvent(const HWInstructionIssuedEvent &Event);
+};
+
+// A dependency edge.
+struct DependencyEdge {
+ enum DependencyType { DT_INVALID, DT_REGISTER, DT_MEMORY, DT_RESOURCE };
+
+ // Dependency edge descriptor.
+ //
+ // It specifies the dependency type, as well as the edge cost in cycles.
+ struct Dependency {
+ DependencyType Type;
+ uint64_t ResourceOrRegID;
+ uint64_t Cost;
+ };
+ Dependency Dep;
+
+ unsigned FromIID;
+ unsigned ToIID;
+
+ // Used by the bottleneck analysis to compute the interference
+ // probability for processor resources.
+ unsigned Frequency;
+};
+
+// A dependency graph used by the bottleneck analysis to describe data
+// dependencies and processor resource interferences between instructions.
+//
+// There is a node (an instance of struct DGNode) for every instruction in the
+// input assembly sequence. Edges of the graph represent dependencies between
+// instructions.
+//
+// Each edge of the graph is associated with a cost value which is used
+// internally to rank dependency based on their impact on the runtime
+// performance (see field DependencyEdge::Dependency::Cost). In general, the
+// higher the cost of an edge, the higher the impact on performance.
+//
+// The cost of a dependency is a function of both the latency and the number of
+// cycles where the dependency has been seen as critical (i.e. contributing to
+// back-pressure increases).
+//
+// Loop carried dependencies are carefully expanded by the bottleneck analysis
+// to guarantee that the graph stays acyclic. To this end, extra nodes are
+// pre-allocated at construction time to describe instructions from "past and
+// future" iterations. The graph is kept acyclic mainly because it simplifies
+// the complexity of the algorithm that computes the critical sequence.
+class DependencyGraph {
+ struct DGNode {
+ unsigned NumPredecessors;
+ unsigned NumVisitedPredecessors;
+ uint64_t Cost;
+ unsigned Depth;
+
+ DependencyEdge CriticalPredecessor;
+ SmallVector<DependencyEdge, 8> OutgoingEdges;
+ };
+ SmallVector<DGNode, 16> Nodes;
+
+ DependencyGraph(const DependencyGraph &) = delete;
+ DependencyGraph &operator=(const DependencyGraph &) = delete;
+
+ void addDependency(unsigned From, unsigned To,
+ DependencyEdge::Dependency &&DE);
+
+ void pruneEdges(unsigned Iterations);
+ void initializeRootSet(SmallVectorImpl<unsigned> &RootSet) const;
+ void propagateThroughEdges(SmallVectorImpl<unsigned> &RootSet,
+ unsigned Iterations);
+
+#ifndef NDEBUG
+ void dumpDependencyEdge(raw_ostream &OS, const DependencyEdge &DE,
+ MCInstPrinter &MCIP) const;
+#endif
+
+public:
+ DependencyGraph(unsigned Size) : Nodes(Size) {}
+
+ void addRegisterDep(unsigned From, unsigned To, unsigned RegID,
+ unsigned Cost) {
+ addDependency(From, To, {DependencyEdge::DT_REGISTER, RegID, Cost});
+ }
+
+ void addMemoryDep(unsigned From, unsigned To, unsigned Cost) {
+ addDependency(From, To, {DependencyEdge::DT_MEMORY, /* unused */ 0, Cost});
+ }
+
+ void addResourceDep(unsigned From, unsigned To, uint64_t Mask,
+ unsigned Cost) {
+ addDependency(From, To, {DependencyEdge::DT_RESOURCE, Mask, Cost});
+ }
+
+ // Called by the bottleneck analysis at the end of simulation to propagate
+ // costs through the edges of the graph, and compute a critical path.
+ void finalizeGraph(unsigned Iterations) {
+ SmallVector<unsigned, 16> RootSet;
+ pruneEdges(Iterations);
+ initializeRootSet(RootSet);
+ propagateThroughEdges(RootSet, Iterations);
+ }
+
+ // Returns a sequence of edges representing the critical sequence based on the
+ // simulated run. It assumes that the graph has already been finalized (i.e.
+ // method `finalizeGraph()` has already been called on this graph).
+ void getCriticalSequence(SmallVectorImpl<const DependencyEdge *> &Seq) const;
+
+#ifndef NDEBUG
+ void dump(raw_ostream &OS, MCInstPrinter &MCIP) const;
+#endif
+};
+
+/// A view that collects and prints a few performance numbers.
+class BottleneckAnalysis : public InstructionView {
+ PressureTracker Tracker;
+ DependencyGraph DG;
+
+ unsigned Iterations;
+ unsigned TotalCycles;
+
+ bool PressureIncreasedBecauseOfResources;
+ bool PressureIncreasedBecauseOfRegisterDependencies;
+ bool PressureIncreasedBecauseOfMemoryDependencies;
+ // True if throughput was affected by dispatch stalls.
+ bool SeenStallCycles;
+
+ struct BackPressureInfo {
+ // Cycles where backpressure increased.
+ unsigned PressureIncreaseCycles;
+ // Cycles where backpressure increased because of pipeline pressure.
+ unsigned ResourcePressureCycles;
+ // Cycles where backpressure increased because of data dependencies.
+ unsigned DataDependencyCycles;
+ // Cycles where backpressure increased because of register dependencies.
+ unsigned RegisterDependencyCycles;
+ // Cycles where backpressure increased because of memory dependencies.
+ unsigned MemoryDependencyCycles;
+ };
+ BackPressureInfo BPI;
+
+ // Used to populate the dependency graph DG.
+ void addRegisterDep(unsigned From, unsigned To, unsigned RegID, unsigned Cy);
+ void addMemoryDep(unsigned From, unsigned To, unsigned Cy);
+ void addResourceDep(unsigned From, unsigned To, uint64_t Mask, unsigned Cy);
+
+ void printInstruction(formatted_raw_ostream &FOS, const MCInst &MCI,
+ bool UseDifferentColor = false) const;
+
+ // Prints a bottleneck message to OS.
+ void printBottleneckHints(raw_ostream &OS) const;
+ void printCriticalSequence(raw_ostream &OS) const;
+
+public:
+ BottleneckAnalysis(const MCSubtargetInfo &STI, MCInstPrinter &MCIP,
+ ArrayRef<MCInst> Sequence, unsigned Iterations);
+
+ void onCycleEnd() override;
+ void onEvent(const HWStallEvent &Event) override { SeenStallCycles = true; }
+ void onEvent(const HWPressureEvent &Event) override;
+ void onEvent(const HWInstructionEvent &Event) override;
+
+ void printView(raw_ostream &OS) const override;
+ StringRef getNameAsString() const override { return "BottleneckAnalysis"; }
+ bool isSerializable() const override { return false; }
+
+#ifndef NDEBUG
+ void dump(raw_ostream &OS, MCInstPrinter &MCIP) const { DG.dump(OS, MCIP); }
+#endif
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp b/contrib/llvm-project/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp
new file mode 100644
index 000000000000..3dc17c8754d8
--- /dev/null
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp
@@ -0,0 +1,98 @@
+//===--------------------- DispatchStatistics.cpp ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the DispatchStatistics interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/DispatchStatistics.h"
+#include "llvm/Support/Format.h"
+
+namespace llvm {
+namespace mca {
+
+void DispatchStatistics::onEvent(const HWStallEvent &Event) {
+ if (Event.Type < HWStallEvent::LastGenericEvent)
+ HWStalls[Event.Type]++;
+}
+
+void DispatchStatistics::onEvent(const HWInstructionEvent &Event) {
+ if (Event.Type != HWInstructionEvent::Dispatched)
+ return;
+
+ const auto &DE = static_cast<const HWInstructionDispatchedEvent &>(Event);
+ NumDispatched += DE.MicroOpcodes;
+}
+
+void DispatchStatistics::printDispatchHistogram(raw_ostream &OS) const {
+ std::string Buffer;
+ raw_string_ostream TempStream(Buffer);
+ TempStream << "\n\nDispatch Logic - "
+ << "number of cycles where we saw N micro opcodes dispatched:\n";
+ TempStream << "[# dispatched], [# cycles]\n";
+ for (const std::pair<const unsigned, unsigned> &Entry :
+ DispatchGroupSizePerCycle) {
+ double Percentage = ((double)Entry.second / NumCycles) * 100.0;
+ TempStream << " " << Entry.first << ", " << Entry.second
+ << " (" << format("%.1f", floor((Percentage * 10) + 0.5) / 10)
+ << "%)\n";
+ }
+
+ TempStream.flush();
+ OS << Buffer;
+}
+
+static void printStalls(raw_ostream &OS, unsigned NumStalls,
+ unsigned NumCycles) {
+ if (!NumStalls) {
+ OS << NumStalls;
+ return;
+ }
+
+ double Percentage = ((double)NumStalls / NumCycles) * 100.0;
+ OS << NumStalls << " ("
+ << format("%.1f", floor((Percentage * 10) + 0.5) / 10) << "%)";
+}
+
+void DispatchStatistics::printDispatchStalls(raw_ostream &OS) const {
+ std::string Buffer;
+ raw_string_ostream SS(Buffer);
+ SS << "\n\nDynamic Dispatch Stall Cycles:\n";
+ SS << "RAT - Register unavailable: ";
+ printStalls(SS, HWStalls[HWStallEvent::RegisterFileStall], NumCycles);
+ SS << "\nRCU - Retire tokens unavailable: ";
+ printStalls(SS, HWStalls[HWStallEvent::RetireControlUnitStall], NumCycles);
+ SS << "\nSCHEDQ - Scheduler full: ";
+ printStalls(SS, HWStalls[HWStallEvent::SchedulerQueueFull], NumCycles);
+ SS << "\nLQ - Load queue full: ";
+ printStalls(SS, HWStalls[HWStallEvent::LoadQueueFull], NumCycles);
+ SS << "\nSQ - Store queue full: ";
+ printStalls(SS, HWStalls[HWStallEvent::StoreQueueFull], NumCycles);
+ SS << "\nGROUP - Static restrictions on the dispatch group: ";
+ printStalls(SS, HWStalls[HWStallEvent::DispatchGroupStall], NumCycles);
+ SS << "\nUSH - Uncategorised Structural Hazard: ";
+ printStalls(SS, HWStalls[HWStallEvent::CustomBehaviourStall], NumCycles);
+ SS << '\n';
+ SS.flush();
+ OS << Buffer;
+}
+
+json::Value DispatchStatistics::toJSON() const {
+ json::Object JO({{"RAT", HWStalls[HWStallEvent::RegisterFileStall]},
+ {"RCU", HWStalls[HWStallEvent::RetireControlUnitStall]},
+ {"SCHEDQ", HWStalls[HWStallEvent::SchedulerQueueFull]},
+ {"LQ", HWStalls[HWStallEvent::LoadQueueFull]},
+ {"SQ", HWStalls[HWStallEvent::StoreQueueFull]},
+ {"GROUP", HWStalls[HWStallEvent::DispatchGroupStall]},
+ {"USH", HWStalls[HWStallEvent::CustomBehaviourStall]}});
+ return JO;
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/DispatchStatistics.h b/contrib/llvm-project/llvm/tools/llvm-mca/Views/DispatchStatistics.h
new file mode 100644
index 000000000000..cfd12691c03f
--- /dev/null
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/DispatchStatistics.h
@@ -0,0 +1,87 @@
+//===--------------------- DispatchStatistics.h -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements a view that prints a few statistics related to the
+/// dispatch logic. It collects and analyzes instruction dispatch events as
+/// well as static/dynamic dispatch stall events.
+///
+/// Example:
+/// ========
+///
+/// Dynamic Dispatch Stall Cycles:
+/// RAT - Register unavailable: 0
+/// RCU - Retire tokens unavailable: 0
+/// SCHEDQ - Scheduler full: 42
+/// LQ - Load queue full: 0
+/// SQ - Store queue full: 0
+/// GROUP - Static restrictions on the dispatch group: 0
+///
+///
+/// Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
+/// [# dispatched], [# cycles]
+/// 0, 15 (11.5%)
+/// 2, 4 (3.1%)
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_DISPATCHVIEW_H
+#define LLVM_TOOLS_LLVM_MCA_DISPATCHVIEW_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MCA/View.h"
+#include <map>
+
+namespace llvm {
+namespace mca {
+
+class DispatchStatistics : public View {
+ unsigned NumDispatched;
+ unsigned NumCycles;
+
+ // Counts dispatch stall events caused by unavailability of resources. There
+ // is one counter for every generic stall kind (see class HWStallEvent).
+ llvm::SmallVector<unsigned, 8> HWStalls;
+
+ using Histogram = std::map<unsigned, unsigned>;
+ Histogram DispatchGroupSizePerCycle;
+
+ void updateHistograms() {
+ DispatchGroupSizePerCycle[NumDispatched]++;
+ NumDispatched = 0;
+ }
+
+ void printDispatchHistogram(llvm::raw_ostream &OS) const;
+
+ void printDispatchStalls(llvm::raw_ostream &OS) const;
+
+public:
+ DispatchStatistics()
+ : NumDispatched(0), NumCycles(0),
+ HWStalls(HWStallEvent::LastGenericEvent) {}
+
+ void onEvent(const HWStallEvent &Event) override;
+
+ void onEvent(const HWInstructionEvent &Event) override;
+
+ void onCycleBegin() override { NumCycles++; }
+
+ void onCycleEnd() override { updateHistograms(); }
+
+ void printView(llvm::raw_ostream &OS) const override {
+ printDispatchStalls(OS);
+ printDispatchHistogram(OS);
+ }
+ StringRef getNameAsString() const override { return "DispatchStatistics"; }
+ json::Value toJSON() const override;
+};
+} // namespace mca
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp b/contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp
new file mode 100644
index 000000000000..fea0c9b8455c
--- /dev/null
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp
@@ -0,0 +1,177 @@
+//===--------------------- InstructionInfoView.cpp --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the InstructionInfoView API.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/InstructionInfoView.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/JSON.h"
+
+namespace llvm {
+namespace mca {
+
+void InstructionInfoView::printView(raw_ostream &OS) const {
+ std::string Buffer;
+ raw_string_ostream TempStream(Buffer);
+
+ ArrayRef<llvm::MCInst> Source = getSource();
+ if (!Source.size())
+ return;
+
+ IIVDVec IIVD(Source.size());
+ collectData(IIVD);
+
+ TempStream << "\n\nInstruction Info:\n";
+ TempStream << "[1]: #uOps\n[2]: Latency\n[3]: RThroughput\n"
+ << "[4]: MayLoad\n[5]: MayStore\n[6]: HasSideEffects (U)\n";
+ if (PrintBarriers) {
+ TempStream << "[7]: LoadBarrier\n[8]: StoreBarrier\n";
+ }
+ if (PrintEncodings) {
+ if (PrintBarriers) {
+ TempStream << "[9]: Encoding Size\n";
+ TempStream << "\n[1] [2] [3] [4] [5] [6] [7] [8] "
+ << "[9] Encodings: Instructions:\n";
+ } else {
+ TempStream << "[7]: Encoding Size\n";
+ TempStream << "\n[1] [2] [3] [4] [5] [6] [7] "
+ << "Encodings: Instructions:\n";
+ }
+ } else {
+ if (PrintBarriers) {
+ TempStream << "\n[1] [2] [3] [4] [5] [6] [7] [8] "
+ << "Instructions:\n";
+ } else {
+ TempStream << "\n[1] [2] [3] [4] [5] [6] "
+ << "Instructions:\n";
+ }
+ }
+
+ for (const auto &[Index, IIVDEntry, Inst] : enumerate(IIVD, Source)) {
+ TempStream << ' ' << IIVDEntry.NumMicroOpcodes << " ";
+ if (IIVDEntry.NumMicroOpcodes < 10)
+ TempStream << " ";
+ else if (IIVDEntry.NumMicroOpcodes < 100)
+ TempStream << ' ';
+ TempStream << IIVDEntry.Latency << " ";
+ if (IIVDEntry.Latency < 10)
+ TempStream << " ";
+ else if (IIVDEntry.Latency < 100)
+ TempStream << ' ';
+
+ if (IIVDEntry.RThroughput) {
+ double RT = *IIVDEntry.RThroughput;
+ TempStream << format("%.2f", RT) << ' ';
+ if (RT < 10.0)
+ TempStream << " ";
+ else if (RT < 100.0)
+ TempStream << ' ';
+ } else {
+ TempStream << " - ";
+ }
+ TempStream << (IIVDEntry.mayLoad ? " * " : " ");
+ TempStream << (IIVDEntry.mayStore ? " * " : " ");
+ TempStream << (IIVDEntry.hasUnmodeledSideEffects ? " U " : " ");
+
+ if (PrintBarriers) {
+ TempStream << (LoweredInsts[Index]->isALoadBarrier() ? " * "
+ : " ");
+ TempStream << (LoweredInsts[Index]->isAStoreBarrier() ? " * "
+ : " ");
+ }
+
+ if (PrintEncodings) {
+ StringRef Encoding(CE.getEncoding(Index));
+ unsigned EncodingSize = Encoding.size();
+ TempStream << " " << EncodingSize
+ << (EncodingSize < 10 ? " " : " ");
+ TempStream.flush();
+ formatted_raw_ostream FOS(TempStream);
+ for (unsigned i = 0, e = Encoding.size(); i != e; ++i)
+ FOS << format("%02x ", (uint8_t)Encoding[i]);
+ FOS.PadToColumn(30);
+ FOS.flush();
+ }
+
+ TempStream << printInstructionString(Inst) << '\n';
+ }
+
+ TempStream.flush();
+ OS << Buffer;
+}
+
+void InstructionInfoView::collectData(
+ MutableArrayRef<InstructionInfoViewData> IIVD) const {
+ const llvm::MCSubtargetInfo &STI = getSubTargetInfo();
+ const MCSchedModel &SM = STI.getSchedModel();
+ for (const auto I : zip(getSource(), IIVD)) {
+ const MCInst &Inst = std::get<0>(I);
+ InstructionInfoViewData &IIVDEntry = std::get<1>(I);
+ const MCInstrDesc &MCDesc = MCII.get(Inst.getOpcode());
+
+ // Obtain the scheduling class information from the instruction
+ // and instruments.
+ auto IVecIt = InstToInstruments.find(&Inst);
+ unsigned SchedClassID =
+ IVecIt == InstToInstruments.end()
+ ? MCDesc.getSchedClass()
+ : IM.getSchedClassID(MCII, Inst, IVecIt->second);
+ unsigned CPUID = SM.getProcessorID();
+
+ // Try to solve variant scheduling classes.
+ while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant())
+ SchedClassID =
+ STI.resolveVariantSchedClass(SchedClassID, &Inst, &MCII, CPUID);
+
+ const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
+ IIVDEntry.NumMicroOpcodes = SCDesc.NumMicroOps;
+ IIVDEntry.Latency = MCSchedModel::computeInstrLatency(STI, SCDesc);
+ // Add extra latency due to delays in the forwarding data paths.
+ IIVDEntry.Latency += MCSchedModel::getForwardingDelayCycles(
+ STI.getReadAdvanceEntries(SCDesc));
+ IIVDEntry.RThroughput = MCSchedModel::getReciprocalThroughput(STI, SCDesc);
+ IIVDEntry.mayLoad = MCDesc.mayLoad();
+ IIVDEntry.mayStore = MCDesc.mayStore();
+ IIVDEntry.hasUnmodeledSideEffects = MCDesc.hasUnmodeledSideEffects();
+ }
+}
+
+// Construct a JSON object from a single InstructionInfoViewData object.
+json::Object
+InstructionInfoView::toJSON(const InstructionInfoViewData &IIVD) const {
+ json::Object JO({{"NumMicroOpcodes", IIVD.NumMicroOpcodes},
+ {"Latency", IIVD.Latency},
+ {"mayLoad", IIVD.mayLoad},
+ {"mayStore", IIVD.mayStore},
+ {"hasUnmodeledSideEffects", IIVD.hasUnmodeledSideEffects}});
+ JO.try_emplace("RThroughput", IIVD.RThroughput.value_or(0.0));
+ return JO;
+}
+
+json::Value InstructionInfoView::toJSON() const {
+ ArrayRef<llvm::MCInst> Source = getSource();
+ if (!Source.size())
+ return json::Value(0);
+
+ IIVDVec IIVD(Source.size());
+ collectData(IIVD);
+
+ json::Array InstInfo;
+ for (const auto &I : enumerate(IIVD)) {
+ const InstructionInfoViewData &IIVDEntry = I.value();
+ json::Object JO = toJSON(IIVDEntry);
+ JO.try_emplace("Instruction", (unsigned)I.index());
+ InstInfo.push_back(std::move(JO));
+ }
+ return json::Object({{"InstructionList", json::Value(std::move(InstInfo))}});
+}
+} // namespace mca.
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionInfoView.h b/contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionInfoView.h
new file mode 100644
index 000000000000..3befafda90a3
--- /dev/null
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionInfoView.h
@@ -0,0 +1,100 @@
+//===--------------------- InstructionInfoView.h ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the instruction info view.
+///
+/// The goal fo the instruction info view is to print the latency and reciprocal
+/// throughput information for every instruction in the input sequence.
+/// This section also reports extra information related to the number of micro
+/// opcodes, and opcode properties (i.e. 'MayLoad', 'MayStore', 'HasSideEffects)
+///
+/// Example:
+///
+/// Instruction Info:
+/// [1]: #uOps
+/// [2]: Latency
+/// [3]: RThroughput
+/// [4]: MayLoad
+/// [5]: MayStore
+/// [6]: HasSideEffects
+///
+/// [1] [2] [3] [4] [5] [6] Instructions:
+/// 1 2 1.00 vmulps %xmm0, %xmm1, %xmm2
+/// 1 3 1.00 vhaddps %xmm2, %xmm2, %xmm3
+/// 1 3 1.00 vhaddps %xmm3, %xmm3, %xmm4
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_INSTRUCTIONINFOVIEW_H
+#define LLVM_TOOLS_LLVM_MCA_INSTRUCTIONINFOVIEW_H
+
+#include "Views/InstructionView.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MCA/CodeEmitter.h"
+#include "llvm/MCA/CustomBehaviour.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "llvm-mca"
+
+namespace llvm {
+namespace mca {
+
+/// A view that prints out generic instruction information.
+class InstructionInfoView : public InstructionView {
+ const llvm::MCInstrInfo &MCII;
+ CodeEmitter &CE;
+ bool PrintEncodings;
+ bool PrintBarriers;
+ using UniqueInst = std::unique_ptr<Instruction>;
+ ArrayRef<UniqueInst> LoweredInsts;
+ const InstrumentManager &IM;
+ using InstToInstrumentsT =
+ DenseMap<const MCInst *, SmallVector<mca::Instrument *>>;
+ const InstToInstrumentsT &InstToInstruments;
+
+ struct InstructionInfoViewData {
+ unsigned NumMicroOpcodes = 0;
+ unsigned Latency = 0;
+ std::optional<double> RThroughput = 0.0;
+ bool mayLoad = false;
+ bool mayStore = false;
+ bool hasUnmodeledSideEffects = false;
+ };
+ using IIVDVec = SmallVector<InstructionInfoViewData, 16>;
+
+ /// Place the data into the array of InstructionInfoViewData IIVD.
+ void collectData(MutableArrayRef<InstructionInfoViewData> IIVD) const;
+
+public:
+ InstructionInfoView(const llvm::MCSubtargetInfo &ST,
+ const llvm::MCInstrInfo &II, CodeEmitter &C,
+ bool ShouldPrintEncodings, llvm::ArrayRef<llvm::MCInst> S,
+ llvm::MCInstPrinter &IP,
+ ArrayRef<UniqueInst> LoweredInsts,
+ bool ShouldPrintBarriers, const InstrumentManager &IM,
+ const InstToInstrumentsT &InstToInstruments)
+ : InstructionView(ST, IP, S), MCII(II), CE(C),
+ PrintEncodings(ShouldPrintEncodings),
+ PrintBarriers(ShouldPrintBarriers), LoweredInsts(LoweredInsts), IM(IM),
+ InstToInstruments(InstToInstruments) {}
+
+ void printView(llvm::raw_ostream &OS) const override;
+ StringRef getNameAsString() const override { return "InstructionInfoView"; }
+ json::Value toJSON() const override;
+ json::Object toJSON(const InstructionInfoViewData &IIVD) const;
+};
+} // namespace mca
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionView.cpp b/contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionView.cpp
new file mode 100644
index 000000000000..3b174a064985
--- /dev/null
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionView.cpp
@@ -0,0 +1,43 @@
+//===----------------------- InstructionView.cpp ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the member functions of the class InstructionView.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/InstructionView.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+namespace llvm {
+namespace mca {
+
+InstructionView::~InstructionView() = default;
+
+StringRef
+InstructionView::printInstructionString(const llvm::MCInst &MCI) const {
+ InstructionString = "";
+ MCIP.printInst(&MCI, 0, "", STI, InstrStream);
+ InstrStream.flush();
+ // Remove any tabs or spaces at the beginning of the instruction.
+ return StringRef(InstructionString).ltrim();
+}
+
+json::Value InstructionView::toJSON() const {
+ json::Array SourceInfo;
+ for (const auto &MCI : getSource()) {
+ StringRef Instruction = printInstructionString(MCI);
+ SourceInfo.push_back(Instruction.str());
+ }
+ return SourceInfo;
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionView.h b/contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionView.h
new file mode 100644
index 000000000000..ae57246fc35f
--- /dev/null
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/InstructionView.h
@@ -0,0 +1,60 @@
+//===----------------------- InstructionView.h ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the main interface for Views that examine and reference
+/// a sequence of machine instructions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_INSTRUCTIONVIEW_H
+#define LLVM_TOOLS_LLVM_MCA_INSTRUCTIONVIEW_H
+
+#include "llvm/MCA/View.h"
+#include "llvm/Support/JSON.h"
+
+namespace llvm {
+class MCInstPrinter;
+
+namespace mca {
+
+// The base class for views that deal with individual machine instructions.
+class InstructionView : public View {
+ const llvm::MCSubtargetInfo &STI;
+ llvm::MCInstPrinter &MCIP;
+ llvm::ArrayRef<llvm::MCInst> Source;
+
+ mutable std::string InstructionString;
+ mutable raw_string_ostream InstrStream;
+
+public:
+ void printView(llvm::raw_ostream &) const override {}
+ InstructionView(const llvm::MCSubtargetInfo &STI,
+ llvm::MCInstPrinter &Printer, llvm::ArrayRef<llvm::MCInst> S)
+ : STI(STI), MCIP(Printer), Source(S), InstrStream(InstructionString) {}
+
+ virtual ~InstructionView();
+
+ StringRef getNameAsString() const override { return "Instructions"; }
+
+ // Return a reference to a string representing a given machine instruction.
+ // The result should be used or copied before the next call to
+ // printInstructionString() as it will overwrite the previous result.
+ StringRef printInstructionString(const llvm::MCInst &MCI) const;
+ const llvm::MCSubtargetInfo &getSubTargetInfo() const { return STI; }
+
+ llvm::MCInstPrinter &getInstPrinter() const { return MCIP; }
+ llvm::ArrayRef<llvm::MCInst> getSource() const { return Source; }
+
+ json::Value toJSON() const override;
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/RegisterFileStatistics.cpp b/contrib/llvm-project/llvm/tools/llvm-mca/Views/RegisterFileStatistics.cpp
new file mode 100644
index 000000000000..4ef8053bff41
--- /dev/null
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/RegisterFileStatistics.cpp
@@ -0,0 +1,170 @@
+//===--------------------- RegisterFileStatistics.cpp -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the RegisterFileStatistics interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/RegisterFileStatistics.h"
+#include "llvm/Support/Format.h"
+
+namespace llvm {
+namespace mca {
+
+RegisterFileStatistics::RegisterFileStatistics(const MCSubtargetInfo &sti)
+ : STI(sti) {
+ const MCSchedModel &SM = STI.getSchedModel();
+ RegisterFileUsage RFUEmpty = {0, 0, 0};
+ MoveEliminationInfo MEIEmpty = {0, 0, 0, 0, 0};
+ if (!SM.hasExtraProcessorInfo()) {
+ // Assume a single register file.
+ PRFUsage.emplace_back(RFUEmpty);
+ MoveElimInfo.emplace_back(MEIEmpty);
+ return;
+ }
+
+ // Initialize a RegisterFileUsage for every user defined register file, plus
+ // the default register file which is always at index #0.
+ const MCExtraProcessorInfo &PI = SM.getExtraProcessorInfo();
+ // There is always an "InvalidRegisterFile" entry in tablegen. That entry can
+ // be skipped. If there are no user defined register files, then reserve a
+ // single entry for the default register file at index #0.
+ unsigned NumRegFiles = std::max(PI.NumRegisterFiles, 1U);
+
+ PRFUsage.resize(NumRegFiles);
+ std::fill(PRFUsage.begin(), PRFUsage.end(), RFUEmpty);
+
+ MoveElimInfo.resize(NumRegFiles);
+ std::fill(MoveElimInfo.begin(), MoveElimInfo.end(), MEIEmpty);
+}
+
+void RegisterFileStatistics::updateRegisterFileUsage(
+ ArrayRef<unsigned> UsedPhysRegs) {
+ for (unsigned I = 0, E = PRFUsage.size(); I < E; ++I) {
+ RegisterFileUsage &RFU = PRFUsage[I];
+ unsigned NumUsedPhysRegs = UsedPhysRegs[I];
+ RFU.CurrentlyUsedMappings += NumUsedPhysRegs;
+ RFU.TotalMappings += NumUsedPhysRegs;
+ RFU.MaxUsedMappings =
+ std::max(RFU.MaxUsedMappings, RFU.CurrentlyUsedMappings);
+ }
+}
+
+void RegisterFileStatistics::updateMoveElimInfo(const Instruction &Inst) {
+ if (!Inst.isOptimizableMove())
+ return;
+
+ if (Inst.getDefs().size() != Inst.getUses().size())
+ return;
+
+ for (size_t I = 0, E = Inst.getDefs().size(); I < E; ++I) {
+ const WriteState &WS = Inst.getDefs()[I];
+ const ReadState &RS = Inst.getUses()[E - (I + 1)];
+
+ MoveEliminationInfo &Info =
+ MoveElimInfo[Inst.getDefs()[0].getRegisterFileID()];
+ Info.TotalMoveEliminationCandidates++;
+ if (WS.isEliminated())
+ Info.CurrentMovesEliminated++;
+ if (WS.isWriteZero() && RS.isReadZero())
+ Info.TotalMovesThatPropagateZero++;
+ }
+}
+
+void RegisterFileStatistics::onEvent(const HWInstructionEvent &Event) {
+ switch (Event.Type) {
+ default:
+ break;
+ case HWInstructionEvent::Retired: {
+ const auto &RE = static_cast<const HWInstructionRetiredEvent &>(Event);
+ for (unsigned I = 0, E = PRFUsage.size(); I < E; ++I)
+ PRFUsage[I].CurrentlyUsedMappings -= RE.FreedPhysRegs[I];
+ break;
+ }
+ case HWInstructionEvent::Dispatched: {
+ const auto &DE = static_cast<const HWInstructionDispatchedEvent &>(Event);
+ updateRegisterFileUsage(DE.UsedPhysRegs);
+ updateMoveElimInfo(*DE.IR.getInstruction());
+ }
+ }
+}
+
+void RegisterFileStatistics::onCycleEnd() {
+ for (MoveEliminationInfo &MEI : MoveElimInfo) {
+ unsigned &CurrentMax = MEI.MaxMovesEliminatedPerCycle;
+ CurrentMax = std::max(CurrentMax, MEI.CurrentMovesEliminated);
+ MEI.TotalMovesEliminated += MEI.CurrentMovesEliminated;
+ MEI.CurrentMovesEliminated = 0;
+ }
+}
+
+void RegisterFileStatistics::printView(raw_ostream &OS) const {
+ std::string Buffer;
+ raw_string_ostream TempStream(Buffer);
+
+ TempStream << "\n\nRegister File statistics:";
+ const RegisterFileUsage &GlobalUsage = PRFUsage[0];
+ TempStream << "\nTotal number of mappings created: "
+ << GlobalUsage.TotalMappings;
+ TempStream << "\nMax number of mappings used: "
+ << GlobalUsage.MaxUsedMappings << '\n';
+
+ for (unsigned I = 1, E = PRFUsage.size(); I < E; ++I) {
+ const RegisterFileUsage &RFU = PRFUsage[I];
+ // Obtain the register file descriptor from the scheduling model.
+ assert(STI.getSchedModel().hasExtraProcessorInfo() &&
+ "Unable to find register file info!");
+ const MCExtraProcessorInfo &PI =
+ STI.getSchedModel().getExtraProcessorInfo();
+ assert(I <= PI.NumRegisterFiles && "Unexpected register file index!");
+ const MCRegisterFileDesc &RFDesc = PI.RegisterFiles[I];
+ // Skip invalid register files.
+ if (!RFDesc.NumPhysRegs)
+ continue;
+
+ TempStream << "\n* Register File #" << I;
+ TempStream << " -- " << StringRef(RFDesc.Name) << ':';
+ TempStream << "\n Number of physical registers: ";
+ if (!RFDesc.NumPhysRegs)
+ TempStream << "unbounded";
+ else
+ TempStream << RFDesc.NumPhysRegs;
+ TempStream << "\n Total number of mappings created: "
+ << RFU.TotalMappings;
+ TempStream << "\n Max number of mappings used: "
+ << RFU.MaxUsedMappings << '\n';
+ const MoveEliminationInfo &MEI = MoveElimInfo[I];
+
+ if (MEI.TotalMoveEliminationCandidates) {
+ TempStream << " Number of optimizable moves: "
+ << MEI.TotalMoveEliminationCandidates;
+ double EliminatedMovProportion = (double)MEI.TotalMovesEliminated /
+ MEI.TotalMoveEliminationCandidates *
+ 100.0;
+ double ZeroMovProportion = (double)MEI.TotalMovesThatPropagateZero /
+ MEI.TotalMoveEliminationCandidates * 100.0;
+ TempStream << "\n Number of moves eliminated: "
+ << MEI.TotalMovesEliminated << " "
+ << format("(%.1f%%)",
+ floor((EliminatedMovProportion * 10) + 0.5) / 10);
+ TempStream << "\n Number of zero moves: "
+ << MEI.TotalMovesThatPropagateZero << " "
+ << format("(%.1f%%)",
+ floor((ZeroMovProportion * 10) + 0.5) / 10);
+ TempStream << "\n Max moves eliminated per cycle: "
+ << MEI.MaxMovesEliminatedPerCycle << '\n';
+ }
+ }
+
+ TempStream.flush();
+ OS << Buffer;
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h b/contrib/llvm-project/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h
new file mode 100644
index 000000000000..3de2a22ac32d
--- /dev/null
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h
@@ -0,0 +1,84 @@
+//===--------------------- RegisterFileStatistics.h -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This view collects and prints register file usage statistics.
+///
+/// Example (-mcpu=btver2):
+/// ========================
+///
+/// Register File statistics:
+/// Total number of mappings created: 6
+/// Max number of mappings used: 3
+///
+/// * Register File #1 -- FpuPRF:
+/// Number of physical registers: 72
+/// Total number of mappings created: 0
+/// Max number of mappings used: 0
+/// Number of optimizable moves: 200
+/// Number of moves eliminated: 200 (100.0%)
+/// Number of zero moves: 200 (100.0%)
+/// Max moves eliminated per cycle: 2
+///
+/// * Register File #2 -- IntegerPRF:
+/// Number of physical registers: 64
+/// Total number of mappings created: 6
+/// Max number of mappings used: 3
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_REGISTERFILESTATISTICS_H
+#define LLVM_TOOLS_LLVM_MCA_REGISTERFILESTATISTICS_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MCA/View.h"
+
+namespace llvm {
+namespace mca {
+
+class RegisterFileStatistics : public View {
+ const llvm::MCSubtargetInfo &STI;
+
+ // Used to track the number of physical registers used in a register file.
+ struct RegisterFileUsage {
+ unsigned TotalMappings;
+ unsigned MaxUsedMappings;
+ unsigned CurrentlyUsedMappings;
+ };
+
+ struct MoveEliminationInfo {
+ unsigned TotalMoveEliminationCandidates;
+ unsigned TotalMovesEliminated;
+ unsigned TotalMovesThatPropagateZero;
+ unsigned MaxMovesEliminatedPerCycle;
+ unsigned CurrentMovesEliminated;
+ };
+
+ // There is one entry for each register file implemented by the processor.
+ llvm::SmallVector<RegisterFileUsage, 4> PRFUsage;
+ llvm::SmallVector<MoveEliminationInfo, 4> MoveElimInfo;
+
+ void updateRegisterFileUsage(ArrayRef<unsigned> UsedPhysRegs);
+ void updateMoveElimInfo(const Instruction &Inst);
+
+public:
+ RegisterFileStatistics(const llvm::MCSubtargetInfo &sti);
+
+ void onCycleEnd() override;
+ void onEvent(const HWInstructionEvent &Event) override;
+ void printView(llvm::raw_ostream &OS) const override;
+ StringRef getNameAsString() const override {
+ return "RegisterFileStatistics";
+ }
+ bool isSerializable() const override { return false; }
+};
+} // namespace mca
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/ResourcePressureView.cpp b/contrib/llvm-project/llvm/tools/llvm-mca/Views/ResourcePressureView.cpp
new file mode 100644
index 000000000000..f39350f3b458
--- /dev/null
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/ResourcePressureView.cpp
@@ -0,0 +1,200 @@
+//===--------------------- ResourcePressureView.cpp -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements methods in the ResourcePressureView interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/ResourcePressureView.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace mca {
+
+ResourcePressureView::ResourcePressureView(const llvm::MCSubtargetInfo &sti,
+ MCInstPrinter &Printer,
+ ArrayRef<MCInst> S)
+ : InstructionView(sti, Printer, S), LastInstructionIdx(0) {
+ // Populate the map of resource descriptors.
+ unsigned R2VIndex = 0;
+ const MCSchedModel &SM = getSubTargetInfo().getSchedModel();
+ for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
+ unsigned NumUnits = ProcResource.NumUnits;
+ // Skip groups and invalid resources with zero units.
+ if (ProcResource.SubUnitsIdxBegin || !NumUnits)
+ continue;
+
+ Resource2VecIndex.insert(std::pair<unsigned, unsigned>(I, R2VIndex));
+ R2VIndex += ProcResource.NumUnits;
+ }
+
+ NumResourceUnits = R2VIndex;
+ ResourceUsage.resize(NumResourceUnits * (getSource().size() + 1));
+ std::fill(ResourceUsage.begin(), ResourceUsage.end(), 0.0);
+}
+
+void ResourcePressureView::onEvent(const HWInstructionEvent &Event) {
+ if (Event.Type == HWInstructionEvent::Dispatched) {
+ LastInstructionIdx = Event.IR.getSourceIndex();
+ return;
+ }
+
+ // We're only interested in Issue events.
+ if (Event.Type != HWInstructionEvent::Issued)
+ return;
+
+ const auto &IssueEvent = static_cast<const HWInstructionIssuedEvent &>(Event);
+ ArrayRef<llvm::MCInst> Source = getSource();
+ const unsigned SourceIdx = Event.IR.getSourceIndex() % Source.size();
+ for (const std::pair<ResourceRef, ReleaseAtCycles> &Use :
+ IssueEvent.UsedResources) {
+ const ResourceRef &RR = Use.first;
+ assert(Resource2VecIndex.contains(RR.first));
+ unsigned R2VIndex = Resource2VecIndex[RR.first];
+ R2VIndex += llvm::countr_zero(RR.second);
+ ResourceUsage[R2VIndex + NumResourceUnits * SourceIdx] += Use.second;
+ ResourceUsage[R2VIndex + NumResourceUnits * Source.size()] += Use.second;
+ }
+}
+
+static void printColumnNames(formatted_raw_ostream &OS,
+ const MCSchedModel &SM) {
+ unsigned Column = OS.getColumn();
+ for (unsigned I = 1, ResourceIndex = 0, E = SM.getNumProcResourceKinds();
+ I < E; ++I) {
+ const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
+ unsigned NumUnits = ProcResource.NumUnits;
+ // Skip groups and invalid resources with zero units.
+ if (ProcResource.SubUnitsIdxBegin || !NumUnits)
+ continue;
+
+ for (unsigned J = 0; J < NumUnits; ++J) {
+ Column += 7;
+ OS << "[" << ResourceIndex;
+ if (NumUnits > 1)
+ OS << '.' << J;
+ OS << ']';
+ OS.PadToColumn(Column);
+ }
+
+ ResourceIndex++;
+ }
+}
+
+static void printResourcePressure(formatted_raw_ostream &OS, double Pressure,
+ unsigned Col) {
+ if (!Pressure || Pressure < 0.005) {
+ OS << " - ";
+ } else {
+ // Round to the value to the nearest hundredth and then print it.
+ OS << format("%.2f", floor((Pressure * 100) + 0.5) / 100);
+ }
+ OS.PadToColumn(Col);
+}
+
+void ResourcePressureView::printResourcePressurePerIter(raw_ostream &OS) const {
+ std::string Buffer;
+ raw_string_ostream TempStream(Buffer);
+ formatted_raw_ostream FOS(TempStream);
+
+ FOS << "\n\nResources:\n";
+ const MCSchedModel &SM = getSubTargetInfo().getSchedModel();
+ for (unsigned I = 1, ResourceIndex = 0, E = SM.getNumProcResourceKinds();
+ I < E; ++I) {
+ const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
+ unsigned NumUnits = ProcResource.NumUnits;
+ // Skip groups and invalid resources with zero units.
+ if (ProcResource.SubUnitsIdxBegin || !NumUnits)
+ continue;
+
+ for (unsigned J = 0; J < NumUnits; ++J) {
+ FOS << '[' << ResourceIndex;
+ if (NumUnits > 1)
+ FOS << '.' << J;
+ FOS << ']';
+ FOS.PadToColumn(6);
+ FOS << "- " << ProcResource.Name << '\n';
+ }
+
+ ResourceIndex++;
+ }
+
+ FOS << "\n\nResource pressure per iteration:\n";
+ FOS.flush();
+ printColumnNames(FOS, SM);
+ FOS << '\n';
+ FOS.flush();
+
+ ArrayRef<llvm::MCInst> Source = getSource();
+ const unsigned Executions = LastInstructionIdx / Source.size() + 1;
+ for (unsigned I = 0, E = NumResourceUnits; I < E; ++I) {
+ double Usage = ResourceUsage[I + Source.size() * E];
+ printResourcePressure(FOS, Usage / Executions, (I + 1) * 7);
+ }
+
+ FOS.flush();
+ OS << Buffer;
+}
+
+void ResourcePressureView::printResourcePressurePerInst(raw_ostream &OS) const {
+ std::string Buffer;
+ raw_string_ostream TempStream(Buffer);
+ formatted_raw_ostream FOS(TempStream);
+
+ FOS << "\n\nResource pressure by instruction:\n";
+ printColumnNames(FOS, getSubTargetInfo().getSchedModel());
+ FOS << "Instructions:\n";
+
+ unsigned InstrIndex = 0;
+ ArrayRef<llvm::MCInst> Source = getSource();
+ const unsigned Executions = LastInstructionIdx / Source.size() + 1;
+ for (const MCInst &MCI : Source) {
+ unsigned BaseEltIdx = InstrIndex * NumResourceUnits;
+ for (unsigned J = 0; J < NumResourceUnits; ++J) {
+ double Usage = ResourceUsage[J + BaseEltIdx];
+ printResourcePressure(FOS, Usage / Executions, (J + 1) * 7);
+ }
+
+ FOS << printInstructionString(MCI) << '\n';
+ FOS.flush();
+ OS << Buffer;
+ Buffer = "";
+
+ ++InstrIndex;
+ }
+}
+
+json::Value ResourcePressureView::toJSON() const {
+ // We're dumping the instructions and the ResourceUsage array.
+ json::Array ResourcePressureInfo;
+
+ // The ResourceUsage matrix is sparse, so we only consider
+ // non-zero values.
+ ArrayRef<llvm::MCInst> Source = getSource();
+ const unsigned Executions = LastInstructionIdx / Source.size() + 1;
+ for (const auto &R : enumerate(ResourceUsage)) {
+ const ReleaseAtCycles &RU = R.value();
+ if (RU.getNumerator() == 0)
+ continue;
+ unsigned InstructionIndex = R.index() / NumResourceUnits;
+ unsigned ResourceIndex = R.index() % NumResourceUnits;
+ double Usage = RU / Executions;
+ ResourcePressureInfo.push_back(
+ json::Object({{"InstructionIndex", InstructionIndex},
+ {"ResourceIndex", ResourceIndex},
+ {"ResourceUsage", Usage}}));
+ }
+
+ json::Object JO({{"ResourcePressureInfo", std::move(ResourcePressureInfo)}});
+ return JO;
+}
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/ResourcePressureView.h b/contrib/llvm-project/llvm/tools/llvm-mca/Views/ResourcePressureView.h
new file mode 100644
index 000000000000..be8ad04102fd
--- /dev/null
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/ResourcePressureView.h
@@ -0,0 +1,103 @@
+//===--------------------- ResourcePressureView.h ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file define class ResourcePressureView.
+/// Class ResourcePressureView observes hardware events generated by
+/// the Pipeline object and collects statistics related to resource usage at
+/// instruction granularity.
+/// Resource pressure information is then printed out to a stream in the
+/// form of a table like the one from the example below:
+///
+/// Resources:
+/// [0] - JALU0
+/// [1] - JALU1
+/// [2] - JDiv
+/// [3] - JFPM
+/// [4] - JFPU0
+/// [5] - JFPU1
+/// [6] - JLAGU
+/// [7] - JSAGU
+/// [8] - JSTC
+/// [9] - JVIMUL
+///
+/// Resource pressure per iteration:
+/// [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
+/// 0.00 0.00 0.00 0.00 2.00 2.00 0.00 0.00 0.00 0.00
+///
+/// Resource pressure by instruction:
+/// [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
+/// - - - - - 1.00 - - - - vpermilpd $1, %xmm0,
+/// %xmm1
+/// - - - - 1.00 - - - - - vaddps %xmm0, %xmm1,
+/// %xmm2
+/// - - - - - 1.00 - - - - vmovshdup %xmm2, %xmm3
+/// - - - - 1.00 - - - - - vaddss %xmm2, %xmm3,
+/// %xmm4
+///
+/// In this example, we have AVX code executed on AMD Jaguar (btver2).
+/// Both shuffles and vector floating point add operations on XMM registers have
+/// a reciprocal throughput of 1cy.
+/// Each add is issued to pipeline JFPU0, while each shuffle is issued to
+/// pipeline JFPU1. The overall pressure per iteration is reported by two
+/// tables: the first smaller table is the resource pressure per iteration;
+/// the second table reports resource pressure per instruction. Values are the
+/// average resource cycles consumed by an instruction.
+/// Every vector add from the example uses resource JFPU0 for an average of 1cy
+/// per iteration. Consequently, the resource pressure on JFPU0 is of 2cy per
+/// iteration.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_RESOURCEPRESSUREVIEW_H
+#define LLVM_TOOLS_LLVM_MCA_RESOURCEPRESSUREVIEW_H
+
+#include "Views/InstructionView.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/JSON.h"
+
+namespace llvm {
+namespace mca {
+
+/// This class collects resource pressure statistics and it is able to print
+/// out all the collected information as a table to an output stream.
+class ResourcePressureView : public InstructionView {
+ unsigned LastInstructionIdx;
+
+ // Map to quickly obtain the ResourceUsage column index from a processor
+ // resource ID.
+ llvm::DenseMap<unsigned, unsigned> Resource2VecIndex;
+
+ // Table of resources used by instructions.
+ std::vector<ReleaseAtCycles> ResourceUsage;
+ unsigned NumResourceUnits;
+
+ void printResourcePressurePerIter(llvm::raw_ostream &OS) const;
+ void printResourcePressurePerInst(llvm::raw_ostream &OS) const;
+
+public:
+ ResourcePressureView(const llvm::MCSubtargetInfo &sti,
+ llvm::MCInstPrinter &Printer,
+ llvm::ArrayRef<llvm::MCInst> S);
+
+ void onEvent(const HWInstructionEvent &Event) override;
+ void printView(llvm::raw_ostream &OS) const override {
+ printResourcePressurePerIter(OS);
+ printResourcePressurePerInst(OS);
+ }
+ StringRef getNameAsString() const override { return "ResourcePressureView"; }
+ json::Value toJSON() const override;
+};
+} // namespace mca
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp b/contrib/llvm-project/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp
new file mode 100644
index 000000000000..1c40428fb018
--- /dev/null
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp
@@ -0,0 +1,91 @@
+//===--------------------- RetireControlUnitStatistics.cpp ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the RetireControlUnitStatistics interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/RetireControlUnitStatistics.h"
+#include "llvm/Support/Format.h"
+
+namespace llvm {
+namespace mca {
+
+RetireControlUnitStatistics::RetireControlUnitStatistics(const MCSchedModel &SM)
+ : NumRetired(0), NumCycles(0), EntriesInUse(0), MaxUsedEntries(0),
+ SumOfUsedEntries(0) {
+ TotalROBEntries = SM.MicroOpBufferSize;
+ if (SM.hasExtraProcessorInfo()) {
+ const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo();
+ if (EPI.ReorderBufferSize)
+ TotalROBEntries = EPI.ReorderBufferSize;
+ }
+}
+
+void RetireControlUnitStatistics::onEvent(const HWInstructionEvent &Event) {
+ if (Event.Type == HWInstructionEvent::Dispatched) {
+ unsigned NumEntries =
+ static_cast<const HWInstructionDispatchedEvent &>(Event).MicroOpcodes;
+ EntriesInUse += NumEntries;
+ }
+
+ if (Event.Type == HWInstructionEvent::Retired) {
+ unsigned ReleasedEntries = Event.IR.getInstruction()->getDesc().NumMicroOps;
+ assert(EntriesInUse >= ReleasedEntries && "Invalid internal state!");
+ EntriesInUse -= ReleasedEntries;
+ ++NumRetired;
+ }
+}
+
+void RetireControlUnitStatistics::onCycleEnd() {
+ // Update histogram
+ RetiredPerCycle[NumRetired]++;
+ NumRetired = 0;
+ ++NumCycles;
+ MaxUsedEntries = std::max(MaxUsedEntries, EntriesInUse);
+ SumOfUsedEntries += EntriesInUse;
+}
+
+void RetireControlUnitStatistics::printView(raw_ostream &OS) const {
+ std::string Buffer;
+ raw_string_ostream TempStream(Buffer);
+ TempStream << "\n\nRetire Control Unit - "
+ << "number of cycles where we saw N instructions retired:\n";
+ TempStream << "[# retired], [# cycles]\n";
+
+ for (const std::pair<const unsigned, unsigned> &Entry : RetiredPerCycle) {
+ TempStream << " " << Entry.first;
+ if (Entry.first < 10)
+ TempStream << ", ";
+ else
+ TempStream << ", ";
+ TempStream << Entry.second << " ("
+ << format("%.1f", ((double)Entry.second / NumCycles) * 100.0)
+ << "%)\n";
+ }
+
+ unsigned AvgUsage = (double)SumOfUsedEntries / NumCycles;
+ double MaxUsagePercentage =
+ ((double)MaxUsedEntries / TotalROBEntries) * 100.0;
+ double NormalizedMaxPercentage = floor((MaxUsagePercentage * 10) + 0.5) / 10;
+ double AvgUsagePercentage = ((double)AvgUsage / TotalROBEntries) * 100.0;
+ double NormalizedAvgPercentage = floor((AvgUsagePercentage * 10) + 0.5) / 10;
+
+ TempStream << "\nTotal ROB Entries: " << TotalROBEntries
+ << "\nMax Used ROB Entries: " << MaxUsedEntries
+ << format(" ( %.1f%% )", NormalizedMaxPercentage)
+ << "\nAverage Used ROB Entries per cy: " << AvgUsage
+ << format(" ( %.1f%% )\n", NormalizedAvgPercentage);
+
+ TempStream.flush();
+ OS << Buffer;
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h b/contrib/llvm-project/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h
new file mode 100644
index 000000000000..ed3736c64515
--- /dev/null
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h
@@ -0,0 +1,64 @@
+//===--------------------- RetireControlUnitStatistics.h --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines class RetireControlUnitStatistics: a view that knows how
+/// to print general statistics related to the retire control unit.
+///
+/// Example:
+/// ========
+///
+/// Retire Control Unit - number of cycles where we saw N instructions retired:
+/// [# retired], [# cycles]
+/// 0, 109 (17.9%)
+/// 1, 102 (16.7%)
+/// 2, 399 (65.4%)
+///
+/// Total ROB Entries: 64
+/// Max Used ROB Entries: 35 ( 54.7% )
+/// Average Used ROB Entries per cy: 32 ( 50.0% )
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_RETIRECONTROLUNITSTATISTICS_H
+#define LLVM_TOOLS_LLVM_MCA_RETIRECONTROLUNITSTATISTICS_H
+
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/MCA/View.h"
+#include <map>
+
+namespace llvm {
+namespace mca {
+
+class RetireControlUnitStatistics : public View {
+ using Histogram = std::map<unsigned, unsigned>;
+ Histogram RetiredPerCycle;
+
+ unsigned NumRetired;
+ unsigned NumCycles;
+ unsigned TotalROBEntries;
+ unsigned EntriesInUse;
+ unsigned MaxUsedEntries;
+ unsigned SumOfUsedEntries;
+
+public:
+ RetireControlUnitStatistics(const MCSchedModel &SM);
+
+ void onEvent(const HWInstructionEvent &Event) override;
+ void onCycleEnd() override;
+ void printView(llvm::raw_ostream &OS) const override;
+ StringRef getNameAsString() const override {
+ return "RetireControlUnitStatistics";
+ }
+ bool isSerializable() const override { return false; }
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp b/contrib/llvm-project/llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp
new file mode 100644
index 000000000000..43f8b62864af
--- /dev/null
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp
@@ -0,0 +1,177 @@
+//===--------------------- SchedulerStatistics.cpp --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the SchedulerStatistics interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/SchedulerStatistics.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+
+namespace llvm {
+namespace mca {
+
+SchedulerStatistics::SchedulerStatistics(const llvm::MCSubtargetInfo &STI)
+ : SM(STI.getSchedModel()), LQResourceID(0), SQResourceID(0), NumIssued(0),
+ NumCycles(0), MostRecentLoadDispatched(~0U),
+ MostRecentStoreDispatched(~0U),
+ Usage(STI.getSchedModel().NumProcResourceKinds, {0, 0, 0}) {
+ if (SM.hasExtraProcessorInfo()) {
+ const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo();
+ LQResourceID = EPI.LoadQueueID;
+ SQResourceID = EPI.StoreQueueID;
+ }
+}
+
+// FIXME: This implementation works under the assumption that load/store queue
+// entries are reserved at 'instruction dispatched' stage, and released at
+// 'instruction executed' stage. This currently matches the behavior of LSUnit.
+//
+// The current design minimizes the number of events generated by the
+// Dispatch/Execute stages, at the cost of doing extra bookkeeping in method
+// `onEvent`. However, it introduces a subtle dependency between this view and
+// how the LSUnit works.
+//
+// In future we should add a new "memory queue" event type, so that we stop
+// making assumptions on how LSUnit internally works (See PR39828).
+void SchedulerStatistics::onEvent(const HWInstructionEvent &Event) {
+ if (Event.Type == HWInstructionEvent::Issued) {
+ const Instruction &Inst = *Event.IR.getInstruction();
+ NumIssued += Inst.getDesc().NumMicroOps;
+ } else if (Event.Type == HWInstructionEvent::Dispatched) {
+ const Instruction &Inst = *Event.IR.getInstruction();
+ const unsigned Index = Event.IR.getSourceIndex();
+ if (LQResourceID && Inst.getMayLoad() &&
+ MostRecentLoadDispatched != Index) {
+ Usage[LQResourceID].SlotsInUse++;
+ MostRecentLoadDispatched = Index;
+ }
+ if (SQResourceID && Inst.getMayStore() &&
+ MostRecentStoreDispatched != Index) {
+ Usage[SQResourceID].SlotsInUse++;
+ MostRecentStoreDispatched = Index;
+ }
+ } else if (Event.Type == HWInstructionEvent::Executed) {
+ const Instruction &Inst = *Event.IR.getInstruction();
+ if (LQResourceID && Inst.getMayLoad()) {
+ assert(Usage[LQResourceID].SlotsInUse);
+ Usage[LQResourceID].SlotsInUse--;
+ }
+ if (SQResourceID && Inst.getMayStore()) {
+ assert(Usage[SQResourceID].SlotsInUse);
+ Usage[SQResourceID].SlotsInUse--;
+ }
+ }
+}
+
+void SchedulerStatistics::onReservedBuffers(const InstRef & /* unused */,
+ ArrayRef<unsigned> Buffers) {
+ for (const unsigned Buffer : Buffers) {
+ if (Buffer == LQResourceID || Buffer == SQResourceID)
+ continue;
+ Usage[Buffer].SlotsInUse++;
+ }
+}
+
+void SchedulerStatistics::onReleasedBuffers(const InstRef & /* unused */,
+ ArrayRef<unsigned> Buffers) {
+ for (const unsigned Buffer : Buffers) {
+ if (Buffer == LQResourceID || Buffer == SQResourceID)
+ continue;
+ Usage[Buffer].SlotsInUse--;
+ }
+}
+
+void SchedulerStatistics::updateHistograms() {
+ for (BufferUsage &BU : Usage) {
+ BU.CumulativeNumUsedSlots += BU.SlotsInUse;
+ BU.MaxUsedSlots = std::max(BU.MaxUsedSlots, BU.SlotsInUse);
+ }
+
+ IssueWidthPerCycle[NumIssued]++;
+ NumIssued = 0;
+}
+
+void SchedulerStatistics::printSchedulerStats(raw_ostream &OS) const {
+ OS << "\n\nSchedulers - "
+ << "number of cycles where we saw N micro opcodes issued:\n";
+ OS << "[# issued], [# cycles]\n";
+
+ bool HasColors = OS.has_colors();
+ const auto It = llvm::max_element(IssueWidthPerCycle);
+ for (const std::pair<const unsigned, unsigned> &Entry : IssueWidthPerCycle) {
+ unsigned NumIssued = Entry.first;
+ if (NumIssued == It->first && HasColors)
+ OS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
+
+ unsigned IPC = Entry.second;
+ OS << " " << NumIssued << ", " << IPC << " ("
+ << format("%.1f", ((double)IPC / NumCycles) * 100) << "%)\n";
+ if (HasColors)
+ OS.resetColor();
+ }
+}
+
+void SchedulerStatistics::printSchedulerUsage(raw_ostream &OS) const {
+ assert(NumCycles && "Unexpected number of cycles!");
+
+ OS << "\nScheduler's queue usage:\n";
+ if (all_of(Usage, [](const BufferUsage &BU) { return !BU.MaxUsedSlots; })) {
+ OS << "No scheduler resources used.\n";
+ return;
+ }
+
+ OS << "[1] Resource name.\n"
+ << "[2] Average number of used buffer entries.\n"
+ << "[3] Maximum number of used buffer entries.\n"
+ << "[4] Total number of buffer entries.\n\n"
+ << " [1] [2] [3] [4]\n";
+
+ formatted_raw_ostream FOS(OS);
+ bool HasColors = FOS.has_colors();
+ for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
+ if (ProcResource.BufferSize <= 0)
+ continue;
+
+ const BufferUsage &BU = Usage[I];
+ double AvgUsage = (double)BU.CumulativeNumUsedSlots / NumCycles;
+ double AlmostFullThreshold = (double)(ProcResource.BufferSize * 4) / 5;
+ unsigned NormalizedAvg = floor((AvgUsage * 10) + 0.5) / 10;
+ unsigned NormalizedThreshold = floor((AlmostFullThreshold * 10) + 0.5) / 10;
+
+ FOS << ProcResource.Name;
+ FOS.PadToColumn(17);
+ if (HasColors && NormalizedAvg >= NormalizedThreshold)
+ FOS.changeColor(raw_ostream::YELLOW, true, false);
+ FOS << NormalizedAvg;
+ if (HasColors)
+ FOS.resetColor();
+ FOS.PadToColumn(28);
+ if (HasColors &&
+ BU.MaxUsedSlots == static_cast<unsigned>(ProcResource.BufferSize))
+ FOS.changeColor(raw_ostream::RED, true, false);
+ FOS << BU.MaxUsedSlots;
+ if (HasColors)
+ FOS.resetColor();
+ FOS.PadToColumn(39);
+ FOS << ProcResource.BufferSize << '\n';
+ }
+
+ FOS.flush();
+}
+
+void SchedulerStatistics::printView(raw_ostream &OS) const {
+ printSchedulerStats(OS);
+ printSchedulerUsage(OS);
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/SchedulerStatistics.h b/contrib/llvm-project/llvm/tools/llvm-mca/Views/SchedulerStatistics.h
new file mode 100644
index 000000000000..9d2f71c13e5a
--- /dev/null
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/SchedulerStatistics.h
@@ -0,0 +1,97 @@
+//===--------------------- SchedulerStatistics.h ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines class SchedulerStatistics. Class SchedulerStatistics is a
+/// View that listens to instruction issue events in order to print general
+/// statistics related to the hardware schedulers.
+///
+/// Example:
+/// ========
+///
+/// Schedulers - number of cycles where we saw N instructions issued:
+/// [# issued], [# cycles]
+/// 0, 6 (2.9%)
+/// 1, 106 (50.7%)
+/// 2, 97 (46.4%)
+///
+/// Scheduler's queue usage:
+/// [1] Resource name.
+/// [2] Average number of used buffer entries.
+/// [3] Maximum number of used buffer entries.
+/// [4] Total number of buffer entries.
+///
+/// [1] [2] [3] [4]
+/// JALU01 0 0 20
+/// JFPU01 15 18 18
+/// JLSAGU 0 0 12
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_SCHEDULERSTATISTICS_H
+#define LLVM_TOOLS_LLVM_MCA_SCHEDULERSTATISTICS_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MCA/View.h"
+#include <map>
+
+namespace llvm {
+namespace mca {
+
+class SchedulerStatistics final : public View {
+ const llvm::MCSchedModel &SM;
+ unsigned LQResourceID;
+ unsigned SQResourceID;
+
+ unsigned NumIssued;
+ unsigned NumCycles;
+
+ unsigned MostRecentLoadDispatched;
+ unsigned MostRecentStoreDispatched;
+
+ // Tracks the usage of a scheduler's queue.
+ struct BufferUsage {
+ unsigned SlotsInUse;
+ unsigned MaxUsedSlots;
+ uint64_t CumulativeNumUsedSlots;
+ };
+
+ using Histogram = std::map<unsigned, unsigned>;
+ Histogram IssueWidthPerCycle;
+
+ std::vector<BufferUsage> Usage;
+
+ void updateHistograms();
+ void printSchedulerStats(llvm::raw_ostream &OS) const;
+ void printSchedulerUsage(llvm::raw_ostream &OS) const;
+
+public:
+ SchedulerStatistics(const llvm::MCSubtargetInfo &STI);
+ void onEvent(const HWInstructionEvent &Event) override;
+ void onCycleBegin() override { NumCycles++; }
+ void onCycleEnd() override { updateHistograms(); }
+
+ // Increases the number of used scheduler queue slots of every buffered
+ // resource in the Buffers set.
+ void onReservedBuffers(const InstRef &IR,
+ llvm::ArrayRef<unsigned> Buffers) override;
+
+ // Decreases by one the number of used scheduler queue slots of every
+ // buffered resource in the Buffers set.
+ void onReleasedBuffers(const InstRef &IR,
+ llvm::ArrayRef<unsigned> Buffers) override;
+
+ void printView(llvm::raw_ostream &OS) const override;
+ StringRef getNameAsString() const override { return "SchedulerStatistics"; }
+ bool isSerializable() const override { return false; }
+};
+} // namespace mca
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/SummaryView.cpp b/contrib/llvm-project/llvm/tools/llvm-mca/Views/SummaryView.cpp
new file mode 100644
index 000000000000..bf258b4c26b1
--- /dev/null
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/SummaryView.cpp
@@ -0,0 +1,113 @@
+//===--------------------- SummaryView.cpp ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the functionalities used by the SummaryView to print
+/// the report information.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/SummaryView.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MCA/Support.h"
+#include "llvm/Support/Format.h"
+
+namespace llvm {
+namespace mca {
+
+#define DEBUG_TYPE "llvm-mca"
+
+SummaryView::SummaryView(const MCSchedModel &Model, ArrayRef<MCInst> S,
+ unsigned Width)
+ : SM(Model), Source(S), DispatchWidth(Width ? Width : Model.IssueWidth),
+ LastInstructionIdx(0), TotalCycles(0), NumMicroOps(0),
+ ProcResourceUsage(Model.getNumProcResourceKinds(), 0),
+ ProcResourceMasks(Model.getNumProcResourceKinds()),
+ ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0) {
+ computeProcResourceMasks(SM, ProcResourceMasks);
+ for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ unsigned Index = getResourceStateIndex(ProcResourceMasks[I]);
+ ResIdx2ProcResID[Index] = I;
+ }
+}
+
+void SummaryView::onEvent(const HWInstructionEvent &Event) {
+ if (Event.Type == HWInstructionEvent::Dispatched)
+ LastInstructionIdx = Event.IR.getSourceIndex();
+
+ // We are only interested in the "instruction retired" events generated by
+ // the retire stage for instructions that are part of iteration #0.
+ if (Event.Type != HWInstructionEvent::Retired ||
+ Event.IR.getSourceIndex() >= Source.size())
+ return;
+
+ // Update the cumulative number of resource cycles based on the processor
+ // resource usage information available from the instruction descriptor. We
+ // need to compute the cumulative number of resource cycles for every
+ // processor resource which is consumed by an instruction of the block.
+ const Instruction &Inst = *Event.IR.getInstruction();
+ const InstrDesc &Desc = Inst.getDesc();
+ NumMicroOps += Desc.NumMicroOps;
+ for (const std::pair<uint64_t, ResourceUsage> &RU : Desc.Resources) {
+ if (RU.second.size()) {
+ unsigned ProcResID = ResIdx2ProcResID[getResourceStateIndex(RU.first)];
+ ProcResourceUsage[ProcResID] += RU.second.size();
+ }
+ }
+}
+
+void SummaryView::printView(raw_ostream &OS) const {
+ std::string Buffer;
+ raw_string_ostream TempStream(Buffer);
+ DisplayValues DV;
+
+ collectData(DV);
+ TempStream << "Iterations: " << DV.Iterations;
+ TempStream << "\nInstructions: " << DV.TotalInstructions;
+ TempStream << "\nTotal Cycles: " << DV.TotalCycles;
+ TempStream << "\nTotal uOps: " << DV.TotalUOps << '\n';
+ TempStream << "\nDispatch Width: " << DV.DispatchWidth;
+ TempStream << "\nuOps Per Cycle: "
+ << format("%.2f", floor((DV.UOpsPerCycle * 100) + 0.5) / 100);
+ TempStream << "\nIPC: "
+ << format("%.2f", floor((DV.IPC * 100) + 0.5) / 100);
+ TempStream << "\nBlock RThroughput: "
+ << format("%.1f", floor((DV.BlockRThroughput * 10) + 0.5) / 10)
+ << '\n';
+ TempStream.flush();
+ OS << Buffer;
+}
+
+void SummaryView::collectData(DisplayValues &DV) const {
+ DV.Instructions = Source.size();
+ DV.Iterations = (LastInstructionIdx / DV.Instructions) + 1;
+ DV.TotalInstructions = DV.Instructions * DV.Iterations;
+ DV.TotalCycles = TotalCycles;
+ DV.DispatchWidth = DispatchWidth;
+ DV.TotalUOps = NumMicroOps * DV.Iterations;
+ DV.UOpsPerCycle = (double)DV.TotalUOps / TotalCycles;
+ DV.IPC = (double)DV.TotalInstructions / TotalCycles;
+ DV.BlockRThroughput = computeBlockRThroughput(SM, DispatchWidth, NumMicroOps,
+ ProcResourceUsage);
+}
+
+json::Value SummaryView::toJSON() const {
+ DisplayValues DV;
+ collectData(DV);
+ json::Object JO({{"Iterations", DV.Iterations},
+ {"Instructions", DV.TotalInstructions},
+ {"TotalCycles", DV.TotalCycles},
+ {"TotaluOps", DV.TotalUOps},
+ {"DispatchWidth", DV.DispatchWidth},
+ {"uOpsPerCycle", DV.UOpsPerCycle},
+ {"IPC", DV.IPC},
+ {"BlockRThroughput", DV.BlockRThroughput}});
+ return JO;
+}
+} // namespace mca.
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/SummaryView.h b/contrib/llvm-project/llvm/tools/llvm-mca/Views/SummaryView.h
new file mode 100644
index 000000000000..c99905dd3e8b
--- /dev/null
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/SummaryView.h
@@ -0,0 +1,89 @@
+//===--------------------- SummaryView.h ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the summary view.
+///
+/// The goal of the summary view is to give a very quick overview of the
+/// performance throughput. Below is an example of summary view:
+///
+///
+/// Iterations: 300
+/// Instructions: 900
+/// Total Cycles: 610
+/// Dispatch Width: 2
+/// IPC: 1.48
+/// Block RThroughput: 2.0
+///
+/// The summary view collects a few performance numbers. The two main
+/// performance indicators are 'Total Cycles' and IPC (Instructions Per Cycle).
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_SUMMARYVIEW_H
+#define LLVM_TOOLS_LLVM_MCA_SUMMARYVIEW_H
+
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/MCA/View.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace mca {
+
+/// A view that collects and prints a few performance numbers.
+class SummaryView : public View {
+ const llvm::MCSchedModel &SM;
+ llvm::ArrayRef<llvm::MCInst> Source;
+ const unsigned DispatchWidth;
+ unsigned LastInstructionIdx;
+ unsigned TotalCycles;
+ // The total number of micro opcodes contributed by a block of instructions.
+ unsigned NumMicroOps;
+
+ struct DisplayValues {
+ unsigned Instructions;
+ unsigned Iterations;
+ unsigned TotalInstructions;
+ unsigned TotalCycles;
+ unsigned DispatchWidth;
+ unsigned TotalUOps;
+ double IPC;
+ double UOpsPerCycle;
+ double BlockRThroughput;
+ };
+
+ // For each processor resource, this vector stores the cumulative number of
+ // resource cycles consumed by the analyzed code block.
+ llvm::SmallVector<unsigned, 8> ProcResourceUsage;
+
+ // Each processor resource is associated with a so-called processor resource
+ // mask. This vector allows to correlate processor resource IDs with processor
+ // resource masks. There is exactly one element per each processor resource
+ // declared by the scheduling model.
+ llvm::SmallVector<uint64_t, 8> ProcResourceMasks;
+
+ // Used to map resource indices to actual processor resource IDs.
+ llvm::SmallVector<unsigned, 8> ResIdx2ProcResID;
+
+ /// Compute the data we want to print out in the object DV.
+ void collectData(DisplayValues &DV) const;
+
+public:
+ SummaryView(const llvm::MCSchedModel &Model, llvm::ArrayRef<llvm::MCInst> S,
+ unsigned Width);
+
+ void onCycleEnd() override { ++TotalCycles; }
+ void onEvent(const HWInstructionEvent &Event) override;
+ void printView(llvm::raw_ostream &OS) const override;
+ StringRef getNameAsString() const override { return "SummaryView"; }
+ json::Value toJSON() const override;
+};
+} // namespace mca
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/TimelineView.cpp b/contrib/llvm-project/llvm/tools/llvm-mca/Views/TimelineView.cpp
new file mode 100644
index 000000000000..2eca48aadfd7
--- /dev/null
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/TimelineView.cpp
@@ -0,0 +1,332 @@
+//===--------------------- TimelineView.cpp ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \brief
+///
+/// This file implements the TimelineView interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/TimelineView.h"
+#include <numeric>
+
+namespace llvm {
+namespace mca {
+
+TimelineView::TimelineView(const MCSubtargetInfo &sti, MCInstPrinter &Printer,
+ llvm::ArrayRef<llvm::MCInst> S, unsigned Iterations,
+ unsigned Cycles)
+ : InstructionView(sti, Printer, S), CurrentCycle(0),
+ MaxCycle(Cycles == 0 ? std::numeric_limits<unsigned>::max() : Cycles),
+ LastCycle(0), WaitTime(S.size()), UsedBuffer(S.size()) {
+ unsigned NumInstructions = getSource().size();
+ assert(Iterations && "Invalid number of iterations specified!");
+ NumInstructions *= Iterations;
+ Timeline.resize(NumInstructions);
+ TimelineViewEntry InvalidTVEntry = {-1, 0, 0, 0, 0};
+ std::fill(Timeline.begin(), Timeline.end(), InvalidTVEntry);
+
+ WaitTimeEntry NullWTEntry = {0, 0, 0};
+ std::fill(WaitTime.begin(), WaitTime.end(), NullWTEntry);
+
+ std::pair<unsigned, int> NullUsedBufferEntry = {/* Invalid resource ID*/ 0,
+ /* unknown buffer size */ -1};
+ std::fill(UsedBuffer.begin(), UsedBuffer.end(), NullUsedBufferEntry);
+}
+
+void TimelineView::onReservedBuffers(const InstRef &IR,
+ ArrayRef<unsigned> Buffers) {
+ if (IR.getSourceIndex() >= getSource().size())
+ return;
+
+ const MCSchedModel &SM = getSubTargetInfo().getSchedModel();
+ std::pair<unsigned, int> BufferInfo = {0, -1};
+ for (const unsigned Buffer : Buffers) {
+ const MCProcResourceDesc &MCDesc = *SM.getProcResource(Buffer);
+ if (!BufferInfo.first || BufferInfo.second > MCDesc.BufferSize) {
+ BufferInfo.first = Buffer;
+ BufferInfo.second = MCDesc.BufferSize;
+ }
+ }
+
+ UsedBuffer[IR.getSourceIndex()] = BufferInfo;
+}
+
+void TimelineView::onEvent(const HWInstructionEvent &Event) {
+ const unsigned Index = Event.IR.getSourceIndex();
+ if (Index >= Timeline.size())
+ return;
+
+ switch (Event.Type) {
+ case HWInstructionEvent::Retired: {
+ TimelineViewEntry &TVEntry = Timeline[Index];
+ if (CurrentCycle < MaxCycle)
+ TVEntry.CycleRetired = CurrentCycle;
+
+ // Update the WaitTime entry which corresponds to this Index.
+ assert(TVEntry.CycleDispatched >= 0 && "Invalid TVEntry found!");
+ unsigned CycleDispatched = static_cast<unsigned>(TVEntry.CycleDispatched);
+ WaitTimeEntry &WTEntry = WaitTime[Index % getSource().size()];
+ WTEntry.CyclesSpentInSchedulerQueue +=
+ TVEntry.CycleIssued - CycleDispatched;
+ assert(CycleDispatched <= TVEntry.CycleReady &&
+ "Instruction cannot be ready if it hasn't been dispatched yet!");
+ WTEntry.CyclesSpentInSQWhileReady +=
+ TVEntry.CycleIssued - TVEntry.CycleReady;
+ if (CurrentCycle > TVEntry.CycleExecuted) {
+ WTEntry.CyclesSpentAfterWBAndBeforeRetire +=
+ (CurrentCycle - 1) - TVEntry.CycleExecuted;
+ }
+ break;
+ }
+ case HWInstructionEvent::Ready:
+ Timeline[Index].CycleReady = CurrentCycle;
+ break;
+ case HWInstructionEvent::Issued:
+ Timeline[Index].CycleIssued = CurrentCycle;
+ break;
+ case HWInstructionEvent::Executed:
+ Timeline[Index].CycleExecuted = CurrentCycle;
+ break;
+ case HWInstructionEvent::Dispatched:
+ // There may be multiple dispatch events. Microcoded instructions that are
+ // expanded into multiple uOps may require multiple dispatch cycles. Here,
+ // we want to capture the first dispatch cycle.
+ if (Timeline[Index].CycleDispatched == -1)
+ Timeline[Index].CycleDispatched = static_cast<int>(CurrentCycle);
+ break;
+ default:
+ return;
+ }
+ if (CurrentCycle < MaxCycle)
+ LastCycle = std::max(LastCycle, CurrentCycle);
+}
+
+static raw_ostream::Colors chooseColor(unsigned CumulativeCycles,
+ unsigned Executions, int BufferSize) {
+ if (CumulativeCycles && BufferSize < 0)
+ return raw_ostream::MAGENTA;
+ unsigned Size = static_cast<unsigned>(BufferSize);
+ if (CumulativeCycles >= Size * Executions)
+ return raw_ostream::RED;
+ if ((CumulativeCycles * 2) >= Size * Executions)
+ return raw_ostream::YELLOW;
+ return raw_ostream::SAVEDCOLOR;
+}
+
+static void tryChangeColor(raw_ostream &OS, unsigned Cycles,
+ unsigned Executions, int BufferSize) {
+ if (!OS.has_colors())
+ return;
+
+ raw_ostream::Colors Color = chooseColor(Cycles, Executions, BufferSize);
+ if (Color == raw_ostream::SAVEDCOLOR) {
+ OS.resetColor();
+ return;
+ }
+ OS.changeColor(Color, /* bold */ true, /* BG */ false);
+}
+
+void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS,
+ const WaitTimeEntry &Entry,
+ unsigned SourceIndex,
+ unsigned Executions) const {
+ bool PrintingTotals = SourceIndex == getSource().size();
+ unsigned CumulativeExecutions = PrintingTotals ? Timeline.size() : Executions;
+
+ if (!PrintingTotals)
+ OS << SourceIndex << '.';
+
+ OS.PadToColumn(7);
+
+ double AverageTime1, AverageTime2, AverageTime3;
+ AverageTime1 =
+ (double)(Entry.CyclesSpentInSchedulerQueue * 10) / CumulativeExecutions;
+ AverageTime2 =
+ (double)(Entry.CyclesSpentInSQWhileReady * 10) / CumulativeExecutions;
+ AverageTime3 = (double)(Entry.CyclesSpentAfterWBAndBeforeRetire * 10) /
+ CumulativeExecutions;
+
+ OS << Executions;
+ OS.PadToColumn(13);
+
+ int BufferSize = PrintingTotals ? 0 : UsedBuffer[SourceIndex].second;
+ if (!PrintingTotals)
+ tryChangeColor(OS, Entry.CyclesSpentInSchedulerQueue, CumulativeExecutions,
+ BufferSize);
+ OS << format("%.1f", floor(AverageTime1 + 0.5) / 10);
+ OS.PadToColumn(20);
+ if (!PrintingTotals)
+ tryChangeColor(OS, Entry.CyclesSpentInSQWhileReady, CumulativeExecutions,
+ BufferSize);
+ OS << format("%.1f", floor(AverageTime2 + 0.5) / 10);
+ OS.PadToColumn(27);
+ if (!PrintingTotals)
+ tryChangeColor(OS, Entry.CyclesSpentAfterWBAndBeforeRetire,
+ CumulativeExecutions,
+ getSubTargetInfo().getSchedModel().MicroOpBufferSize);
+ OS << format("%.1f", floor(AverageTime3 + 0.5) / 10);
+
+ if (OS.has_colors())
+ OS.resetColor();
+ OS.PadToColumn(34);
+}
+
+void TimelineView::printAverageWaitTimes(raw_ostream &OS) const {
+ std::string Header =
+ "\n\nAverage Wait times (based on the timeline view):\n"
+ "[0]: Executions\n"
+ "[1]: Average time spent waiting in a scheduler's queue\n"
+ "[2]: Average time spent waiting in a scheduler's queue while ready\n"
+ "[3]: Average time elapsed from WB until retire stage\n\n"
+ " [0] [1] [2] [3]\n";
+ OS << Header;
+ formatted_raw_ostream FOS(OS);
+ unsigned Executions = Timeline.size() / getSource().size();
+ unsigned IID = 0;
+ for (const MCInst &Inst : getSource()) {
+ printWaitTimeEntry(FOS, WaitTime[IID], IID, Executions);
+ FOS << " " << printInstructionString(Inst) << '\n';
+ FOS.flush();
+ ++IID;
+ }
+
+ // If the timeline contains more than one instruction,
+ // let's also print global averages.
+ if (getSource().size() != 1) {
+ WaitTimeEntry TotalWaitTime = std::accumulate(
+ WaitTime.begin(), WaitTime.end(), WaitTimeEntry{0, 0, 0},
+ [](const WaitTimeEntry &A, const WaitTimeEntry &B) {
+ return WaitTimeEntry{
+ A.CyclesSpentInSchedulerQueue + B.CyclesSpentInSchedulerQueue,
+ A.CyclesSpentInSQWhileReady + B.CyclesSpentInSQWhileReady,
+ A.CyclesSpentAfterWBAndBeforeRetire +
+ B.CyclesSpentAfterWBAndBeforeRetire};
+ });
+ printWaitTimeEntry(FOS, TotalWaitTime, IID, Executions);
+ FOS << " "
+ << "<total>" << '\n';
+ FOS.flush();
+ }
+}
+
+void TimelineView::printTimelineViewEntry(formatted_raw_ostream &OS,
+ const TimelineViewEntry &Entry,
+ unsigned Iteration,
+ unsigned SourceIndex) const {
+ if (Iteration == 0 && SourceIndex == 0)
+ OS << '\n';
+ OS << '[' << Iteration << ',' << SourceIndex << ']';
+ OS.PadToColumn(10);
+ assert(Entry.CycleDispatched >= 0 && "Invalid TimelineViewEntry!");
+ unsigned CycleDispatched = static_cast<unsigned>(Entry.CycleDispatched);
+ for (unsigned I = 0, E = CycleDispatched; I < E; ++I)
+ OS << ((I % 5 == 0) ? '.' : ' ');
+ OS << TimelineView::DisplayChar::Dispatched;
+ if (CycleDispatched != Entry.CycleExecuted) {
+ // Zero latency instructions have the same value for CycleDispatched,
+ // CycleIssued and CycleExecuted.
+ for (unsigned I = CycleDispatched + 1, E = Entry.CycleIssued; I < E; ++I)
+ OS << TimelineView::DisplayChar::Waiting;
+ if (Entry.CycleIssued == Entry.CycleExecuted)
+ OS << TimelineView::DisplayChar::DisplayChar::Executed;
+ else {
+ if (CycleDispatched != Entry.CycleIssued)
+ OS << TimelineView::DisplayChar::Executing;
+ for (unsigned I = Entry.CycleIssued + 1, E = Entry.CycleExecuted; I < E;
+ ++I)
+ OS << TimelineView::DisplayChar::Executing;
+ OS << TimelineView::DisplayChar::Executed;
+ }
+ }
+
+ for (unsigned I = Entry.CycleExecuted + 1, E = Entry.CycleRetired; I < E; ++I)
+ OS << TimelineView::DisplayChar::RetireLag;
+ if (Entry.CycleExecuted < Entry.CycleRetired)
+ OS << TimelineView::DisplayChar::Retired;
+
+ // Skip other columns.
+ for (unsigned I = Entry.CycleRetired + 1, E = LastCycle; I <= E; ++I)
+ OS << ((I % 5 == 0 || I == LastCycle) ? '.' : ' ');
+}
+
+static void printTimelineHeader(formatted_raw_ostream &OS, unsigned Cycles) {
+ OS << "\n\nTimeline view:\n";
+ if (Cycles >= 10) {
+ OS.PadToColumn(10);
+ for (unsigned I = 0; I <= Cycles; ++I) {
+ if (((I / 10) & 1) == 0)
+ OS << ' ';
+ else
+ OS << I % 10;
+ }
+ OS << '\n';
+ }
+
+ OS << "Index";
+ OS.PadToColumn(10);
+ for (unsigned I = 0; I <= Cycles; ++I) {
+ if (((I / 10) & 1) == 0)
+ OS << I % 10;
+ else
+ OS << ' ';
+ }
+ OS << '\n';
+}
+
+void TimelineView::printTimeline(raw_ostream &OS) const {
+ formatted_raw_ostream FOS(OS);
+ printTimelineHeader(FOS, LastCycle);
+ FOS.flush();
+
+ unsigned IID = 0;
+ ArrayRef<llvm::MCInst> Source = getSource();
+ const unsigned Iterations = Timeline.size() / Source.size();
+ for (unsigned Iteration = 0; Iteration < Iterations; ++Iteration) {
+ for (const MCInst &Inst : Source) {
+ const TimelineViewEntry &Entry = Timeline[IID];
+ // When an instruction is retired after timeline-max-cycles,
+ // its CycleRetired is left at 0. However, it's possible for
+ // a 0 latency instruction to be retired during cycle 0 and we
+ // don't want to early exit in that case. The CycleExecuted
+ // attribute is set correctly whether or not it is greater
+ // than timeline-max-cycles so we can use that to ensure
+ // we don't early exit because of a 0 latency instruction.
+ if (Entry.CycleRetired == 0 && Entry.CycleExecuted != 0) {
+ FOS << "Truncated display due to cycle limit\n";
+ return;
+ }
+
+ unsigned SourceIndex = IID % Source.size();
+ printTimelineViewEntry(FOS, Entry, Iteration, SourceIndex);
+ FOS << " " << printInstructionString(Inst) << '\n';
+ FOS.flush();
+
+ ++IID;
+ }
+ }
+}
+
+json::Value TimelineView::toJSON() const {
+ json::Array TimelineInfo;
+
+ for (const TimelineViewEntry &TLE : Timeline) {
+ // Check if the timeline-max-cycles has been reached.
+ if (!TLE.CycleRetired && TLE.CycleExecuted)
+ break;
+
+ TimelineInfo.push_back(
+ json::Object({{"CycleDispatched", TLE.CycleDispatched},
+ {"CycleReady", TLE.CycleReady},
+ {"CycleIssued", TLE.CycleIssued},
+ {"CycleExecuted", TLE.CycleExecuted},
+ {"CycleRetired", TLE.CycleRetired}}));
+ }
+ return json::Object({{"TimelineInfo", std::move(TimelineInfo)}});
+}
+} // namespace mca
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/tools/llvm-mca/Views/TimelineView.h b/contrib/llvm-project/llvm/tools/llvm-mca/Views/TimelineView.h
new file mode 100644
index 000000000000..81be8244b779
--- /dev/null
+++ b/contrib/llvm-project/llvm/tools/llvm-mca/Views/TimelineView.h
@@ -0,0 +1,188 @@
+//===--------------------- TimelineView.h -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \brief
+///
+/// This file implements a timeline view for the llvm-mca tool.
+///
+/// Class TimelineView observes events generated by the pipeline. For every
+/// instruction executed by the pipeline, it stores information related to
+/// state transition. It then plots that information in the form of a table
+/// as reported by the example below:
+///
+/// Timeline view:
+/// 0123456
+/// Index 0123456789
+///
+/// [0,0] DeER . . .. vmovshdup %xmm0, %xmm1
+/// [0,1] DeER . . .. vpermilpd $1, %xmm0, %xmm2
+/// [0,2] .DeER. . .. vpermilps $231, %xmm0, %xmm5
+/// [0,3] .DeeeER . .. vaddss %xmm1, %xmm0, %xmm3
+/// [0,4] . D==eeeER. .. vaddss %xmm3, %xmm2, %xmm4
+/// [0,5] . D=====eeeER .. vaddss %xmm4, %xmm5, %xmm6
+///
+/// [1,0] . DeE------R .. vmovshdup %xmm0, %xmm1
+/// [1,1] . DeE------R .. vpermilpd $1, %xmm0, %xmm2
+/// [1,2] . DeE-----R .. vpermilps $231, %xmm0, %xmm5
+/// [1,3] . D=eeeE--R .. vaddss %xmm1, %xmm0, %xmm3
+/// [1,4] . D===eeeER .. vaddss %xmm3, %xmm2, %xmm4
+/// [1,5] . D======eeeER vaddss %xmm4, %xmm5, %xmm6
+///
+/// There is an entry for every instruction in the input assembly sequence.
+/// The first field is a pair of numbers obtained from the instruction index.
+/// The first element of the pair is the iteration index, while the second
+/// element of the pair is a sequence number (i.e. a position in the assembly
+/// sequence).
+/// The second field of the table is the actual timeline information; each
+/// column is the information related to a specific cycle of execution.
+/// The timeline of an instruction is described by a sequence of character
+/// where each character represents the instruction state at a specific cycle.
+///
+/// Possible instruction states are:
+/// D: Instruction Dispatched
+/// e: Instruction Executing
+/// E: Instruction Executed (write-back stage)
+/// R: Instruction retired
+/// =: Instruction waiting in the Scheduler's queue
+/// -: Instruction executed, waiting to retire in order.
+///
+/// dots ('.') and empty spaces are cycles where the instruction is not
+/// in-flight.
+///
+/// The last column is the assembly instruction associated to the entry.
+///
+/// Based on the timeline view information from the example, instruction 0
+/// at iteration 0 was dispatched at cycle 0, and was retired at cycle 3.
+/// Instruction [0,1] was also dispatched at cycle 0, and it retired at
+/// the same cycle than instruction [0,0].
+/// Instruction [0,4] has been dispatched at cycle 2. However, it had to
+/// wait for two cycles before being issued. That is because operands
+/// became ready only at cycle 5.
+///
+/// This view helps further understanding bottlenecks and the impact of
+/// resource pressure on the code.
+///
+/// To better understand why instructions had to wait for multiple cycles in
+/// the scheduler's queue, class TimelineView also reports extra timing info
+/// in another table named "Average Wait times" (see example below).
+///
+///
+/// Average Wait times (based on the timeline view):
+/// [0]: Executions
+/// [1]: Average time spent waiting in a scheduler's queue
+/// [2]: Average time spent waiting in a scheduler's queue while ready
+/// [3]: Average time elapsed from WB until retire stage
+///
+/// [0] [1] [2] [3]
+/// 0. 2 1.0 1.0 3.0 vmovshdup %xmm0, %xmm1
+/// 1. 2 1.0 1.0 3.0 vpermilpd $1, %xmm0, %xmm2
+/// 2. 2 1.0 1.0 2.5 vpermilps $231, %xmm0, %xmm5
+/// 3. 2 1.5 0.5 1.0 vaddss %xmm1, %xmm0, %xmm3
+/// 4. 2 3.5 0.0 0.0 vaddss %xmm3, %xmm2, %xmm4
+/// 5. 2 6.5 0.0 0.0 vaddss %xmm4, %xmm5, %xmm6
+/// 2 2.4 0.6 1.6 <total>
+///
+/// By comparing column [2] with column [1], we get an idea about how many
+/// cycles were spent in the scheduler's queue due to data dependencies.
+///
+/// In this example, instruction 5 spent an average of ~6 cycles in the
+/// scheduler's queue. As soon as operands became ready, the instruction
+/// was immediately issued to the pipeline(s).
+/// That is expected because instruction 5 cannot transition to the "ready"
+/// state until %xmm4 is written by instruction 4.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_TIMELINEVIEW_H
+#define LLVM_TOOLS_LLVM_MCA_TIMELINEVIEW_H
+
+#include "Views/InstructionView.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace mca {
+
+/// This class listens to instruction state transition events
+/// in order to construct a timeline information.
+///
+/// For every instruction executed by the Pipeline, this class constructs
+/// a TimelineViewEntry object. TimelineViewEntry objects are then used
+/// to print the timeline information, as well as the "average wait times"
+/// for every instruction in the input assembly sequence.
+class TimelineView : public InstructionView {
+ unsigned CurrentCycle;
+ unsigned MaxCycle;
+ unsigned LastCycle;
+
+ struct TimelineViewEntry {
+ int CycleDispatched; // A negative value is an "invalid cycle".
+ unsigned CycleReady;
+ unsigned CycleIssued;
+ unsigned CycleExecuted;
+ unsigned CycleRetired;
+ };
+ std::vector<TimelineViewEntry> Timeline;
+
+ struct WaitTimeEntry {
+ unsigned CyclesSpentInSchedulerQueue;
+ unsigned CyclesSpentInSQWhileReady;
+ unsigned CyclesSpentAfterWBAndBeforeRetire;
+ };
+ std::vector<WaitTimeEntry> WaitTime;
+
+ // This field is used to map instructions to buffered resources.
+ // Elements of this vector are <resourceID, BufferSizer> pairs.
+ std::vector<std::pair<unsigned, int>> UsedBuffer;
+
+ void printTimelineViewEntry(llvm::formatted_raw_ostream &OS,
+ const TimelineViewEntry &E, unsigned Iteration,
+ unsigned SourceIndex) const;
+ void printWaitTimeEntry(llvm::formatted_raw_ostream &OS,
+ const WaitTimeEntry &E, unsigned Index,
+ unsigned Executions) const;
+
+ // Display characters for the TimelineView report output.
+ struct DisplayChar {
+ static const char Dispatched = 'D';
+ static const char Executed = 'E';
+ static const char Retired = 'R';
+ static const char Waiting = '='; // Instruction is waiting in the scheduler.
+ static const char Executing = 'e';
+ static const char RetireLag = '-'; // The instruction is waiting to retire.
+ };
+
+public:
+ TimelineView(const llvm::MCSubtargetInfo &sti, llvm::MCInstPrinter &Printer,
+ llvm::ArrayRef<llvm::MCInst> S, unsigned Iterations,
+ unsigned Cycles);
+
+ // Event handlers.
+ void onCycleEnd() override { ++CurrentCycle; }
+ void onEvent(const HWInstructionEvent &Event) override;
+ void onReservedBuffers(const InstRef &IR,
+ llvm::ArrayRef<unsigned> Buffers) override;
+
+ // print functionalities.
+ void printTimeline(llvm::raw_ostream &OS) const;
+ void printAverageWaitTimes(llvm::raw_ostream &OS) const;
+ void printView(llvm::raw_ostream &OS) const override {
+ printTimeline(OS);
+ printAverageWaitTimes(OS);
+ }
+ StringRef getNameAsString() const override { return "TimelineView"; }
+ json::Value toJSON() const override;
+};
+} // namespace mca
+} // namespace llvm
+
+#endif