diff options
Diffstat (limited to 'llvm/tools/llvm-mca/Views')
20 files changed, 3049 insertions, 0 deletions
diff --git a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp new file mode 100644 index 000000000000..feff0cd6d524 --- /dev/null +++ b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp @@ -0,0 +1,652 @@ +//===--------------------- BottleneckAnalysis.cpp ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the functionalities used by the BottleneckAnalysis +/// to report bottleneck info. +/// +//===----------------------------------------------------------------------===// + +#include "Views/BottleneckAnalysis.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MCA/Support.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormattedStream.h" + +namespace llvm { +namespace mca { + +#define DEBUG_TYPE "llvm-mca" + +PressureTracker::PressureTracker(const MCSchedModel &Model) + : SM(Model), + ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0), + ProcResID2Mask(Model.getNumProcResourceKinds(), 0), + ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0), + ProcResID2ResourceUsersIndex(Model.getNumProcResourceKinds(), 0) { + computeProcResourceMasks(SM, ProcResID2Mask); + + // Ignore the invalid resource at index zero. + unsigned NextResourceUsersIdx = 0; + for (unsigned I = 1, E = Model.getNumProcResourceKinds(); I < E; ++I) { + const MCProcResourceDesc &ProcResource = *SM.getProcResource(I); + ProcResID2ResourceUsersIndex[I] = NextResourceUsersIdx; + NextResourceUsersIdx += ProcResource.NumUnits; + uint64_t ResourceMask = ProcResID2Mask[I]; + ResIdx2ProcResID[getResourceStateIndex(ResourceMask)] = I; + } + + ResourceUsers.resize(NextResourceUsersIdx); + std::fill(ResourceUsers.begin(), ResourceUsers.end(), + std::make_pair<unsigned, unsigned>(~0U, 0U)); +} + +void PressureTracker::getResourceUsers(uint64_t ResourceMask, + SmallVectorImpl<User> &Users) const { + unsigned Index = getResourceStateIndex(ResourceMask); + unsigned ProcResID = ResIdx2ProcResID[Index]; + const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID); + for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) { + const User U = getResourceUser(ProcResID, I); + if (U.second && IPI.find(U.first) != IPI.end()) + Users.emplace_back(U); + } +} + +void PressureTracker::onInstructionDispatched(unsigned IID) { + IPI.insert(std::make_pair(IID, InstructionPressureInfo())); +} + +void PressureTracker::onInstructionExecuted(unsigned IID) { IPI.erase(IID); } + +void PressureTracker::handleInstructionIssuedEvent( + const HWInstructionIssuedEvent &Event) { + unsigned IID = Event.IR.getSourceIndex(); + using ResourceRef = HWInstructionIssuedEvent::ResourceRef; + using ResourceUse = std::pair<ResourceRef, ResourceCycles>; + for (const ResourceUse &Use : Event.UsedResources) { + const ResourceRef &RR = Use.first; + unsigned Index = ProcResID2ResourceUsersIndex[RR.first]; + Index += countTrailingZeros(RR.second); + ResourceUsers[Index] = std::make_pair(IID, Use.second.getNumerator()); + } +} + +void PressureTracker::updateResourcePressureDistribution( + uint64_t CumulativeMask) { + while (CumulativeMask) { + uint64_t Current = CumulativeMask & (-CumulativeMask); + unsigned ResIdx = getResourceStateIndex(Current); + unsigned ProcResID = ResIdx2ProcResID[ResIdx]; + uint64_t Mask = ProcResID2Mask[ProcResID]; + + if (Mask == Current) { + ResourcePressureDistribution[ProcResID]++; + CumulativeMask ^= Current; + continue; + } + + Mask ^= Current; + while (Mask) { + uint64_t SubUnit = Mask & (-Mask); + ResIdx = getResourceStateIndex(SubUnit); + ProcResID = ResIdx2ProcResID[ResIdx]; + ResourcePressureDistribution[ProcResID]++; + Mask ^= SubUnit; + } + + CumulativeMask ^= Current; + } +} + +void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) { + assert(Event.Reason != HWPressureEvent::INVALID && + "Unexpected invalid event!"); + + switch (Event.Reason) { + default: + break; + + case HWPressureEvent::RESOURCES: { + const uint64_t ResourceMask = Event.ResourceMask; + updateResourcePressureDistribution(Event.ResourceMask); + + for (const InstRef &IR : Event.AffectedInstructions) { + const Instruction &IS = *IR.getInstruction(); + unsigned BusyResources = IS.getCriticalResourceMask() & ResourceMask; + if (!BusyResources) + continue; + + unsigned IID = IR.getSourceIndex(); + IPI[IID].ResourcePressureCycles++; + } + break; + } + + case HWPressureEvent::REGISTER_DEPS: + for (const InstRef &IR : Event.AffectedInstructions) { + unsigned IID = IR.getSourceIndex(); + IPI[IID].RegisterPressureCycles++; + } + break; + + case HWPressureEvent::MEMORY_DEPS: + for (const InstRef &IR : Event.AffectedInstructions) { + unsigned IID = IR.getSourceIndex(); + IPI[IID].MemoryPressureCycles++; + } + } +} + +#ifndef NDEBUG +void DependencyGraph::dumpDependencyEdge(raw_ostream &OS, + const DependencyEdge &DepEdge, + MCInstPrinter &MCIP) const { + unsigned FromIID = DepEdge.FromIID; + unsigned ToIID = DepEdge.ToIID; + assert(FromIID < ToIID && "Graph should be acyclic!"); + + const DependencyEdge::Dependency &DE = DepEdge.Dep; + assert(DE.Type != DependencyEdge::DT_INVALID && "Unexpected invalid edge!"); + + OS << " FROM: " << FromIID << " TO: " << ToIID << " "; + if (DE.Type == DependencyEdge::DT_REGISTER) { + OS << " - REGISTER: "; + MCIP.printRegName(OS, DE.ResourceOrRegID); + } else if (DE.Type == DependencyEdge::DT_MEMORY) { + OS << " - MEMORY"; + } else { + assert(DE.Type == DependencyEdge::DT_RESOURCE && + "Unsupported dependency type!"); + OS << " - RESOURCE MASK: " << DE.ResourceOrRegID; + } + OS << " - COST: " << DE.Cost << '\n'; +} +#endif // NDEBUG + +void DependencyGraph::pruneEdges(unsigned Iterations) { + for (DGNode &N : Nodes) { + unsigned NumPruned = 0; + const unsigned Size = N.OutgoingEdges.size(); + // Use a cut-off threshold to prune edges with a low frequency. + for (unsigned I = 0, E = Size; I < E; ++I) { + DependencyEdge &Edge = N.OutgoingEdges[I]; + if (Edge.Frequency == Iterations) + continue; + double Factor = (double)Edge.Frequency / Iterations; + if (0.10 < Factor) + continue; + Nodes[Edge.ToIID].NumPredecessors--; + std::swap(Edge, N.OutgoingEdges[E - 1]); + --E; + ++NumPruned; + } + + if (NumPruned) + N.OutgoingEdges.resize(Size - NumPruned); + } +} + +void DependencyGraph::initializeRootSet( + SmallVectorImpl<unsigned> &RootSet) const { + for (unsigned I = 0, E = Nodes.size(); I < E; ++I) { + const DGNode &N = Nodes[I]; + if (N.NumPredecessors == 0 && !N.OutgoingEdges.empty()) + RootSet.emplace_back(I); + } +} + +void DependencyGraph::propagateThroughEdges( + SmallVectorImpl<unsigned> &RootSet, unsigned Iterations) { + SmallVector<unsigned, 8> ToVisit; + + // A critical sequence is computed as the longest path from a node of the + // RootSet to a leaf node (i.e. a node with no successors). The RootSet is + // composed of nodes with at least one successor, and no predecessors. + // + // Each node of the graph starts with an initial default cost of zero. The + // cost of a node is a measure of criticality: the higher the cost, the bigger + // is the performance impact. + // For register and memory dependencies, the cost is a function of the write + // latency as well as the actual delay (in cycles) caused to users. + // For processor resource dependencies, the cost is a function of the resource + // pressure. Resource interferences with low frequency values are ignored. + // + // This algorithm is very similar to a (reverse) Dijkstra. Every iteration of + // the inner loop selects (i.e. visits) a node N from a set of `unvisited + // nodes`, and then propagates the cost of N to all its neighbors. + // + // The `unvisited nodes` set initially contains all the nodes from the + // RootSet. A node N is added to the `unvisited nodes` if all its + // predecessors have been visited already. + // + // For simplicity, every node tracks the number of unvisited incoming edges in + // field `NumVisitedPredecessors`. When the value of that field drops to + // zero, then the corresponding node is added to a `ToVisit` set. + // + // At the end of every iteration of the outer loop, set `ToVisit` becomes our + // new `unvisited nodes` set. + // + // The algorithm terminates when the set of unvisited nodes (i.e. our RootSet) + // is empty. This algorithm works under the assumption that the graph is + // acyclic. + do { + for (unsigned IID : RootSet) { + const DGNode &N = Nodes[IID]; + for (const DependencyEdge &DepEdge : N.OutgoingEdges) { + unsigned ToIID = DepEdge.ToIID; + DGNode &To = Nodes[ToIID]; + uint64_t Cost = N.Cost + DepEdge.Dep.Cost; + // Check if this is the most expensive incoming edge seen so far. In + // case, update the total cost of the destination node (ToIID), as well + // its field `CriticalPredecessor`. + if (Cost > To.Cost) { + To.CriticalPredecessor = DepEdge; + To.Cost = Cost; + To.Depth = N.Depth + 1; + } + To.NumVisitedPredecessors++; + if (To.NumVisitedPredecessors == To.NumPredecessors) + ToVisit.emplace_back(ToIID); + } + } + + std::swap(RootSet, ToVisit); + ToVisit.clear(); + } while (!RootSet.empty()); +} + +void DependencyGraph::getCriticalSequence( + SmallVectorImpl<const DependencyEdge *> &Seq) const { + // At this stage, nodes of the graph have been already visited, and costs have + // been propagated through the edges (see method `propagateThroughEdges()`). + + // Identify the node N with the highest cost in the graph. By construction, + // that node is the last instruction of our critical sequence. + // Field N.Depth would tell us the total length of the sequence. + // + // To obtain the sequence of critical edges, we simply follow the chain of critical + // predecessors starting from node N (field DGNode::CriticalPredecessor). + const auto It = std::max_element( + Nodes.begin(), Nodes.end(), + [](const DGNode &Lhs, const DGNode &Rhs) { return Lhs.Cost < Rhs.Cost; }); + unsigned IID = std::distance(Nodes.begin(), It); + Seq.resize(Nodes[IID].Depth); + for (unsigned I = Seq.size(), E = 0; I > E; --I) { + const DGNode &N = Nodes[IID]; + Seq[I - 1] = &N.CriticalPredecessor; + IID = N.CriticalPredecessor.FromIID; + } +} + +static void printInstruction(formatted_raw_ostream &FOS, + const MCSubtargetInfo &STI, MCInstPrinter &MCIP, + const MCInst &MCI, + bool UseDifferentColor = false) { + std::string Instruction; + raw_string_ostream InstrStream(Instruction); + + FOS.PadToColumn(14); + + MCIP.printInst(&MCI, InstrStream, "", STI); + InstrStream.flush(); + + if (UseDifferentColor) + FOS.changeColor(raw_ostream::CYAN, true, false); + FOS << StringRef(Instruction).ltrim(); + if (UseDifferentColor) + FOS.resetColor(); +} + +void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const { + // Early exit if no bottlenecks were found during the simulation. + if (!SeenStallCycles || !BPI.PressureIncreaseCycles) + return; + + SmallVector<const DependencyEdge *, 16> Seq; + DG.getCriticalSequence(Seq); + if (Seq.empty()) + return; + + OS << "\nCritical sequence based on the simulation:\n\n"; + + const DependencyEdge &FirstEdge = *Seq[0]; + unsigned FromIID = FirstEdge.FromIID % Source.size(); + unsigned ToIID = FirstEdge.ToIID % Source.size(); + bool IsLoopCarried = FromIID >= ToIID; + + formatted_raw_ostream FOS(OS); + FOS.PadToColumn(14); + FOS << "Instruction"; + FOS.PadToColumn(58); + FOS << "Dependency Information"; + + bool HasColors = FOS.has_colors(); + + unsigned CurrentIID = 0; + if (IsLoopCarried) { + FOS << "\n +----< " << FromIID << "."; + printInstruction(FOS, STI, MCIP, Source[FromIID], HasColors); + FOS << "\n |\n | < loop carried > \n |"; + } else { + while (CurrentIID < FromIID) { + FOS << "\n " << CurrentIID << "."; + printInstruction(FOS, STI, MCIP, Source[CurrentIID]); + CurrentIID++; + } + + FOS << "\n +----< " << CurrentIID << "."; + printInstruction(FOS, STI, MCIP, Source[CurrentIID], HasColors); + CurrentIID++; + } + + for (const DependencyEdge *&DE : Seq) { + ToIID = DE->ToIID % Source.size(); + unsigned LastIID = CurrentIID > ToIID ? Source.size() : ToIID; + + while (CurrentIID < LastIID) { + FOS << "\n | " << CurrentIID << "."; + printInstruction(FOS, STI, MCIP, Source[CurrentIID]); + CurrentIID++; + } + + if (CurrentIID == ToIID) { + FOS << "\n +----> " << ToIID << "."; + printInstruction(FOS, STI, MCIP, Source[CurrentIID], HasColors); + } else { + FOS << "\n |\n | < loop carried > \n |" + << "\n +----> " << ToIID << "."; + printInstruction(FOS, STI, MCIP, Source[ToIID], HasColors); + } + FOS.PadToColumn(58); + + const DependencyEdge::Dependency &Dep = DE->Dep; + if (HasColors) + FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false); + + if (Dep.Type == DependencyEdge::DT_REGISTER) { + FOS << "## REGISTER dependency: "; + if (HasColors) + FOS.changeColor(raw_ostream::MAGENTA, true, false); + MCIP.printRegName(FOS, Dep.ResourceOrRegID); + } else if (Dep.Type == DependencyEdge::DT_MEMORY) { + FOS << "## MEMORY dependency."; + } else { + assert(Dep.Type == DependencyEdge::DT_RESOURCE && + "Unsupported dependency type!"); + FOS << "## RESOURCE interference: "; + if (HasColors) + FOS.changeColor(raw_ostream::MAGENTA, true, false); + FOS << Tracker.resolveResourceName(Dep.ResourceOrRegID); + if (HasColors) { + FOS.resetColor(); + FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false); + } + FOS << " [ probability: " << ((DE->Frequency * 100) / Iterations) + << "% ]"; + } + if (HasColors) + FOS.resetColor(); + ++CurrentIID; + } + + while (CurrentIID < Source.size()) { + FOS << "\n " << CurrentIID << "."; + printInstruction(FOS, STI, MCIP, Source[CurrentIID]); + CurrentIID++; + } + + FOS << '\n'; + FOS.flush(); +} + +#ifndef NDEBUG +void DependencyGraph::dump(raw_ostream &OS, MCInstPrinter &MCIP) const { + OS << "\nREG DEPS\n"; + for (const DGNode &Node : Nodes) + for (const DependencyEdge &DE : Node.OutgoingEdges) + if (DE.Dep.Type == DependencyEdge::DT_REGISTER) + dumpDependencyEdge(OS, DE, MCIP); + + OS << "\nMEM DEPS\n"; + for (const DGNode &Node : Nodes) + for (const DependencyEdge &DE : Node.OutgoingEdges) + if (DE.Dep.Type == DependencyEdge::DT_MEMORY) + dumpDependencyEdge(OS, DE, MCIP); + + OS << "\nRESOURCE DEPS\n"; + for (const DGNode &Node : Nodes) + for (const DependencyEdge &DE : Node.OutgoingEdges) + if (DE.Dep.Type == DependencyEdge::DT_RESOURCE) + dumpDependencyEdge(OS, DE, MCIP); +} +#endif // NDEBUG + +void DependencyGraph::addDependency(unsigned From, unsigned To, + DependencyEdge::Dependency &&Dep) { + DGNode &NodeFrom = Nodes[From]; + DGNode &NodeTo = Nodes[To]; + SmallVectorImpl<DependencyEdge> &Vec = NodeFrom.OutgoingEdges; + + auto It = find_if(Vec, [To, Dep](DependencyEdge &DE) { + return DE.ToIID == To && DE.Dep.ResourceOrRegID == Dep.ResourceOrRegID; + }); + + if (It != Vec.end()) { + It->Dep.Cost += Dep.Cost; + It->Frequency++; + return; + } + + DependencyEdge DE = {Dep, From, To, 1}; + Vec.emplace_back(DE); + NodeTo.NumPredecessors++; +} + +BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti, + MCInstPrinter &Printer, + ArrayRef<MCInst> S, unsigned NumIter) + : STI(sti), MCIP(Printer), Tracker(STI.getSchedModel()), DG(S.size() * 3), + Source(S), Iterations(NumIter), TotalCycles(0), + PressureIncreasedBecauseOfResources(false), + PressureIncreasedBecauseOfRegisterDependencies(false), + PressureIncreasedBecauseOfMemoryDependencies(false), + SeenStallCycles(false), BPI() {} + +void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To, + unsigned RegID, unsigned Cost) { + bool IsLoopCarried = From >= To; + unsigned SourceSize = Source.size(); + if (IsLoopCarried) { + DG.addRegisterDep(From, To + SourceSize, RegID, Cost); + DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cost); + return; + } + DG.addRegisterDep(From + SourceSize, To + SourceSize, RegID, Cost); +} + +void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To, + unsigned Cost) { + bool IsLoopCarried = From >= To; + unsigned SourceSize = Source.size(); + if (IsLoopCarried) { + DG.addMemoryDep(From, To + SourceSize, Cost); + DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cost); + return; + } + DG.addMemoryDep(From + SourceSize, To + SourceSize, Cost); +} + +void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To, + uint64_t Mask, unsigned Cost) { + bool IsLoopCarried = From >= To; + unsigned SourceSize = Source.size(); + if (IsLoopCarried) { + DG.addResourceDep(From, To + SourceSize, Mask, Cost); + DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cost); + return; + } + DG.addResourceDep(From + SourceSize, To + SourceSize, Mask, Cost); +} + +void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) { + const unsigned IID = Event.IR.getSourceIndex(); + if (Event.Type == HWInstructionEvent::Dispatched) { + Tracker.onInstructionDispatched(IID); + return; + } + if (Event.Type == HWInstructionEvent::Executed) { + Tracker.onInstructionExecuted(IID); + return; + } + + if (Event.Type != HWInstructionEvent::Issued) + return; + + const Instruction &IS = *Event.IR.getInstruction(); + unsigned To = IID % Source.size(); + + unsigned Cycles = 2 * Tracker.getResourcePressureCycles(IID); + uint64_t ResourceMask = IS.getCriticalResourceMask(); + SmallVector<std::pair<unsigned, unsigned>, 4> Users; + while (ResourceMask) { + uint64_t Current = ResourceMask & (-ResourceMask); + Tracker.getResourceUsers(Current, Users); + for (const std::pair<unsigned, unsigned> &U : Users) + addResourceDep(U.first % Source.size(), To, Current, U.second + Cycles); + Users.clear(); + ResourceMask ^= Current; + } + + const CriticalDependency &RegDep = IS.getCriticalRegDep(); + if (RegDep.Cycles) { + Cycles = RegDep.Cycles + 2 * Tracker.getRegisterPressureCycles(IID); + unsigned From = RegDep.IID % Source.size(); + addRegisterDep(From, To, RegDep.RegID, Cycles); + } + + const CriticalDependency &MemDep = IS.getCriticalMemDep(); + if (MemDep.Cycles) { + Cycles = MemDep.Cycles + 2 * Tracker.getMemoryPressureCycles(IID); + unsigned From = MemDep.IID % Source.size(); + addMemoryDep(From, To, Cycles); + } + + Tracker.handleInstructionIssuedEvent( + static_cast<const HWInstructionIssuedEvent &>(Event)); + + // Check if this is the last simulated instruction. + if (IID == ((Iterations * Source.size()) - 1)) + DG.finalizeGraph(Iterations); +} + +void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) { + assert(Event.Reason != HWPressureEvent::INVALID && + "Unexpected invalid event!"); + + Tracker.handlePressureEvent(Event); + + switch (Event.Reason) { + default: + break; + + case HWPressureEvent::RESOURCES: + PressureIncreasedBecauseOfResources = true; + break; + case HWPressureEvent::REGISTER_DEPS: + PressureIncreasedBecauseOfRegisterDependencies = true; + break; + case HWPressureEvent::MEMORY_DEPS: + PressureIncreasedBecauseOfMemoryDependencies = true; + break; + } +} + +void BottleneckAnalysis::onCycleEnd() { + ++TotalCycles; + + bool PressureIncreasedBecauseOfDataDependencies = + PressureIncreasedBecauseOfRegisterDependencies || + PressureIncreasedBecauseOfMemoryDependencies; + if (!PressureIncreasedBecauseOfResources && + !PressureIncreasedBecauseOfDataDependencies) + return; + + ++BPI.PressureIncreaseCycles; + if (PressureIncreasedBecauseOfRegisterDependencies) + ++BPI.RegisterDependencyCycles; + if (PressureIncreasedBecauseOfMemoryDependencies) + ++BPI.MemoryDependencyCycles; + if (PressureIncreasedBecauseOfDataDependencies) + ++BPI.DataDependencyCycles; + if (PressureIncreasedBecauseOfResources) + ++BPI.ResourcePressureCycles; + PressureIncreasedBecauseOfResources = false; + PressureIncreasedBecauseOfRegisterDependencies = false; + PressureIncreasedBecauseOfMemoryDependencies = false; +} + +void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const { + if (!SeenStallCycles || !BPI.PressureIncreaseCycles) { + OS << "\n\nNo resource or data dependency bottlenecks discovered.\n"; + return; + } + + double PressurePerCycle = + (double)BPI.PressureIncreaseCycles * 100 / TotalCycles; + double ResourcePressurePerCycle = + (double)BPI.ResourcePressureCycles * 100 / TotalCycles; + double DDPerCycle = (double)BPI.DataDependencyCycles * 100 / TotalCycles; + double RegDepPressurePerCycle = + (double)BPI.RegisterDependencyCycles * 100 / TotalCycles; + double MemDepPressurePerCycle = + (double)BPI.MemoryDependencyCycles * 100 / TotalCycles; + + OS << "\n\nCycles with backend pressure increase [ " + << format("%.2f", floor((PressurePerCycle * 100) + 0.5) / 100) << "% ]"; + + OS << "\nThroughput Bottlenecks: " + << "\n Resource Pressure [ " + << format("%.2f", floor((ResourcePressurePerCycle * 100) + 0.5) / 100) + << "% ]"; + + if (BPI.PressureIncreaseCycles) { + ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution(); + const MCSchedModel &SM = STI.getSchedModel(); + for (unsigned I = 0, E = Distribution.size(); I < E; ++I) { + unsigned ResourceCycles = Distribution[I]; + if (ResourceCycles) { + double Frequency = (double)ResourceCycles * 100 / TotalCycles; + const MCProcResourceDesc &PRDesc = *SM.getProcResource(I); + OS << "\n - " << PRDesc.Name << " [ " + << format("%.2f", floor((Frequency * 100) + 0.5) / 100) << "% ]"; + } + } + } + + OS << "\n Data Dependencies: [ " + << format("%.2f", floor((DDPerCycle * 100) + 0.5) / 100) << "% ]"; + OS << "\n - Register Dependencies [ " + << format("%.2f", floor((RegDepPressurePerCycle * 100) + 0.5) / 100) + << "% ]"; + OS << "\n - Memory Dependencies [ " + << format("%.2f", floor((MemDepPressurePerCycle * 100) + 0.5) / 100) + << "% ]\n"; +} + +void BottleneckAnalysis::printView(raw_ostream &OS) const { + std::string Buffer; + raw_string_ostream TempStream(Buffer); + printBottleneckHints(TempStream); + TempStream.flush(); + OS << Buffer; + printCriticalSequence(OS); +} + +} // namespace mca. +} // namespace llvm diff --git a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h new file mode 100644 index 000000000000..9e3bd5978f09 --- /dev/null +++ b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h @@ -0,0 +1,343 @@ +//===--------------------- BottleneckAnalysis.h -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the bottleneck analysis view. +/// +/// This view internally observes backend pressure increase events in order to +/// identify problematic data dependencies and processor resource interferences. +/// +/// Example of bottleneck analysis report for a dot-product on X86 btver2: +/// +/// Cycles with backend pressure increase [ 40.76% ] +/// Throughput Bottlenecks: +/// Resource Pressure [ 39.34% ] +/// - JFPA [ 39.34% ] +/// - JFPU0 [ 39.34% ] +/// Data Dependencies: [ 1.42% ] +/// - Register Dependencies [ 1.42% ] +/// - Memory Dependencies [ 0.00% ] +/// +/// According to the example, backend pressure increased during the 40.76% of +/// the simulated cycles. In particular, the major cause of backend pressure +/// increases was the contention on floating point adder JFPA accessible from +/// pipeline resource JFPU0. +/// +/// At the end of each cycle, if pressure on the simulated out-of-order buffers +/// has increased, a backend pressure event is reported. +/// In particular, this occurs when there is a delta between the number of uOps +/// dispatched and the number of uOps issued to the underlying pipelines. +/// +/// The bottleneck analysis view is also responsible for identifying and printing +/// the most "critical" sequence of dependent instructions according to the +/// simulated run. +/// +/// Below is the critical sequence computed for the dot-product example on +/// btver2: +/// +/// Instruction Dependency Information +/// +----< 2. vhaddps %xmm3, %xmm3, %xmm4 +/// | +/// | < loop carried > +/// | +/// | 0. vmulps %xmm0, %xmm0, %xmm2 +/// +----> 1. vhaddps %xmm2, %xmm2, %xmm3 ## RESOURCE interference: JFPA [ probability: 73% ] +/// +----> 2. vhaddps %xmm3, %xmm3, %xmm4 ## REGISTER dependency: %xmm3 +/// | +/// | < loop carried > +/// | +/// +----> 1. vhaddps %xmm2, %xmm2, %xmm3 ## RESOURCE interference: JFPA [ probability: 73% ] +/// +/// +/// The algorithm that computes the critical sequence is very similar to a +/// critical path analysis. +/// +/// A dependency graph is used internally to track dependencies between nodes. +/// Nodes of the graph represent instructions from the input assembly sequence, +/// and edges of the graph represent data dependencies or processor resource +/// interferences. +/// +/// Edges are dynamically 'discovered' by observing instruction state transitions +/// and backend pressure increase events. Edges are internally ranked based on +/// their "criticality". A dependency is considered to be critical if it takes a +/// long time to execute, and if it contributes to backend pressure increases. +/// Criticality is internally measured in terms of cycles; it is computed for +/// every edge in the graph as a function of the edge latency and the number of +/// backend pressure increase cycles contributed by that edge. +/// +/// At the end of simulation, costs are propagated to nodes through the edges of +/// the graph, and the most expensive path connecting the root-set (a +/// set of nodes with no predecessors) to a leaf node is reported as critical +/// sequence. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_BOTTLENECK_ANALYSIS_H +#define LLVM_TOOLS_LLVM_MCA_BOTTLENECK_ANALYSIS_H + +#include "Views/View.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCSchedule.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace mca { + +class PressureTracker { + const MCSchedModel &SM; + + // Resource pressure distribution. There is an element for every processor + // resource declared by the scheduling model. Quantities are number of cycles. + SmallVector<unsigned, 4> ResourcePressureDistribution; + + // Each processor resource is associated with a so-called processor resource + // mask. This vector allows to correlate processor resource IDs with processor + // resource masks. There is exactly one element per each processor resource + // declared by the scheduling model. + SmallVector<uint64_t, 4> ProcResID2Mask; + + // Maps processor resource state indices (returned by calls to + // `getResourceStateIndex(Mask)` to processor resource identifiers. + SmallVector<unsigned, 4> ResIdx2ProcResID; + + // Maps Processor Resource identifiers to ResourceUsers indices. + SmallVector<unsigned, 4> ProcResID2ResourceUsersIndex; + + // Identifies the last user of a processor resource unit. + // This vector is updated on every instruction issued event. + // There is one entry for every processor resource unit declared by the + // processor model. An all_ones value is treated like an invalid instruction + // identifier. + using User = std::pair<unsigned, unsigned>; + SmallVector<User, 4> ResourceUsers; + + struct InstructionPressureInfo { + unsigned RegisterPressureCycles; + unsigned MemoryPressureCycles; + unsigned ResourcePressureCycles; + }; + DenseMap<unsigned, InstructionPressureInfo> IPI; + + void updateResourcePressureDistribution(uint64_t CumulativeMask); + + User getResourceUser(unsigned ProcResID, unsigned UnitID) const { + unsigned Index = ProcResID2ResourceUsersIndex[ProcResID]; + return ResourceUsers[Index + UnitID]; + } + +public: + PressureTracker(const MCSchedModel &Model); + + ArrayRef<unsigned> getResourcePressureDistribution() const { + return ResourcePressureDistribution; + } + + void getResourceUsers(uint64_t ResourceMask, + SmallVectorImpl<User> &Users) const; + + unsigned getRegisterPressureCycles(unsigned IID) const { + assert(IPI.find(IID) != IPI.end() && "Instruction is not tracked!"); + const InstructionPressureInfo &Info = IPI.find(IID)->second; + return Info.RegisterPressureCycles; + } + + unsigned getMemoryPressureCycles(unsigned IID) const { + assert(IPI.find(IID) != IPI.end() && "Instruction is not tracked!"); + const InstructionPressureInfo &Info = IPI.find(IID)->second; + return Info.MemoryPressureCycles; + } + + unsigned getResourcePressureCycles(unsigned IID) const { + assert(IPI.find(IID) != IPI.end() && "Instruction is not tracked!"); + const InstructionPressureInfo &Info = IPI.find(IID)->second; + return Info.ResourcePressureCycles; + } + + const char *resolveResourceName(uint64_t ResourceMask) const { + unsigned Index = getResourceStateIndex(ResourceMask); + unsigned ProcResID = ResIdx2ProcResID[Index]; + const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID); + return PRDesc.Name; + } + + void onInstructionDispatched(unsigned IID); + void onInstructionExecuted(unsigned IID); + + void handlePressureEvent(const HWPressureEvent &Event); + void handleInstructionIssuedEvent(const HWInstructionIssuedEvent &Event); +}; + +// A dependency edge. +struct DependencyEdge { + enum DependencyType { DT_INVALID, DT_REGISTER, DT_MEMORY, DT_RESOURCE }; + + // Dependency edge descriptor. + // + // It specifies the dependency type, as well as the edge cost in cycles. + struct Dependency { + DependencyType Type; + uint64_t ResourceOrRegID; + uint64_t Cost; + }; + Dependency Dep; + + unsigned FromIID; + unsigned ToIID; + + // Used by the bottleneck analysis to compute the interference + // probability for processor resources. + unsigned Frequency; +}; + +// A dependency graph used by the bottleneck analysis to describe data +// dependencies and processor resource interferences between instructions. +// +// There is a node (an instance of struct DGNode) for every instruction in the +// input assembly sequence. Edges of the graph represent dependencies between +// instructions. +// +// Each edge of the graph is associated with a cost value which is used +// internally to rank dependency based on their impact on the runtime +// performance (see field DependencyEdge::Dependency::Cost). In general, the +// higher the cost of an edge, the higher the impact on performance. +// +// The cost of a dependency is a function of both the latency and the number of +// cycles where the dependency has been seen as critical (i.e. contributing to +// back-pressure increases). +// +// Loop carried dependencies are carefully expanded by the bottleneck analysis +// to guarantee that the graph stays acyclic. To this end, extra nodes are +// pre-allocated at construction time to describe instructions from "past and +// future" iterations. The graph is kept acyclic mainly because it simplifies the +// complexity of the algorithm that computes the critical sequence. +class DependencyGraph { + struct DGNode { + unsigned NumPredecessors; + unsigned NumVisitedPredecessors; + uint64_t Cost; + unsigned Depth; + + DependencyEdge CriticalPredecessor; + SmallVector<DependencyEdge, 8> OutgoingEdges; + }; + SmallVector<DGNode, 16> Nodes; + + DependencyGraph(const DependencyGraph &) = delete; + DependencyGraph &operator=(const DependencyGraph &) = delete; + + void addDependency(unsigned From, unsigned To, + DependencyEdge::Dependency &&DE); + + void pruneEdges(unsigned Iterations); + void initializeRootSet(SmallVectorImpl<unsigned> &RootSet) const; + void propagateThroughEdges(SmallVectorImpl<unsigned> &RootSet, unsigned Iterations); + +#ifndef NDEBUG + void dumpDependencyEdge(raw_ostream &OS, const DependencyEdge &DE, + MCInstPrinter &MCIP) const; +#endif + +public: + DependencyGraph(unsigned Size) : Nodes(Size) {} + + void addRegisterDep(unsigned From, unsigned To, unsigned RegID, + unsigned Cost) { + addDependency(From, To, {DependencyEdge::DT_REGISTER, RegID, Cost}); + } + + void addMemoryDep(unsigned From, unsigned To, unsigned Cost) { + addDependency(From, To, {DependencyEdge::DT_MEMORY, /* unused */ 0, Cost}); + } + + void addResourceDep(unsigned From, unsigned To, uint64_t Mask, + unsigned Cost) { + addDependency(From, To, {DependencyEdge::DT_RESOURCE, Mask, Cost}); + } + + // Called by the bottleneck analysis at the end of simulation to propagate + // costs through the edges of the graph, and compute a critical path. + void finalizeGraph(unsigned Iterations) { + SmallVector<unsigned, 16> RootSet; + pruneEdges(Iterations); + initializeRootSet(RootSet); + propagateThroughEdges(RootSet, Iterations); + } + + // Returns a sequence of edges representing the critical sequence based on the + // simulated run. It assumes that the graph has already been finalized (i.e. + // method `finalizeGraph()` has already been called on this graph). + void getCriticalSequence(SmallVectorImpl<const DependencyEdge *> &Seq) const; + +#ifndef NDEBUG + void dump(raw_ostream &OS, MCInstPrinter &MCIP) const; +#endif +}; + +/// A view that collects and prints a few performance numbers. +class BottleneckAnalysis : public View { + const MCSubtargetInfo &STI; + MCInstPrinter &MCIP; + PressureTracker Tracker; + DependencyGraph DG; + + ArrayRef<MCInst> Source; + unsigned Iterations; + unsigned TotalCycles; + + bool PressureIncreasedBecauseOfResources; + bool PressureIncreasedBecauseOfRegisterDependencies; + bool PressureIncreasedBecauseOfMemoryDependencies; + // True if throughput was affected by dispatch stalls. + bool SeenStallCycles; + + struct BackPressureInfo { + // Cycles where backpressure increased. + unsigned PressureIncreaseCycles; + // Cycles where backpressure increased because of pipeline pressure. + unsigned ResourcePressureCycles; + // Cycles where backpressure increased because of data dependencies. + unsigned DataDependencyCycles; + // Cycles where backpressure increased because of register dependencies. + unsigned RegisterDependencyCycles; + // Cycles where backpressure increased because of memory dependencies. + unsigned MemoryDependencyCycles; + }; + BackPressureInfo BPI; + + // Used to populate the dependency graph DG. + void addRegisterDep(unsigned From, unsigned To, unsigned RegID, unsigned Cy); + void addMemoryDep(unsigned From, unsigned To, unsigned Cy); + void addResourceDep(unsigned From, unsigned To, uint64_t Mask, unsigned Cy); + + // Prints a bottleneck message to OS. + void printBottleneckHints(raw_ostream &OS) const; + void printCriticalSequence(raw_ostream &OS) const; + +public: + BottleneckAnalysis(const MCSubtargetInfo &STI, MCInstPrinter &MCIP, + ArrayRef<MCInst> Sequence, unsigned Iterations); + + void onCycleEnd() override; + void onEvent(const HWStallEvent &Event) override { SeenStallCycles = true; } + void onEvent(const HWPressureEvent &Event) override; + void onEvent(const HWInstructionEvent &Event) override; + + void printView(raw_ostream &OS) const override; + +#ifndef NDEBUG + void dump(raw_ostream &OS, MCInstPrinter &MCIP) const { DG.dump(OS, MCIP); } +#endif +}; + +} // namespace mca +} // namespace llvm + +#endif diff --git a/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp b/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp new file mode 100644 index 000000000000..557b8ba17b17 --- /dev/null +++ b/llvm/tools/llvm-mca/Views/DispatchStatistics.cpp @@ -0,0 +1,85 @@ +//===--------------------- DispatchStatistics.cpp ---------------------*- C++ +//-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the DispatchStatistics interface. +/// +//===----------------------------------------------------------------------===// + +#include "Views/DispatchStatistics.h" +#include "llvm/Support/Format.h" + +namespace llvm { +namespace mca { + +void DispatchStatistics::onEvent(const HWStallEvent &Event) { + if (Event.Type < HWStallEvent::LastGenericEvent) + HWStalls[Event.Type]++; +} + +void DispatchStatistics::onEvent(const HWInstructionEvent &Event) { + if (Event.Type != HWInstructionEvent::Dispatched) + return; + + const auto &DE = static_cast<const HWInstructionDispatchedEvent &>(Event); + NumDispatched += DE.MicroOpcodes; +} + +void DispatchStatistics::printDispatchHistogram(raw_ostream &OS) const { + std::string Buffer; + raw_string_ostream TempStream(Buffer); + TempStream << "\n\nDispatch Logic - " + << "number of cycles where we saw N micro opcodes dispatched:\n"; + TempStream << "[# dispatched], [# cycles]\n"; + for (const std::pair<unsigned, unsigned> &Entry : DispatchGroupSizePerCycle) { + double Percentage = ((double)Entry.second / NumCycles) * 100.0; + TempStream << " " << Entry.first << ", " << Entry.second + << " (" << format("%.1f", floor((Percentage * 10) + 0.5) / 10) + << "%)\n"; + } + + TempStream.flush(); + OS << Buffer; +} + +static void printStalls(raw_ostream &OS, unsigned NumStalls, + unsigned NumCycles) { + if (!NumStalls) { + OS << NumStalls; + return; + } + + double Percentage = ((double)NumStalls / NumCycles) * 100.0; + OS << NumStalls << " (" + << format("%.1f", floor((Percentage * 10) + 0.5) / 10) << "%)"; +} + +void DispatchStatistics::printDispatchStalls(raw_ostream &OS) const { + std::string Buffer; + raw_string_ostream SS(Buffer); + SS << "\n\nDynamic Dispatch Stall Cycles:\n"; + SS << "RAT - Register unavailable: "; + printStalls(SS, HWStalls[HWStallEvent::RegisterFileStall], NumCycles); + SS << "\nRCU - Retire tokens unavailable: "; + printStalls(SS, HWStalls[HWStallEvent::RetireControlUnitStall], NumCycles); + SS << "\nSCHEDQ - Scheduler full: "; + printStalls(SS, HWStalls[HWStallEvent::SchedulerQueueFull], NumCycles); + SS << "\nLQ - Load queue full: "; + printStalls(SS, HWStalls[HWStallEvent::LoadQueueFull], NumCycles); + SS << "\nSQ - Store queue full: "; + printStalls(SS, HWStalls[HWStallEvent::StoreQueueFull], NumCycles); + SS << "\nGROUP - Static restrictions on the dispatch group: "; + printStalls(SS, HWStalls[HWStallEvent::DispatchGroupStall], NumCycles); + SS << '\n'; + SS.flush(); + OS << Buffer; +} + +} // namespace mca +} // namespace llvm diff --git a/llvm/tools/llvm-mca/Views/DispatchStatistics.h b/llvm/tools/llvm-mca/Views/DispatchStatistics.h new file mode 100644 index 000000000000..07c0f5a4c68f --- /dev/null +++ b/llvm/tools/llvm-mca/Views/DispatchStatistics.h @@ -0,0 +1,85 @@ +//===--------------------- DispatchStatistics.h -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements a view that prints a few statistics related to the +/// dispatch logic. It collects and analyzes instruction dispatch events as +/// well as static/dynamic dispatch stall events. +/// +/// Example: +/// ======== +/// +/// Dynamic Dispatch Stall Cycles: +/// RAT - Register unavailable: 0 +/// RCU - Retire tokens unavailable: 0 +/// SCHEDQ - Scheduler full: 42 +/// LQ - Load queue full: 0 +/// SQ - Store queue full: 0 +/// GROUP - Static restrictions on the dispatch group: 0 +/// +/// +/// Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: +/// [# dispatched], [# cycles] +/// 0, 15 (11.5%) +/// 2, 4 (3.1%) +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_DISPATCHVIEW_H +#define LLVM_TOOLS_LLVM_MCA_DISPATCHVIEW_H + +#include "Views/View.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include <map> + +namespace llvm { +namespace mca { + +class DispatchStatistics : public View { + unsigned NumDispatched; + unsigned NumCycles; + + // Counts dispatch stall events caused by unavailability of resources. There + // is one counter for every generic stall kind (see class HWStallEvent). + llvm::SmallVector<unsigned, 8> HWStalls; + + using Histogram = std::map<unsigned, unsigned>; + Histogram DispatchGroupSizePerCycle; + + void updateHistograms() { + DispatchGroupSizePerCycle[NumDispatched]++; + NumDispatched = 0; + } + + void printDispatchHistogram(llvm::raw_ostream &OS) const; + + void printDispatchStalls(llvm::raw_ostream &OS) const; + +public: + DispatchStatistics() + : NumDispatched(0), NumCycles(0), + HWStalls(HWStallEvent::LastGenericEvent) {} + + void onEvent(const HWStallEvent &Event) override; + + void onEvent(const HWInstructionEvent &Event) override; + + void onCycleBegin() override { NumCycles++; } + + void onCycleEnd() override { updateHistograms(); } + + void printView(llvm::raw_ostream &OS) const override { + printDispatchStalls(OS); + printDispatchHistogram(OS); + } +}; +} // namespace mca +} // namespace llvm + +#endif diff --git a/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp b/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp new file mode 100644 index 000000000000..a6f9153b4945 --- /dev/null +++ b/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp @@ -0,0 +1,112 @@ +//===--------------------- InstructionInfoView.cpp --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the InstructionInfoView API. +/// +//===----------------------------------------------------------------------===// + +#include "Views/InstructionInfoView.h" +#include "llvm/Support/FormattedStream.h" + +namespace llvm { +namespace mca { + +void InstructionInfoView::printView(raw_ostream &OS) const { + std::string Buffer; + raw_string_ostream TempStream(Buffer); + const MCSchedModel &SM = STI.getSchedModel(); + + std::string Instruction; + raw_string_ostream InstrStream(Instruction); + + TempStream << "\n\nInstruction Info:\n"; + TempStream << "[1]: #uOps\n[2]: Latency\n[3]: RThroughput\n" + << "[4]: MayLoad\n[5]: MayStore\n[6]: HasSideEffects (U)\n"; + if (PrintEncodings) { + TempStream << "[7]: Encoding Size\n"; + TempStream << "\n[1] [2] [3] [4] [5] [6] [7] " + << "Encodings: Instructions:\n"; + } else { + TempStream << "\n[1] [2] [3] [4] [5] [6] Instructions:\n"; + } + + for (unsigned I = 0, E = Source.size(); I < E; ++I) { + const MCInst &Inst = Source[I]; + const MCInstrDesc &MCDesc = MCII.get(Inst.getOpcode()); + + // Obtain the scheduling class information from the instruction. + unsigned SchedClassID = MCDesc.getSchedClass(); + unsigned CPUID = SM.getProcessorID(); + + // Try to solve variant scheduling classes. + while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant()) + SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &Inst, CPUID); + + const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); + unsigned NumMicroOpcodes = SCDesc.NumMicroOps; + unsigned Latency = MCSchedModel::computeInstrLatency(STI, SCDesc); + // Add extra latency due to delays in the forwarding data paths. + Latency += MCSchedModel::getForwardingDelayCycles( + STI.getReadAdvanceEntries(SCDesc)); + Optional<double> RThroughput = + MCSchedModel::getReciprocalThroughput(STI, SCDesc); + + TempStream << ' ' << NumMicroOpcodes << " "; + if (NumMicroOpcodes < 10) + TempStream << " "; + else if (NumMicroOpcodes < 100) + TempStream << ' '; + TempStream << Latency << " "; + if (Latency < 10) + TempStream << " "; + else if (Latency < 100) + TempStream << ' '; + + if (RThroughput.hasValue()) { + double RT = RThroughput.getValue(); + TempStream << format("%.2f", RT) << ' '; + if (RT < 10.0) + TempStream << " "; + else if (RT < 100.0) + TempStream << ' '; + } else { + TempStream << " - "; + } + TempStream << (MCDesc.mayLoad() ? " * " : " "); + TempStream << (MCDesc.mayStore() ? " * " : " "); + TempStream << (MCDesc.hasUnmodeledSideEffects() ? " U " : " "); + + if (PrintEncodings) { + StringRef Encoding(CE.getEncoding(I)); + unsigned EncodingSize = Encoding.size(); + TempStream << " " << EncodingSize + << (EncodingSize < 10 ? " " : " "); + TempStream.flush(); + formatted_raw_ostream FOS(TempStream); + for (unsigned i = 0, e = Encoding.size(); i != e; ++i) + FOS << format("%02x ", (uint8_t)Encoding[i]); + FOS.PadToColumn(30); + FOS.flush(); + } + + MCIP.printInst(&Inst, InstrStream, "", STI); + InstrStream.flush(); + + // Consume any tabs or spaces at the beginning of the string. + StringRef Str(Instruction); + Str = Str.ltrim(); + TempStream << Str << '\n'; + Instruction = ""; + } + + TempStream.flush(); + OS << Buffer; +} +} // namespace mca. +} // namespace llvm diff --git a/llvm/tools/llvm-mca/Views/InstructionInfoView.h b/llvm/tools/llvm-mca/Views/InstructionInfoView.h new file mode 100644 index 000000000000..0e948304119f --- /dev/null +++ b/llvm/tools/llvm-mca/Views/InstructionInfoView.h @@ -0,0 +1,73 @@ +//===--------------------- InstructionInfoView.h ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the instruction info view. +/// +/// The goal fo the instruction info view is to print the latency and reciprocal +/// throughput information for every instruction in the input sequence. +/// This section also reports extra information related to the number of micro +/// opcodes, and opcode properties (i.e. 'MayLoad', 'MayStore', 'HasSideEffects) +/// +/// Example: +/// +/// Instruction Info: +/// [1]: #uOps +/// [2]: Latency +/// [3]: RThroughput +/// [4]: MayLoad +/// [5]: MayStore +/// [6]: HasSideEffects +/// +/// [1] [2] [3] [4] [5] [6] Instructions: +/// 1 2 1.00 vmulps %xmm0, %xmm1, %xmm2 +/// 1 3 1.00 vhaddps %xmm2, %xmm2, %xmm3 +/// 1 3 1.00 vhaddps %xmm3, %xmm3, %xmm4 +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_INSTRUCTIONINFOVIEW_H +#define LLVM_TOOLS_LLVM_MCA_INSTRUCTIONINFOVIEW_H + +#include "Views/View.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MCA/CodeEmitter.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "llvm-mca" + +namespace llvm { +namespace mca { + +/// A view that prints out generic instruction information. +class InstructionInfoView : public View { + const llvm::MCSubtargetInfo &STI; + const llvm::MCInstrInfo &MCII; + CodeEmitter &CE; + bool PrintEncodings; + llvm::ArrayRef<llvm::MCInst> Source; + llvm::MCInstPrinter &MCIP; + +public: + InstructionInfoView(const llvm::MCSubtargetInfo &ST, + const llvm::MCInstrInfo &II, CodeEmitter &C, + bool ShouldPrintEncodings, llvm::ArrayRef<llvm::MCInst> S, + llvm::MCInstPrinter &IP) + : STI(ST), MCII(II), CE(C), PrintEncodings(ShouldPrintEncodings), + Source(S), MCIP(IP) {} + + void printView(llvm::raw_ostream &OS) const override; +}; +} // namespace mca +} // namespace llvm + +#endif diff --git a/llvm/tools/llvm-mca/Views/RegisterFileStatistics.cpp b/llvm/tools/llvm-mca/Views/RegisterFileStatistics.cpp new file mode 100644 index 000000000000..58736ee0d18c --- /dev/null +++ b/llvm/tools/llvm-mca/Views/RegisterFileStatistics.cpp @@ -0,0 +1,167 @@ +//===--------------------- RegisterFileStatistics.cpp -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the RegisterFileStatistics interface. +/// +//===----------------------------------------------------------------------===// + +#include "Views/RegisterFileStatistics.h" +#include "llvm/Support/Format.h" + +namespace llvm { +namespace mca { + +RegisterFileStatistics::RegisterFileStatistics(const MCSubtargetInfo &sti) + : STI(sti) { + const MCSchedModel &SM = STI.getSchedModel(); + RegisterFileUsage RFUEmpty = {0, 0, 0}; + MoveEliminationInfo MEIEmpty = {0, 0, 0, 0, 0}; + if (!SM.hasExtraProcessorInfo()) { + // Assume a single register file. + PRFUsage.emplace_back(RFUEmpty); + MoveElimInfo.emplace_back(MEIEmpty); + return; + } + + // Initialize a RegisterFileUsage for every user defined register file, plus + // the default register file which is always at index #0. + const MCExtraProcessorInfo &PI = SM.getExtraProcessorInfo(); + // There is always an "InvalidRegisterFile" entry in tablegen. That entry can + // be skipped. If there are no user defined register files, then reserve a + // single entry for the default register file at index #0. + unsigned NumRegFiles = std::max(PI.NumRegisterFiles, 1U); + + PRFUsage.resize(NumRegFiles); + std::fill(PRFUsage.begin(), PRFUsage.end(), RFUEmpty); + + MoveElimInfo.resize(NumRegFiles); + std::fill(MoveElimInfo.begin(), MoveElimInfo.end(), MEIEmpty); +} + +void RegisterFileStatistics::updateRegisterFileUsage( + ArrayRef<unsigned> UsedPhysRegs) { + for (unsigned I = 0, E = PRFUsage.size(); I < E; ++I) { + RegisterFileUsage &RFU = PRFUsage[I]; + unsigned NumUsedPhysRegs = UsedPhysRegs[I]; + RFU.CurrentlyUsedMappings += NumUsedPhysRegs; + RFU.TotalMappings += NumUsedPhysRegs; + RFU.MaxUsedMappings = + std::max(RFU.MaxUsedMappings, RFU.CurrentlyUsedMappings); + } +} + +void RegisterFileStatistics::updateMoveElimInfo(const Instruction &Inst) { + if (!Inst.isOptimizableMove()) + return; + + assert(Inst.getDefs().size() == 1 && "Expected a single definition!"); + assert(Inst.getUses().size() == 1 && "Expected a single register use!"); + const WriteState &WS = Inst.getDefs()[0]; + const ReadState &RS = Inst.getUses()[0]; + + MoveEliminationInfo &Info = + MoveElimInfo[Inst.getDefs()[0].getRegisterFileID()]; + Info.TotalMoveEliminationCandidates++; + if (WS.isEliminated()) + Info.CurrentMovesEliminated++; + if (WS.isWriteZero() && RS.isReadZero()) + Info.TotalMovesThatPropagateZero++; +} + +void RegisterFileStatistics::onEvent(const HWInstructionEvent &Event) { + switch (Event.Type) { + default: + break; + case HWInstructionEvent::Retired: { + const auto &RE = static_cast<const HWInstructionRetiredEvent &>(Event); + for (unsigned I = 0, E = PRFUsage.size(); I < E; ++I) + PRFUsage[I].CurrentlyUsedMappings -= RE.FreedPhysRegs[I]; + break; + } + case HWInstructionEvent::Dispatched: { + const auto &DE = static_cast<const HWInstructionDispatchedEvent &>(Event); + updateRegisterFileUsage(DE.UsedPhysRegs); + updateMoveElimInfo(*DE.IR.getInstruction()); + } + } +} + +void RegisterFileStatistics::onCycleEnd() { + for (MoveEliminationInfo &MEI : MoveElimInfo) { + unsigned &CurrentMax = MEI.MaxMovesEliminatedPerCycle; + CurrentMax = std::max(CurrentMax, MEI.CurrentMovesEliminated); + MEI.TotalMovesEliminated += MEI.CurrentMovesEliminated; + MEI.CurrentMovesEliminated = 0; + } +} + +void RegisterFileStatistics::printView(raw_ostream &OS) const { + std::string Buffer; + raw_string_ostream TempStream(Buffer); + + TempStream << "\n\nRegister File statistics:"; + const RegisterFileUsage &GlobalUsage = PRFUsage[0]; + TempStream << "\nTotal number of mappings created: " + << GlobalUsage.TotalMappings; + TempStream << "\nMax number of mappings used: " + << GlobalUsage.MaxUsedMappings << '\n'; + + for (unsigned I = 1, E = PRFUsage.size(); I < E; ++I) { + const RegisterFileUsage &RFU = PRFUsage[I]; + // Obtain the register file descriptor from the scheduling model. + assert(STI.getSchedModel().hasExtraProcessorInfo() && + "Unable to find register file info!"); + const MCExtraProcessorInfo &PI = + STI.getSchedModel().getExtraProcessorInfo(); + assert(I <= PI.NumRegisterFiles && "Unexpected register file index!"); + const MCRegisterFileDesc &RFDesc = PI.RegisterFiles[I]; + // Skip invalid register files. + if (!RFDesc.NumPhysRegs) + continue; + + TempStream << "\n* Register File #" << I; + TempStream << " -- " << StringRef(RFDesc.Name) << ':'; + TempStream << "\n Number of physical registers: "; + if (!RFDesc.NumPhysRegs) + TempStream << "unbounded"; + else + TempStream << RFDesc.NumPhysRegs; + TempStream << "\n Total number of mappings created: " + << RFU.TotalMappings; + TempStream << "\n Max number of mappings used: " + << RFU.MaxUsedMappings << '\n'; + const MoveEliminationInfo &MEI = MoveElimInfo[I]; + + if (MEI.TotalMoveEliminationCandidates) { + TempStream << " Number of optimizable moves: " + << MEI.TotalMoveEliminationCandidates; + double EliminatedMovProportion = (double)MEI.TotalMovesEliminated / + MEI.TotalMoveEliminationCandidates * + 100.0; + double ZeroMovProportion = (double)MEI.TotalMovesThatPropagateZero / + MEI.TotalMoveEliminationCandidates * 100.0; + TempStream << "\n Number of moves eliminated: " + << MEI.TotalMovesEliminated << " " + << format("(%.1f%%)", + floor((EliminatedMovProportion * 10) + 0.5) / 10); + TempStream << "\n Number of zero moves: " + << MEI.TotalMovesThatPropagateZero << " " + << format("(%.1f%%)", + floor((ZeroMovProportion * 10) + 0.5) / 10); + TempStream << "\n Max moves eliminated per cycle: " + << MEI.MaxMovesEliminatedPerCycle << '\n'; + } + } + + TempStream.flush(); + OS << Buffer; +} + +} // namespace mca +} // namespace llvm diff --git a/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h b/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h new file mode 100644 index 000000000000..a2273dd48b22 --- /dev/null +++ b/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h @@ -0,0 +1,80 @@ +//===--------------------- RegisterFileStatistics.h -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This view collects and prints register file usage statistics. +/// +/// Example (-mcpu=btver2): +/// ======================== +/// +/// Register File statistics: +/// Total number of mappings created: 6 +/// Max number of mappings used: 3 +/// +/// * Register File #1 -- FpuPRF: +/// Number of physical registers: 72 +/// Total number of mappings created: 0 +/// Max number of mappings used: 0 +/// Number of optimizable moves: 200 +/// Number of moves eliminated: 200 (100.0%) +/// Number of zero moves: 200 (100.0%) +/// Max moves eliminated per cycle: 2 +/// +/// * Register File #2 -- IntegerPRF: +/// Number of physical registers: 64 +/// Total number of mappings created: 6 +/// Max number of mappings used: 3 +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_REGISTERFILESTATISTICS_H +#define LLVM_TOOLS_LLVM_MCA_REGISTERFILESTATISTICS_H + +#include "Views/View.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCSubtargetInfo.h" + +namespace llvm { +namespace mca { + +class RegisterFileStatistics : public View { + const llvm::MCSubtargetInfo &STI; + + // Used to track the number of physical registers used in a register file. + struct RegisterFileUsage { + unsigned TotalMappings; + unsigned MaxUsedMappings; + unsigned CurrentlyUsedMappings; + }; + + struct MoveEliminationInfo { + unsigned TotalMoveEliminationCandidates; + unsigned TotalMovesEliminated; + unsigned TotalMovesThatPropagateZero; + unsigned MaxMovesEliminatedPerCycle; + unsigned CurrentMovesEliminated; + }; + + // There is one entry for each register file implemented by the processor. + llvm::SmallVector<RegisterFileUsage, 4> PRFUsage; + llvm::SmallVector<MoveEliminationInfo, 4> MoveElimInfo; + + void updateRegisterFileUsage(ArrayRef<unsigned> UsedPhysRegs); + void updateMoveElimInfo(const Instruction &Inst); + +public: + RegisterFileStatistics(const llvm::MCSubtargetInfo &sti); + + void onCycleEnd() override; + void onEvent(const HWInstructionEvent &Event) override; + void printView(llvm::raw_ostream &OS) const override; +}; +} // namespace mca +} // namespace llvm + +#endif diff --git a/llvm/tools/llvm-mca/Views/ResourcePressureView.cpp b/llvm/tools/llvm-mca/Views/ResourcePressureView.cpp new file mode 100644 index 000000000000..38a2478cf4fe --- /dev/null +++ b/llvm/tools/llvm-mca/Views/ResourcePressureView.cpp @@ -0,0 +1,184 @@ +//===--------------------- ResourcePressureView.cpp -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements methods in the ResourcePressureView interface. +/// +//===----------------------------------------------------------------------===// + +#include "Views/ResourcePressureView.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace mca { + +ResourcePressureView::ResourcePressureView(const llvm::MCSubtargetInfo &sti, + MCInstPrinter &Printer, + ArrayRef<MCInst> S) + : STI(sti), MCIP(Printer), Source(S), LastInstructionIdx(0) { + // Populate the map of resource descriptors. + unsigned R2VIndex = 0; + const MCSchedModel &SM = STI.getSchedModel(); + for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) { + const MCProcResourceDesc &ProcResource = *SM.getProcResource(I); + unsigned NumUnits = ProcResource.NumUnits; + // Skip groups and invalid resources with zero units. + if (ProcResource.SubUnitsIdxBegin || !NumUnits) + continue; + + Resource2VecIndex.insert(std::pair<unsigned, unsigned>(I, R2VIndex)); + R2VIndex += ProcResource.NumUnits; + } + + NumResourceUnits = R2VIndex; + ResourceUsage.resize(NumResourceUnits * (Source.size() + 1)); + std::fill(ResourceUsage.begin(), ResourceUsage.end(), 0.0); +} + +void ResourcePressureView::onEvent(const HWInstructionEvent &Event) { + if (Event.Type == HWInstructionEvent::Dispatched) { + LastInstructionIdx = Event.IR.getSourceIndex(); + return; + } + + // We're only interested in Issue events. + if (Event.Type != HWInstructionEvent::Issued) + return; + + const auto &IssueEvent = static_cast<const HWInstructionIssuedEvent &>(Event); + const unsigned SourceIdx = Event.IR.getSourceIndex() % Source.size(); + for (const std::pair<ResourceRef, ResourceCycles> &Use : + IssueEvent.UsedResources) { + const ResourceRef &RR = Use.first; + assert(Resource2VecIndex.find(RR.first) != Resource2VecIndex.end()); + unsigned R2VIndex = Resource2VecIndex[RR.first]; + R2VIndex += countTrailingZeros(RR.second); + ResourceUsage[R2VIndex + NumResourceUnits * SourceIdx] += Use.second; + ResourceUsage[R2VIndex + NumResourceUnits * Source.size()] += Use.second; + } +} + +static void printColumnNames(formatted_raw_ostream &OS, + const MCSchedModel &SM) { + unsigned Column = OS.getColumn(); + for (unsigned I = 1, ResourceIndex = 0, E = SM.getNumProcResourceKinds(); + I < E; ++I) { + const MCProcResourceDesc &ProcResource = *SM.getProcResource(I); + unsigned NumUnits = ProcResource.NumUnits; + // Skip groups and invalid resources with zero units. + if (ProcResource.SubUnitsIdxBegin || !NumUnits) + continue; + + for (unsigned J = 0; J < NumUnits; ++J) { + Column += 7; + OS << "[" << ResourceIndex; + if (NumUnits > 1) + OS << '.' << J; + OS << ']'; + OS.PadToColumn(Column); + } + + ResourceIndex++; + } +} + +static void printResourcePressure(formatted_raw_ostream &OS, double Pressure, + unsigned Col) { + if (!Pressure || Pressure < 0.005) { + OS << " - "; + } else { + // Round to the value to the nearest hundredth and then print it. + OS << format("%.2f", floor((Pressure * 100) + 0.5) / 100); + } + OS.PadToColumn(Col); +} + +void ResourcePressureView::printResourcePressurePerIter(raw_ostream &OS) const { + std::string Buffer; + raw_string_ostream TempStream(Buffer); + formatted_raw_ostream FOS(TempStream); + + FOS << "\n\nResources:\n"; + const MCSchedModel &SM = STI.getSchedModel(); + for (unsigned I = 1, ResourceIndex = 0, E = SM.getNumProcResourceKinds(); + I < E; ++I) { + const MCProcResourceDesc &ProcResource = *SM.getProcResource(I); + unsigned NumUnits = ProcResource.NumUnits; + // Skip groups and invalid resources with zero units. + if (ProcResource.SubUnitsIdxBegin || !NumUnits) + continue; + + for (unsigned J = 0; J < NumUnits; ++J) { + FOS << '[' << ResourceIndex; + if (NumUnits > 1) + FOS << '.' << J; + FOS << ']'; + FOS.PadToColumn(6); + FOS << "- " << ProcResource.Name << '\n'; + } + + ResourceIndex++; + } + + FOS << "\n\nResource pressure per iteration:\n"; + FOS.flush(); + printColumnNames(FOS, SM); + FOS << '\n'; + FOS.flush(); + + const unsigned Executions = LastInstructionIdx / Source.size() + 1; + for (unsigned I = 0, E = NumResourceUnits; I < E; ++I) { + double Usage = ResourceUsage[I + Source.size() * E]; + printResourcePressure(FOS, Usage / Executions, (I + 1) * 7); + } + + FOS.flush(); + OS << Buffer; +} + +void ResourcePressureView::printResourcePressurePerInst(raw_ostream &OS) const { + std::string Buffer; + raw_string_ostream TempStream(Buffer); + formatted_raw_ostream FOS(TempStream); + + FOS << "\n\nResource pressure by instruction:\n"; + printColumnNames(FOS, STI.getSchedModel()); + FOS << "Instructions:\n"; + + std::string Instruction; + raw_string_ostream InstrStream(Instruction); + + unsigned InstrIndex = 0; + const unsigned Executions = LastInstructionIdx / Source.size() + 1; + for (const MCInst &MCI : Source) { + unsigned BaseEltIdx = InstrIndex * NumResourceUnits; + for (unsigned J = 0; J < NumResourceUnits; ++J) { + double Usage = ResourceUsage[J + BaseEltIdx]; + printResourcePressure(FOS, Usage / Executions, (J + 1) * 7); + } + + MCIP.printInst(&MCI, InstrStream, "", STI); + InstrStream.flush(); + StringRef Str(Instruction); + + // Remove any tabs or spaces at the beginning of the instruction. + Str = Str.ltrim(); + + FOS << Str << '\n'; + Instruction = ""; + + FOS.flush(); + OS << Buffer; + Buffer = ""; + + ++InstrIndex; + } +} +} // namespace mca +} // namespace llvm diff --git a/llvm/tools/llvm-mca/Views/ResourcePressureView.h b/llvm/tools/llvm-mca/Views/ResourcePressureView.h new file mode 100644 index 000000000000..0fa0b9a36aa3 --- /dev/null +++ b/llvm/tools/llvm-mca/Views/ResourcePressureView.h @@ -0,0 +1,103 @@ +//===--------------------- ResourcePressureView.h ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file define class ResourcePressureView. +/// Class ResourcePressureView observes hardware events generated by +/// the Pipeline object and collects statistics related to resource usage at +/// instruction granularity. +/// Resource pressure information is then printed out to a stream in the +/// form of a table like the one from the example below: +/// +/// Resources: +/// [0] - JALU0 +/// [1] - JALU1 +/// [2] - JDiv +/// [3] - JFPM +/// [4] - JFPU0 +/// [5] - JFPU1 +/// [6] - JLAGU +/// [7] - JSAGU +/// [8] - JSTC +/// [9] - JVIMUL +/// +/// Resource pressure per iteration: +/// [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] +/// 0.00 0.00 0.00 0.00 2.00 2.00 0.00 0.00 0.00 0.00 +/// +/// Resource pressure by instruction: +/// [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: +/// - - - - - 1.00 - - - - vpermilpd $1, %xmm0, +/// %xmm1 +/// - - - - 1.00 - - - - - vaddps %xmm0, %xmm1, +/// %xmm2 +/// - - - - - 1.00 - - - - vmovshdup %xmm2, %xmm3 +/// - - - - 1.00 - - - - - vaddss %xmm2, %xmm3, +/// %xmm4 +/// +/// In this example, we have AVX code executed on AMD Jaguar (btver2). +/// Both shuffles and vector floating point add operations on XMM registers have +/// a reciprocal throughput of 1cy. +/// Each add is issued to pipeline JFPU0, while each shuffle is issued to +/// pipeline JFPU1. The overall pressure per iteration is reported by two +/// tables: the first smaller table is the resource pressure per iteration; +/// the second table reports resource pressure per instruction. Values are the +/// average resource cycles consumed by an instruction. +/// Every vector add from the example uses resource JFPU0 for an average of 1cy +/// per iteration. Consequently, the resource pressure on JFPU0 is of 2cy per +/// iteration. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_RESOURCEPRESSUREVIEW_H +#define LLVM_TOOLS_LLVM_MCA_RESOURCEPRESSUREVIEW_H + +#include "Views/View.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCSubtargetInfo.h" + +namespace llvm { +namespace mca { + +/// This class collects resource pressure statistics and it is able to print +/// out all the collected information as a table to an output stream. +class ResourcePressureView : public View { + const llvm::MCSubtargetInfo &STI; + llvm::MCInstPrinter &MCIP; + llvm::ArrayRef<llvm::MCInst> Source; + unsigned LastInstructionIdx; + + // Map to quickly obtain the ResourceUsage column index from a processor + // resource ID. + llvm::DenseMap<unsigned, unsigned> Resource2VecIndex; + + // Table of resources used by instructions. + std::vector<ResourceCycles> ResourceUsage; + unsigned NumResourceUnits; + + void printResourcePressurePerIter(llvm::raw_ostream &OS) const; + void printResourcePressurePerInst(llvm::raw_ostream &OS) const; + +public: + ResourcePressureView(const llvm::MCSubtargetInfo &sti, + llvm::MCInstPrinter &Printer, + llvm::ArrayRef<llvm::MCInst> S); + + void onEvent(const HWInstructionEvent &Event) override; + void printView(llvm::raw_ostream &OS) const override { + printResourcePressurePerIter(OS); + printResourcePressurePerInst(OS); + } +}; +} // namespace mca +} // namespace llvm + +#endif diff --git a/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp b/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp new file mode 100644 index 000000000000..cb4fbae78039 --- /dev/null +++ b/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp @@ -0,0 +1,90 @@ +//===--------------------- RetireControlUnitStatistics.cpp ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the RetireControlUnitStatistics interface. +/// +//===----------------------------------------------------------------------===// + +#include "Views/RetireControlUnitStatistics.h" +#include "llvm/Support/Format.h" + +namespace llvm { +namespace mca { + +RetireControlUnitStatistics::RetireControlUnitStatistics(const MCSchedModel &SM) + : NumRetired(0), NumCycles(0), EntriesInUse(0), MaxUsedEntries(0), + SumOfUsedEntries(0) { + TotalROBEntries = SM.MicroOpBufferSize; + if (SM.hasExtraProcessorInfo()) { + const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo(); + if (EPI.ReorderBufferSize) + TotalROBEntries = EPI.ReorderBufferSize; + } +} + +void RetireControlUnitStatistics::onEvent(const HWInstructionEvent &Event) { + if (Event.Type == HWInstructionEvent::Dispatched) { + unsigned NumEntries = + static_cast<const HWInstructionDispatchedEvent &>(Event).MicroOpcodes; + EntriesInUse += NumEntries; + } + + if (Event.Type == HWInstructionEvent::Retired) { + unsigned ReleasedEntries = Event.IR.getInstruction()->getDesc().NumMicroOps; + assert(EntriesInUse >= ReleasedEntries && "Invalid internal state!"); + EntriesInUse -= ReleasedEntries; + ++NumRetired; + } +} + +void RetireControlUnitStatistics::onCycleEnd() { + // Update histogram + RetiredPerCycle[NumRetired]++; + NumRetired = 0; + ++NumCycles; + MaxUsedEntries = std::max(MaxUsedEntries, EntriesInUse); + SumOfUsedEntries += EntriesInUse; +} + +void RetireControlUnitStatistics::printView(raw_ostream &OS) const { + std::string Buffer; + raw_string_ostream TempStream(Buffer); + TempStream << "\n\nRetire Control Unit - " + << "number of cycles where we saw N instructions retired:\n"; + TempStream << "[# retired], [# cycles]\n"; + + for (const std::pair<unsigned, unsigned> &Entry : RetiredPerCycle) { + TempStream << " " << Entry.first; + if (Entry.first < 10) + TempStream << ", "; + else + TempStream << ", "; + TempStream << Entry.second << " (" + << format("%.1f", ((double)Entry.second / NumCycles) * 100.0) + << "%)\n"; + } + + unsigned AvgUsage = (double)SumOfUsedEntries / NumCycles; + double MaxUsagePercentage = ((double)MaxUsedEntries / TotalROBEntries) * 100.0; + double NormalizedMaxPercentage = floor((MaxUsagePercentage * 10) + 0.5) / 10; + double AvgUsagePercentage = ((double)AvgUsage / TotalROBEntries) * 100.0; + double NormalizedAvgPercentage = floor((AvgUsagePercentage * 10) + 0.5) / 10; + + TempStream << "\nTotal ROB Entries: " << TotalROBEntries + << "\nMax Used ROB Entries: " << MaxUsedEntries + << format(" ( %.1f%% )", NormalizedMaxPercentage) + << "\nAverage Used ROB Entries per cy: " << AvgUsage + << format(" ( %.1f%% )\n", NormalizedAvgPercentage); + + TempStream.flush(); + OS << Buffer; +} + +} // namespace mca +} // namespace llvm diff --git a/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h b/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h new file mode 100644 index 000000000000..1a4d3dec5c56 --- /dev/null +++ b/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h @@ -0,0 +1,60 @@ +//===--------------------- RetireControlUnitStatistics.h --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines class RetireControlUnitStatistics: a view that knows how +/// to print general statistics related to the retire control unit. +/// +/// Example: +/// ======== +/// +/// Retire Control Unit - number of cycles where we saw N instructions retired: +/// [# retired], [# cycles] +/// 0, 109 (17.9%) +/// 1, 102 (16.7%) +/// 2, 399 (65.4%) +/// +/// Total ROB Entries: 64 +/// Max Used ROB Entries: 35 ( 54.7% ) +/// Average Used ROB Entries per cy: 32 ( 50.0% ) +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_RETIRECONTROLUNITSTATISTICS_H +#define LLVM_TOOLS_LLVM_MCA_RETIRECONTROLUNITSTATISTICS_H + +#include "Views/View.h" +#include "llvm/MC/MCSchedule.h" +#include <map> + +namespace llvm { +namespace mca { + +class RetireControlUnitStatistics : public View { + using Histogram = std::map<unsigned, unsigned>; + Histogram RetiredPerCycle; + + unsigned NumRetired; + unsigned NumCycles; + unsigned TotalROBEntries; + unsigned EntriesInUse; + unsigned MaxUsedEntries; + unsigned SumOfUsedEntries; + +public: + RetireControlUnitStatistics(const MCSchedModel &SM); + + void onEvent(const HWInstructionEvent &Event) override; + void onCycleEnd() override; + void printView(llvm::raw_ostream &OS) const override; +}; + +} // namespace mca +} // namespace llvm + +#endif diff --git a/llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp b/llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp new file mode 100644 index 000000000000..bd0ba350ab68 --- /dev/null +++ b/llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp @@ -0,0 +1,178 @@ +//===--------------------- SchedulerStatistics.cpp --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the SchedulerStatistics interface. +/// +//===----------------------------------------------------------------------===// + +#include "Views/SchedulerStatistics.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormattedStream.h" + +namespace llvm { +namespace mca { + +SchedulerStatistics::SchedulerStatistics(const llvm::MCSubtargetInfo &STI) + : SM(STI.getSchedModel()), LQResourceID(0), SQResourceID(0), NumIssued(0), + NumCycles(0), MostRecentLoadDispatched(~0U), + MostRecentStoreDispatched(~0U), + Usage(STI.getSchedModel().NumProcResourceKinds, {0, 0, 0}) { + if (SM.hasExtraProcessorInfo()) { + const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo(); + LQResourceID = EPI.LoadQueueID; + SQResourceID = EPI.StoreQueueID; + } +} + +// FIXME: This implementation works under the assumption that load/store queue +// entries are reserved at 'instruction dispatched' stage, and released at +// 'instruction executed' stage. This currently matches the behavior of LSUnit. +// +// The current design minimizes the number of events generated by the +// Dispatch/Execute stages, at the cost of doing extra bookkeeping in method +// `onEvent`. However, it introduces a subtle dependency between this view and +// how the LSUnit works. +// +// In future we should add a new "memory queue" event type, so that we stop +// making assumptions on how LSUnit internally works (See PR39828). +void SchedulerStatistics::onEvent(const HWInstructionEvent &Event) { + if (Event.Type == HWInstructionEvent::Issued) { + const Instruction &Inst = *Event.IR.getInstruction(); + NumIssued += Inst.getDesc().NumMicroOps; + } else if (Event.Type == HWInstructionEvent::Dispatched) { + const Instruction &Inst = *Event.IR.getInstruction(); + const unsigned Index = Event.IR.getSourceIndex(); + if (LQResourceID && Inst.getDesc().MayLoad && + MostRecentLoadDispatched != Index) { + Usage[LQResourceID].SlotsInUse++; + MostRecentLoadDispatched = Index; + } + if (SQResourceID && Inst.getDesc().MayStore && + MostRecentStoreDispatched != Index) { + Usage[SQResourceID].SlotsInUse++; + MostRecentStoreDispatched = Index; + } + } else if (Event.Type == HWInstructionEvent::Executed) { + const Instruction &Inst = *Event.IR.getInstruction(); + if (LQResourceID && Inst.getDesc().MayLoad) { + assert(Usage[LQResourceID].SlotsInUse); + Usage[LQResourceID].SlotsInUse--; + } + if (SQResourceID && Inst.getDesc().MayStore) { + assert(Usage[SQResourceID].SlotsInUse); + Usage[SQResourceID].SlotsInUse--; + } + } +} + +void SchedulerStatistics::onReservedBuffers(const InstRef & /* unused */, + ArrayRef<unsigned> Buffers) { + for (const unsigned Buffer : Buffers) { + if (Buffer == LQResourceID || Buffer == SQResourceID) + continue; + Usage[Buffer].SlotsInUse++; + } +} + +void SchedulerStatistics::onReleasedBuffers(const InstRef & /* unused */, + ArrayRef<unsigned> Buffers) { + for (const unsigned Buffer : Buffers) { + if (Buffer == LQResourceID || Buffer == SQResourceID) + continue; + Usage[Buffer].SlotsInUse--; + } +} + +void SchedulerStatistics::updateHistograms() { + for (BufferUsage &BU : Usage) { + BU.CumulativeNumUsedSlots += BU.SlotsInUse; + BU.MaxUsedSlots = std::max(BU.MaxUsedSlots, BU.SlotsInUse); + } + + IssueWidthPerCycle[NumIssued]++; + NumIssued = 0; +} + +void SchedulerStatistics::printSchedulerStats(raw_ostream &OS) const { + OS << "\n\nSchedulers - " + << "number of cycles where we saw N micro opcodes issued:\n"; + OS << "[# issued], [# cycles]\n"; + + bool HasColors = OS.has_colors(); + const auto It = + std::max_element(IssueWidthPerCycle.begin(), IssueWidthPerCycle.end()); + for (const std::pair<unsigned, unsigned> &Entry : IssueWidthPerCycle) { + unsigned NumIssued = Entry.first; + if (NumIssued == It->first && HasColors) + OS.changeColor(raw_ostream::SAVEDCOLOR, true, false); + + unsigned IPC = Entry.second; + OS << " " << NumIssued << ", " << IPC << " (" + << format("%.1f", ((double)IPC / NumCycles) * 100) << "%)\n"; + if (HasColors) + OS.resetColor(); + } +} + +void SchedulerStatistics::printSchedulerUsage(raw_ostream &OS) const { + assert(NumCycles && "Unexpected number of cycles!"); + + OS << "\nScheduler's queue usage:\n"; + if (all_of(Usage, [](const BufferUsage &BU) { return !BU.MaxUsedSlots; })) { + OS << "No scheduler resources used.\n"; + return; + } + + OS << "[1] Resource name.\n" + << "[2] Average number of used buffer entries.\n" + << "[3] Maximum number of used buffer entries.\n" + << "[4] Total number of buffer entries.\n\n" + << " [1] [2] [3] [4]\n"; + + formatted_raw_ostream FOS(OS); + bool HasColors = FOS.has_colors(); + for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) { + const MCProcResourceDesc &ProcResource = *SM.getProcResource(I); + if (ProcResource.BufferSize <= 0) + continue; + + const BufferUsage &BU = Usage[I]; + double AvgUsage = (double)BU.CumulativeNumUsedSlots / NumCycles; + double AlmostFullThreshold = (double)(ProcResource.BufferSize * 4) / 5; + unsigned NormalizedAvg = floor((AvgUsage * 10) + 0.5) / 10; + unsigned NormalizedThreshold = floor((AlmostFullThreshold * 10) + 0.5) / 10; + + FOS << ProcResource.Name; + FOS.PadToColumn(17); + if (HasColors && NormalizedAvg >= NormalizedThreshold) + FOS.changeColor(raw_ostream::YELLOW, true, false); + FOS << NormalizedAvg; + if (HasColors) + FOS.resetColor(); + FOS.PadToColumn(28); + if (HasColors && + BU.MaxUsedSlots == static_cast<unsigned>(ProcResource.BufferSize)) + FOS.changeColor(raw_ostream::RED, true, false); + FOS << BU.MaxUsedSlots; + if (HasColors) + FOS.resetColor(); + FOS.PadToColumn(39); + FOS << ProcResource.BufferSize << '\n'; + } + + FOS.flush(); +} + +void SchedulerStatistics::printView(raw_ostream &OS) const { + printSchedulerStats(OS); + printSchedulerUsage(OS); +} + +} // namespace mca +} // namespace llvm diff --git a/llvm/tools/llvm-mca/Views/SchedulerStatistics.h b/llvm/tools/llvm-mca/Views/SchedulerStatistics.h new file mode 100644 index 000000000000..32711b4483b4 --- /dev/null +++ b/llvm/tools/llvm-mca/Views/SchedulerStatistics.h @@ -0,0 +1,95 @@ +//===--------------------- SchedulerStatistics.h ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines class SchedulerStatistics. Class SchedulerStatistics is a +/// View that listens to instruction issue events in order to print general +/// statistics related to the hardware schedulers. +/// +/// Example: +/// ======== +/// +/// Schedulers - number of cycles where we saw N instructions issued: +/// [# issued], [# cycles] +/// 0, 6 (2.9%) +/// 1, 106 (50.7%) +/// 2, 97 (46.4%) +/// +/// Scheduler's queue usage: +/// [1] Resource name. +/// [2] Average number of used buffer entries. +/// [3] Maximum number of used buffer entries. +/// [4] Total number of buffer entries. +/// +/// [1] [2] [3] [4] +/// JALU01 0 0 20 +/// JFPU01 15 18 18 +/// JLSAGU 0 0 12 +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_SCHEDULERSTATISTICS_H +#define LLVM_TOOLS_LLVM_MCA_SCHEDULERSTATISTICS_H + +#include "Views/View.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include <map> + +namespace llvm { +namespace mca { + +class SchedulerStatistics final : public View { + const llvm::MCSchedModel &SM; + unsigned LQResourceID; + unsigned SQResourceID; + + unsigned NumIssued; + unsigned NumCycles; + + unsigned MostRecentLoadDispatched; + unsigned MostRecentStoreDispatched; + + // Tracks the usage of a scheduler's queue. + struct BufferUsage { + unsigned SlotsInUse; + unsigned MaxUsedSlots; + uint64_t CumulativeNumUsedSlots; + }; + + using Histogram = std::map<unsigned, unsigned>; + Histogram IssueWidthPerCycle; + + std::vector<BufferUsage> Usage; + + void updateHistograms(); + void printSchedulerStats(llvm::raw_ostream &OS) const; + void printSchedulerUsage(llvm::raw_ostream &OS) const; + +public: + SchedulerStatistics(const llvm::MCSubtargetInfo &STI); + void onEvent(const HWInstructionEvent &Event) override; + void onCycleBegin() override { NumCycles++; } + void onCycleEnd() override { updateHistograms(); } + + // Increases the number of used scheduler queue slots of every buffered + // resource in the Buffers set. + void onReservedBuffers(const InstRef &IR, + llvm::ArrayRef<unsigned> Buffers) override; + + // Decreases by one the number of used scheduler queue slots of every + // buffered resource in the Buffers set. + void onReleasedBuffers(const InstRef &IR, + llvm::ArrayRef<unsigned> Buffers) override; + + void printView(llvm::raw_ostream &OS) const override; +}; +} // namespace mca +} // namespace llvm + +#endif diff --git a/llvm/tools/llvm-mca/Views/SummaryView.cpp b/llvm/tools/llvm-mca/Views/SummaryView.cpp new file mode 100644 index 000000000000..ef5550048f4c --- /dev/null +++ b/llvm/tools/llvm-mca/Views/SummaryView.cpp @@ -0,0 +1,94 @@ +//===--------------------- SummaryView.cpp -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the functionalities used by the SummaryView to print +/// the report information. +/// +//===----------------------------------------------------------------------===// + +#include "Views/SummaryView.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MCA/Support.h" +#include "llvm/Support/Format.h" + +namespace llvm { +namespace mca { + +#define DEBUG_TYPE "llvm-mca" + +SummaryView::SummaryView(const MCSchedModel &Model, ArrayRef<MCInst> S, + unsigned Width) + : SM(Model), Source(S), DispatchWidth(Width?Width: Model.IssueWidth), + LastInstructionIdx(0), + TotalCycles(0), NumMicroOps(0), + ProcResourceUsage(Model.getNumProcResourceKinds(), 0), + ProcResourceMasks(Model.getNumProcResourceKinds()), + ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0) { + computeProcResourceMasks(SM, ProcResourceMasks); + for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) { + unsigned Index = getResourceStateIndex(ProcResourceMasks[I]); + ResIdx2ProcResID[Index] = I; + } +} + +void SummaryView::onEvent(const HWInstructionEvent &Event) { + if (Event.Type == HWInstructionEvent::Dispatched) + LastInstructionIdx = Event.IR.getSourceIndex(); + + // We are only interested in the "instruction retired" events generated by + // the retire stage for instructions that are part of iteration #0. + if (Event.Type != HWInstructionEvent::Retired || + Event.IR.getSourceIndex() >= Source.size()) + return; + + // Update the cumulative number of resource cycles based on the processor + // resource usage information available from the instruction descriptor. We + // need to compute the cumulative number of resource cycles for every + // processor resource which is consumed by an instruction of the block. + const Instruction &Inst = *Event.IR.getInstruction(); + const InstrDesc &Desc = Inst.getDesc(); + NumMicroOps += Desc.NumMicroOps; + for (const std::pair<uint64_t, const ResourceUsage> &RU : Desc.Resources) { + if (RU.second.size()) { + unsigned ProcResID = ResIdx2ProcResID[getResourceStateIndex(RU.first)]; + ProcResourceUsage[ProcResID] += RU.second.size(); + } + } +} + +void SummaryView::printView(raw_ostream &OS) const { + unsigned Instructions = Source.size(); + unsigned Iterations = (LastInstructionIdx / Instructions) + 1; + unsigned TotalInstructions = Instructions * Iterations; + unsigned TotalUOps = NumMicroOps * Iterations; + double IPC = (double)TotalInstructions / TotalCycles; + double UOpsPerCycle = (double)TotalUOps / TotalCycles; + double BlockRThroughput = computeBlockRThroughput( + SM, DispatchWidth, NumMicroOps, ProcResourceUsage); + + std::string Buffer; + raw_string_ostream TempStream(Buffer); + TempStream << "Iterations: " << Iterations; + TempStream << "\nInstructions: " << TotalInstructions; + TempStream << "\nTotal Cycles: " << TotalCycles; + TempStream << "\nTotal uOps: " << TotalUOps << '\n'; + TempStream << "\nDispatch Width: " << DispatchWidth; + TempStream << "\nuOps Per Cycle: " + << format("%.2f", floor((UOpsPerCycle * 100) + 0.5) / 100); + TempStream << "\nIPC: " + << format("%.2f", floor((IPC * 100) + 0.5) / 100); + TempStream << "\nBlock RThroughput: " + << format("%.1f", floor((BlockRThroughput * 10) + 0.5) / 10) + << '\n'; + TempStream.flush(); + OS << Buffer; +} + +} // namespace mca. +} // namespace llvm diff --git a/llvm/tools/llvm-mca/Views/SummaryView.h b/llvm/tools/llvm-mca/Views/SummaryView.h new file mode 100644 index 000000000000..9be31b7d51bd --- /dev/null +++ b/llvm/tools/llvm-mca/Views/SummaryView.h @@ -0,0 +1,80 @@ +//===--------------------- SummaryView.h ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the summary view. +/// +/// The goal of the summary view is to give a very quick overview of the +/// performance throughput. Below is an example of summary view: +/// +/// +/// Iterations: 300 +/// Instructions: 900 +/// Total Cycles: 610 +/// Dispatch Width: 2 +/// IPC: 1.48 +/// Block RThroughput: 2.0 +/// +/// The summary view collects a few performance numbers. The two main +/// performance indicators are 'Total Cycles' and IPC (Instructions Per Cycle). +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_SUMMARYVIEW_H +#define LLVM_TOOLS_LLVM_MCA_SUMMARYVIEW_H + +#include "Views/View.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/MC/MCSchedule.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace mca { + +/// A view that collects and prints a few performance numbers. +class SummaryView : public View { + const llvm::MCSchedModel &SM; + llvm::ArrayRef<llvm::MCInst> Source; + const unsigned DispatchWidth; + unsigned LastInstructionIdx; + unsigned TotalCycles; + // The total number of micro opcodes contributed by a block of instructions. + unsigned NumMicroOps; + + // For each processor resource, this vector stores the cumulative number of + // resource cycles consumed by the analyzed code block. + llvm::SmallVector<unsigned, 8> ProcResourceUsage; + + // Each processor resource is associated with a so-called processor resource + // mask. This vector allows to correlate processor resource IDs with processor + // resource masks. There is exactly one element per each processor resource + // declared by the scheduling model. + llvm::SmallVector<uint64_t, 8> ProcResourceMasks; + + // Used to map resource indices to actual processor resource IDs. + llvm::SmallVector<unsigned, 8> ResIdx2ProcResID; + + // Compute the reciprocal throughput for the analyzed code block. + // The reciprocal block throughput is computed as the MAX between: + // - NumMicroOps / DispatchWidth + // - Total Resource Cycles / #Units (for every resource consumed). + double getBlockRThroughput() const; + +public: + SummaryView(const llvm::MCSchedModel &Model, llvm::ArrayRef<llvm::MCInst> S, + unsigned Width); + + void onCycleEnd() override { ++TotalCycles; } + void onEvent(const HWInstructionEvent &Event) override; + void printView(llvm::raw_ostream &OS) const override; +}; + +} // namespace mca +} // namespace llvm + +#endif diff --git a/llvm/tools/llvm-mca/Views/TimelineView.cpp b/llvm/tools/llvm-mca/Views/TimelineView.cpp new file mode 100644 index 000000000000..1e7caa297ac6 --- /dev/null +++ b/llvm/tools/llvm-mca/Views/TimelineView.cpp @@ -0,0 +1,325 @@ +//===--------------------- TimelineView.cpp ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \brief +/// +/// This file implements the TimelineView interface. +/// +//===----------------------------------------------------------------------===// + +#include "Views/TimelineView.h" +#include <numeric> + +namespace llvm { +namespace mca { + +TimelineView::TimelineView(const MCSubtargetInfo &sti, MCInstPrinter &Printer, + llvm::ArrayRef<llvm::MCInst> S, unsigned Iterations, + unsigned Cycles) + : STI(sti), MCIP(Printer), Source(S), CurrentCycle(0), + MaxCycle(Cycles == 0 ? 80 : Cycles), LastCycle(0), WaitTime(S.size()), + UsedBuffer(S.size()) { + unsigned NumInstructions = Source.size(); + assert(Iterations && "Invalid number of iterations specified!"); + NumInstructions *= Iterations; + Timeline.resize(NumInstructions); + TimelineViewEntry InvalidTVEntry = {-1, 0, 0, 0, 0}; + std::fill(Timeline.begin(), Timeline.end(), InvalidTVEntry); + + WaitTimeEntry NullWTEntry = {0, 0, 0}; + std::fill(WaitTime.begin(), WaitTime.end(), NullWTEntry); + + std::pair<unsigned, int> NullUsedBufferEntry = {/* Invalid resource ID*/ 0, + /* unknown buffer size */ -1}; + std::fill(UsedBuffer.begin(), UsedBuffer.end(), NullUsedBufferEntry); +} + +void TimelineView::onReservedBuffers(const InstRef &IR, + ArrayRef<unsigned> Buffers) { + if (IR.getSourceIndex() >= Source.size()) + return; + + const MCSchedModel &SM = STI.getSchedModel(); + std::pair<unsigned, int> BufferInfo = {0, -1}; + for (const unsigned Buffer : Buffers) { + const MCProcResourceDesc &MCDesc = *SM.getProcResource(Buffer); + if (!BufferInfo.first || BufferInfo.second > MCDesc.BufferSize) { + BufferInfo.first = Buffer; + BufferInfo.second = MCDesc.BufferSize; + } + } + + UsedBuffer[IR.getSourceIndex()] = BufferInfo; +} + +void TimelineView::onEvent(const HWInstructionEvent &Event) { + const unsigned Index = Event.IR.getSourceIndex(); + if (Index >= Timeline.size()) + return; + + switch (Event.Type) { + case HWInstructionEvent::Retired: { + TimelineViewEntry &TVEntry = Timeline[Index]; + if (CurrentCycle < MaxCycle) + TVEntry.CycleRetired = CurrentCycle; + + // Update the WaitTime entry which corresponds to this Index. + assert(TVEntry.CycleDispatched >= 0 && "Invalid TVEntry found!"); + unsigned CycleDispatched = static_cast<unsigned>(TVEntry.CycleDispatched); + WaitTimeEntry &WTEntry = WaitTime[Index % Source.size()]; + WTEntry.CyclesSpentInSchedulerQueue += + TVEntry.CycleIssued - CycleDispatched; + assert(CycleDispatched <= TVEntry.CycleReady && + "Instruction cannot be ready if it hasn't been dispatched yet!"); + WTEntry.CyclesSpentInSQWhileReady += + TVEntry.CycleIssued - TVEntry.CycleReady; + WTEntry.CyclesSpentAfterWBAndBeforeRetire += + (CurrentCycle - 1) - TVEntry.CycleExecuted; + break; + } + case HWInstructionEvent::Ready: + Timeline[Index].CycleReady = CurrentCycle; + break; + case HWInstructionEvent::Issued: + Timeline[Index].CycleIssued = CurrentCycle; + break; + case HWInstructionEvent::Executed: + Timeline[Index].CycleExecuted = CurrentCycle; + break; + case HWInstructionEvent::Dispatched: + // There may be multiple dispatch events. Microcoded instructions that are + // expanded into multiple uOps may require multiple dispatch cycles. Here, + // we want to capture the first dispatch cycle. + if (Timeline[Index].CycleDispatched == -1) + Timeline[Index].CycleDispatched = static_cast<int>(CurrentCycle); + break; + default: + return; + } + if (CurrentCycle < MaxCycle) + LastCycle = std::max(LastCycle, CurrentCycle); +} + +static raw_ostream::Colors chooseColor(unsigned CumulativeCycles, + unsigned Executions, int BufferSize) { + if (CumulativeCycles && BufferSize < 0) + return raw_ostream::MAGENTA; + unsigned Size = static_cast<unsigned>(BufferSize); + if (CumulativeCycles >= Size * Executions) + return raw_ostream::RED; + if ((CumulativeCycles * 2) >= Size * Executions) + return raw_ostream::YELLOW; + return raw_ostream::SAVEDCOLOR; +} + +static void tryChangeColor(raw_ostream &OS, unsigned Cycles, + unsigned Executions, int BufferSize) { + if (!OS.has_colors()) + return; + + raw_ostream::Colors Color = chooseColor(Cycles, Executions, BufferSize); + if (Color == raw_ostream::SAVEDCOLOR) { + OS.resetColor(); + return; + } + OS.changeColor(Color, /* bold */ true, /* BG */ false); +} + +void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS, + const WaitTimeEntry &Entry, + unsigned SourceIndex, + unsigned Executions) const { + bool PrintingTotals = SourceIndex == Source.size(); + unsigned CumulativeExecutions = PrintingTotals ? Timeline.size() : Executions; + + if (!PrintingTotals) + OS << SourceIndex << '.'; + + OS.PadToColumn(7); + + double AverageTime1, AverageTime2, AverageTime3; + AverageTime1 = + (double)Entry.CyclesSpentInSchedulerQueue / CumulativeExecutions; + AverageTime2 = (double)Entry.CyclesSpentInSQWhileReady / CumulativeExecutions; + AverageTime3 = + (double)Entry.CyclesSpentAfterWBAndBeforeRetire / CumulativeExecutions; + + OS << Executions; + OS.PadToColumn(13); + + int BufferSize = PrintingTotals ? 0 : UsedBuffer[SourceIndex].second; + if (!PrintingTotals) + tryChangeColor(OS, Entry.CyclesSpentInSchedulerQueue, CumulativeExecutions, + BufferSize); + OS << format("%.1f", floor((AverageTime1 * 10) + 0.5) / 10); + OS.PadToColumn(20); + if (!PrintingTotals) + tryChangeColor(OS, Entry.CyclesSpentInSQWhileReady, CumulativeExecutions, + BufferSize); + OS << format("%.1f", floor((AverageTime2 * 10) + 0.5) / 10); + OS.PadToColumn(27); + if (!PrintingTotals) + tryChangeColor(OS, Entry.CyclesSpentAfterWBAndBeforeRetire, + CumulativeExecutions, STI.getSchedModel().MicroOpBufferSize); + OS << format("%.1f", floor((AverageTime3 * 10) + 0.5) / 10); + + if (OS.has_colors()) + OS.resetColor(); + OS.PadToColumn(34); +} + +void TimelineView::printAverageWaitTimes(raw_ostream &OS) const { + std::string Header = + "\n\nAverage Wait times (based on the timeline view):\n" + "[0]: Executions\n" + "[1]: Average time spent waiting in a scheduler's queue\n" + "[2]: Average time spent waiting in a scheduler's queue while ready\n" + "[3]: Average time elapsed from WB until retire stage\n\n" + " [0] [1] [2] [3]\n"; + OS << Header; + + // Use a different string stream for printing instructions. + std::string Instruction; + raw_string_ostream InstrStream(Instruction); + + formatted_raw_ostream FOS(OS); + unsigned Executions = Timeline.size() / Source.size(); + unsigned IID = 0; + for (const MCInst &Inst : Source) { + printWaitTimeEntry(FOS, WaitTime[IID], IID, Executions); + // Append the instruction info at the end of the line. + MCIP.printInst(&Inst, InstrStream, "", STI); + InstrStream.flush(); + + // Consume any tabs or spaces at the beginning of the string. + StringRef Str(Instruction); + Str = Str.ltrim(); + FOS << " " << Str << '\n'; + FOS.flush(); + Instruction = ""; + + ++IID; + } + + // If the timeline contains more than one instruction, + // let's also print global averages. + if (Source.size() != 1) { + WaitTimeEntry TotalWaitTime = std::accumulate( + WaitTime.begin(), WaitTime.end(), WaitTimeEntry{0, 0, 0}, + [](const WaitTimeEntry &A, const WaitTimeEntry &B) { + return WaitTimeEntry{ + A.CyclesSpentInSchedulerQueue + B.CyclesSpentInSchedulerQueue, + A.CyclesSpentInSQWhileReady + B.CyclesSpentInSQWhileReady, + A.CyclesSpentAfterWBAndBeforeRetire + + B.CyclesSpentAfterWBAndBeforeRetire}; + }); + printWaitTimeEntry(FOS, TotalWaitTime, IID, Executions); + FOS << " " + << "<total>" << '\n'; + InstrStream.flush(); + } +} + +void TimelineView::printTimelineViewEntry(formatted_raw_ostream &OS, + const TimelineViewEntry &Entry, + unsigned Iteration, + unsigned SourceIndex) const { + if (Iteration == 0 && SourceIndex == 0) + OS << '\n'; + OS << '[' << Iteration << ',' << SourceIndex << ']'; + OS.PadToColumn(10); + assert(Entry.CycleDispatched >= 0 && "Invalid TimelineViewEntry!"); + unsigned CycleDispatched = static_cast<unsigned>(Entry.CycleDispatched); + for (unsigned I = 0, E = CycleDispatched; I < E; ++I) + OS << ((I % 5 == 0) ? '.' : ' '); + OS << TimelineView::DisplayChar::Dispatched; + if (CycleDispatched != Entry.CycleExecuted) { + // Zero latency instructions have the same value for CycleDispatched, + // CycleIssued and CycleExecuted. + for (unsigned I = CycleDispatched + 1, E = Entry.CycleIssued; I < E; ++I) + OS << TimelineView::DisplayChar::Waiting; + if (Entry.CycleIssued == Entry.CycleExecuted) + OS << TimelineView::DisplayChar::DisplayChar::Executed; + else { + if (CycleDispatched != Entry.CycleIssued) + OS << TimelineView::DisplayChar::Executing; + for (unsigned I = Entry.CycleIssued + 1, E = Entry.CycleExecuted; I < E; + ++I) + OS << TimelineView::DisplayChar::Executing; + OS << TimelineView::DisplayChar::Executed; + } + } + + for (unsigned I = Entry.CycleExecuted + 1, E = Entry.CycleRetired; I < E; ++I) + OS << TimelineView::DisplayChar::RetireLag; + OS << TimelineView::DisplayChar::Retired; + + // Skip other columns. + for (unsigned I = Entry.CycleRetired + 1, E = LastCycle; I <= E; ++I) + OS << ((I % 5 == 0 || I == LastCycle) ? '.' : ' '); +} + +static void printTimelineHeader(formatted_raw_ostream &OS, unsigned Cycles) { + OS << "\n\nTimeline view:\n"; + if (Cycles >= 10) { + OS.PadToColumn(10); + for (unsigned I = 0; I <= Cycles; ++I) { + if (((I / 10) & 1) == 0) + OS << ' '; + else + OS << I % 10; + } + OS << '\n'; + } + + OS << "Index"; + OS.PadToColumn(10); + for (unsigned I = 0; I <= Cycles; ++I) { + if (((I / 10) & 1) == 0) + OS << I % 10; + else + OS << ' '; + } + OS << '\n'; +} + +void TimelineView::printTimeline(raw_ostream &OS) const { + formatted_raw_ostream FOS(OS); + printTimelineHeader(FOS, LastCycle); + FOS.flush(); + + // Use a different string stream for the instruction. + std::string Instruction; + raw_string_ostream InstrStream(Instruction); + + unsigned IID = 0; + const unsigned Iterations = Timeline.size() / Source.size(); + for (unsigned Iteration = 0; Iteration < Iterations; ++Iteration) { + for (const MCInst &Inst : Source) { + const TimelineViewEntry &Entry = Timeline[IID]; + if (Entry.CycleRetired == 0) + return; + + unsigned SourceIndex = IID % Source.size(); + printTimelineViewEntry(FOS, Entry, Iteration, SourceIndex); + // Append the instruction info at the end of the line. + MCIP.printInst(&Inst, InstrStream, "", STI); + InstrStream.flush(); + + // Consume any tabs or spaces at the beginning of the string. + StringRef Str(Instruction); + Str = Str.ltrim(); + FOS << " " << Str << '\n'; + FOS.flush(); + Instruction = ""; + + ++IID; + } + } +} +} // namespace mca +} // namespace llvm diff --git a/llvm/tools/llvm-mca/Views/TimelineView.h b/llvm/tools/llvm-mca/Views/TimelineView.h new file mode 100644 index 000000000000..9bec3b87db45 --- /dev/null +++ b/llvm/tools/llvm-mca/Views/TimelineView.h @@ -0,0 +1,189 @@ +//===--------------------- TimelineView.h -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \brief +/// +/// This file implements a timeline view for the llvm-mca tool. +/// +/// Class TimelineView observes events generated by the pipeline. For every +/// instruction executed by the pipeline, it stores information related to +/// state transition. It then plots that information in the form of a table +/// as reported by the example below: +/// +/// Timeline view: +/// 0123456 +/// Index 0123456789 +/// +/// [0,0] DeER . . .. vmovshdup %xmm0, %xmm1 +/// [0,1] DeER . . .. vpermilpd $1, %xmm0, %xmm2 +/// [0,2] .DeER. . .. vpermilps $231, %xmm0, %xmm5 +/// [0,3] .DeeeER . .. vaddss %xmm1, %xmm0, %xmm3 +/// [0,4] . D==eeeER. .. vaddss %xmm3, %xmm2, %xmm4 +/// [0,5] . D=====eeeER .. vaddss %xmm4, %xmm5, %xmm6 +/// +/// [1,0] . DeE------R .. vmovshdup %xmm0, %xmm1 +/// [1,1] . DeE------R .. vpermilpd $1, %xmm0, %xmm2 +/// [1,2] . DeE-----R .. vpermilps $231, %xmm0, %xmm5 +/// [1,3] . D=eeeE--R .. vaddss %xmm1, %xmm0, %xmm3 +/// [1,4] . D===eeeER .. vaddss %xmm3, %xmm2, %xmm4 +/// [1,5] . D======eeeER vaddss %xmm4, %xmm5, %xmm6 +/// +/// There is an entry for every instruction in the input assembly sequence. +/// The first field is a pair of numbers obtained from the instruction index. +/// The first element of the pair is the iteration index, while the second +/// element of the pair is a sequence number (i.e. a position in the assembly +/// sequence). +/// The second field of the table is the actual timeline information; each +/// column is the information related to a specific cycle of execution. +/// The timeline of an instruction is described by a sequence of character +/// where each character represents the instruction state at a specific cycle. +/// +/// Possible instruction states are: +/// D: Instruction Dispatched +/// e: Instruction Executing +/// E: Instruction Executed (write-back stage) +/// R: Instruction retired +/// =: Instruction waiting in the Scheduler's queue +/// -: Instruction executed, waiting to retire in order. +/// +/// dots ('.') and empty spaces are cycles where the instruction is not +/// in-flight. +/// +/// The last column is the assembly instruction associated to the entry. +/// +/// Based on the timeline view information from the example, instruction 0 +/// at iteration 0 was dispatched at cycle 0, and was retired at cycle 3. +/// Instruction [0,1] was also dispatched at cycle 0, and it retired at +/// the same cycle than instruction [0,0]. +/// Instruction [0,4] has been dispatched at cycle 2. However, it had to +/// wait for two cycles before being issued. That is because operands +/// became ready only at cycle 5. +/// +/// This view helps further understanding bottlenecks and the impact of +/// resource pressure on the code. +/// +/// To better understand why instructions had to wait for multiple cycles in +/// the scheduler's queue, class TimelineView also reports extra timing info +/// in another table named "Average Wait times" (see example below). +/// +/// +/// Average Wait times (based on the timeline view): +/// [0]: Executions +/// [1]: Average time spent waiting in a scheduler's queue +/// [2]: Average time spent waiting in a scheduler's queue while ready +/// [3]: Average time elapsed from WB until retire stage +/// +/// [0] [1] [2] [3] +/// 0. 2 1.0 1.0 3.0 vmovshdup %xmm0, %xmm1 +/// 1. 2 1.0 1.0 3.0 vpermilpd $1, %xmm0, %xmm2 +/// 2. 2 1.0 1.0 2.5 vpermilps $231, %xmm0, %xmm5 +/// 3. 2 1.5 0.5 1.0 vaddss %xmm1, %xmm0, %xmm3 +/// 4. 2 3.5 0.0 0.0 vaddss %xmm3, %xmm2, %xmm4 +/// 5. 2 6.5 0.0 0.0 vaddss %xmm4, %xmm5, %xmm6 +/// 2 2.4 0.6 1.6 <total> +/// +/// By comparing column [2] with column [1], we get an idea about how many +/// cycles were spent in the scheduler's queue due to data dependencies. +/// +/// In this example, instruction 5 spent an average of ~6 cycles in the +/// scheduler's queue. As soon as operands became ready, the instruction +/// was immediately issued to the pipeline(s). +/// That is expected because instruction 5 cannot transition to the "ready" +/// state until %xmm4 is written by instruction 4. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_TIMELINEVIEW_H +#define LLVM_TOOLS_LLVM_MCA_TIMELINEVIEW_H + +#include "Views/View.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace mca { + +/// This class listens to instruction state transition events +/// in order to construct a timeline information. +/// +/// For every instruction executed by the Pipeline, this class constructs +/// a TimelineViewEntry object. TimelineViewEntry objects are then used +/// to print the timeline information, as well as the "average wait times" +/// for every instruction in the input assembly sequence. +class TimelineView : public View { + const llvm::MCSubtargetInfo &STI; + llvm::MCInstPrinter &MCIP; + llvm::ArrayRef<llvm::MCInst> Source; + + unsigned CurrentCycle; + unsigned MaxCycle; + unsigned LastCycle; + + struct TimelineViewEntry { + int CycleDispatched; // A negative value is an "invalid cycle". + unsigned CycleReady; + unsigned CycleIssued; + unsigned CycleExecuted; + unsigned CycleRetired; + }; + std::vector<TimelineViewEntry> Timeline; + + struct WaitTimeEntry { + unsigned CyclesSpentInSchedulerQueue; + unsigned CyclesSpentInSQWhileReady; + unsigned CyclesSpentAfterWBAndBeforeRetire; + }; + std::vector<WaitTimeEntry> WaitTime; + + // This field is used to map instructions to buffered resources. + // Elements of this vector are <resourceID, BufferSizer> pairs. + std::vector<std::pair<unsigned, int>> UsedBuffer; + + void printTimelineViewEntry(llvm::formatted_raw_ostream &OS, + const TimelineViewEntry &E, unsigned Iteration, + unsigned SourceIndex) const; + void printWaitTimeEntry(llvm::formatted_raw_ostream &OS, + const WaitTimeEntry &E, unsigned Index, + unsigned Executions) const; + + // Display characters for the TimelineView report output. + struct DisplayChar { + static const char Dispatched = 'D'; + static const char Executed = 'E'; + static const char Retired = 'R'; + static const char Waiting = '='; // Instruction is waiting in the scheduler. + static const char Executing = 'e'; + static const char RetireLag = '-'; // The instruction is waiting to retire. + }; + +public: + TimelineView(const llvm::MCSubtargetInfo &sti, llvm::MCInstPrinter &Printer, + llvm::ArrayRef<llvm::MCInst> S, unsigned Iterations, + unsigned Cycles); + + // Event handlers. + void onCycleEnd() override { ++CurrentCycle; } + void onEvent(const HWInstructionEvent &Event) override; + void onReservedBuffers(const InstRef &IR, + llvm::ArrayRef<unsigned> Buffers) override; + + // print functionalities. + void printTimeline(llvm::raw_ostream &OS) const; + void printAverageWaitTimes(llvm::raw_ostream &OS) const; + void printView(llvm::raw_ostream &OS) const override { + printTimeline(OS); + printAverageWaitTimes(OS); + } +}; +} // namespace mca +} // namespace llvm + +#endif diff --git a/llvm/tools/llvm-mca/Views/View.cpp b/llvm/tools/llvm-mca/Views/View.cpp new file mode 100644 index 000000000000..8e5c34d2d5c2 --- /dev/null +++ b/llvm/tools/llvm-mca/Views/View.cpp @@ -0,0 +1,21 @@ +//===----------------------- View.cpp ---------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines the virtual anchor method in View.h to pin the vtable. +/// +//===----------------------------------------------------------------------===// + +#include "Views/View.h" + +namespace llvm { +namespace mca { + +void View::anchor() {} +} // namespace mca +} // namespace llvm diff --git a/llvm/tools/llvm-mca/Views/View.h b/llvm/tools/llvm-mca/Views/View.h new file mode 100644 index 000000000000..3b52511b4d29 --- /dev/null +++ b/llvm/tools/llvm-mca/Views/View.h @@ -0,0 +1,33 @@ +//===----------------------- View.h -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines the main interface for Views. Each view contributes a +/// portion of the final report generated by the tool. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_VIEW_H +#define LLVM_TOOLS_LLVM_MCA_VIEW_H + +#include "llvm/MCA/HWEventListener.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace mca { + +class View : public HWEventListener { +public: + virtual void printView(llvm::raw_ostream &OS) const = 0; + virtual ~View() = default; + void anchor() override; +}; +} // namespace mca +} // namespace llvm + +#endif |
