diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2019-10-23 17:51:42 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2019-10-23 17:51:42 +0000 |
commit | 1d5ae1026e831016fc29fd927877c86af904481f (patch) | |
tree | 2cdfd12620fcfa5d9e4a0389f85368e8e36f63f9 /tools/llvm-mca/Views | |
parent | e6d1592492a3a379186bfb02bd0f4eda0669c0d5 (diff) |
Notes
Diffstat (limited to 'tools/llvm-mca/Views')
-rw-r--r-- | tools/llvm-mca/Views/BottleneckAnalysis.cpp | 40 | ||||
-rw-r--r-- | tools/llvm-mca/Views/BottleneckAnalysis.h | 8 | ||||
-rw-r--r-- | tools/llvm-mca/Views/InstructionInfoView.cpp | 31 | ||||
-rw-r--r-- | tools/llvm-mca/Views/InstructionInfoView.h | 13 | ||||
-rw-r--r-- | tools/llvm-mca/Views/TimelineView.cpp | 50 | ||||
-rw-r--r-- | tools/llvm-mca/Views/TimelineView.h | 1 |
6 files changed, 116 insertions, 27 deletions
diff --git a/tools/llvm-mca/Views/BottleneckAnalysis.cpp b/tools/llvm-mca/Views/BottleneckAnalysis.cpp index 560c6c6e8a33..feff0cd6d524 100644 --- a/tools/llvm-mca/Views/BottleneckAnalysis.cpp +++ b/tools/llvm-mca/Views/BottleneckAnalysis.cpp @@ -165,10 +165,33 @@ void DependencyGraph::dumpDependencyEdge(raw_ostream &OS, "Unsupported dependency type!"); OS << " - RESOURCE MASK: " << DE.ResourceOrRegID; } - OS << " - CYCLES: " << DE.Cost << '\n'; + OS << " - COST: " << DE.Cost << '\n'; } #endif // NDEBUG +void DependencyGraph::pruneEdges(unsigned Iterations) { + for (DGNode &N : Nodes) { + unsigned NumPruned = 0; + const unsigned Size = N.OutgoingEdges.size(); + // Use a cut-off threshold to prune edges with a low frequency. + for (unsigned I = 0, E = Size; I < E; ++I) { + DependencyEdge &Edge = N.OutgoingEdges[I]; + if (Edge.Frequency == Iterations) + continue; + double Factor = (double)Edge.Frequency / Iterations; + if (0.10 < Factor) + continue; + Nodes[Edge.ToIID].NumPredecessors--; + std::swap(Edge, N.OutgoingEdges[E - 1]); + --E; + ++NumPruned; + } + + if (NumPruned) + N.OutgoingEdges.resize(Size - NumPruned); + } +} + void DependencyGraph::initializeRootSet( SmallVectorImpl<unsigned> &RootSet) const { for (unsigned I = 0, E = Nodes.size(); I < E; ++I) { @@ -179,7 +202,7 @@ void DependencyGraph::initializeRootSet( } void DependencyGraph::propagateThroughEdges( - SmallVectorImpl<unsigned> &RootSet) { + SmallVectorImpl<unsigned> &RootSet, unsigned Iterations) { SmallVector<unsigned, 8> ToVisit; // A critical sequence is computed as the longest path from a node of the @@ -189,6 +212,10 @@ void DependencyGraph::propagateThroughEdges( // Each node of the graph starts with an initial default cost of zero. The // cost of a node is a measure of criticality: the higher the cost, the bigger // is the performance impact. + // For register and memory dependencies, the cost is a function of the write + // latency as well as the actual delay (in cycles) caused to users. + // For processor resource dependencies, the cost is a function of the resource + // pressure. Resource interferences with low frequency values are ignored. // // This algorithm is very similar to a (reverse) Dijkstra. Every iteration of // the inner loop selects (i.e. visits) a node N from a set of `unvisited @@ -277,6 +304,10 @@ static void printInstruction(formatted_raw_ostream &FOS, } void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const { + // Early exit if no bottlenecks were found during the simulation. + if (!SeenStallCycles || !BPI.PressureIncreaseCycles) + return; + SmallVector<const DependencyEdge *, 16> Seq; DG.getCriticalSequence(Seq); if (Seq.empty()) @@ -432,7 +463,6 @@ void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To, bool IsLoopCarried = From >= To; unsigned SourceSize = Source.size(); if (IsLoopCarried) { - Cost *= Iterations / 2; DG.addRegisterDep(From, To + SourceSize, RegID, Cost); DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cost); return; @@ -445,7 +475,6 @@ void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To, bool IsLoopCarried = From >= To; unsigned SourceSize = Source.size(); if (IsLoopCarried) { - Cost *= Iterations / 2; DG.addMemoryDep(From, To + SourceSize, Cost); DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cost); return; @@ -458,7 +487,6 @@ void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To, bool IsLoopCarried = From >= To; unsigned SourceSize = Source.size(); if (IsLoopCarried) { - Cost *= Iterations / 2; DG.addResourceDep(From, To + SourceSize, Mask, Cost); DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cost); return; @@ -514,7 +542,7 @@ void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) { // Check if this is the last simulated instruction. if (IID == ((Iterations * Source.size()) - 1)) - DG.finalizeGraph(); + DG.finalizeGraph(Iterations); } void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) { diff --git a/tools/llvm-mca/Views/BottleneckAnalysis.h b/tools/llvm-mca/Views/BottleneckAnalysis.h index 7564b1a48206..9e3bd5978f09 100644 --- a/tools/llvm-mca/Views/BottleneckAnalysis.h +++ b/tools/llvm-mca/Views/BottleneckAnalysis.h @@ -236,8 +236,9 @@ class DependencyGraph { void addDependency(unsigned From, unsigned To, DependencyEdge::Dependency &&DE); + void pruneEdges(unsigned Iterations); void initializeRootSet(SmallVectorImpl<unsigned> &RootSet) const; - void propagateThroughEdges(SmallVectorImpl<unsigned> &RootSet); + void propagateThroughEdges(SmallVectorImpl<unsigned> &RootSet, unsigned Iterations); #ifndef NDEBUG void dumpDependencyEdge(raw_ostream &OS, const DependencyEdge &DE, @@ -263,10 +264,11 @@ public: // Called by the bottleneck analysis at the end of simulation to propagate // costs through the edges of the graph, and compute a critical path. - void finalizeGraph() { + void finalizeGraph(unsigned Iterations) { SmallVector<unsigned, 16> RootSet; + pruneEdges(Iterations); initializeRootSet(RootSet); - propagateThroughEdges(RootSet); + propagateThroughEdges(RootSet, Iterations); } // Returns a sequence of edges representing the critical sequence based on the diff --git a/tools/llvm-mca/Views/InstructionInfoView.cpp b/tools/llvm-mca/Views/InstructionInfoView.cpp index 1fbffa3e5b69..a6f9153b4945 100644 --- a/tools/llvm-mca/Views/InstructionInfoView.cpp +++ b/tools/llvm-mca/Views/InstructionInfoView.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "Views/InstructionInfoView.h" +#include "llvm/Support/FormattedStream.h" namespace llvm { namespace mca { @@ -26,10 +27,17 @@ void InstructionInfoView::printView(raw_ostream &OS) const { TempStream << "\n\nInstruction Info:\n"; TempStream << "[1]: #uOps\n[2]: Latency\n[3]: RThroughput\n" - << "[4]: MayLoad\n[5]: MayStore\n[6]: HasSideEffects (U)\n\n"; + << "[4]: MayLoad\n[5]: MayStore\n[6]: HasSideEffects (U)\n"; + if (PrintEncodings) { + TempStream << "[7]: Encoding Size\n"; + TempStream << "\n[1] [2] [3] [4] [5] [6] [7] " + << "Encodings: Instructions:\n"; + } else { + TempStream << "\n[1] [2] [3] [4] [5] [6] Instructions:\n"; + } - TempStream << "[1] [2] [3] [4] [5] [6] Instructions:\n"; - for (const MCInst &Inst : Source) { + for (unsigned I = 0, E = Source.size(); I < E; ++I) { + const MCInst &Inst = Source[I]; const MCInstrDesc &MCDesc = MCII.get(Inst.getOpcode()); // Obtain the scheduling class information from the instruction. @@ -72,7 +80,20 @@ void InstructionInfoView::printView(raw_ostream &OS) const { } TempStream << (MCDesc.mayLoad() ? " * " : " "); TempStream << (MCDesc.mayStore() ? " * " : " "); - TempStream << (MCDesc.hasUnmodeledSideEffects() ? " U " : " "); + TempStream << (MCDesc.hasUnmodeledSideEffects() ? " U " : " "); + + if (PrintEncodings) { + StringRef Encoding(CE.getEncoding(I)); + unsigned EncodingSize = Encoding.size(); + TempStream << " " << EncodingSize + << (EncodingSize < 10 ? " " : " "); + TempStream.flush(); + formatted_raw_ostream FOS(TempStream); + for (unsigned i = 0, e = Encoding.size(); i != e; ++i) + FOS << format("%02x ", (uint8_t)Encoding[i]); + FOS.PadToColumn(30); + FOS.flush(); + } MCIP.printInst(&Inst, InstrStream, "", STI); InstrStream.flush(); @@ -80,7 +101,7 @@ void InstructionInfoView::printView(raw_ostream &OS) const { // Consume any tabs or spaces at the beginning of the string. StringRef Str(Instruction); Str = Str.ltrim(); - TempStream << " " << Str << '\n'; + TempStream << Str << '\n'; Instruction = ""; } diff --git a/tools/llvm-mca/Views/InstructionInfoView.h b/tools/llvm-mca/Views/InstructionInfoView.h index 640d87383436..0e948304119f 100644 --- a/tools/llvm-mca/Views/InstructionInfoView.h +++ b/tools/llvm-mca/Views/InstructionInfoView.h @@ -40,6 +40,7 @@ #include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MCA/CodeEmitter.h" #include "llvm/Support/raw_ostream.h" #define DEBUG_TYPE "llvm-mca" @@ -51,14 +52,18 @@ namespace mca { class InstructionInfoView : public View { const llvm::MCSubtargetInfo &STI; const llvm::MCInstrInfo &MCII; + CodeEmitter &CE; + bool PrintEncodings; llvm::ArrayRef<llvm::MCInst> Source; llvm::MCInstPrinter &MCIP; public: - InstructionInfoView(const llvm::MCSubtargetInfo &sti, - const llvm::MCInstrInfo &mcii, - llvm::ArrayRef<llvm::MCInst> S, llvm::MCInstPrinter &IP) - : STI(sti), MCII(mcii), Source(S), MCIP(IP) {} + InstructionInfoView(const llvm::MCSubtargetInfo &ST, + const llvm::MCInstrInfo &II, CodeEmitter &C, + bool ShouldPrintEncodings, llvm::ArrayRef<llvm::MCInst> S, + llvm::MCInstPrinter &IP) + : STI(ST), MCII(II), CE(C), PrintEncodings(ShouldPrintEncodings), + Source(S), MCIP(IP) {} void printView(llvm::raw_ostream &OS) const override; }; diff --git a/tools/llvm-mca/Views/TimelineView.cpp b/tools/llvm-mca/Views/TimelineView.cpp index fe3f16ba344c..1e7caa297ac6 100644 --- a/tools/llvm-mca/Views/TimelineView.cpp +++ b/tools/llvm-mca/Views/TimelineView.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "Views/TimelineView.h" +#include <numeric> namespace llvm { namespace mca { @@ -132,25 +133,38 @@ void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS, const WaitTimeEntry &Entry, unsigned SourceIndex, unsigned Executions) const { - OS << SourceIndex << '.'; + bool PrintingTotals = SourceIndex == Source.size(); + unsigned CumulativeExecutions = PrintingTotals ? Timeline.size() : Executions; + + if (!PrintingTotals) + OS << SourceIndex << '.'; + OS.PadToColumn(7); double AverageTime1, AverageTime2, AverageTime3; - AverageTime1 = (double)Entry.CyclesSpentInSchedulerQueue / Executions; - AverageTime2 = (double)Entry.CyclesSpentInSQWhileReady / Executions; - AverageTime3 = (double)Entry.CyclesSpentAfterWBAndBeforeRetire / Executions; + AverageTime1 = + (double)Entry.CyclesSpentInSchedulerQueue / CumulativeExecutions; + AverageTime2 = (double)Entry.CyclesSpentInSQWhileReady / CumulativeExecutions; + AverageTime3 = + (double)Entry.CyclesSpentAfterWBAndBeforeRetire / CumulativeExecutions; OS << Executions; OS.PadToColumn(13); - int BufferSize = UsedBuffer[SourceIndex].second; - tryChangeColor(OS, Entry.CyclesSpentInSchedulerQueue, Executions, BufferSize); + + int BufferSize = PrintingTotals ? 0 : UsedBuffer[SourceIndex].second; + if (!PrintingTotals) + tryChangeColor(OS, Entry.CyclesSpentInSchedulerQueue, CumulativeExecutions, + BufferSize); OS << format("%.1f", floor((AverageTime1 * 10) + 0.5) / 10); OS.PadToColumn(20); - tryChangeColor(OS, Entry.CyclesSpentInSQWhileReady, Executions, BufferSize); + if (!PrintingTotals) + tryChangeColor(OS, Entry.CyclesSpentInSQWhileReady, CumulativeExecutions, + BufferSize); OS << format("%.1f", floor((AverageTime2 * 10) + 0.5) / 10); OS.PadToColumn(27); - tryChangeColor(OS, Entry.CyclesSpentAfterWBAndBeforeRetire, Executions, - STI.getSchedModel().MicroOpBufferSize); + if (!PrintingTotals) + tryChangeColor(OS, Entry.CyclesSpentAfterWBAndBeforeRetire, + CumulativeExecutions, STI.getSchedModel().MicroOpBufferSize); OS << format("%.1f", floor((AverageTime3 * 10) + 0.5) / 10); if (OS.has_colors()) @@ -190,6 +204,24 @@ void TimelineView::printAverageWaitTimes(raw_ostream &OS) const { ++IID; } + + // If the timeline contains more than one instruction, + // let's also print global averages. + if (Source.size() != 1) { + WaitTimeEntry TotalWaitTime = std::accumulate( + WaitTime.begin(), WaitTime.end(), WaitTimeEntry{0, 0, 0}, + [](const WaitTimeEntry &A, const WaitTimeEntry &B) { + return WaitTimeEntry{ + A.CyclesSpentInSchedulerQueue + B.CyclesSpentInSchedulerQueue, + A.CyclesSpentInSQWhileReady + B.CyclesSpentInSQWhileReady, + A.CyclesSpentAfterWBAndBeforeRetire + + B.CyclesSpentAfterWBAndBeforeRetire}; + }); + printWaitTimeEntry(FOS, TotalWaitTime, IID, Executions); + FOS << " " + << "<total>" << '\n'; + InstrStream.flush(); + } } void TimelineView::printTimelineViewEntry(formatted_raw_ostream &OS, diff --git a/tools/llvm-mca/Views/TimelineView.h b/tools/llvm-mca/Views/TimelineView.h index b63b234293cd..9bec3b87db45 100644 --- a/tools/llvm-mca/Views/TimelineView.h +++ b/tools/llvm-mca/Views/TimelineView.h @@ -84,6 +84,7 @@ /// 3. 2 1.5 0.5 1.0 vaddss %xmm1, %xmm0, %xmm3 /// 4. 2 3.5 0.0 0.0 vaddss %xmm3, %xmm2, %xmm4 /// 5. 2 6.5 0.0 0.0 vaddss %xmm4, %xmm5, %xmm6 +/// 2 2.4 0.6 1.6 <total> /// /// By comparing column [2] with column [1], we get an idea about how many /// cycles were spent in the scheduler's queue due to data dependencies. |