aboutsummaryrefslogtreecommitdiff
path: root/tools/llvm-mca/Views
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2019-10-23 17:51:42 +0000
committerDimitry Andric <dim@FreeBSD.org>2019-10-23 17:51:42 +0000
commit1d5ae1026e831016fc29fd927877c86af904481f (patch)
tree2cdfd12620fcfa5d9e4a0389f85368e8e36f63f9 /tools/llvm-mca/Views
parente6d1592492a3a379186bfb02bd0f4eda0669c0d5 (diff)
Notes
Diffstat (limited to 'tools/llvm-mca/Views')
-rw-r--r--tools/llvm-mca/Views/BottleneckAnalysis.cpp40
-rw-r--r--tools/llvm-mca/Views/BottleneckAnalysis.h8
-rw-r--r--tools/llvm-mca/Views/InstructionInfoView.cpp31
-rw-r--r--tools/llvm-mca/Views/InstructionInfoView.h13
-rw-r--r--tools/llvm-mca/Views/TimelineView.cpp50
-rw-r--r--tools/llvm-mca/Views/TimelineView.h1
6 files changed, 116 insertions, 27 deletions
diff --git a/tools/llvm-mca/Views/BottleneckAnalysis.cpp b/tools/llvm-mca/Views/BottleneckAnalysis.cpp
index 560c6c6e8a33..feff0cd6d524 100644
--- a/tools/llvm-mca/Views/BottleneckAnalysis.cpp
+++ b/tools/llvm-mca/Views/BottleneckAnalysis.cpp
@@ -165,10 +165,33 @@ void DependencyGraph::dumpDependencyEdge(raw_ostream &OS,
"Unsupported dependency type!");
OS << " - RESOURCE MASK: " << DE.ResourceOrRegID;
}
- OS << " - CYCLES: " << DE.Cost << '\n';
+ OS << " - COST: " << DE.Cost << '\n';
}
#endif // NDEBUG
+void DependencyGraph::pruneEdges(unsigned Iterations) {
+ for (DGNode &N : Nodes) {
+ unsigned NumPruned = 0;
+ const unsigned Size = N.OutgoingEdges.size();
+ // Use a cut-off threshold to prune edges with a low frequency.
+ for (unsigned I = 0, E = Size; I < E; ++I) {
+ DependencyEdge &Edge = N.OutgoingEdges[I];
+ if (Edge.Frequency == Iterations)
+ continue;
+ double Factor = (double)Edge.Frequency / Iterations;
+ if (0.10 < Factor)
+ continue;
+ Nodes[Edge.ToIID].NumPredecessors--;
+ std::swap(Edge, N.OutgoingEdges[E - 1]);
+ --E;
+ ++NumPruned;
+ }
+
+ if (NumPruned)
+ N.OutgoingEdges.resize(Size - NumPruned);
+ }
+}
+
void DependencyGraph::initializeRootSet(
SmallVectorImpl<unsigned> &RootSet) const {
for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {
@@ -179,7 +202,7 @@ void DependencyGraph::initializeRootSet(
}
void DependencyGraph::propagateThroughEdges(
- SmallVectorImpl<unsigned> &RootSet) {
+ SmallVectorImpl<unsigned> &RootSet, unsigned Iterations) {
SmallVector<unsigned, 8> ToVisit;
// A critical sequence is computed as the longest path from a node of the
@@ -189,6 +212,10 @@ void DependencyGraph::propagateThroughEdges(
// Each node of the graph starts with an initial default cost of zero. The
// cost of a node is a measure of criticality: the higher the cost, the bigger
// is the performance impact.
+ // For register and memory dependencies, the cost is a function of the write
+ // latency as well as the actual delay (in cycles) caused to users.
+ // For processor resource dependencies, the cost is a function of the resource
+ // pressure. Resource interferences with low frequency values are ignored.
//
// This algorithm is very similar to a (reverse) Dijkstra. Every iteration of
// the inner loop selects (i.e. visits) a node N from a set of `unvisited
@@ -277,6 +304,10 @@ static void printInstruction(formatted_raw_ostream &FOS,
}
void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const {
+ // Early exit if no bottlenecks were found during the simulation.
+ if (!SeenStallCycles || !BPI.PressureIncreaseCycles)
+ return;
+
SmallVector<const DependencyEdge *, 16> Seq;
DG.getCriticalSequence(Seq);
if (Seq.empty())
@@ -432,7 +463,6 @@ void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To,
bool IsLoopCarried = From >= To;
unsigned SourceSize = Source.size();
if (IsLoopCarried) {
- Cost *= Iterations / 2;
DG.addRegisterDep(From, To + SourceSize, RegID, Cost);
DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cost);
return;
@@ -445,7 +475,6 @@ void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To,
bool IsLoopCarried = From >= To;
unsigned SourceSize = Source.size();
if (IsLoopCarried) {
- Cost *= Iterations / 2;
DG.addMemoryDep(From, To + SourceSize, Cost);
DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cost);
return;
@@ -458,7 +487,6 @@ void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To,
bool IsLoopCarried = From >= To;
unsigned SourceSize = Source.size();
if (IsLoopCarried) {
- Cost *= Iterations / 2;
DG.addResourceDep(From, To + SourceSize, Mask, Cost);
DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cost);
return;
@@ -514,7 +542,7 @@ void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) {
// Check if this is the last simulated instruction.
if (IID == ((Iterations * Source.size()) - 1))
- DG.finalizeGraph();
+ DG.finalizeGraph(Iterations);
}
void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) {
diff --git a/tools/llvm-mca/Views/BottleneckAnalysis.h b/tools/llvm-mca/Views/BottleneckAnalysis.h
index 7564b1a48206..9e3bd5978f09 100644
--- a/tools/llvm-mca/Views/BottleneckAnalysis.h
+++ b/tools/llvm-mca/Views/BottleneckAnalysis.h
@@ -236,8 +236,9 @@ class DependencyGraph {
void addDependency(unsigned From, unsigned To,
DependencyEdge::Dependency &&DE);
+ void pruneEdges(unsigned Iterations);
void initializeRootSet(SmallVectorImpl<unsigned> &RootSet) const;
- void propagateThroughEdges(SmallVectorImpl<unsigned> &RootSet);
+ void propagateThroughEdges(SmallVectorImpl<unsigned> &RootSet, unsigned Iterations);
#ifndef NDEBUG
void dumpDependencyEdge(raw_ostream &OS, const DependencyEdge &DE,
@@ -263,10 +264,11 @@ public:
// Called by the bottleneck analysis at the end of simulation to propagate
// costs through the edges of the graph, and compute a critical path.
- void finalizeGraph() {
+ void finalizeGraph(unsigned Iterations) {
SmallVector<unsigned, 16> RootSet;
+ pruneEdges(Iterations);
initializeRootSet(RootSet);
- propagateThroughEdges(RootSet);
+ propagateThroughEdges(RootSet, Iterations);
}
// Returns a sequence of edges representing the critical sequence based on the
diff --git a/tools/llvm-mca/Views/InstructionInfoView.cpp b/tools/llvm-mca/Views/InstructionInfoView.cpp
index 1fbffa3e5b69..a6f9153b4945 100644
--- a/tools/llvm-mca/Views/InstructionInfoView.cpp
+++ b/tools/llvm-mca/Views/InstructionInfoView.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "Views/InstructionInfoView.h"
+#include "llvm/Support/FormattedStream.h"
namespace llvm {
namespace mca {
@@ -26,10 +27,17 @@ void InstructionInfoView::printView(raw_ostream &OS) const {
TempStream << "\n\nInstruction Info:\n";
TempStream << "[1]: #uOps\n[2]: Latency\n[3]: RThroughput\n"
- << "[4]: MayLoad\n[5]: MayStore\n[6]: HasSideEffects (U)\n\n";
+ << "[4]: MayLoad\n[5]: MayStore\n[6]: HasSideEffects (U)\n";
+ if (PrintEncodings) {
+ TempStream << "[7]: Encoding Size\n";
+ TempStream << "\n[1] [2] [3] [4] [5] [6] [7] "
+ << "Encodings: Instructions:\n";
+ } else {
+ TempStream << "\n[1] [2] [3] [4] [5] [6] Instructions:\n";
+ }
- TempStream << "[1] [2] [3] [4] [5] [6] Instructions:\n";
- for (const MCInst &Inst : Source) {
+ for (unsigned I = 0, E = Source.size(); I < E; ++I) {
+ const MCInst &Inst = Source[I];
const MCInstrDesc &MCDesc = MCII.get(Inst.getOpcode());
// Obtain the scheduling class information from the instruction.
@@ -72,7 +80,20 @@ void InstructionInfoView::printView(raw_ostream &OS) const {
}
TempStream << (MCDesc.mayLoad() ? " * " : " ");
TempStream << (MCDesc.mayStore() ? " * " : " ");
- TempStream << (MCDesc.hasUnmodeledSideEffects() ? " U " : " ");
+ TempStream << (MCDesc.hasUnmodeledSideEffects() ? " U " : " ");
+
+ if (PrintEncodings) {
+ StringRef Encoding(CE.getEncoding(I));
+ unsigned EncodingSize = Encoding.size();
+ TempStream << " " << EncodingSize
+ << (EncodingSize < 10 ? " " : " ");
+ TempStream.flush();
+ formatted_raw_ostream FOS(TempStream);
+ for (unsigned i = 0, e = Encoding.size(); i != e; ++i)
+ FOS << format("%02x ", (uint8_t)Encoding[i]);
+ FOS.PadToColumn(30);
+ FOS.flush();
+ }
MCIP.printInst(&Inst, InstrStream, "", STI);
InstrStream.flush();
@@ -80,7 +101,7 @@ void InstructionInfoView::printView(raw_ostream &OS) const {
// Consume any tabs or spaces at the beginning of the string.
StringRef Str(Instruction);
Str = Str.ltrim();
- TempStream << " " << Str << '\n';
+ TempStream << Str << '\n';
Instruction = "";
}
diff --git a/tools/llvm-mca/Views/InstructionInfoView.h b/tools/llvm-mca/Views/InstructionInfoView.h
index 640d87383436..0e948304119f 100644
--- a/tools/llvm-mca/Views/InstructionInfoView.h
+++ b/tools/llvm-mca/Views/InstructionInfoView.h
@@ -40,6 +40,7 @@
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MCA/CodeEmitter.h"
#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "llvm-mca"
@@ -51,14 +52,18 @@ namespace mca {
class InstructionInfoView : public View {
const llvm::MCSubtargetInfo &STI;
const llvm::MCInstrInfo &MCII;
+ CodeEmitter &CE;
+ bool PrintEncodings;
llvm::ArrayRef<llvm::MCInst> Source;
llvm::MCInstPrinter &MCIP;
public:
- InstructionInfoView(const llvm::MCSubtargetInfo &sti,
- const llvm::MCInstrInfo &mcii,
- llvm::ArrayRef<llvm::MCInst> S, llvm::MCInstPrinter &IP)
- : STI(sti), MCII(mcii), Source(S), MCIP(IP) {}
+ InstructionInfoView(const llvm::MCSubtargetInfo &ST,
+ const llvm::MCInstrInfo &II, CodeEmitter &C,
+ bool ShouldPrintEncodings, llvm::ArrayRef<llvm::MCInst> S,
+ llvm::MCInstPrinter &IP)
+ : STI(ST), MCII(II), CE(C), PrintEncodings(ShouldPrintEncodings),
+ Source(S), MCIP(IP) {}
void printView(llvm::raw_ostream &OS) const override;
};
diff --git a/tools/llvm-mca/Views/TimelineView.cpp b/tools/llvm-mca/Views/TimelineView.cpp
index fe3f16ba344c..1e7caa297ac6 100644
--- a/tools/llvm-mca/Views/TimelineView.cpp
+++ b/tools/llvm-mca/Views/TimelineView.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "Views/TimelineView.h"
+#include <numeric>
namespace llvm {
namespace mca {
@@ -132,25 +133,38 @@ void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS,
const WaitTimeEntry &Entry,
unsigned SourceIndex,
unsigned Executions) const {
- OS << SourceIndex << '.';
+ bool PrintingTotals = SourceIndex == Source.size();
+ unsigned CumulativeExecutions = PrintingTotals ? Timeline.size() : Executions;
+
+ if (!PrintingTotals)
+ OS << SourceIndex << '.';
+
OS.PadToColumn(7);
double AverageTime1, AverageTime2, AverageTime3;
- AverageTime1 = (double)Entry.CyclesSpentInSchedulerQueue / Executions;
- AverageTime2 = (double)Entry.CyclesSpentInSQWhileReady / Executions;
- AverageTime3 = (double)Entry.CyclesSpentAfterWBAndBeforeRetire / Executions;
+ AverageTime1 =
+ (double)Entry.CyclesSpentInSchedulerQueue / CumulativeExecutions;
+ AverageTime2 = (double)Entry.CyclesSpentInSQWhileReady / CumulativeExecutions;
+ AverageTime3 =
+ (double)Entry.CyclesSpentAfterWBAndBeforeRetire / CumulativeExecutions;
OS << Executions;
OS.PadToColumn(13);
- int BufferSize = UsedBuffer[SourceIndex].second;
- tryChangeColor(OS, Entry.CyclesSpentInSchedulerQueue, Executions, BufferSize);
+
+ int BufferSize = PrintingTotals ? 0 : UsedBuffer[SourceIndex].second;
+ if (!PrintingTotals)
+ tryChangeColor(OS, Entry.CyclesSpentInSchedulerQueue, CumulativeExecutions,
+ BufferSize);
OS << format("%.1f", floor((AverageTime1 * 10) + 0.5) / 10);
OS.PadToColumn(20);
- tryChangeColor(OS, Entry.CyclesSpentInSQWhileReady, Executions, BufferSize);
+ if (!PrintingTotals)
+ tryChangeColor(OS, Entry.CyclesSpentInSQWhileReady, CumulativeExecutions,
+ BufferSize);
OS << format("%.1f", floor((AverageTime2 * 10) + 0.5) / 10);
OS.PadToColumn(27);
- tryChangeColor(OS, Entry.CyclesSpentAfterWBAndBeforeRetire, Executions,
- STI.getSchedModel().MicroOpBufferSize);
+ if (!PrintingTotals)
+ tryChangeColor(OS, Entry.CyclesSpentAfterWBAndBeforeRetire,
+ CumulativeExecutions, STI.getSchedModel().MicroOpBufferSize);
OS << format("%.1f", floor((AverageTime3 * 10) + 0.5) / 10);
if (OS.has_colors())
@@ -190,6 +204,24 @@ void TimelineView::printAverageWaitTimes(raw_ostream &OS) const {
++IID;
}
+
+ // If the timeline contains more than one instruction,
+ // let's also print global averages.
+ if (Source.size() != 1) {
+ WaitTimeEntry TotalWaitTime = std::accumulate(
+ WaitTime.begin(), WaitTime.end(), WaitTimeEntry{0, 0, 0},
+ [](const WaitTimeEntry &A, const WaitTimeEntry &B) {
+ return WaitTimeEntry{
+ A.CyclesSpentInSchedulerQueue + B.CyclesSpentInSchedulerQueue,
+ A.CyclesSpentInSQWhileReady + B.CyclesSpentInSQWhileReady,
+ A.CyclesSpentAfterWBAndBeforeRetire +
+ B.CyclesSpentAfterWBAndBeforeRetire};
+ });
+ printWaitTimeEntry(FOS, TotalWaitTime, IID, Executions);
+ FOS << " "
+ << "<total>" << '\n';
+ InstrStream.flush();
+ }
}
void TimelineView::printTimelineViewEntry(formatted_raw_ostream &OS,
diff --git a/tools/llvm-mca/Views/TimelineView.h b/tools/llvm-mca/Views/TimelineView.h
index b63b234293cd..9bec3b87db45 100644
--- a/tools/llvm-mca/Views/TimelineView.h
+++ b/tools/llvm-mca/Views/TimelineView.h
@@ -84,6 +84,7 @@
/// 3. 2 1.5 0.5 1.0 vaddss %xmm1, %xmm0, %xmm3
/// 4. 2 3.5 0.0 0.0 vaddss %xmm3, %xmm2, %xmm4
/// 5. 2 6.5 0.0 0.0 vaddss %xmm4, %xmm5, %xmm6
+/// 2 2.4 0.6 1.6 <total>
///
/// By comparing column [2] with column [1], we get an idea about how many
/// cycles were spent in the scheduler's queue due to data dependencies.