summaryrefslogtreecommitdiff
path: root/tools/llvm-exegesis/lib/Analysis.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'tools/llvm-exegesis/lib/Analysis.cpp')
-rw-r--r--tools/llvm-exegesis/lib/Analysis.cpp246
1 files changed, 153 insertions, 93 deletions
diff --git a/tools/llvm-exegesis/lib/Analysis.cpp b/tools/llvm-exegesis/lib/Analysis.cpp
index bb5118080967..0a91679fe1d1 100644
--- a/tools/llvm-exegesis/lib/Analysis.cpp
+++ b/tools/llvm-exegesis/lib/Analysis.cpp
@@ -12,13 +12,25 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/FormatVariadic.h"
+#include <limits>
#include <unordered_set>
#include <vector>
+namespace llvm {
namespace exegesis {
static const char kCsvSep = ',';
+static unsigned resolveSchedClassId(const llvm::MCSubtargetInfo &STI,
+ unsigned SchedClassId,
+ const llvm::MCInst &MCI) {
+ const auto &SM = STI.getSchedModel();
+ while (SchedClassId && SM.getSchedClassDesc(SchedClassId)->isVariant())
+ SchedClassId =
+ STI.resolveVariantSchedClass(SchedClassId, &MCI, SM.getProcessorID());
+ return SchedClassId;
+}
+
namespace {
enum EscapeTag { kEscapeCsv, kEscapeHtml, kEscapeHtmlString };
@@ -84,7 +96,21 @@ writeClusterId(llvm::raw_ostream &OS,
template <EscapeTag Tag>
static void writeMeasurementValue(llvm::raw_ostream &OS, const double Value) {
- writeEscaped<Tag>(OS, llvm::formatv("{0:F}", Value).str());
+ // Given Value, if we wanted to serialize it to a string,
+ // how many base-10 digits will we need to store, max?
+ static constexpr auto MaxDigitCount =
+ std::numeric_limits<decltype(Value)>::max_digits10;
+ // Also, we will need a decimal separator.
+ static constexpr auto DecimalSeparatorLen = 1; // '.' e.g.
+ // So how long of a string will the serialization produce, max?
+ static constexpr auto SerializationLen = MaxDigitCount + DecimalSeparatorLen;
+
+ // WARNING: when changing the format, also adjust the small-size estimate ^.
+ static constexpr StringLiteral SimpleFloatFormat = StringLiteral("{0:F}");
+
+ writeEscaped<Tag>(
+ OS,
+ llvm::formatv(SimpleFloatFormat.data(), Value).sstr<SerializationLen>());
}
template <typename EscapeTag, EscapeTag Tag>
@@ -103,13 +129,11 @@ void Analysis::writeSnippet(llvm::raw_ostream &OS,
writeEscaped<Tag>(OS, "[error decoding asm snippet]");
return;
}
- Lines.emplace_back();
- std::string &Line = Lines.back();
- llvm::raw_string_ostream OSS(Line);
+ llvm::SmallString<128> InstPrinterStr; // FIXME: magic number.
+ llvm::raw_svector_ostream OSS(InstPrinterStr);
InstPrinter_->printInst(&MI, OSS, "", *SubtargetInfo_);
Bytes = Bytes.drop_front(MISize);
- OSS.flush();
- Line = llvm::StringRef(Line).trim().str();
+ Lines.emplace_back(llvm::StringRef(InstPrinterStr).trim());
}
writeEscaped<Tag>(OS, llvm::join(Lines, Separator));
}
@@ -126,20 +150,20 @@ void Analysis::printInstructionRowCsv(const size_t PointId,
writeEscaped<kEscapeCsv>(OS, Point.Key.Config);
OS << kCsvSep;
assert(!Point.Key.Instructions.empty());
- // FIXME: Resolve variant classes.
- const unsigned SchedClassId =
- InstrInfo_->get(Point.Key.Instructions[0].getOpcode()).getSchedClass();
+ const llvm::MCInst &MCI = Point.Key.Instructions[0];
+ const unsigned SchedClassId = resolveSchedClassId(
+ *SubtargetInfo_, InstrInfo_->get(MCI.getOpcode()).getSchedClass(), MCI);
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- const auto &SchedModel = SubtargetInfo_->getSchedModel();
const llvm::MCSchedClassDesc *const SCDesc =
- SchedModel.getSchedClassDesc(SchedClassId);
+ SubtargetInfo_->getSchedModel().getSchedClassDesc(SchedClassId);
writeEscaped<kEscapeCsv>(OS, SCDesc->Name);
#else
OS << SchedClassId;
#endif
for (const auto &Measurement : Point.Measurements) {
OS << kCsvSep;
- writeMeasurementValue<kEscapeCsv>(OS, Measurement.Value);
+ writeMeasurementValue<kEscapeCsv>(OS, Measurement.PerInstructionValue);
}
OS << "\n";
}
@@ -193,21 +217,43 @@ Analysis::run<Analysis::PrintClusters>(llvm::raw_ostream &OS) const {
return llvm::Error::success();
}
-std::unordered_map<unsigned, std::vector<size_t>>
+Analysis::ResolvedSchedClassAndPoints::ResolvedSchedClassAndPoints(
+ ResolvedSchedClass &&RSC)
+ : RSC(std::move(RSC)) {}
+
+std::vector<Analysis::ResolvedSchedClassAndPoints>
Analysis::makePointsPerSchedClass() const {
- std::unordered_map<unsigned, std::vector<size_t>> PointsPerSchedClass;
+ std::vector<ResolvedSchedClassAndPoints> Entries;
+ // Maps SchedClassIds to index in result.
+ std::unordered_map<unsigned, size_t> SchedClassIdToIndex;
const auto &Points = Clustering_.getPoints();
for (size_t PointId = 0, E = Points.size(); PointId < E; ++PointId) {
const InstructionBenchmark &Point = Points[PointId];
if (!Point.Error.empty())
continue;
assert(!Point.Key.Instructions.empty());
- const auto Opcode = Point.Key.Instructions[0].getOpcode();
- // FIXME: Resolve variant classes.
- PointsPerSchedClass[InstrInfo_->get(Opcode).getSchedClass()].push_back(
- PointId);
+ // FIXME: we should be using the tuple of classes for instructions in the
+ // snippet as key.
+ const llvm::MCInst &MCI = Point.Key.Instructions[0];
+ unsigned SchedClassId = InstrInfo_->get(MCI.getOpcode()).getSchedClass();
+ const bool WasVariant = SchedClassId && SubtargetInfo_->getSchedModel()
+ .getSchedClassDesc(SchedClassId)
+ ->isVariant();
+ SchedClassId = resolveSchedClassId(*SubtargetInfo_, SchedClassId, MCI);
+ const auto IndexIt = SchedClassIdToIndex.find(SchedClassId);
+ if (IndexIt == SchedClassIdToIndex.end()) {
+ // Create a new entry.
+ SchedClassIdToIndex.emplace(SchedClassId, Entries.size());
+ ResolvedSchedClassAndPoints Entry(
+ ResolvedSchedClass(*SubtargetInfo_, SchedClassId, WasVariant));
+ Entry.PointIds.push_back(PointId);
+ Entries.push_back(std::move(Entry));
+ } else {
+ // Append to the existing entry.
+ Entries[IndexIt->second].PointIds.push_back(PointId);
+ }
}
- return PointsPerSchedClass;
+ return Entries;
}
// Uops repeat the same opcode over again. Just show this opcode and show the
@@ -239,8 +285,8 @@ writeLatencySnippetHtml(llvm::raw_ostream &OS,
}
void Analysis::printSchedClassClustersHtml(
- const std::vector<SchedClassCluster> &Clusters, const SchedClass &SC,
- llvm::raw_ostream &OS) const {
+ const std::vector<SchedClassCluster> &Clusters,
+ const ResolvedSchedClass &RSC, llvm::raw_ostream &OS) const {
const auto &Points = Clustering_.getPoints();
OS << "<table class=\"sched-class-clusters\">";
OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>";
@@ -248,16 +294,13 @@ void Analysis::printSchedClassClustersHtml(
for (const auto &Measurement :
Points[Clusters[0].getPointIds()[0]].Measurements) {
OS << "<th>";
- if (Measurement.DebugString.empty())
- writeEscaped<kEscapeHtml>(OS, Measurement.Key);
- else
- writeEscaped<kEscapeHtml>(OS, Measurement.DebugString);
+ writeEscaped<kEscapeHtml>(OS, Measurement.Key);
OS << "</th>";
}
OS << "</tr>";
for (const SchedClassCluster &Cluster : Clusters) {
OS << "<tr class=\""
- << (Cluster.measurementsMatch(*SubtargetInfo_, SC, Clustering_)
+ << (Cluster.measurementsMatch(*SubtargetInfo_, RSC, Clustering_)
? "good-cluster"
: "bad-cluster")
<< "\"><td>";
@@ -372,12 +415,17 @@ getNonRedundantWriteProcRes(const llvm::MCSchedClassDesc &SCDesc,
return Result;
}
-Analysis::SchedClass::SchedClass(const llvm::MCSchedClassDesc &SD,
- const llvm::MCSubtargetInfo &STI)
- : SCDesc(&SD),
- NonRedundantWriteProcRes(getNonRedundantWriteProcRes(SD, STI)),
+Analysis::ResolvedSchedClass::ResolvedSchedClass(
+ const llvm::MCSubtargetInfo &STI, unsigned ResolvedSchedClassId,
+ bool WasVariant)
+ : SchedClassId(ResolvedSchedClassId), SCDesc(STI.getSchedModel().getSchedClassDesc(ResolvedSchedClassId)),
+ WasVariant(WasVariant),
+ NonRedundantWriteProcRes(getNonRedundantWriteProcRes(*SCDesc, STI)),
IdealizedProcResPressure(computeIdealizedProcResPressure(
- STI.getSchedModel(), NonRedundantWriteProcRes)) {}
+ STI.getSchedModel(), NonRedundantWriteProcRes)) {
+ assert((SCDesc == nullptr || !SCDesc->isVariant()) &&
+ "ResolvedSchedClass should never be variant");
+}
void Analysis::SchedClassCluster::addPoint(
size_t PointId, const InstructionBenchmarkClustering &Clustering) {
@@ -393,8 +441,24 @@ void Analysis::SchedClassCluster::addPoint(
assert(ClusterId == Clustering.getClusterIdForPoint(PointId));
}
+// Returns a ProxResIdx by id or name.
+static unsigned findProcResIdx(const llvm::MCSubtargetInfo &STI,
+ const llvm::StringRef NameOrId) {
+ // Interpret the key as an ProcResIdx.
+ unsigned ProcResIdx = 0;
+ if (llvm::to_integer(NameOrId, ProcResIdx, 10))
+ return ProcResIdx;
+ // Interpret the key as a ProcRes name.
+ const auto &SchedModel = STI.getSchedModel();
+ for (int I = 0, E = SchedModel.getNumProcResourceKinds(); I < E; ++I) {
+ if (NameOrId == SchedModel.getProcResource(I)->Name)
+ return I;
+ }
+ return 0;
+}
+
bool Analysis::SchedClassCluster::measurementsMatch(
- const llvm::MCSubtargetInfo &STI, const SchedClass &SC,
+ const llvm::MCSubtargetInfo &STI, const ResolvedSchedClass &RSC,
const InstructionBenchmarkClustering &Clustering) const {
const size_t NumMeasurements = Representative.size();
std::vector<BenchmarkMeasure> ClusterCenterPoint(NumMeasurements);
@@ -410,34 +474,39 @@ bool Analysis::SchedClassCluster::measurementsMatch(
return false;
}
// Find the latency.
- SchedClassPoint[0].Value = 0.0;
- for (unsigned I = 0; I < SC.SCDesc->NumWriteLatencyEntries; ++I) {
+ SchedClassPoint[0].PerInstructionValue = 0.0;
+ for (unsigned I = 0; I < RSC.SCDesc->NumWriteLatencyEntries; ++I) {
const llvm::MCWriteLatencyEntry *const WLE =
- STI.getWriteLatencyEntry(SC.SCDesc, I);
- SchedClassPoint[0].Value =
- std::max<double>(SchedClassPoint[0].Value, WLE->Cycles);
+ STI.getWriteLatencyEntry(RSC.SCDesc, I);
+ SchedClassPoint[0].PerInstructionValue =
+ std::max<double>(SchedClassPoint[0].PerInstructionValue, WLE->Cycles);
}
- ClusterCenterPoint[0].Value = Representative[0].avg();
+ ClusterCenterPoint[0].PerInstructionValue = Representative[0].avg();
} else if (Mode == InstructionBenchmark::Uops) {
for (int I = 0, E = Representative.size(); I < E; ++I) {
- // Find the pressure on ProcResIdx `Key`.
- uint16_t ProcResIdx = 0;
- if (!llvm::to_integer(Representative[I].key(), ProcResIdx, 10)) {
- llvm::errs() << "expected ProcResIdx key, got "
- << Representative[I].key() << "\n";
+ const auto Key = Representative[I].key();
+ uint16_t ProcResIdx = findProcResIdx(STI, Key);
+ if (ProcResIdx > 0) {
+ // Find the pressure on ProcResIdx `Key`.
+ const auto ProcResPressureIt =
+ std::find_if(RSC.IdealizedProcResPressure.begin(),
+ RSC.IdealizedProcResPressure.end(),
+ [ProcResIdx](const std::pair<uint16_t, float> &WPR) {
+ return WPR.first == ProcResIdx;
+ });
+ SchedClassPoint[I].PerInstructionValue =
+ ProcResPressureIt == RSC.IdealizedProcResPressure.end()
+ ? 0.0
+ : ProcResPressureIt->second;
+ } else if (Key == "NumMicroOps") {
+ SchedClassPoint[I].PerInstructionValue = RSC.SCDesc->NumMicroOps;
+ } else {
+ llvm::errs() << "expected `key` to be either a ProcResIdx or a ProcRes "
+ "name, got "
+ << Key << "\n";
return false;
}
- const auto ProcResPressureIt =
- std::find_if(SC.IdealizedProcResPressure.begin(),
- SC.IdealizedProcResPressure.end(),
- [ProcResIdx](const std::pair<uint16_t, float> &WPR) {
- return WPR.first == ProcResIdx;
- });
- SchedClassPoint[I].Value =
- ProcResPressureIt == SC.IdealizedProcResPressure.end()
- ? 0.0
- : ProcResPressureIt->second;
- ClusterCenterPoint[I].Value = Representative[I].avg();
+ ClusterCenterPoint[I].PerInstructionValue = Representative[I].avg();
}
} else {
llvm::errs() << "unimplemented measurement matching for mode " << Mode
@@ -447,26 +516,25 @@ bool Analysis::SchedClassCluster::measurementsMatch(
return Clustering.isNeighbour(ClusterCenterPoint, SchedClassPoint);
}
-void Analysis::printSchedClassDescHtml(const SchedClass &SC,
+void Analysis::printSchedClassDescHtml(const ResolvedSchedClass &RSC,
llvm::raw_ostream &OS) const {
OS << "<table class=\"sched-class-desc\">";
- OS << "<tr><th>Valid</th><th>Variant</th><th>uOps</th><th>Latency</"
+ OS << "<tr><th>Valid</th><th>Variant</th><th>NumMicroOps</th><th>Latency</"
"th><th>WriteProcRes</th><th title=\"This is the idealized unit "
"resource (port) pressure assuming ideal distribution\">Idealized "
"Resource Pressure</th></tr>";
- if (SC.SCDesc->isValid()) {
+ if (RSC.SCDesc->isValid()) {
const auto &SM = SubtargetInfo_->getSchedModel();
OS << "<tr><td>&#10004;</td>";
- OS << "<td>" << (SC.SCDesc->isVariant() ? "&#10004;" : "&#10005;")
- << "</td>";
- OS << "<td>" << SC.SCDesc->NumMicroOps << "</td>";
+ OS << "<td>" << (RSC.WasVariant ? "&#10004;" : "&#10005;") << "</td>";
+ OS << "<td>" << RSC.SCDesc->NumMicroOps << "</td>";
// Latencies.
OS << "<td><ul>";
- for (int I = 0, E = SC.SCDesc->NumWriteLatencyEntries; I < E; ++I) {
+ for (int I = 0, E = RSC.SCDesc->NumWriteLatencyEntries; I < E; ++I) {
const auto *const Entry =
- SubtargetInfo_->getWriteLatencyEntry(SC.SCDesc, I);
+ SubtargetInfo_->getWriteLatencyEntry(RSC.SCDesc, I);
OS << "<li>" << Entry->Cycles;
- if (SC.SCDesc->NumWriteLatencyEntries > 1) {
+ if (RSC.SCDesc->NumWriteLatencyEntries > 1) {
// Dismabiguate if more than 1 latency.
OS << " (WriteResourceID " << Entry->WriteResourceID << ")";
}
@@ -475,7 +543,7 @@ void Analysis::printSchedClassDescHtml(const SchedClass &SC,
OS << "</ul></td>";
// WriteProcRes.
OS << "<td><ul>";
- for (const auto &WPR : SC.NonRedundantWriteProcRes) {
+ for (const auto &WPR : RSC.NonRedundantWriteProcRes) {
OS << "<li><span class=\"mono\">";
writeEscaped<kEscapeHtml>(OS,
SM.getProcResource(WPR.ProcResourceIdx)->Name);
@@ -484,7 +552,7 @@ void Analysis::printSchedClassDescHtml(const SchedClass &SC,
OS << "</ul></td>";
// Idealized port pressure.
OS << "<td><ul>";
- for (const auto &Pressure : SC.IdealizedProcResPressure) {
+ for (const auto &Pressure : RSC.IdealizedProcResPressure) {
OS << "<li><span class=\"mono\">";
writeEscaped<kEscapeHtml>(OS, SubtargetInfo_->getSchedModel()
.getProcResource(Pressure.first)
@@ -580,19 +648,12 @@ llvm::Error Analysis::run<Analysis::PrintSchedClassInconsistencies>(
writeEscaped<kEscapeHtml>(OS, FirstPoint.CpuName);
OS << "</span></h3>";
- for (const auto &SchedClassAndPoints : makePointsPerSchedClass()) {
- const auto SchedClassId = SchedClassAndPoints.first;
- const std::vector<size_t> &SchedClassPoints = SchedClassAndPoints.second;
- const auto &SchedModel = SubtargetInfo_->getSchedModel();
- const llvm::MCSchedClassDesc *const SCDesc =
- SchedModel.getSchedClassDesc(SchedClassId);
- if (!SCDesc)
+ for (const auto &RSCAndPoints : makePointsPerSchedClass()) {
+ if (!RSCAndPoints.RSC.SCDesc)
continue;
- const SchedClass SC(*SCDesc, *SubtargetInfo_);
-
// Bucket sched class points into sched class clusters.
std::vector<SchedClassCluster> SchedClassClusters;
- for (const size_t PointId : SchedClassPoints) {
+ for (const size_t PointId : RSCAndPoints.PointIds) {
const auto &ClusterId = Clustering_.getClusterIdForPoint(PointId);
if (!ClusterId.isValid())
continue; // Ignore noise and errors. FIXME: take noise into account ?
@@ -610,25 +671,25 @@ llvm::Error Analysis::run<Analysis::PrintSchedClassInconsistencies>(
// Print any scheduling class that has at least one cluster that does not
// match the checked-in data.
- if (std::all_of(SchedClassClusters.begin(), SchedClassClusters.end(),
- [this, &SC](const SchedClassCluster &C) {
- return C.measurementsMatch(*SubtargetInfo_, SC,
- Clustering_);
- }))
+ if (llvm::all_of(SchedClassClusters,
+ [this, &RSCAndPoints](const SchedClassCluster &C) {
+ return C.measurementsMatch(
+ *SubtargetInfo_, RSCAndPoints.RSC, Clustering_);
+ }))
continue; // Nothing weird.
OS << "<div class=\"inconsistency\"><p>Sched Class <span "
"class=\"sched-class-name\">";
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- writeEscaped<kEscapeHtml>(OS, SCDesc->Name);
+ writeEscaped<kEscapeHtml>(OS, RSCAndPoints.RSC.SCDesc->Name);
#else
- OS << SchedClassId;
+ OS << RSCAndPoints.RSC.SchedClassId;
#endif
OS << "</span> contains instructions whose performance characteristics do"
" not match that of LLVM:</p>";
- printSchedClassClustersHtml(SchedClassClusters, SC, OS);
+ printSchedClassClustersHtml(SchedClassClusters, RSCAndPoints.RSC, OS);
OS << "<p>llvm SchedModel data:</p>";
- printSchedClassDescHtml(SC, OS);
+ printSchedClassDescHtml(RSCAndPoints.RSC, OS);
OS << "</div>";
}
@@ -671,10 +732,9 @@ void distributePressure(float RemainingPressure,
llvm::SmallVector<float, 32> &DensePressure) {
// Find the number of subunits with minimal pressure (they are at the
// front).
- llvm::sort(Subunits.begin(), Subunits.end(),
- [&DensePressure](const uint16_t A, const uint16_t B) {
- return DensePressure[A] < DensePressure[B];
- });
+ llvm::sort(Subunits, [&DensePressure](const uint16_t A, const uint16_t B) {
+ return DensePressure[A] < DensePressure[B];
+ });
const auto getPressureForSubunit = [&DensePressure,
&Subunits](size_t I) -> float & {
return DensePressure[Subunits[I]];
@@ -721,11 +781,10 @@ std::vector<std::pair<uint16_t, float>> computeIdealizedProcResPressure(
llvm::SmallVector<llvm::MCWriteProcResEntry, 8> WPRS) {
// DensePressure[I] is the port pressure for Proc Resource I.
llvm::SmallVector<float, 32> DensePressure(SM.getNumProcResourceKinds());
- llvm::sort(WPRS.begin(), WPRS.end(),
- [](const llvm::MCWriteProcResEntry &A,
- const llvm::MCWriteProcResEntry &B) {
- return A.ProcResourceIdx < B.ProcResourceIdx;
- });
+ llvm::sort(WPRS, [](const llvm::MCWriteProcResEntry &A,
+ const llvm::MCWriteProcResEntry &B) {
+ return A.ProcResourceIdx < B.ProcResourceIdx;
+ });
for (const llvm::MCWriteProcResEntry &WPR : WPRS) {
// Get units for the entry.
const llvm::MCProcResourceDesc *const ProcResDesc =
@@ -751,3 +810,4 @@ std::vector<std::pair<uint16_t, float>> computeIdealizedProcResPressure(
}
} // namespace exegesis
+} // namespace llvm