diff options
Diffstat (limited to 'tools/llvm-exegesis/lib/Analysis.cpp')
| -rw-r--r-- | tools/llvm-exegesis/lib/Analysis.cpp | 246 |
1 files changed, 153 insertions, 93 deletions
diff --git a/tools/llvm-exegesis/lib/Analysis.cpp b/tools/llvm-exegesis/lib/Analysis.cpp index bb5118080967..0a91679fe1d1 100644 --- a/tools/llvm-exegesis/lib/Analysis.cpp +++ b/tools/llvm-exegesis/lib/Analysis.cpp @@ -12,13 +12,25 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/FormatVariadic.h" +#include <limits> #include <unordered_set> #include <vector> +namespace llvm { namespace exegesis { static const char kCsvSep = ','; +static unsigned resolveSchedClassId(const llvm::MCSubtargetInfo &STI, + unsigned SchedClassId, + const llvm::MCInst &MCI) { + const auto &SM = STI.getSchedModel(); + while (SchedClassId && SM.getSchedClassDesc(SchedClassId)->isVariant()) + SchedClassId = + STI.resolveVariantSchedClass(SchedClassId, &MCI, SM.getProcessorID()); + return SchedClassId; +} + namespace { enum EscapeTag { kEscapeCsv, kEscapeHtml, kEscapeHtmlString }; @@ -84,7 +96,21 @@ writeClusterId(llvm::raw_ostream &OS, template <EscapeTag Tag> static void writeMeasurementValue(llvm::raw_ostream &OS, const double Value) { - writeEscaped<Tag>(OS, llvm::formatv("{0:F}", Value).str()); + // Given Value, if we wanted to serialize it to a string, + // how many base-10 digits will we need to store, max? + static constexpr auto MaxDigitCount = + std::numeric_limits<decltype(Value)>::max_digits10; + // Also, we will need a decimal separator. + static constexpr auto DecimalSeparatorLen = 1; // '.' e.g. + // So how long of a string will the serialization produce, max? + static constexpr auto SerializationLen = MaxDigitCount + DecimalSeparatorLen; + + // WARNING: when changing the format, also adjust the small-size estimate ^. + static constexpr StringLiteral SimpleFloatFormat = StringLiteral("{0:F}"); + + writeEscaped<Tag>( + OS, + llvm::formatv(SimpleFloatFormat.data(), Value).sstr<SerializationLen>()); } template <typename EscapeTag, EscapeTag Tag> @@ -103,13 +129,11 @@ void Analysis::writeSnippet(llvm::raw_ostream &OS, writeEscaped<Tag>(OS, "[error decoding asm snippet]"); return; } - Lines.emplace_back(); - std::string &Line = Lines.back(); - llvm::raw_string_ostream OSS(Line); + llvm::SmallString<128> InstPrinterStr; // FIXME: magic number. + llvm::raw_svector_ostream OSS(InstPrinterStr); InstPrinter_->printInst(&MI, OSS, "", *SubtargetInfo_); Bytes = Bytes.drop_front(MISize); - OSS.flush(); - Line = llvm::StringRef(Line).trim().str(); + Lines.emplace_back(llvm::StringRef(InstPrinterStr).trim()); } writeEscaped<Tag>(OS, llvm::join(Lines, Separator)); } @@ -126,20 +150,20 @@ void Analysis::printInstructionRowCsv(const size_t PointId, writeEscaped<kEscapeCsv>(OS, Point.Key.Config); OS << kCsvSep; assert(!Point.Key.Instructions.empty()); - // FIXME: Resolve variant classes. - const unsigned SchedClassId = - InstrInfo_->get(Point.Key.Instructions[0].getOpcode()).getSchedClass(); + const llvm::MCInst &MCI = Point.Key.Instructions[0]; + const unsigned SchedClassId = resolveSchedClassId( + *SubtargetInfo_, InstrInfo_->get(MCI.getOpcode()).getSchedClass(), MCI); + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - const auto &SchedModel = SubtargetInfo_->getSchedModel(); const llvm::MCSchedClassDesc *const SCDesc = - SchedModel.getSchedClassDesc(SchedClassId); + SubtargetInfo_->getSchedModel().getSchedClassDesc(SchedClassId); writeEscaped<kEscapeCsv>(OS, SCDesc->Name); #else OS << SchedClassId; #endif for (const auto &Measurement : Point.Measurements) { OS << kCsvSep; - writeMeasurementValue<kEscapeCsv>(OS, Measurement.Value); + writeMeasurementValue<kEscapeCsv>(OS, Measurement.PerInstructionValue); } OS << "\n"; } @@ -193,21 +217,43 @@ Analysis::run<Analysis::PrintClusters>(llvm::raw_ostream &OS) const { return llvm::Error::success(); } -std::unordered_map<unsigned, std::vector<size_t>> +Analysis::ResolvedSchedClassAndPoints::ResolvedSchedClassAndPoints( + ResolvedSchedClass &&RSC) + : RSC(std::move(RSC)) {} + +std::vector<Analysis::ResolvedSchedClassAndPoints> Analysis::makePointsPerSchedClass() const { - std::unordered_map<unsigned, std::vector<size_t>> PointsPerSchedClass; + std::vector<ResolvedSchedClassAndPoints> Entries; + // Maps SchedClassIds to index in result. + std::unordered_map<unsigned, size_t> SchedClassIdToIndex; const auto &Points = Clustering_.getPoints(); for (size_t PointId = 0, E = Points.size(); PointId < E; ++PointId) { const InstructionBenchmark &Point = Points[PointId]; if (!Point.Error.empty()) continue; assert(!Point.Key.Instructions.empty()); - const auto Opcode = Point.Key.Instructions[0].getOpcode(); - // FIXME: Resolve variant classes. - PointsPerSchedClass[InstrInfo_->get(Opcode).getSchedClass()].push_back( - PointId); + // FIXME: we should be using the tuple of classes for instructions in the + // snippet as key. + const llvm::MCInst &MCI = Point.Key.Instructions[0]; + unsigned SchedClassId = InstrInfo_->get(MCI.getOpcode()).getSchedClass(); + const bool WasVariant = SchedClassId && SubtargetInfo_->getSchedModel() + .getSchedClassDesc(SchedClassId) + ->isVariant(); + SchedClassId = resolveSchedClassId(*SubtargetInfo_, SchedClassId, MCI); + const auto IndexIt = SchedClassIdToIndex.find(SchedClassId); + if (IndexIt == SchedClassIdToIndex.end()) { + // Create a new entry. + SchedClassIdToIndex.emplace(SchedClassId, Entries.size()); + ResolvedSchedClassAndPoints Entry( + ResolvedSchedClass(*SubtargetInfo_, SchedClassId, WasVariant)); + Entry.PointIds.push_back(PointId); + Entries.push_back(std::move(Entry)); + } else { + // Append to the existing entry. + Entries[IndexIt->second].PointIds.push_back(PointId); + } } - return PointsPerSchedClass; + return Entries; } // Uops repeat the same opcode over again. Just show this opcode and show the @@ -239,8 +285,8 @@ writeLatencySnippetHtml(llvm::raw_ostream &OS, } void Analysis::printSchedClassClustersHtml( - const std::vector<SchedClassCluster> &Clusters, const SchedClass &SC, - llvm::raw_ostream &OS) const { + const std::vector<SchedClassCluster> &Clusters, + const ResolvedSchedClass &RSC, llvm::raw_ostream &OS) const { const auto &Points = Clustering_.getPoints(); OS << "<table class=\"sched-class-clusters\">"; OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>"; @@ -248,16 +294,13 @@ void Analysis::printSchedClassClustersHtml( for (const auto &Measurement : Points[Clusters[0].getPointIds()[0]].Measurements) { OS << "<th>"; - if (Measurement.DebugString.empty()) - writeEscaped<kEscapeHtml>(OS, Measurement.Key); - else - writeEscaped<kEscapeHtml>(OS, Measurement.DebugString); + writeEscaped<kEscapeHtml>(OS, Measurement.Key); OS << "</th>"; } OS << "</tr>"; for (const SchedClassCluster &Cluster : Clusters) { OS << "<tr class=\"" - << (Cluster.measurementsMatch(*SubtargetInfo_, SC, Clustering_) + << (Cluster.measurementsMatch(*SubtargetInfo_, RSC, Clustering_) ? "good-cluster" : "bad-cluster") << "\"><td>"; @@ -372,12 +415,17 @@ getNonRedundantWriteProcRes(const llvm::MCSchedClassDesc &SCDesc, return Result; } -Analysis::SchedClass::SchedClass(const llvm::MCSchedClassDesc &SD, - const llvm::MCSubtargetInfo &STI) - : SCDesc(&SD), - NonRedundantWriteProcRes(getNonRedundantWriteProcRes(SD, STI)), +Analysis::ResolvedSchedClass::ResolvedSchedClass( + const llvm::MCSubtargetInfo &STI, unsigned ResolvedSchedClassId, + bool WasVariant) + : SchedClassId(ResolvedSchedClassId), SCDesc(STI.getSchedModel().getSchedClassDesc(ResolvedSchedClassId)), + WasVariant(WasVariant), + NonRedundantWriteProcRes(getNonRedundantWriteProcRes(*SCDesc, STI)), IdealizedProcResPressure(computeIdealizedProcResPressure( - STI.getSchedModel(), NonRedundantWriteProcRes)) {} + STI.getSchedModel(), NonRedundantWriteProcRes)) { + assert((SCDesc == nullptr || !SCDesc->isVariant()) && + "ResolvedSchedClass should never be variant"); +} void Analysis::SchedClassCluster::addPoint( size_t PointId, const InstructionBenchmarkClustering &Clustering) { @@ -393,8 +441,24 @@ void Analysis::SchedClassCluster::addPoint( assert(ClusterId == Clustering.getClusterIdForPoint(PointId)); } +// Returns a ProxResIdx by id or name. +static unsigned findProcResIdx(const llvm::MCSubtargetInfo &STI, + const llvm::StringRef NameOrId) { + // Interpret the key as an ProcResIdx. + unsigned ProcResIdx = 0; + if (llvm::to_integer(NameOrId, ProcResIdx, 10)) + return ProcResIdx; + // Interpret the key as a ProcRes name. + const auto &SchedModel = STI.getSchedModel(); + for (int I = 0, E = SchedModel.getNumProcResourceKinds(); I < E; ++I) { + if (NameOrId == SchedModel.getProcResource(I)->Name) + return I; + } + return 0; +} + bool Analysis::SchedClassCluster::measurementsMatch( - const llvm::MCSubtargetInfo &STI, const SchedClass &SC, + const llvm::MCSubtargetInfo &STI, const ResolvedSchedClass &RSC, const InstructionBenchmarkClustering &Clustering) const { const size_t NumMeasurements = Representative.size(); std::vector<BenchmarkMeasure> ClusterCenterPoint(NumMeasurements); @@ -410,34 +474,39 @@ bool Analysis::SchedClassCluster::measurementsMatch( return false; } // Find the latency. - SchedClassPoint[0].Value = 0.0; - for (unsigned I = 0; I < SC.SCDesc->NumWriteLatencyEntries; ++I) { + SchedClassPoint[0].PerInstructionValue = 0.0; + for (unsigned I = 0; I < RSC.SCDesc->NumWriteLatencyEntries; ++I) { const llvm::MCWriteLatencyEntry *const WLE = - STI.getWriteLatencyEntry(SC.SCDesc, I); - SchedClassPoint[0].Value = - std::max<double>(SchedClassPoint[0].Value, WLE->Cycles); + STI.getWriteLatencyEntry(RSC.SCDesc, I); + SchedClassPoint[0].PerInstructionValue = + std::max<double>(SchedClassPoint[0].PerInstructionValue, WLE->Cycles); } - ClusterCenterPoint[0].Value = Representative[0].avg(); + ClusterCenterPoint[0].PerInstructionValue = Representative[0].avg(); } else if (Mode == InstructionBenchmark::Uops) { for (int I = 0, E = Representative.size(); I < E; ++I) { - // Find the pressure on ProcResIdx `Key`. - uint16_t ProcResIdx = 0; - if (!llvm::to_integer(Representative[I].key(), ProcResIdx, 10)) { - llvm::errs() << "expected ProcResIdx key, got " - << Representative[I].key() << "\n"; + const auto Key = Representative[I].key(); + uint16_t ProcResIdx = findProcResIdx(STI, Key); + if (ProcResIdx > 0) { + // Find the pressure on ProcResIdx `Key`. + const auto ProcResPressureIt = + std::find_if(RSC.IdealizedProcResPressure.begin(), + RSC.IdealizedProcResPressure.end(), + [ProcResIdx](const std::pair<uint16_t, float> &WPR) { + return WPR.first == ProcResIdx; + }); + SchedClassPoint[I].PerInstructionValue = + ProcResPressureIt == RSC.IdealizedProcResPressure.end() + ? 0.0 + : ProcResPressureIt->second; + } else if (Key == "NumMicroOps") { + SchedClassPoint[I].PerInstructionValue = RSC.SCDesc->NumMicroOps; + } else { + llvm::errs() << "expected `key` to be either a ProcResIdx or a ProcRes " + "name, got " + << Key << "\n"; return false; } - const auto ProcResPressureIt = - std::find_if(SC.IdealizedProcResPressure.begin(), - SC.IdealizedProcResPressure.end(), - [ProcResIdx](const std::pair<uint16_t, float> &WPR) { - return WPR.first == ProcResIdx; - }); - SchedClassPoint[I].Value = - ProcResPressureIt == SC.IdealizedProcResPressure.end() - ? 0.0 - : ProcResPressureIt->second; - ClusterCenterPoint[I].Value = Representative[I].avg(); + ClusterCenterPoint[I].PerInstructionValue = Representative[I].avg(); } } else { llvm::errs() << "unimplemented measurement matching for mode " << Mode @@ -447,26 +516,25 @@ bool Analysis::SchedClassCluster::measurementsMatch( return Clustering.isNeighbour(ClusterCenterPoint, SchedClassPoint); } -void Analysis::printSchedClassDescHtml(const SchedClass &SC, +void Analysis::printSchedClassDescHtml(const ResolvedSchedClass &RSC, llvm::raw_ostream &OS) const { OS << "<table class=\"sched-class-desc\">"; - OS << "<tr><th>Valid</th><th>Variant</th><th>uOps</th><th>Latency</" + OS << "<tr><th>Valid</th><th>Variant</th><th>NumMicroOps</th><th>Latency</" "th><th>WriteProcRes</th><th title=\"This is the idealized unit " "resource (port) pressure assuming ideal distribution\">Idealized " "Resource Pressure</th></tr>"; - if (SC.SCDesc->isValid()) { + if (RSC.SCDesc->isValid()) { const auto &SM = SubtargetInfo_->getSchedModel(); OS << "<tr><td>✔</td>"; - OS << "<td>" << (SC.SCDesc->isVariant() ? "✔" : "✕") - << "</td>"; - OS << "<td>" << SC.SCDesc->NumMicroOps << "</td>"; + OS << "<td>" << (RSC.WasVariant ? "✔" : "✕") << "</td>"; + OS << "<td>" << RSC.SCDesc->NumMicroOps << "</td>"; // Latencies. OS << "<td><ul>"; - for (int I = 0, E = SC.SCDesc->NumWriteLatencyEntries; I < E; ++I) { + for (int I = 0, E = RSC.SCDesc->NumWriteLatencyEntries; I < E; ++I) { const auto *const Entry = - SubtargetInfo_->getWriteLatencyEntry(SC.SCDesc, I); + SubtargetInfo_->getWriteLatencyEntry(RSC.SCDesc, I); OS << "<li>" << Entry->Cycles; - if (SC.SCDesc->NumWriteLatencyEntries > 1) { + if (RSC.SCDesc->NumWriteLatencyEntries > 1) { // Dismabiguate if more than 1 latency. OS << " (WriteResourceID " << Entry->WriteResourceID << ")"; } @@ -475,7 +543,7 @@ void Analysis::printSchedClassDescHtml(const SchedClass &SC, OS << "</ul></td>"; // WriteProcRes. OS << "<td><ul>"; - for (const auto &WPR : SC.NonRedundantWriteProcRes) { + for (const auto &WPR : RSC.NonRedundantWriteProcRes) { OS << "<li><span class=\"mono\">"; writeEscaped<kEscapeHtml>(OS, SM.getProcResource(WPR.ProcResourceIdx)->Name); @@ -484,7 +552,7 @@ void Analysis::printSchedClassDescHtml(const SchedClass &SC, OS << "</ul></td>"; // Idealized port pressure. OS << "<td><ul>"; - for (const auto &Pressure : SC.IdealizedProcResPressure) { + for (const auto &Pressure : RSC.IdealizedProcResPressure) { OS << "<li><span class=\"mono\">"; writeEscaped<kEscapeHtml>(OS, SubtargetInfo_->getSchedModel() .getProcResource(Pressure.first) @@ -580,19 +648,12 @@ llvm::Error Analysis::run<Analysis::PrintSchedClassInconsistencies>( writeEscaped<kEscapeHtml>(OS, FirstPoint.CpuName); OS << "</span></h3>"; - for (const auto &SchedClassAndPoints : makePointsPerSchedClass()) { - const auto SchedClassId = SchedClassAndPoints.first; - const std::vector<size_t> &SchedClassPoints = SchedClassAndPoints.second; - const auto &SchedModel = SubtargetInfo_->getSchedModel(); - const llvm::MCSchedClassDesc *const SCDesc = - SchedModel.getSchedClassDesc(SchedClassId); - if (!SCDesc) + for (const auto &RSCAndPoints : makePointsPerSchedClass()) { + if (!RSCAndPoints.RSC.SCDesc) continue; - const SchedClass SC(*SCDesc, *SubtargetInfo_); - // Bucket sched class points into sched class clusters. std::vector<SchedClassCluster> SchedClassClusters; - for (const size_t PointId : SchedClassPoints) { + for (const size_t PointId : RSCAndPoints.PointIds) { const auto &ClusterId = Clustering_.getClusterIdForPoint(PointId); if (!ClusterId.isValid()) continue; // Ignore noise and errors. FIXME: take noise into account ? @@ -610,25 +671,25 @@ llvm::Error Analysis::run<Analysis::PrintSchedClassInconsistencies>( // Print any scheduling class that has at least one cluster that does not // match the checked-in data. - if (std::all_of(SchedClassClusters.begin(), SchedClassClusters.end(), - [this, &SC](const SchedClassCluster &C) { - return C.measurementsMatch(*SubtargetInfo_, SC, - Clustering_); - })) + if (llvm::all_of(SchedClassClusters, + [this, &RSCAndPoints](const SchedClassCluster &C) { + return C.measurementsMatch( + *SubtargetInfo_, RSCAndPoints.RSC, Clustering_); + })) continue; // Nothing weird. OS << "<div class=\"inconsistency\"><p>Sched Class <span " "class=\"sched-class-name\">"; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - writeEscaped<kEscapeHtml>(OS, SCDesc->Name); + writeEscaped<kEscapeHtml>(OS, RSCAndPoints.RSC.SCDesc->Name); #else - OS << SchedClassId; + OS << RSCAndPoints.RSC.SchedClassId; #endif OS << "</span> contains instructions whose performance characteristics do" " not match that of LLVM:</p>"; - printSchedClassClustersHtml(SchedClassClusters, SC, OS); + printSchedClassClustersHtml(SchedClassClusters, RSCAndPoints.RSC, OS); OS << "<p>llvm SchedModel data:</p>"; - printSchedClassDescHtml(SC, OS); + printSchedClassDescHtml(RSCAndPoints.RSC, OS); OS << "</div>"; } @@ -671,10 +732,9 @@ void distributePressure(float RemainingPressure, llvm::SmallVector<float, 32> &DensePressure) { // Find the number of subunits with minimal pressure (they are at the // front). - llvm::sort(Subunits.begin(), Subunits.end(), - [&DensePressure](const uint16_t A, const uint16_t B) { - return DensePressure[A] < DensePressure[B]; - }); + llvm::sort(Subunits, [&DensePressure](const uint16_t A, const uint16_t B) { + return DensePressure[A] < DensePressure[B]; + }); const auto getPressureForSubunit = [&DensePressure, &Subunits](size_t I) -> float & { return DensePressure[Subunits[I]]; @@ -721,11 +781,10 @@ std::vector<std::pair<uint16_t, float>> computeIdealizedProcResPressure( llvm::SmallVector<llvm::MCWriteProcResEntry, 8> WPRS) { // DensePressure[I] is the port pressure for Proc Resource I. llvm::SmallVector<float, 32> DensePressure(SM.getNumProcResourceKinds()); - llvm::sort(WPRS.begin(), WPRS.end(), - [](const llvm::MCWriteProcResEntry &A, - const llvm::MCWriteProcResEntry &B) { - return A.ProcResourceIdx < B.ProcResourceIdx; - }); + llvm::sort(WPRS, [](const llvm::MCWriteProcResEntry &A, + const llvm::MCWriteProcResEntry &B) { + return A.ProcResourceIdx < B.ProcResourceIdx; + }); for (const llvm::MCWriteProcResEntry &WPR : WPRS) { // Get units for the entry. const llvm::MCProcResourceDesc *const ProcResDesc = @@ -751,3 +810,4 @@ std::vector<std::pair<uint16_t, float>> computeIdealizedProcResPressure( } } // namespace exegesis +} // namespace llvm |
