diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2019-08-20 20:50:12 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2019-08-20 20:50:12 +0000 |
commit | e6d1592492a3a379186bfb02bd0f4eda0669c0d5 (patch) | |
tree | 599ab169a01f1c86eda9adc774edaedde2f2db5b /tools/llvm-mca | |
parent | 1a56a5ead7a2e84bee8240f5f6b033b5f1707154 (diff) |
Notes
Diffstat (limited to 'tools/llvm-mca')
27 files changed, 1215 insertions, 187 deletions
diff --git a/tools/llvm-mca/CodeRegion.cpp b/tools/llvm-mca/CodeRegion.cpp index 29a27c50c171..bf592f67245e 100644 --- a/tools/llvm-mca/CodeRegion.cpp +++ b/tools/llvm-mca/CodeRegion.cpp @@ -1,9 +1,8 @@ //===-------------------------- CodeRegion.cpp -----------------*- C++ -* -===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file @@ -17,7 +16,12 @@ namespace llvm { namespace mca { -bool CodeRegion::isLocInRange(llvm::SMLoc Loc) const { +CodeRegions::CodeRegions(llvm::SourceMgr &S) : SM(S), FoundErrors(false) { + // Create a default region for the input code sequence. + Regions.emplace_back(make_unique<CodeRegion>("", SMLoc())); +} + +bool CodeRegion::isLocInRange(SMLoc Loc) const { if (RangeEnd.isValid() && Loc.getPointer() > RangeEnd.getPointer()) return false; if (RangeStart.isValid() && Loc.getPointer() < RangeStart.getPointer()) @@ -25,42 +29,88 @@ bool CodeRegion::isLocInRange(llvm::SMLoc Loc) const { return true; } -void CodeRegions::beginRegion(llvm::StringRef Description, llvm::SMLoc Loc) { - assert(!Regions.empty() && "Missing Default region"); - const CodeRegion &CurrentRegion = *Regions.back(); - if (CurrentRegion.startLoc().isValid() && !CurrentRegion.endLoc().isValid()) { - SM.PrintMessage(Loc, llvm::SourceMgr::DK_Warning, - "Ignoring invalid region start"); - return; +void CodeRegions::beginRegion(StringRef Description, SMLoc Loc) { + if (ActiveRegions.empty()) { + // Remove the default region if there is at least one user defined region. + // By construction, only the default region has an invalid start location. + if (Regions.size() == 1 && !Regions[0]->startLoc().isValid() && + !Regions[0]->endLoc().isValid()) { + ActiveRegions[Description] = 0; + Regions[0] = make_unique<CodeRegion>(Description, Loc); + return; + } + } else { + auto It = ActiveRegions.find(Description); + if (It != ActiveRegions.end()) { + const CodeRegion &R = *Regions[It->second]; + if (Description.empty()) { + SM.PrintMessage(Loc, SourceMgr::DK_Error, + "found multiple overlapping anonymous regions"); + SM.PrintMessage(R.startLoc(), SourceMgr::DK_Note, + "Previous anonymous region was defined here"); + FoundErrors = true; + return; + } + + SM.PrintMessage(Loc, SourceMgr::DK_Error, + "overlapping regions cannot have the same name"); + SM.PrintMessage(R.startLoc(), SourceMgr::DK_Note, + "region " + Description + " was previously defined here"); + FoundErrors = true; + return; + } } - // Remove the default region if there are user defined regions. - if (!CurrentRegion.startLoc().isValid()) - Regions.erase(Regions.begin()); - addRegion(Description, Loc); + ActiveRegions[Description] = Regions.size(); + Regions.emplace_back(make_unique<CodeRegion>(Description, Loc)); + return; } -void CodeRegions::endRegion(llvm::SMLoc Loc) { - assert(!Regions.empty() && "Missing Default region"); - CodeRegion &CurrentRegion = *Regions.back(); - if (CurrentRegion.endLoc().isValid()) { - SM.PrintMessage(Loc, llvm::SourceMgr::DK_Warning, - "Ignoring invalid region end"); +void CodeRegions::endRegion(StringRef Description, SMLoc Loc) { + if (Description.empty()) { + // Special case where there is only one user defined region, + // and this LLVM-MCA-END directive doesn't provide a region name. + // In this case, we assume that the user simply wanted to just terminate + // the only active region. + if (ActiveRegions.size() == 1) { + auto It = ActiveRegions.begin(); + Regions[It->second]->setEndLocation(Loc); + ActiveRegions.erase(It); + return; + } + + // Special case where the region end marker applies to the default region. + if (ActiveRegions.empty() && Regions.size() == 1 && + !Regions[0]->startLoc().isValid() && !Regions[0]->endLoc().isValid()) { + Regions[0]->setEndLocation(Loc); + return; + } + } + + auto It = ActiveRegions.find(Description); + if (It != ActiveRegions.end()) { + Regions[It->second]->setEndLocation(Loc); + ActiveRegions.erase(It); return; } - CurrentRegion.setEndLocation(Loc); + FoundErrors = true; + SM.PrintMessage(Loc, SourceMgr::DK_Error, + "found an invalid region end directive"); + if (!Description.empty()) { + SM.PrintMessage(Loc, SourceMgr::DK_Note, + "unable to find an active region named " + Description); + } else { + SM.PrintMessage(Loc, SourceMgr::DK_Note, + "unable to find an active anonymous region"); + } } -void CodeRegions::addInstruction(const llvm::MCInst &Instruction) { - const llvm::SMLoc &Loc = Instruction.getLoc(); - const auto It = - std::find_if(Regions.rbegin(), Regions.rend(), - [Loc](const std::unique_ptr<CodeRegion> &Region) { - return Region->isLocInRange(Loc); - }); - if (It != Regions.rend()) - (*It)->addInstruction(Instruction); +void CodeRegions::addInstruction(const MCInst &Instruction) { + SMLoc Loc = Instruction.getLoc(); + for (UniqueCodeRegion &Region : Regions) + if (Region->isLocInRange(Loc)) + Region->addInstruction(Instruction); } } // namespace mca diff --git a/tools/llvm-mca/CodeRegion.h b/tools/llvm-mca/CodeRegion.h index 867aa18bb4fe..cabb4a5d4484 100644 --- a/tools/llvm-mca/CodeRegion.h +++ b/tools/llvm-mca/CodeRegion.h @@ -1,9 +1,8 @@ //===-------------------------- CodeRegion.h -------------------*- C++ -* -===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file @@ -35,6 +34,7 @@ #define LLVM_TOOLS_LLVM_MCA_CODEREGION_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/SMLoc.h" @@ -51,7 +51,7 @@ class CodeRegion { // An optional descriptor for this region. llvm::StringRef Description; // Instructions that form this region. - std::vector<llvm::MCInst> Instructions; + llvm::SmallVector<llvm::MCInst, 8> Instructions; // Source location range. llvm::SMLoc RangeStart; llvm::SMLoc RangeEnd; @@ -79,24 +79,25 @@ public: llvm::StringRef getDescription() const { return Description; } }; +class CodeRegionParseError final : public Error {}; + class CodeRegions { // A source manager. Used by the tool to generate meaningful warnings. llvm::SourceMgr &SM; - std::vector<std::unique_ptr<CodeRegion>> Regions; - - // Construct a new region of code guarded by LLVM-MCA comments. - void addRegion(llvm::StringRef Description, llvm::SMLoc Loc) { - Regions.emplace_back(llvm::make_unique<CodeRegion>(Description, Loc)); - } + using UniqueCodeRegion = std::unique_ptr<CodeRegion>; + std::vector<UniqueCodeRegion> Regions; + llvm::StringMap<unsigned> ActiveRegions; + bool FoundErrors; CodeRegions(const CodeRegions &) = delete; CodeRegions &operator=(const CodeRegions &) = delete; public: - typedef std::vector<std::unique_ptr<CodeRegion>>::iterator iterator; - typedef std::vector<std::unique_ptr<CodeRegion>>::const_iterator - const_iterator; + CodeRegions(llvm::SourceMgr &S); + + typedef std::vector<UniqueCodeRegion>::iterator iterator; + typedef std::vector<UniqueCodeRegion>::const_iterator const_iterator; iterator begin() { return Regions.begin(); } iterator end() { return Regions.end(); } @@ -104,24 +105,21 @@ public: const_iterator end() const { return Regions.cend(); } void beginRegion(llvm::StringRef Description, llvm::SMLoc Loc); - void endRegion(llvm::SMLoc Loc); + void endRegion(llvm::StringRef Description, llvm::SMLoc Loc); void addInstruction(const llvm::MCInst &Instruction); llvm::SourceMgr &getSourceMgr() const { return SM; } - CodeRegions(llvm::SourceMgr &S) : SM(S) { - // Create a default region for the input code sequence. - addRegion("Default", llvm::SMLoc()); - } - llvm::ArrayRef<llvm::MCInst> getInstructionSequence(unsigned Idx) const { return Regions[Idx]->getInstructions(); } bool empty() const { - return llvm::all_of(Regions, [](const std::unique_ptr<CodeRegion> &Region) { + return llvm::all_of(Regions, [](const UniqueCodeRegion &Region) { return Region->empty(); }); } + + bool isValid() const { return !FoundErrors; } }; } // namespace mca diff --git a/tools/llvm-mca/CodeRegionGenerator.cpp b/tools/llvm-mca/CodeRegionGenerator.cpp index 5bd37adeeae9..c793169e64e0 100644 --- a/tools/llvm-mca/CodeRegionGenerator.cpp +++ b/tools/llvm-mca/CodeRegionGenerator.cpp @@ -1,9 +1,8 @@ //===----------------------- CodeRegionGenerator.cpp ------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file @@ -49,8 +48,7 @@ public: // We only want to intercept the emission of new instructions. virtual void EmitInstruction(const MCInst &Inst, - const MCSubtargetInfo & /* unused */, - bool /* unused */) override { + const MCSubtargetInfo &/* unused */) override { Regions.addInstruction(Inst); } @@ -88,7 +86,11 @@ void MCACommentConsumer::HandleComment(SMLoc Loc, StringRef CommentText) { Comment = Comment.drop_front(Position); if (Comment.consume_front("LLVM-MCA-END")) { - Regions.endRegion(Loc); + // Skip spaces and tabs. + Position = Comment.find_first_not_of(" \t"); + if (Position < Comment.size()) + Comment = Comment.drop_front(Position); + Regions.endRegion(Comment, Loc); return; } @@ -117,7 +119,6 @@ Expected<const CodeRegions &> AsmCodeRegionGenerator::parseCodeRegions() { MCACommentConsumer CC(Regions); Lexer.setCommentConsumer(&CC); - // Create a target-specific parser and perform the parse. std::unique_ptr<MCTargetAsmParser> TAP( TheTarget.createMCAsmParser(STI, *Parser, MCII, Opts)); if (!TAP) @@ -127,7 +128,7 @@ Expected<const CodeRegions &> AsmCodeRegionGenerator::parseCodeRegions() { Parser->setTargetParser(*TAP); Parser->Run(false); - // Get the assembler dialect from the input. llvm-mca will use this as the + // Set the assembler dialect from the input. llvm-mca will use this as the // default dialect when printing reports. AssemblerDialect = Parser->getAssemblerDialect(); return Regions; diff --git a/tools/llvm-mca/CodeRegionGenerator.h b/tools/llvm-mca/CodeRegionGenerator.h index 892cafb92686..9a10aa2c148b 100644 --- a/tools/llvm-mca/CodeRegionGenerator.h +++ b/tools/llvm-mca/CodeRegionGenerator.h @@ -1,9 +1,8 @@ //===----------------------- CodeRegionGenerator.h --------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file diff --git a/tools/llvm-mca/PipelinePrinter.cpp b/tools/llvm-mca/PipelinePrinter.cpp index 18ef45fc2a65..90d468075996 100644 --- a/tools/llvm-mca/PipelinePrinter.cpp +++ b/tools/llvm-mca/PipelinePrinter.cpp @@ -1,9 +1,8 @@ //===--------------------- PipelinePrinter.cpp ------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file diff --git a/tools/llvm-mca/PipelinePrinter.h b/tools/llvm-mca/PipelinePrinter.h index 456026e12df3..004309cd7b8e 100644 --- a/tools/llvm-mca/PipelinePrinter.h +++ b/tools/llvm-mca/PipelinePrinter.h @@ -1,9 +1,8 @@ //===--------------------- PipelinePrinter.h --------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file diff --git a/tools/llvm-mca/Views/BottleneckAnalysis.cpp b/tools/llvm-mca/Views/BottleneckAnalysis.cpp new file mode 100644 index 000000000000..560c6c6e8a33 --- /dev/null +++ b/tools/llvm-mca/Views/BottleneckAnalysis.cpp @@ -0,0 +1,624 @@ +//===--------------------- BottleneckAnalysis.cpp ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the functionalities used by the BottleneckAnalysis +/// to report bottleneck info. +/// +//===----------------------------------------------------------------------===// + +#include "Views/BottleneckAnalysis.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MCA/Support.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormattedStream.h" + +namespace llvm { +namespace mca { + +#define DEBUG_TYPE "llvm-mca" + +PressureTracker::PressureTracker(const MCSchedModel &Model) + : SM(Model), + ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0), + ProcResID2Mask(Model.getNumProcResourceKinds(), 0), + ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0), + ProcResID2ResourceUsersIndex(Model.getNumProcResourceKinds(), 0) { + computeProcResourceMasks(SM, ProcResID2Mask); + + // Ignore the invalid resource at index zero. + unsigned NextResourceUsersIdx = 0; + for (unsigned I = 1, E = Model.getNumProcResourceKinds(); I < E; ++I) { + const MCProcResourceDesc &ProcResource = *SM.getProcResource(I); + ProcResID2ResourceUsersIndex[I] = NextResourceUsersIdx; + NextResourceUsersIdx += ProcResource.NumUnits; + uint64_t ResourceMask = ProcResID2Mask[I]; + ResIdx2ProcResID[getResourceStateIndex(ResourceMask)] = I; + } + + ResourceUsers.resize(NextResourceUsersIdx); + std::fill(ResourceUsers.begin(), ResourceUsers.end(), + std::make_pair<unsigned, unsigned>(~0U, 0U)); +} + +void PressureTracker::getResourceUsers(uint64_t ResourceMask, + SmallVectorImpl<User> &Users) const { + unsigned Index = getResourceStateIndex(ResourceMask); + unsigned ProcResID = ResIdx2ProcResID[Index]; + const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID); + for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) { + const User U = getResourceUser(ProcResID, I); + if (U.second && IPI.find(U.first) != IPI.end()) + Users.emplace_back(U); + } +} + +void PressureTracker::onInstructionDispatched(unsigned IID) { + IPI.insert(std::make_pair(IID, InstructionPressureInfo())); +} + +void PressureTracker::onInstructionExecuted(unsigned IID) { IPI.erase(IID); } + +void PressureTracker::handleInstructionIssuedEvent( + const HWInstructionIssuedEvent &Event) { + unsigned IID = Event.IR.getSourceIndex(); + using ResourceRef = HWInstructionIssuedEvent::ResourceRef; + using ResourceUse = std::pair<ResourceRef, ResourceCycles>; + for (const ResourceUse &Use : Event.UsedResources) { + const ResourceRef &RR = Use.first; + unsigned Index = ProcResID2ResourceUsersIndex[RR.first]; + Index += countTrailingZeros(RR.second); + ResourceUsers[Index] = std::make_pair(IID, Use.second.getNumerator()); + } +} + +void PressureTracker::updateResourcePressureDistribution( + uint64_t CumulativeMask) { + while (CumulativeMask) { + uint64_t Current = CumulativeMask & (-CumulativeMask); + unsigned ResIdx = getResourceStateIndex(Current); + unsigned ProcResID = ResIdx2ProcResID[ResIdx]; + uint64_t Mask = ProcResID2Mask[ProcResID]; + + if (Mask == Current) { + ResourcePressureDistribution[ProcResID]++; + CumulativeMask ^= Current; + continue; + } + + Mask ^= Current; + while (Mask) { + uint64_t SubUnit = Mask & (-Mask); + ResIdx = getResourceStateIndex(SubUnit); + ProcResID = ResIdx2ProcResID[ResIdx]; + ResourcePressureDistribution[ProcResID]++; + Mask ^= SubUnit; + } + + CumulativeMask ^= Current; + } +} + +void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) { + assert(Event.Reason != HWPressureEvent::INVALID && + "Unexpected invalid event!"); + + switch (Event.Reason) { + default: + break; + + case HWPressureEvent::RESOURCES: { + const uint64_t ResourceMask = Event.ResourceMask; + updateResourcePressureDistribution(Event.ResourceMask); + + for (const InstRef &IR : Event.AffectedInstructions) { + const Instruction &IS = *IR.getInstruction(); + unsigned BusyResources = IS.getCriticalResourceMask() & ResourceMask; + if (!BusyResources) + continue; + + unsigned IID = IR.getSourceIndex(); + IPI[IID].ResourcePressureCycles++; + } + break; + } + + case HWPressureEvent::REGISTER_DEPS: + for (const InstRef &IR : Event.AffectedInstructions) { + unsigned IID = IR.getSourceIndex(); + IPI[IID].RegisterPressureCycles++; + } + break; + + case HWPressureEvent::MEMORY_DEPS: + for (const InstRef &IR : Event.AffectedInstructions) { + unsigned IID = IR.getSourceIndex(); + IPI[IID].MemoryPressureCycles++; + } + } +} + +#ifndef NDEBUG +void DependencyGraph::dumpDependencyEdge(raw_ostream &OS, + const DependencyEdge &DepEdge, + MCInstPrinter &MCIP) const { + unsigned FromIID = DepEdge.FromIID; + unsigned ToIID = DepEdge.ToIID; + assert(FromIID < ToIID && "Graph should be acyclic!"); + + const DependencyEdge::Dependency &DE = DepEdge.Dep; + assert(DE.Type != DependencyEdge::DT_INVALID && "Unexpected invalid edge!"); + + OS << " FROM: " << FromIID << " TO: " << ToIID << " "; + if (DE.Type == DependencyEdge::DT_REGISTER) { + OS << " - REGISTER: "; + MCIP.printRegName(OS, DE.ResourceOrRegID); + } else if (DE.Type == DependencyEdge::DT_MEMORY) { + OS << " - MEMORY"; + } else { + assert(DE.Type == DependencyEdge::DT_RESOURCE && + "Unsupported dependency type!"); + OS << " - RESOURCE MASK: " << DE.ResourceOrRegID; + } + OS << " - CYCLES: " << DE.Cost << '\n'; +} +#endif // NDEBUG + +void DependencyGraph::initializeRootSet( + SmallVectorImpl<unsigned> &RootSet) const { + for (unsigned I = 0, E = Nodes.size(); I < E; ++I) { + const DGNode &N = Nodes[I]; + if (N.NumPredecessors == 0 && !N.OutgoingEdges.empty()) + RootSet.emplace_back(I); + } +} + +void DependencyGraph::propagateThroughEdges( + SmallVectorImpl<unsigned> &RootSet) { + SmallVector<unsigned, 8> ToVisit; + + // A critical sequence is computed as the longest path from a node of the + // RootSet to a leaf node (i.e. a node with no successors). The RootSet is + // composed of nodes with at least one successor, and no predecessors. + // + // Each node of the graph starts with an initial default cost of zero. The + // cost of a node is a measure of criticality: the higher the cost, the bigger + // is the performance impact. + // + // This algorithm is very similar to a (reverse) Dijkstra. Every iteration of + // the inner loop selects (i.e. visits) a node N from a set of `unvisited + // nodes`, and then propagates the cost of N to all its neighbors. + // + // The `unvisited nodes` set initially contains all the nodes from the + // RootSet. A node N is added to the `unvisited nodes` if all its + // predecessors have been visited already. + // + // For simplicity, every node tracks the number of unvisited incoming edges in + // field `NumVisitedPredecessors`. When the value of that field drops to + // zero, then the corresponding node is added to a `ToVisit` set. + // + // At the end of every iteration of the outer loop, set `ToVisit` becomes our + // new `unvisited nodes` set. + // + // The algorithm terminates when the set of unvisited nodes (i.e. our RootSet) + // is empty. This algorithm works under the assumption that the graph is + // acyclic. + do { + for (unsigned IID : RootSet) { + const DGNode &N = Nodes[IID]; + for (const DependencyEdge &DepEdge : N.OutgoingEdges) { + unsigned ToIID = DepEdge.ToIID; + DGNode &To = Nodes[ToIID]; + uint64_t Cost = N.Cost + DepEdge.Dep.Cost; + // Check if this is the most expensive incoming edge seen so far. In + // case, update the total cost of the destination node (ToIID), as well + // its field `CriticalPredecessor`. + if (Cost > To.Cost) { + To.CriticalPredecessor = DepEdge; + To.Cost = Cost; + To.Depth = N.Depth + 1; + } + To.NumVisitedPredecessors++; + if (To.NumVisitedPredecessors == To.NumPredecessors) + ToVisit.emplace_back(ToIID); + } + } + + std::swap(RootSet, ToVisit); + ToVisit.clear(); + } while (!RootSet.empty()); +} + +void DependencyGraph::getCriticalSequence( + SmallVectorImpl<const DependencyEdge *> &Seq) const { + // At this stage, nodes of the graph have been already visited, and costs have + // been propagated through the edges (see method `propagateThroughEdges()`). + + // Identify the node N with the highest cost in the graph. By construction, + // that node is the last instruction of our critical sequence. + // Field N.Depth would tell us the total length of the sequence. + // + // To obtain the sequence of critical edges, we simply follow the chain of critical + // predecessors starting from node N (field DGNode::CriticalPredecessor). + const auto It = std::max_element( + Nodes.begin(), Nodes.end(), + [](const DGNode &Lhs, const DGNode &Rhs) { return Lhs.Cost < Rhs.Cost; }); + unsigned IID = std::distance(Nodes.begin(), It); + Seq.resize(Nodes[IID].Depth); + for (unsigned I = Seq.size(), E = 0; I > E; --I) { + const DGNode &N = Nodes[IID]; + Seq[I - 1] = &N.CriticalPredecessor; + IID = N.CriticalPredecessor.FromIID; + } +} + +static void printInstruction(formatted_raw_ostream &FOS, + const MCSubtargetInfo &STI, MCInstPrinter &MCIP, + const MCInst &MCI, + bool UseDifferentColor = false) { + std::string Instruction; + raw_string_ostream InstrStream(Instruction); + + FOS.PadToColumn(14); + + MCIP.printInst(&MCI, InstrStream, "", STI); + InstrStream.flush(); + + if (UseDifferentColor) + FOS.changeColor(raw_ostream::CYAN, true, false); + FOS << StringRef(Instruction).ltrim(); + if (UseDifferentColor) + FOS.resetColor(); +} + +void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const { + SmallVector<const DependencyEdge *, 16> Seq; + DG.getCriticalSequence(Seq); + if (Seq.empty()) + return; + + OS << "\nCritical sequence based on the simulation:\n\n"; + + const DependencyEdge &FirstEdge = *Seq[0]; + unsigned FromIID = FirstEdge.FromIID % Source.size(); + unsigned ToIID = FirstEdge.ToIID % Source.size(); + bool IsLoopCarried = FromIID >= ToIID; + + formatted_raw_ostream FOS(OS); + FOS.PadToColumn(14); + FOS << "Instruction"; + FOS.PadToColumn(58); + FOS << "Dependency Information"; + + bool HasColors = FOS.has_colors(); + + unsigned CurrentIID = 0; + if (IsLoopCarried) { + FOS << "\n +----< " << FromIID << "."; + printInstruction(FOS, STI, MCIP, Source[FromIID], HasColors); + FOS << "\n |\n | < loop carried > \n |"; + } else { + while (CurrentIID < FromIID) { + FOS << "\n " << CurrentIID << "."; + printInstruction(FOS, STI, MCIP, Source[CurrentIID]); + CurrentIID++; + } + + FOS << "\n +----< " << CurrentIID << "."; + printInstruction(FOS, STI, MCIP, Source[CurrentIID], HasColors); + CurrentIID++; + } + + for (const DependencyEdge *&DE : Seq) { + ToIID = DE->ToIID % Source.size(); + unsigned LastIID = CurrentIID > ToIID ? Source.size() : ToIID; + + while (CurrentIID < LastIID) { + FOS << "\n | " << CurrentIID << "."; + printInstruction(FOS, STI, MCIP, Source[CurrentIID]); + CurrentIID++; + } + + if (CurrentIID == ToIID) { + FOS << "\n +----> " << ToIID << "."; + printInstruction(FOS, STI, MCIP, Source[CurrentIID], HasColors); + } else { + FOS << "\n |\n | < loop carried > \n |" + << "\n +----> " << ToIID << "."; + printInstruction(FOS, STI, MCIP, Source[ToIID], HasColors); + } + FOS.PadToColumn(58); + + const DependencyEdge::Dependency &Dep = DE->Dep; + if (HasColors) + FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false); + + if (Dep.Type == DependencyEdge::DT_REGISTER) { + FOS << "## REGISTER dependency: "; + if (HasColors) + FOS.changeColor(raw_ostream::MAGENTA, true, false); + MCIP.printRegName(FOS, Dep.ResourceOrRegID); + } else if (Dep.Type == DependencyEdge::DT_MEMORY) { + FOS << "## MEMORY dependency."; + } else { + assert(Dep.Type == DependencyEdge::DT_RESOURCE && + "Unsupported dependency type!"); + FOS << "## RESOURCE interference: "; + if (HasColors) + FOS.changeColor(raw_ostream::MAGENTA, true, false); + FOS << Tracker.resolveResourceName(Dep.ResourceOrRegID); + if (HasColors) { + FOS.resetColor(); + FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false); + } + FOS << " [ probability: " << ((DE->Frequency * 100) / Iterations) + << "% ]"; + } + if (HasColors) + FOS.resetColor(); + ++CurrentIID; + } + + while (CurrentIID < Source.size()) { + FOS << "\n " << CurrentIID << "."; + printInstruction(FOS, STI, MCIP, Source[CurrentIID]); + CurrentIID++; + } + + FOS << '\n'; + FOS.flush(); +} + +#ifndef NDEBUG +void DependencyGraph::dump(raw_ostream &OS, MCInstPrinter &MCIP) const { + OS << "\nREG DEPS\n"; + for (const DGNode &Node : Nodes) + for (const DependencyEdge &DE : Node.OutgoingEdges) + if (DE.Dep.Type == DependencyEdge::DT_REGISTER) + dumpDependencyEdge(OS, DE, MCIP); + + OS << "\nMEM DEPS\n"; + for (const DGNode &Node : Nodes) + for (const DependencyEdge &DE : Node.OutgoingEdges) + if (DE.Dep.Type == DependencyEdge::DT_MEMORY) + dumpDependencyEdge(OS, DE, MCIP); + + OS << "\nRESOURCE DEPS\n"; + for (const DGNode &Node : Nodes) + for (const DependencyEdge &DE : Node.OutgoingEdges) + if (DE.Dep.Type == DependencyEdge::DT_RESOURCE) + dumpDependencyEdge(OS, DE, MCIP); +} +#endif // NDEBUG + +void DependencyGraph::addDependency(unsigned From, unsigned To, + DependencyEdge::Dependency &&Dep) { + DGNode &NodeFrom = Nodes[From]; + DGNode &NodeTo = Nodes[To]; + SmallVectorImpl<DependencyEdge> &Vec = NodeFrom.OutgoingEdges; + + auto It = find_if(Vec, [To, Dep](DependencyEdge &DE) { + return DE.ToIID == To && DE.Dep.ResourceOrRegID == Dep.ResourceOrRegID; + }); + + if (It != Vec.end()) { + It->Dep.Cost += Dep.Cost; + It->Frequency++; + return; + } + + DependencyEdge DE = {Dep, From, To, 1}; + Vec.emplace_back(DE); + NodeTo.NumPredecessors++; +} + +BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti, + MCInstPrinter &Printer, + ArrayRef<MCInst> S, unsigned NumIter) + : STI(sti), MCIP(Printer), Tracker(STI.getSchedModel()), DG(S.size() * 3), + Source(S), Iterations(NumIter), TotalCycles(0), + PressureIncreasedBecauseOfResources(false), + PressureIncreasedBecauseOfRegisterDependencies(false), + PressureIncreasedBecauseOfMemoryDependencies(false), + SeenStallCycles(false), BPI() {} + +void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To, + unsigned RegID, unsigned Cost) { + bool IsLoopCarried = From >= To; + unsigned SourceSize = Source.size(); + if (IsLoopCarried) { + Cost *= Iterations / 2; + DG.addRegisterDep(From, To + SourceSize, RegID, Cost); + DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cost); + return; + } + DG.addRegisterDep(From + SourceSize, To + SourceSize, RegID, Cost); +} + +void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To, + unsigned Cost) { + bool IsLoopCarried = From >= To; + unsigned SourceSize = Source.size(); + if (IsLoopCarried) { + Cost *= Iterations / 2; + DG.addMemoryDep(From, To + SourceSize, Cost); + DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cost); + return; + } + DG.addMemoryDep(From + SourceSize, To + SourceSize, Cost); +} + +void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To, + uint64_t Mask, unsigned Cost) { + bool IsLoopCarried = From >= To; + unsigned SourceSize = Source.size(); + if (IsLoopCarried) { + Cost *= Iterations / 2; + DG.addResourceDep(From, To + SourceSize, Mask, Cost); + DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cost); + return; + } + DG.addResourceDep(From + SourceSize, To + SourceSize, Mask, Cost); +} + +void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) { + const unsigned IID = Event.IR.getSourceIndex(); + if (Event.Type == HWInstructionEvent::Dispatched) { + Tracker.onInstructionDispatched(IID); + return; + } + if (Event.Type == HWInstructionEvent::Executed) { + Tracker.onInstructionExecuted(IID); + return; + } + + if (Event.Type != HWInstructionEvent::Issued) + return; + + const Instruction &IS = *Event.IR.getInstruction(); + unsigned To = IID % Source.size(); + + unsigned Cycles = 2 * Tracker.getResourcePressureCycles(IID); + uint64_t ResourceMask = IS.getCriticalResourceMask(); + SmallVector<std::pair<unsigned, unsigned>, 4> Users; + while (ResourceMask) { + uint64_t Current = ResourceMask & (-ResourceMask); + Tracker.getResourceUsers(Current, Users); + for (const std::pair<unsigned, unsigned> &U : Users) + addResourceDep(U.first % Source.size(), To, Current, U.second + Cycles); + Users.clear(); + ResourceMask ^= Current; + } + + const CriticalDependency &RegDep = IS.getCriticalRegDep(); + if (RegDep.Cycles) { + Cycles = RegDep.Cycles + 2 * Tracker.getRegisterPressureCycles(IID); + unsigned From = RegDep.IID % Source.size(); + addRegisterDep(From, To, RegDep.RegID, Cycles); + } + + const CriticalDependency &MemDep = IS.getCriticalMemDep(); + if (MemDep.Cycles) { + Cycles = MemDep.Cycles + 2 * Tracker.getMemoryPressureCycles(IID); + unsigned From = MemDep.IID % Source.size(); + addMemoryDep(From, To, Cycles); + } + + Tracker.handleInstructionIssuedEvent( + static_cast<const HWInstructionIssuedEvent &>(Event)); + + // Check if this is the last simulated instruction. + if (IID == ((Iterations * Source.size()) - 1)) + DG.finalizeGraph(); +} + +void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) { + assert(Event.Reason != HWPressureEvent::INVALID && + "Unexpected invalid event!"); + + Tracker.handlePressureEvent(Event); + + switch (Event.Reason) { + default: + break; + + case HWPressureEvent::RESOURCES: + PressureIncreasedBecauseOfResources = true; + break; + case HWPressureEvent::REGISTER_DEPS: + PressureIncreasedBecauseOfRegisterDependencies = true; + break; + case HWPressureEvent::MEMORY_DEPS: + PressureIncreasedBecauseOfMemoryDependencies = true; + break; + } +} + +void BottleneckAnalysis::onCycleEnd() { + ++TotalCycles; + + bool PressureIncreasedBecauseOfDataDependencies = + PressureIncreasedBecauseOfRegisterDependencies || + PressureIncreasedBecauseOfMemoryDependencies; + if (!PressureIncreasedBecauseOfResources && + !PressureIncreasedBecauseOfDataDependencies) + return; + + ++BPI.PressureIncreaseCycles; + if (PressureIncreasedBecauseOfRegisterDependencies) + ++BPI.RegisterDependencyCycles; + if (PressureIncreasedBecauseOfMemoryDependencies) + ++BPI.MemoryDependencyCycles; + if (PressureIncreasedBecauseOfDataDependencies) + ++BPI.DataDependencyCycles; + if (PressureIncreasedBecauseOfResources) + ++BPI.ResourcePressureCycles; + PressureIncreasedBecauseOfResources = false; + PressureIncreasedBecauseOfRegisterDependencies = false; + PressureIncreasedBecauseOfMemoryDependencies = false; +} + +void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const { + if (!SeenStallCycles || !BPI.PressureIncreaseCycles) { + OS << "\n\nNo resource or data dependency bottlenecks discovered.\n"; + return; + } + + double PressurePerCycle = + (double)BPI.PressureIncreaseCycles * 100 / TotalCycles; + double ResourcePressurePerCycle = + (double)BPI.ResourcePressureCycles * 100 / TotalCycles; + double DDPerCycle = (double)BPI.DataDependencyCycles * 100 / TotalCycles; + double RegDepPressurePerCycle = + (double)BPI.RegisterDependencyCycles * 100 / TotalCycles; + double MemDepPressurePerCycle = + (double)BPI.MemoryDependencyCycles * 100 / TotalCycles; + + OS << "\n\nCycles with backend pressure increase [ " + << format("%.2f", floor((PressurePerCycle * 100) + 0.5) / 100) << "% ]"; + + OS << "\nThroughput Bottlenecks: " + << "\n Resource Pressure [ " + << format("%.2f", floor((ResourcePressurePerCycle * 100) + 0.5) / 100) + << "% ]"; + + if (BPI.PressureIncreaseCycles) { + ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution(); + const MCSchedModel &SM = STI.getSchedModel(); + for (unsigned I = 0, E = Distribution.size(); I < E; ++I) { + unsigned ResourceCycles = Distribution[I]; + if (ResourceCycles) { + double Frequency = (double)ResourceCycles * 100 / TotalCycles; + const MCProcResourceDesc &PRDesc = *SM.getProcResource(I); + OS << "\n - " << PRDesc.Name << " [ " + << format("%.2f", floor((Frequency * 100) + 0.5) / 100) << "% ]"; + } + } + } + + OS << "\n Data Dependencies: [ " + << format("%.2f", floor((DDPerCycle * 100) + 0.5) / 100) << "% ]"; + OS << "\n - Register Dependencies [ " + << format("%.2f", floor((RegDepPressurePerCycle * 100) + 0.5) / 100) + << "% ]"; + OS << "\n - Memory Dependencies [ " + << format("%.2f", floor((MemDepPressurePerCycle * 100) + 0.5) / 100) + << "% ]\n"; +} + +void BottleneckAnalysis::printView(raw_ostream &OS) const { + std::string Buffer; + raw_string_ostream TempStream(Buffer); + printBottleneckHints(TempStream); + TempStream.flush(); + OS << Buffer; + printCriticalSequence(OS); +} + +} // namespace mca. +} // namespace llvm diff --git a/tools/llvm-mca/Views/BottleneckAnalysis.h b/tools/llvm-mca/Views/BottleneckAnalysis.h new file mode 100644 index 000000000000..7564b1a48206 --- /dev/null +++ b/tools/llvm-mca/Views/BottleneckAnalysis.h @@ -0,0 +1,341 @@ +//===--------------------- BottleneckAnalysis.h -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the bottleneck analysis view. +/// +/// This view internally observes backend pressure increase events in order to +/// identify problematic data dependencies and processor resource interferences. +/// +/// Example of bottleneck analysis report for a dot-product on X86 btver2: +/// +/// Cycles with backend pressure increase [ 40.76% ] +/// Throughput Bottlenecks: +/// Resource Pressure [ 39.34% ] +/// - JFPA [ 39.34% ] +/// - JFPU0 [ 39.34% ] +/// Data Dependencies: [ 1.42% ] +/// - Register Dependencies [ 1.42% ] +/// - Memory Dependencies [ 0.00% ] +/// +/// According to the example, backend pressure increased during the 40.76% of +/// the simulated cycles. In particular, the major cause of backend pressure +/// increases was the contention on floating point adder JFPA accessible from +/// pipeline resource JFPU0. +/// +/// At the end of each cycle, if pressure on the simulated out-of-order buffers +/// has increased, a backend pressure event is reported. +/// In particular, this occurs when there is a delta between the number of uOps +/// dispatched and the number of uOps issued to the underlying pipelines. +/// +/// The bottleneck analysis view is also responsible for identifying and printing +/// the most "critical" sequence of dependent instructions according to the +/// simulated run. +/// +/// Below is the critical sequence computed for the dot-product example on +/// btver2: +/// +/// Instruction Dependency Information +/// +----< 2. vhaddps %xmm3, %xmm3, %xmm4 +/// | +/// | < loop carried > +/// | +/// | 0. vmulps %xmm0, %xmm0, %xmm2 +/// +----> 1. vhaddps %xmm2, %xmm2, %xmm3 ## RESOURCE interference: JFPA [ probability: 73% ] +/// +----> 2. vhaddps %xmm3, %xmm3, %xmm4 ## REGISTER dependency: %xmm3 +/// | +/// | < loop carried > +/// | +/// +----> 1. vhaddps %xmm2, %xmm2, %xmm3 ## RESOURCE interference: JFPA [ probability: 73% ] +/// +/// +/// The algorithm that computes the critical sequence is very similar to a +/// critical path analysis. +/// +/// A dependency graph is used internally to track dependencies between nodes. +/// Nodes of the graph represent instructions from the input assembly sequence, +/// and edges of the graph represent data dependencies or processor resource +/// interferences. +/// +/// Edges are dynamically 'discovered' by observing instruction state transitions +/// and backend pressure increase events. Edges are internally ranked based on +/// their "criticality". A dependency is considered to be critical if it takes a +/// long time to execute, and if it contributes to backend pressure increases. +/// Criticality is internally measured in terms of cycles; it is computed for +/// every edge in the graph as a function of the edge latency and the number of +/// backend pressure increase cycles contributed by that edge. +/// +/// At the end of simulation, costs are propagated to nodes through the edges of +/// the graph, and the most expensive path connecting the root-set (a +/// set of nodes with no predecessors) to a leaf node is reported as critical +/// sequence. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCA_BOTTLENECK_ANALYSIS_H +#define LLVM_TOOLS_LLVM_MCA_BOTTLENECK_ANALYSIS_H + +#include "Views/View.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCSchedule.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace mca { + +class PressureTracker { + const MCSchedModel &SM; + + // Resource pressure distribution. There is an element for every processor + // resource declared by the scheduling model. Quantities are number of cycles. + SmallVector<unsigned, 4> ResourcePressureDistribution; + + // Each processor resource is associated with a so-called processor resource + // mask. This vector allows to correlate processor resource IDs with processor + // resource masks. There is exactly one element per each processor resource + // declared by the scheduling model. + SmallVector<uint64_t, 4> ProcResID2Mask; + + // Maps processor resource state indices (returned by calls to + // `getResourceStateIndex(Mask)` to processor resource identifiers. + SmallVector<unsigned, 4> ResIdx2ProcResID; + + // Maps Processor Resource identifiers to ResourceUsers indices. + SmallVector<unsigned, 4> ProcResID2ResourceUsersIndex; + + // Identifies the last user of a processor resource unit. + // This vector is updated on every instruction issued event. + // There is one entry for every processor resource unit declared by the + // processor model. An all_ones value is treated like an invalid instruction + // identifier. + using User = std::pair<unsigned, unsigned>; + SmallVector<User, 4> ResourceUsers; + + struct InstructionPressureInfo { + unsigned RegisterPressureCycles; + unsigned MemoryPressureCycles; + unsigned ResourcePressureCycles; + }; + DenseMap<unsigned, InstructionPressureInfo> IPI; + + void updateResourcePressureDistribution(uint64_t CumulativeMask); + + User getResourceUser(unsigned ProcResID, unsigned UnitID) const { + unsigned Index = ProcResID2ResourceUsersIndex[ProcResID]; + return ResourceUsers[Index + UnitID]; + } + +public: + PressureTracker(const MCSchedModel &Model); + + ArrayRef<unsigned> getResourcePressureDistribution() const { + return ResourcePressureDistribution; + } + + void getResourceUsers(uint64_t ResourceMask, + SmallVectorImpl<User> &Users) const; + + unsigned getRegisterPressureCycles(unsigned IID) const { + assert(IPI.find(IID) != IPI.end() && "Instruction is not tracked!"); + const InstructionPressureInfo &Info = IPI.find(IID)->second; + return Info.RegisterPressureCycles; + } + + unsigned getMemoryPressureCycles(unsigned IID) const { + assert(IPI.find(IID) != IPI.end() && "Instruction is not tracked!"); + const InstructionPressureInfo &Info = IPI.find(IID)->second; + return Info.MemoryPressureCycles; + } + + unsigned getResourcePressureCycles(unsigned IID) const { + assert(IPI.find(IID) != IPI.end() && "Instruction is not tracked!"); + const InstructionPressureInfo &Info = IPI.find(IID)->second; + return Info.ResourcePressureCycles; + } + + const char *resolveResourceName(uint64_t ResourceMask) const { + unsigned Index = getResourceStateIndex(ResourceMask); + unsigned ProcResID = ResIdx2ProcResID[Index]; + const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID); + return PRDesc.Name; + } + + void onInstructionDispatched(unsigned IID); + void onInstructionExecuted(unsigned IID); + + void handlePressureEvent(const HWPressureEvent &Event); + void handleInstructionIssuedEvent(const HWInstructionIssuedEvent &Event); +}; + +// A dependency edge. +struct DependencyEdge { + enum DependencyType { DT_INVALID, DT_REGISTER, DT_MEMORY, DT_RESOURCE }; + + // Dependency edge descriptor. + // + // It specifies the dependency type, as well as the edge cost in cycles. + struct Dependency { + DependencyType Type; + uint64_t ResourceOrRegID; + uint64_t Cost; + }; + Dependency Dep; + + unsigned FromIID; + unsigned ToIID; + + // Used by the bottleneck analysis to compute the interference + // probability for processor resources. + unsigned Frequency; +}; + +// A dependency graph used by the bottleneck analysis to describe data +// dependencies and processor resource interferences between instructions. +// +// There is a node (an instance of struct DGNode) for every instruction in the +// input assembly sequence. Edges of the graph represent dependencies between +// instructions. +// +// Each edge of the graph is associated with a cost value which is used +// internally to rank dependency based on their impact on the runtime +// performance (see field DependencyEdge::Dependency::Cost). In general, the +// higher the cost of an edge, the higher the impact on performance. +// +// The cost of a dependency is a function of both the latency and the number of +// cycles where the dependency has been seen as critical (i.e. contributing to +// back-pressure increases). +// +// Loop carried dependencies are carefully expanded by the bottleneck analysis +// to guarantee that the graph stays acyclic. To this end, extra nodes are +// pre-allocated at construction time to describe instructions from "past and +// future" iterations. The graph is kept acyclic mainly because it simplifies the +// complexity of the algorithm that computes the critical sequence. +class DependencyGraph { + struct DGNode { + unsigned NumPredecessors; + unsigned NumVisitedPredecessors; + uint64_t Cost; + unsigned Depth; + + DependencyEdge CriticalPredecessor; + SmallVector<DependencyEdge, 8> OutgoingEdges; + }; + SmallVector<DGNode, 16> Nodes; + + DependencyGraph(const DependencyGraph &) = delete; + DependencyGraph &operator=(const DependencyGraph &) = delete; + + void addDependency(unsigned From, unsigned To, + DependencyEdge::Dependency &&DE); + + void initializeRootSet(SmallVectorImpl<unsigned> &RootSet) const; + void propagateThroughEdges(SmallVectorImpl<unsigned> &RootSet); + +#ifndef NDEBUG + void dumpDependencyEdge(raw_ostream &OS, const DependencyEdge &DE, + MCInstPrinter &MCIP) const; +#endif + +public: + DependencyGraph(unsigned Size) : Nodes(Size) {} + + void addRegisterDep(unsigned From, unsigned To, unsigned RegID, + unsigned Cost) { + addDependency(From, To, {DependencyEdge::DT_REGISTER, RegID, Cost}); + } + + void addMemoryDep(unsigned From, unsigned To, unsigned Cost) { + addDependency(From, To, {DependencyEdge::DT_MEMORY, /* unused */ 0, Cost}); + } + + void addResourceDep(unsigned From, unsigned To, uint64_t Mask, + unsigned Cost) { + addDependency(From, To, {DependencyEdge::DT_RESOURCE, Mask, Cost}); + } + + // Called by the bottleneck analysis at the end of simulation to propagate + // costs through the edges of the graph, and compute a critical path. + void finalizeGraph() { + SmallVector<unsigned, 16> RootSet; + initializeRootSet(RootSet); + propagateThroughEdges(RootSet); + } + + // Returns a sequence of edges representing the critical sequence based on the + // simulated run. It assumes that the graph has already been finalized (i.e. + // method `finalizeGraph()` has already been called on this graph). + void getCriticalSequence(SmallVectorImpl<const DependencyEdge *> &Seq) const; + +#ifndef NDEBUG + void dump(raw_ostream &OS, MCInstPrinter &MCIP) const; +#endif +}; + +/// A view that collects and prints a few performance numbers. +class BottleneckAnalysis : public View { + const MCSubtargetInfo &STI; + MCInstPrinter &MCIP; + PressureTracker Tracker; + DependencyGraph DG; + + ArrayRef<MCInst> Source; + unsigned Iterations; + unsigned TotalCycles; + + bool PressureIncreasedBecauseOfResources; + bool PressureIncreasedBecauseOfRegisterDependencies; + bool PressureIncreasedBecauseOfMemoryDependencies; + // True if throughput was affected by dispatch stalls. + bool SeenStallCycles; + + struct BackPressureInfo { + // Cycles where backpressure increased. + unsigned PressureIncreaseCycles; + // Cycles where backpressure increased because of pipeline pressure. + unsigned ResourcePressureCycles; + // Cycles where backpressure increased because of data dependencies. + unsigned DataDependencyCycles; + // Cycles where backpressure increased because of register dependencies. + unsigned RegisterDependencyCycles; + // Cycles where backpressure increased because of memory dependencies. + unsigned MemoryDependencyCycles; + }; + BackPressureInfo BPI; + + // Used to populate the dependency graph DG. + void addRegisterDep(unsigned From, unsigned To, unsigned RegID, unsigned Cy); + void addMemoryDep(unsigned From, unsigned To, unsigned Cy); + void addResourceDep(unsigned From, unsigned To, uint64_t Mask, unsigned Cy); + + // Prints a bottleneck message to OS. + void printBottleneckHints(raw_ostream &OS) const; + void printCriticalSequence(raw_ostream &OS) const; + +public: + BottleneckAnalysis(const MCSubtargetInfo &STI, MCInstPrinter &MCIP, + ArrayRef<MCInst> Sequence, unsigned Iterations); + + void onCycleEnd() override; + void onEvent(const HWStallEvent &Event) override { SeenStallCycles = true; } + void onEvent(const HWPressureEvent &Event) override; + void onEvent(const HWInstructionEvent &Event) override; + + void printView(raw_ostream &OS) const override; + +#ifndef NDEBUG + void dump(raw_ostream &OS, MCInstPrinter &MCIP) const { DG.dump(OS, MCIP); } +#endif +}; + +} // namespace mca +} // namespace llvm + +#endif diff --git a/tools/llvm-mca/Views/DispatchStatistics.cpp b/tools/llvm-mca/Views/DispatchStatistics.cpp index 2562c82407bf..557b8ba17b17 100644 --- a/tools/llvm-mca/Views/DispatchStatistics.cpp +++ b/tools/llvm-mca/Views/DispatchStatistics.cpp @@ -1,10 +1,9 @@ //===--------------------- DispatchStatistics.cpp ---------------------*- C++ //-*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file diff --git a/tools/llvm-mca/Views/DispatchStatistics.h b/tools/llvm-mca/Views/DispatchStatistics.h index 6679c81efe95..07c0f5a4c68f 100644 --- a/tools/llvm-mca/Views/DispatchStatistics.h +++ b/tools/llvm-mca/Views/DispatchStatistics.h @@ -1,9 +1,8 @@ //===--------------------- DispatchStatistics.h -----------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file diff --git a/tools/llvm-mca/Views/InstructionInfoView.cpp b/tools/llvm-mca/Views/InstructionInfoView.cpp index 5016afb49e44..1fbffa3e5b69 100644 --- a/tools/llvm-mca/Views/InstructionInfoView.cpp +++ b/tools/llvm-mca/Views/InstructionInfoView.cpp @@ -1,9 +1,8 @@ //===--------------------- InstructionInfoView.cpp --------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file @@ -44,6 +43,9 @@ void InstructionInfoView::printView(raw_ostream &OS) const { const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); unsigned NumMicroOpcodes = SCDesc.NumMicroOps; unsigned Latency = MCSchedModel::computeInstrLatency(STI, SCDesc); + // Add extra latency due to delays in the forwarding data paths. + Latency += MCSchedModel::getForwardingDelayCycles( + STI.getReadAdvanceEntries(SCDesc)); Optional<double> RThroughput = MCSchedModel::getReciprocalThroughput(STI, SCDesc); diff --git a/tools/llvm-mca/Views/InstructionInfoView.h b/tools/llvm-mca/Views/InstructionInfoView.h index 3ef95d474490..640d87383436 100644 --- a/tools/llvm-mca/Views/InstructionInfoView.h +++ b/tools/llvm-mca/Views/InstructionInfoView.h @@ -1,9 +1,8 @@ //===--------------------- InstructionInfoView.h ----------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file diff --git a/tools/llvm-mca/Views/RegisterFileStatistics.cpp b/tools/llvm-mca/Views/RegisterFileStatistics.cpp index 06202bc41421..58736ee0d18c 100644 --- a/tools/llvm-mca/Views/RegisterFileStatistics.cpp +++ b/tools/llvm-mca/Views/RegisterFileStatistics.cpp @@ -1,9 +1,8 @@ //===--------------------- RegisterFileStatistics.cpp -----------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file diff --git a/tools/llvm-mca/Views/RegisterFileStatistics.h b/tools/llvm-mca/Views/RegisterFileStatistics.h index a2c52a668dae..a2273dd48b22 100644 --- a/tools/llvm-mca/Views/RegisterFileStatistics.h +++ b/tools/llvm-mca/Views/RegisterFileStatistics.h @@ -1,9 +1,8 @@ //===--------------------- RegisterFileStatistics.h -------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file diff --git a/tools/llvm-mca/Views/ResourcePressureView.cpp b/tools/llvm-mca/Views/ResourcePressureView.cpp index 6df61840437d..38a2478cf4fe 100644 --- a/tools/llvm-mca/Views/ResourcePressureView.cpp +++ b/tools/llvm-mca/Views/ResourcePressureView.cpp @@ -1,9 +1,8 @@ //===--------------------- ResourcePressureView.cpp -------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file diff --git a/tools/llvm-mca/Views/ResourcePressureView.h b/tools/llvm-mca/Views/ResourcePressureView.h index 572ce6fe6b70..0fa0b9a36aa3 100644 --- a/tools/llvm-mca/Views/ResourcePressureView.h +++ b/tools/llvm-mca/Views/ResourcePressureView.h @@ -1,9 +1,8 @@ //===--------------------- ResourcePressureView.h ---------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file diff --git a/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp b/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp index 54eb28f1add9..cb4fbae78039 100644 --- a/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp +++ b/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp @@ -1,9 +1,8 @@ //===--------------------- RetireControlUnitStatistics.cpp ------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file diff --git a/tools/llvm-mca/Views/RetireControlUnitStatistics.h b/tools/llvm-mca/Views/RetireControlUnitStatistics.h index 02aa13bc444a..1a4d3dec5c56 100644 --- a/tools/llvm-mca/Views/RetireControlUnitStatistics.h +++ b/tools/llvm-mca/Views/RetireControlUnitStatistics.h @@ -1,9 +1,8 @@ //===--------------------- RetireControlUnitStatistics.h --------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file diff --git a/tools/llvm-mca/Views/SchedulerStatistics.cpp b/tools/llvm-mca/Views/SchedulerStatistics.cpp index 670f90127f18..bd0ba350ab68 100644 --- a/tools/llvm-mca/Views/SchedulerStatistics.cpp +++ b/tools/llvm-mca/Views/SchedulerStatistics.cpp @@ -1,9 +1,8 @@ //===--------------------- SchedulerStatistics.cpp --------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file @@ -23,7 +22,6 @@ SchedulerStatistics::SchedulerStatistics(const llvm::MCSubtargetInfo &STI) : SM(STI.getSchedModel()), LQResourceID(0), SQResourceID(0), NumIssued(0), NumCycles(0), MostRecentLoadDispatched(~0U), MostRecentStoreDispatched(~0U), - IssuedPerCycle(STI.getSchedModel().NumProcResourceKinds, 0), Usage(STI.getSchedModel().NumProcResourceKinds, {0, 0, 0}) { if (SM.hasExtraProcessorInfo()) { const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo(); @@ -44,9 +42,10 @@ SchedulerStatistics::SchedulerStatistics(const llvm::MCSubtargetInfo &STI) // In future we should add a new "memory queue" event type, so that we stop // making assumptions on how LSUnit internally works (See PR39828). void SchedulerStatistics::onEvent(const HWInstructionEvent &Event) { - if (Event.Type == HWInstructionEvent::Issued) - ++NumIssued; - else if (Event.Type == HWInstructionEvent::Dispatched) { + if (Event.Type == HWInstructionEvent::Issued) { + const Instruction &Inst = *Event.IR.getInstruction(); + NumIssued += Inst.getDesc().NumMicroOps; + } else if (Event.Type == HWInstructionEvent::Dispatched) { const Instruction &Inst = *Event.IR.getInstruction(); const unsigned Index = Event.IR.getSourceIndex(); if (LQResourceID && Inst.getDesc().MayLoad && @@ -96,29 +95,25 @@ void SchedulerStatistics::updateHistograms() { BU.MaxUsedSlots = std::max(BU.MaxUsedSlots, BU.SlotsInUse); } - IssuedPerCycle[NumIssued]++; + IssueWidthPerCycle[NumIssued]++; NumIssued = 0; } void SchedulerStatistics::printSchedulerStats(raw_ostream &OS) const { OS << "\n\nSchedulers - " - << "number of cycles where we saw N instructions issued:\n"; + << "number of cycles where we saw N micro opcodes issued:\n"; OS << "[# issued], [# cycles]\n"; - const auto It = - std::max_element(IssuedPerCycle.begin(), IssuedPerCycle.end()); - unsigned Index = std::distance(IssuedPerCycle.begin(), It); - bool HasColors = OS.has_colors(); - for (unsigned I = 0, E = IssuedPerCycle.size(); I < E; ++I) { - unsigned IPC = IssuedPerCycle[I]; - if (!IPC) - continue; - - if (I == Index && HasColors) + const auto It = + std::max_element(IssueWidthPerCycle.begin(), IssueWidthPerCycle.end()); + for (const std::pair<unsigned, unsigned> &Entry : IssueWidthPerCycle) { + unsigned NumIssued = Entry.first; + if (NumIssued == It->first && HasColors) OS.changeColor(raw_ostream::SAVEDCOLOR, true, false); - OS << " " << I << ", " << IPC << " (" + unsigned IPC = Entry.second; + OS << " " << NumIssued << ", " << IPC << " (" << format("%.1f", ((double)IPC / NumCycles) * 100) << "%)\n"; if (HasColors) OS.resetColor(); diff --git a/tools/llvm-mca/Views/SchedulerStatistics.h b/tools/llvm-mca/Views/SchedulerStatistics.h index d99a395a726d..32711b4483b4 100644 --- a/tools/llvm-mca/Views/SchedulerStatistics.h +++ b/tools/llvm-mca/Views/SchedulerStatistics.h @@ -1,9 +1,8 @@ //===--------------------- SchedulerStatistics.h ----------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file @@ -63,7 +62,9 @@ class SchedulerStatistics final : public View { uint64_t CumulativeNumUsedSlots; }; - std::vector<unsigned> IssuedPerCycle; + using Histogram = std::map<unsigned, unsigned>; + Histogram IssueWidthPerCycle; + std::vector<BufferUsage> Usage; void updateHistograms(); diff --git a/tools/llvm-mca/Views/SummaryView.cpp b/tools/llvm-mca/Views/SummaryView.cpp index d8ac709e784d..ef5550048f4c 100644 --- a/tools/llvm-mca/Views/SummaryView.cpp +++ b/tools/llvm-mca/Views/SummaryView.cpp @@ -1,9 +1,8 @@ //===--------------------- SummaryView.cpp -------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file @@ -25,11 +24,17 @@ namespace mca { SummaryView::SummaryView(const MCSchedModel &Model, ArrayRef<MCInst> S, unsigned Width) - : SM(Model), Source(S), DispatchWidth(Width), LastInstructionIdx(0), + : SM(Model), Source(S), DispatchWidth(Width?Width: Model.IssueWidth), + LastInstructionIdx(0), TotalCycles(0), NumMicroOps(0), ProcResourceUsage(Model.getNumProcResourceKinds(), 0), - ProcResourceMasks(Model.getNumProcResourceKinds()) { + ProcResourceMasks(Model.getNumProcResourceKinds()), + ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0) { computeProcResourceMasks(SM, ProcResourceMasks); + for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) { + unsigned Index = getResourceStateIndex(ProcResourceMasks[I]); + ResIdx2ProcResID[Index] = I; + } } void SummaryView::onEvent(const HWInstructionEvent &Event) { @@ -51,11 +56,8 @@ void SummaryView::onEvent(const HWInstructionEvent &Event) { NumMicroOps += Desc.NumMicroOps; for (const std::pair<uint64_t, const ResourceUsage> &RU : Desc.Resources) { if (RU.second.size()) { - const auto It = find(ProcResourceMasks, RU.first); - assert(It != ProcResourceMasks.end() && - "Invalid processor resource mask!"); - ProcResourceUsage[std::distance(ProcResourceMasks.begin(), It)] += - RU.second.size(); + unsigned ProcResID = ResIdx2ProcResID[getResourceStateIndex(RU.first)]; + ProcResourceUsage[ProcResID] += RU.second.size(); } } } @@ -87,5 +89,6 @@ void SummaryView::printView(raw_ostream &OS) const { TempStream.flush(); OS << Buffer; } + } // namespace mca. } // namespace llvm diff --git a/tools/llvm-mca/Views/SummaryView.h b/tools/llvm-mca/Views/SummaryView.h index f59fd4233fbe..9be31b7d51bd 100644 --- a/tools/llvm-mca/Views/SummaryView.h +++ b/tools/llvm-mca/Views/SummaryView.h @@ -1,9 +1,8 @@ //===--------------------- SummaryView.h ---------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file @@ -46,6 +45,7 @@ class SummaryView : public View { unsigned TotalCycles; // The total number of micro opcodes contributed by a block of instructions. unsigned NumMicroOps; + // For each processor resource, this vector stores the cumulative number of // resource cycles consumed by the analyzed code block. llvm::SmallVector<unsigned, 8> ProcResourceUsage; @@ -56,6 +56,9 @@ class SummaryView : public View { // declared by the scheduling model. llvm::SmallVector<uint64_t, 8> ProcResourceMasks; + // Used to map resource indices to actual processor resource IDs. + llvm::SmallVector<unsigned, 8> ResIdx2ProcResID; + // Compute the reciprocal throughput for the analyzed code block. // The reciprocal block throughput is computed as the MAX between: // - NumMicroOps / DispatchWidth @@ -68,9 +71,9 @@ public: void onCycleEnd() override { ++TotalCycles; } void onEvent(const HWInstructionEvent &Event) override; - void printView(llvm::raw_ostream &OS) const override; }; + } // namespace mca } // namespace llvm diff --git a/tools/llvm-mca/Views/TimelineView.cpp b/tools/llvm-mca/Views/TimelineView.cpp index 7d55bbc99c73..fe3f16ba344c 100644 --- a/tools/llvm-mca/Views/TimelineView.cpp +++ b/tools/llvm-mca/Views/TimelineView.cpp @@ -1,9 +1,8 @@ //===--------------------- TimelineView.cpp ---------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \brief diff --git a/tools/llvm-mca/Views/TimelineView.h b/tools/llvm-mca/Views/TimelineView.h index ee981800161c..b63b234293cd 100644 --- a/tools/llvm-mca/Views/TimelineView.h +++ b/tools/llvm-mca/Views/TimelineView.h @@ -1,9 +1,8 @@ //===--------------------- TimelineView.h -----------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \brief diff --git a/tools/llvm-mca/Views/View.cpp b/tools/llvm-mca/Views/View.cpp index 6cfb9dd9f394..8e5c34d2d5c2 100644 --- a/tools/llvm-mca/Views/View.cpp +++ b/tools/llvm-mca/Views/View.cpp @@ -1,9 +1,8 @@ //===----------------------- View.cpp ---------------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file diff --git a/tools/llvm-mca/Views/View.h b/tools/llvm-mca/Views/View.h index 4b82b0da0d27..3b52511b4d29 100644 --- a/tools/llvm-mca/Views/View.h +++ b/tools/llvm-mca/Views/View.h @@ -1,9 +1,8 @@ //===----------------------- View.h -----------------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file diff --git a/tools/llvm-mca/llvm-mca.cpp b/tools/llvm-mca/llvm-mca.cpp index 68d63db599d7..b3590b5910ec 100644 --- a/tools/llvm-mca/llvm-mca.cpp +++ b/tools/llvm-mca/llvm-mca.cpp @@ -1,9 +1,8 @@ //===-- llvm-mca.cpp - Machine Code Analyzer -------------------*- C++ -* -===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -24,6 +23,7 @@ #include "CodeRegion.h" #include "CodeRegionGenerator.h" #include "PipelinePrinter.h" +#include "Views/BottleneckAnalysis.h" #include "Views/DispatchStatistics.h" #include "Views/InstructionInfoView.h" #include "Views/RegisterFileStatistics.h" @@ -68,8 +68,9 @@ static cl::opt<std::string> OutputFilename("o", cl::desc("Output filename"), cl::value_desc("filename")); static cl::opt<std::string> - ArchName("march", cl::desc("Target architecture. " - "See -version for available targets"), + ArchName("march", + cl::desc("Target architecture. " + "See -version for available targets"), cl::cat(ToolOptions)); static cl::opt<std::string> @@ -101,6 +102,17 @@ static cl::opt<unsigned> "be used for register mappings"), cl::cat(ToolOptions), cl::init(0)); +static cl::opt<unsigned> + MicroOpQueue("micro-op-queue-size", cl::Hidden, + cl::desc("Number of entries in the micro-op queue"), + cl::cat(ToolOptions), cl::init(0)); + +static cl::opt<unsigned> + DecoderThroughput("decoder-throughput", cl::Hidden, + cl::desc("Maximum throughput from the decoders " + "(instructions per cycle)"), + cl::cat(ToolOptions), cl::init(0)); + static cl::opt<bool> PrintRegisterFileStats("register-file-stats", cl::desc("Print register file statistics"), @@ -176,6 +188,11 @@ static cl::opt<bool> cl::desc("Print all views including hardware statistics"), cl::cat(ViewOptions), cl::init(false)); +static cl::opt<bool> EnableBottleneckAnalysis( + "bottleneck-analysis", + cl::desc("Enable bottleneck analysis (disabled by default)"), + cl::cat(ViewOptions), cl::init(false)); + namespace { const Target *getTarget(const char *ProgName) { @@ -220,6 +237,7 @@ static void processViewOptions() { if (EnableAllViews.getNumOccurrences()) { processOptionImpl(PrintSummaryView, EnableAllViews); + processOptionImpl(EnableBottleneckAnalysis, EnableAllViews); processOptionImpl(PrintResourcePressureView, EnableAllViews); processOptionImpl(PrintTimelineView, EnableAllViews); processOptionImpl(PrintInstructionInfoView, EnableAllViews); @@ -348,6 +366,11 @@ int main(int argc, char **argv) { return 1; } const mca::CodeRegions &Regions = *RegionsOrErr; + + // Early exit if errors were found by the code region parsing logic. + if (!Regions.isValid()) + return 1; + if (Regions.empty()) { WithColor::error() << "no assembly instructions found.\n"; return 1; @@ -377,18 +400,15 @@ int main(int argc, char **argv) { const MCSchedModel &SM = STI->getSchedModel(); - unsigned Width = SM.IssueWidth; - if (DispatchWidth) - Width = DispatchWidth; - // Create an instruction builder. mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get()); // Create a context to control ownership of the pipeline hardware. mca::Context MCA(*MRI, *STI); - mca::PipelineOptions PO(Width, RegisterFileSize, LoadQueueSize, - StoreQueueSize, AssumeNoAlias); + mca::PipelineOptions PO(MicroOpQueue, DecoderThroughput, DispatchWidth, + RegisterFileSize, LoadQueueSize, StoreQueueSize, + AssumeNoAlias, EnableBottleneckAnalysis); // Number each region in the sequence. unsigned RegionIdx = 0; @@ -423,8 +443,8 @@ int main(int argc, char **argv) { WithColor::error() << IE.Message << '\n'; IP->printInst(&IE.Inst, SS, "", *STI); SS.flush(); - WithColor::note() << "instruction: " << InstructionStr - << '\n'; + WithColor::note() + << "instruction: " << InstructionStr << '\n'; })) { // Default case. WithColor::error() << toString(std::move(NewE)); @@ -464,7 +484,13 @@ int main(int argc, char **argv) { mca::PipelinePrinter Printer(*P); if (PrintSummaryView) - Printer.addView(llvm::make_unique<mca::SummaryView>(SM, Insts, Width)); + Printer.addView( + llvm::make_unique<mca::SummaryView>(SM, Insts, DispatchWidth)); + + if (EnableBottleneckAnalysis) { + Printer.addView(llvm::make_unique<mca::BottleneckAnalysis>( + *STI, *IP, Insts, S.getNumIterations())); + } if (PrintInstructionInfoView) Printer.addView( |