diff options
Diffstat (limited to 'include/llvm/ProfileData')
-rw-r--r-- | include/llvm/ProfileData/Coverage/CoverageMapping.h | 346 | ||||
-rw-r--r-- | include/llvm/ProfileData/Coverage/CoverageMappingReader.h | 32 | ||||
-rw-r--r-- | include/llvm/ProfileData/GCOV.h | 460 | ||||
-rw-r--r-- | include/llvm/ProfileData/InstrProf.h | 7 | ||||
-rw-r--r-- | include/llvm/ProfileData/InstrProfData.inc | 4 | ||||
-rw-r--r-- | include/llvm/ProfileData/InstrProfReader.h | 4 | ||||
-rw-r--r-- | include/llvm/ProfileData/SampleProf.h | 63 | ||||
-rw-r--r-- | include/llvm/ProfileData/SampleProfReader.h | 2 |
8 files changed, 808 insertions, 110 deletions
diff --git a/include/llvm/ProfileData/Coverage/CoverageMapping.h b/include/llvm/ProfileData/Coverage/CoverageMapping.h index fa9a87aed6806..5a4098cf666c4 100644 --- a/include/llvm/ProfileData/Coverage/CoverageMapping.h +++ b/include/llvm/ProfileData/Coverage/CoverageMapping.h @@ -85,7 +85,7 @@ private: coveragemap_error Err; }; -/// \brief A Counter is an abstract value that describes how to compute the +/// A Counter is an abstract value that describes how to compute the /// execution count for a region of code using the collected profile count data. struct Counter { enum CounterKind { Zero, CounterValueReference, Expression }; @@ -125,23 +125,23 @@ public: return std::tie(LHS.Kind, LHS.ID) < std::tie(RHS.Kind, RHS.ID); } - /// \brief Return the counter that represents the number zero. + /// Return the counter that represents the number zero. static Counter getZero() { return Counter(); } - /// \brief Return the counter that corresponds to a specific profile counter. + /// Return the counter that corresponds to a specific profile counter. static Counter getCounter(unsigned CounterId) { return Counter(CounterValueReference, CounterId); } - /// \brief Return the counter that corresponds to a specific - /// addition counter expression. + /// Return the counter that corresponds to a specific addition counter + /// expression. static Counter getExpression(unsigned ExpressionId) { return Counter(Expression, ExpressionId); } }; -/// \brief A Counter expression is a value that represents an arithmetic -/// operation with two counters. +/// A Counter expression is a value that represents an arithmetic operation +/// with two counters. struct CounterExpression { enum ExprKind { Subtract, Add }; ExprKind Kind; @@ -151,17 +151,16 @@ struct CounterExpression { : Kind(Kind), LHS(LHS), RHS(RHS) {} }; -/// \brief A Counter expression builder is used to construct the -/// counter expressions. It avoids unnecessary duplication -/// and simplifies algebraic expressions. +/// A Counter expression builder is used to construct the counter expressions. +/// It avoids unnecessary duplication and simplifies algebraic expressions. class CounterExpressionBuilder { - /// \brief A list of all the counter expressions + /// A list of all the counter expressions std::vector<CounterExpression> Expressions; - /// \brief A lookup table for the index of a given expression. + /// A lookup table for the index of a given expression. DenseMap<CounterExpression, unsigned> ExpressionIndices; - /// \brief Return the counter which corresponds to the given expression. + /// Return the counter which corresponds to the given expression. /// /// If the given expression is already stored in the builder, a counter /// that references that expression is returned. Otherwise, the given @@ -177,44 +176,48 @@ class CounterExpressionBuilder { : CounterID(CounterID), Factor(Factor) {} }; - /// \brief Gather the terms of the expression tree for processing. + /// Gather the terms of the expression tree for processing. /// /// This collects each addition and subtraction referenced by the counter into /// a sequence that can be sorted and combined to build a simplified counter /// expression. void extractTerms(Counter C, int Sign, SmallVectorImpl<Term> &Terms); - /// \brief Simplifies the given expression tree + /// Simplifies the given expression tree /// by getting rid of algebraically redundant operations. Counter simplify(Counter ExpressionTree); public: ArrayRef<CounterExpression> getExpressions() const { return Expressions; } - /// \brief Return a counter that represents the expression - /// that adds LHS and RHS. + /// Return a counter that represents the expression that adds LHS and RHS. Counter add(Counter LHS, Counter RHS); - /// \brief Return a counter that represents the expression - /// that subtracts RHS from LHS. + /// Return a counter that represents the expression that subtracts RHS from + /// LHS. Counter subtract(Counter LHS, Counter RHS); }; -/// \brief A Counter mapping region associates a source range with -/// a specific counter. +using LineColPair = std::pair<unsigned, unsigned>; + +/// A Counter mapping region associates a source range with a specific counter. struct CounterMappingRegion { enum RegionKind { - /// \brief A CodeRegion associates some code with a counter + /// A CodeRegion associates some code with a counter CodeRegion, - /// \brief An ExpansionRegion represents a file expansion region that - /// associates a source range with the expansion of a virtual source file, - /// such as for a macro instantiation or #include file. + /// An ExpansionRegion represents a file expansion region that associates + /// a source range with the expansion of a virtual source file, such as + /// for a macro instantiation or #include file. ExpansionRegion, - /// \brief A SkippedRegion represents a source range with code that - /// was skipped by a preprocessor or similar means. - SkippedRegion + /// A SkippedRegion represents a source range with code that was skipped + /// by a preprocessor or similar means. + SkippedRegion, + + /// A GapRegion is like a CodeRegion, but its count is only set as the + /// line execution count when its the only region in the line. + GapRegion }; Counter Count; @@ -251,16 +254,21 @@ struct CounterMappingRegion { LineEnd, ColumnEnd, SkippedRegion); } - inline std::pair<unsigned, unsigned> startLoc() const { - return std::pair<unsigned, unsigned>(LineStart, ColumnStart); + static CounterMappingRegion + makeGapRegion(Counter Count, unsigned FileID, unsigned LineStart, + unsigned ColumnStart, unsigned LineEnd, unsigned ColumnEnd) { + return CounterMappingRegion(Count, FileID, 0, LineStart, ColumnStart, + LineEnd, (1U << 31) | ColumnEnd, GapRegion); } - inline std::pair<unsigned, unsigned> endLoc() const { - return std::pair<unsigned, unsigned>(LineEnd, ColumnEnd); + inline LineColPair startLoc() const { + return LineColPair(LineStart, ColumnStart); } + + inline LineColPair endLoc() const { return LineColPair(LineEnd, ColumnEnd); } }; -/// \brief Associates a source range with an execution count. +/// Associates a source range with an execution count. struct CountedRegion : public CounterMappingRegion { uint64_t ExecutionCount; @@ -268,8 +276,8 @@ struct CountedRegion : public CounterMappingRegion { : CounterMappingRegion(R), ExecutionCount(ExecutionCount) {} }; -/// \brief A Counter mapping context is used to connect the counters, -/// expressions and the obtained counter values. +/// A Counter mapping context is used to connect the counters, expressions +/// and the obtained counter values. class CounterMappingContext { ArrayRef<CounterExpression> Expressions; ArrayRef<uint64_t> CounterValues; @@ -284,20 +292,20 @@ public: void dump(const Counter &C, raw_ostream &OS) const; void dump(const Counter &C) const { dump(C, dbgs()); } - /// \brief Return the number of times that a region of code associated with - /// this counter was executed. + /// Return the number of times that a region of code associated with this + /// counter was executed. Expected<int64_t> evaluate(const Counter &C) const; }; -/// \brief Code coverage information for a single function. +/// Code coverage information for a single function. struct FunctionRecord { - /// \brief Raw function name. + /// Raw function name. std::string Name; - /// \brief Associated files. + /// Associated files. std::vector<std::string> Filenames; - /// \brief Regions in the function along with their counts. + /// Regions in the function along with their counts. std::vector<CountedRegion> CountedRegions; - /// \brief The number of times this function was executed. + /// The number of times this function was executed. uint64_t ExecutionCount; FunctionRecord(StringRef Name, ArrayRef<StringRef> Filenames) @@ -313,7 +321,7 @@ struct FunctionRecord { } }; -/// \brief Iterator over Functions, optionally filtered to a single file. +/// Iterator over Functions, optionally filtered to a single file. class FunctionRecordIterator : public iterator_facade_base<FunctionRecordIterator, std::forward_iterator_tag, FunctionRecord> { @@ -321,7 +329,7 @@ class FunctionRecordIterator ArrayRef<FunctionRecord>::iterator Current; StringRef Filename; - /// \brief Skip records whose primary file is not \c Filename. + /// Skip records whose primary file is not \c Filename. void skipOtherFiles(); public: @@ -347,17 +355,17 @@ public: } }; -/// \brief Coverage information for a macro expansion or #included file. +/// Coverage information for a macro expansion or #included file. /// /// When covered code has pieces that can be expanded for more detail, such as a /// preprocessor macro use and its definition, these are represented as /// expansions whose coverage can be looked up independently. struct ExpansionRecord { - /// \brief The abstract file this expansion covers. + /// The abstract file this expansion covers. unsigned FileID; - /// \brief The region that expands to this record. + /// The region that expands to this record. const CountedRegion &Region; - /// \brief Coverage for the expansion. + /// Coverage for the expansion. const FunctionRecord &Function; ExpansionRecord(const CountedRegion &Region, @@ -365,38 +373,99 @@ struct ExpansionRecord { : FileID(Region.ExpandedFileID), Region(Region), Function(Function) {} }; -/// \brief The execution count information starting at a point in a file. +/// The execution count information starting at a point in a file. /// /// A sequence of CoverageSegments gives execution counts for a file in format /// that's simple to iterate through for processing. struct CoverageSegment { - /// \brief The line where this segment begins. + /// The line where this segment begins. unsigned Line; - /// \brief The column where this segment begins. + /// The column where this segment begins. unsigned Col; - /// \brief The execution count, or zero if no count was recorded. + /// The execution count, or zero if no count was recorded. uint64_t Count; - /// \brief When false, the segment was uninstrumented or skipped. + /// When false, the segment was uninstrumented or skipped. bool HasCount; - /// \brief Whether this enters a new region or returns to a previous count. + /// Whether this enters a new region or returns to a previous count. bool IsRegionEntry; + /// Whether this enters a gap region. + bool IsGapRegion; CoverageSegment(unsigned Line, unsigned Col, bool IsRegionEntry) : Line(Line), Col(Col), Count(0), HasCount(false), - IsRegionEntry(IsRegionEntry) {} + IsRegionEntry(IsRegionEntry), IsGapRegion(false) {} CoverageSegment(unsigned Line, unsigned Col, uint64_t Count, - bool IsRegionEntry) + bool IsRegionEntry, bool IsGapRegion = false) : Line(Line), Col(Col), Count(Count), HasCount(true), - IsRegionEntry(IsRegionEntry) {} + IsRegionEntry(IsRegionEntry), IsGapRegion(IsGapRegion) {} friend bool operator==(const CoverageSegment &L, const CoverageSegment &R) { - return std::tie(L.Line, L.Col, L.Count, L.HasCount, L.IsRegionEntry) == - std::tie(R.Line, R.Col, R.Count, R.HasCount, R.IsRegionEntry); + return std::tie(L.Line, L.Col, L.Count, L.HasCount, L.IsRegionEntry, + L.IsGapRegion) == std::tie(R.Line, R.Col, R.Count, + R.HasCount, R.IsRegionEntry, + R.IsGapRegion); } }; -/// \brief Coverage information to be processed or displayed. +/// An instantiation group contains a \c FunctionRecord list, such that each +/// record corresponds to a distinct instantiation of the same function. +/// +/// Note that it's possible for a function to have more than one instantiation +/// (consider C++ template specializations or static inline functions). +class InstantiationGroup { + friend class CoverageMapping; + + unsigned Line; + unsigned Col; + std::vector<const FunctionRecord *> Instantiations; + + InstantiationGroup(unsigned Line, unsigned Col, + std::vector<const FunctionRecord *> Instantiations) + : Line(Line), Col(Col), Instantiations(std::move(Instantiations)) {} + +public: + InstantiationGroup(const InstantiationGroup &) = delete; + InstantiationGroup(InstantiationGroup &&) = default; + + /// Get the number of instantiations in this group. + size_t size() const { return Instantiations.size(); } + + /// Get the line where the common function was defined. + unsigned getLine() const { return Line; } + + /// Get the column where the common function was defined. + unsigned getColumn() const { return Col; } + + /// Check if the instantiations in this group have a common mangled name. + bool hasName() const { + for (unsigned I = 1, E = Instantiations.size(); I < E; ++I) + if (Instantiations[I]->Name != Instantiations[0]->Name) + return false; + return true; + } + + /// Get the common mangled name for instantiations in this group. + StringRef getName() const { + assert(hasName() && "Instantiations don't have a shared name"); + return Instantiations[0]->Name; + } + + /// Get the total execution count of all instantiations in this group. + uint64_t getTotalExecutionCount() const { + uint64_t Count = 0; + for (const FunctionRecord *F : Instantiations) + Count += F->ExecutionCount; + return Count; + } + + /// Get the instantiations in this group. + ArrayRef<const FunctionRecord *> getInstantiations() const { + return Instantiations; + } +}; + +/// Coverage information to be processed or displayed. /// /// This represents the coverage of an entire file, expansion, or function. It /// provides a sequence of CoverageSegments to iterate through, as well as the @@ -413,9 +482,11 @@ public: CoverageData(StringRef Filename) : Filename(Filename) {} - /// \brief Get the name of the file this data covers. + /// Get the name of the file this data covers. StringRef getFilename() const { return Filename; } + /// Get an iterator over the coverage segments for this object. The segments + /// are guaranteed to be uniqued and sorted by location. std::vector<CoverageSegment>::const_iterator begin() const { return Segments.begin(); } @@ -426,22 +497,23 @@ public: bool empty() const { return Segments.empty(); } - /// \brief Expansions that can be further processed. + /// Expansions that can be further processed. ArrayRef<ExpansionRecord> getExpansions() const { return Expansions; } }; -/// \brief The mapping of profile information to coverage data. +/// The mapping of profile information to coverage data. /// /// This is the main interface to get coverage information, using a profile to /// fill out execution counts. class CoverageMapping { StringSet<> FunctionNames; std::vector<FunctionRecord> Functions; - unsigned MismatchedFunctionCount = 0; + std::vector<std::pair<std::string, uint64_t>> FuncHashMismatches; + std::vector<std::pair<std::string, uint64_t>> FuncCounterMismatches; CoverageMapping() = default; - /// \brief Add a function record corresponding to \p Record. + /// Add a function record corresponding to \p Record. Error loadFunctionRecord(const CoverageMappingRecord &Record, IndexedInstrProfReader &ProfileReader); @@ -449,59 +521,162 @@ public: CoverageMapping(const CoverageMapping &) = delete; CoverageMapping &operator=(const CoverageMapping &) = delete; - /// \brief Load the coverage mapping using the given readers. + /// Load the coverage mapping using the given readers. static Expected<std::unique_ptr<CoverageMapping>> load(ArrayRef<std::unique_ptr<CoverageMappingReader>> CoverageReaders, IndexedInstrProfReader &ProfileReader); + /// Load the coverage mapping from the given object files and profile. If + /// \p Arches is non-empty, it must specify an architecture for each object. static Expected<std::unique_ptr<CoverageMapping>> load(ArrayRef<StringRef> ObjectFilenames, StringRef ProfileFilename, - StringRef Arch = StringRef()); + ArrayRef<StringRef> Arches = None); - /// \brief The number of functions that couldn't have their profiles mapped. + /// The number of functions that couldn't have their profiles mapped. /// /// This is a count of functions whose profile is out of date or otherwise /// can't be associated with any coverage information. - unsigned getMismatchedCount() { return MismatchedFunctionCount; } + unsigned getMismatchedCount() const { + return FuncHashMismatches.size() + FuncCounterMismatches.size(); + } + + /// A hash mismatch occurs when a profile record for a symbol does not have + /// the same hash as a coverage mapping record for the same symbol. This + /// returns a list of hash mismatches, where each mismatch is a pair of the + /// symbol name and its coverage mapping hash. + ArrayRef<std::pair<std::string, uint64_t>> getHashMismatches() const { + return FuncHashMismatches; + } - /// \brief Returns a lexicographically sorted, unique list of files that are + /// A counter mismatch occurs when there is an error when evaluating the + /// counter expressions in a coverage mapping record. This returns a list of + /// counter mismatches, where each mismatch is a pair of the symbol name and + /// the number of valid evaluated counter expressions. + ArrayRef<std::pair<std::string, uint64_t>> getCounterMismatches() const { + return FuncCounterMismatches; + } + + /// Returns a lexicographically sorted, unique list of files that are /// covered. std::vector<StringRef> getUniqueSourceFiles() const; - /// \brief Get the coverage for a particular file. + /// Get the coverage for a particular file. /// /// The given filename must be the name as recorded in the coverage /// information. That is, only names returned from getUniqueSourceFiles will /// yield a result. CoverageData getCoverageForFile(StringRef Filename) const; - /// \brief Gets all of the functions covered by this profile. + /// Get the coverage for a particular function. + CoverageData getCoverageForFunction(const FunctionRecord &Function) const; + + /// Get the coverage for an expansion within a coverage set. + CoverageData getCoverageForExpansion(const ExpansionRecord &Expansion) const; + + /// Gets all of the functions covered by this profile. iterator_range<FunctionRecordIterator> getCoveredFunctions() const { return make_range(FunctionRecordIterator(Functions), FunctionRecordIterator()); } - /// \brief Gets all of the functions in a particular file. + /// Gets all of the functions in a particular file. iterator_range<FunctionRecordIterator> getCoveredFunctions(StringRef Filename) const { return make_range(FunctionRecordIterator(Functions, Filename), FunctionRecordIterator()); } - /// \brief Get the list of function instantiations in the file. + /// Get the list of function instantiation groups in a particular file. /// - /// Functions that are instantiated more than once, such as C++ template - /// specializations, have distinct coverage records for each instantiation. - std::vector<const FunctionRecord *> - getInstantiations(StringRef Filename) const; + /// Every instantiation group in a program is attributed to exactly one file: + /// the file in which the definition for the common function begins. + std::vector<InstantiationGroup> + getInstantiationGroups(StringRef Filename) const; +}; - /// \brief Get the coverage for a particular function. - CoverageData getCoverageForFunction(const FunctionRecord &Function) const; +/// Coverage statistics for a single line. +class LineCoverageStats { + uint64_t ExecutionCount; + bool HasMultipleRegions; + bool Mapped; + unsigned Line; + ArrayRef<const CoverageSegment *> LineSegments; + const CoverageSegment *WrappedSegment; - /// \brief Get the coverage for an expansion within a coverage set. - CoverageData getCoverageForExpansion(const ExpansionRecord &Expansion) const; + friend class LineCoverageIterator; + LineCoverageStats() = default; + +public: + LineCoverageStats(ArrayRef<const CoverageSegment *> LineSegments, + const CoverageSegment *WrappedSegment, unsigned Line); + + uint64_t getExecutionCount() const { return ExecutionCount; } + + bool hasMultipleRegions() const { return HasMultipleRegions; } + + bool isMapped() const { return Mapped; } + + unsigned getLine() const { return Line; } + + ArrayRef<const CoverageSegment *> getLineSegments() const { + return LineSegments; + } + + const CoverageSegment *getWrappedSegment() const { return WrappedSegment; } +}; + +/// An iterator over the \c LineCoverageStats objects for lines described by +/// a \c CoverageData instance. +class LineCoverageIterator + : public iterator_facade_base< + LineCoverageIterator, std::forward_iterator_tag, LineCoverageStats> { +public: + LineCoverageIterator(const CoverageData &CD) + : LineCoverageIterator(CD, CD.begin()->Line) {} + + LineCoverageIterator(const CoverageData &CD, unsigned Line) + : CD(CD), WrappedSegment(nullptr), Next(CD.begin()), Ended(false), + Line(Line), Segments(), Stats() { + this->operator++(); + } + + LineCoverageIterator &operator=(const LineCoverageIterator &R) = default; + + bool operator==(const LineCoverageIterator &R) const { + return &CD == &R.CD && Next == R.Next && Ended == R.Ended; + } + + const LineCoverageStats &operator*() const { return Stats; } + + LineCoverageStats &operator*() { return Stats; } + + LineCoverageIterator &operator++(); + + LineCoverageIterator getEnd() const { + auto EndIt = *this; + EndIt.Next = CD.end(); + EndIt.Ended = true; + return EndIt; + } + +private: + const CoverageData &CD; + const CoverageSegment *WrappedSegment; + std::vector<CoverageSegment>::const_iterator Next; + bool Ended; + unsigned Line; + SmallVector<const CoverageSegment *, 4> Segments; + LineCoverageStats Stats; }; +/// Get a \c LineCoverageIterator range for the lines described by \p CD. +static inline iterator_range<LineCoverageIterator> +getLineCoverageStats(const coverage::CoverageData &CD) { + auto Begin = LineCoverageIterator(CD); + auto End = Begin.getEnd(); + return make_range(Begin, End); +} + // Profile coverage map has the following layout: // [CoverageMapFileHeader] // [ArrayStart] @@ -602,7 +777,10 @@ enum CovMapVersion { // name string pointer to MD5 to support name section compression. Name // section is also compressed. Version2 = 1, - // The current version is Version2 + // A new interpretation of the columnEnd field is added in order to mark + // regions as gap areas. + Version3 = 2, + // The current version is Version3 CurrentVersion = INSTR_PROF_COVMAP_VERSION }; @@ -618,7 +796,7 @@ template <class IntPtrT> struct CovMapTraits<CovMapVersion::Version1, IntPtrT> { } // end namespace coverage -/// \brief Provide DenseMapInfo for CounterExpression +/// Provide DenseMapInfo for CounterExpression template<> struct DenseMapInfo<coverage::CounterExpression> { static inline coverage::CounterExpression getEmptyKey() { using namespace coverage; diff --git a/include/llvm/ProfileData/Coverage/CoverageMappingReader.h b/include/llvm/ProfileData/Coverage/CoverageMappingReader.h index 5b372252a9ac9..633e51565cd2a 100644 --- a/include/llvm/ProfileData/Coverage/CoverageMappingReader.h +++ b/include/llvm/ProfileData/Coverage/CoverageMappingReader.h @@ -44,18 +44,26 @@ struct CoverageMappingRecord { /// \brief A file format agnostic iterator over coverage mapping data. class CoverageMappingIterator : public std::iterator<std::input_iterator_tag, CoverageMappingRecord> { - CoverageMappingReader *Reader = nullptr; + CoverageMappingReader *Reader; CoverageMappingRecord Record; + coveragemap_error ReadErr; void increment(); public: - CoverageMappingIterator() = default; + CoverageMappingIterator() + : Reader(nullptr), Record(), ReadErr(coveragemap_error::success) {} - CoverageMappingIterator(CoverageMappingReader *Reader) : Reader(Reader) { + CoverageMappingIterator(CoverageMappingReader *Reader) + : Reader(Reader), Record(), ReadErr(coveragemap_error::success) { increment(); } + ~CoverageMappingIterator() { + if (ReadErr != coveragemap_error::success) + llvm_unreachable("Unexpected error in coverage mapping iterator"); + } + CoverageMappingIterator &operator++() { increment(); return *this; @@ -66,8 +74,22 @@ public: bool operator!=(const CoverageMappingIterator &RHS) { return Reader != RHS.Reader; } - CoverageMappingRecord &operator*() { return Record; } - CoverageMappingRecord *operator->() { return &Record; } + Expected<CoverageMappingRecord &> operator*() { + if (ReadErr != coveragemap_error::success) { + auto E = make_error<CoverageMapError>(ReadErr); + ReadErr = coveragemap_error::success; + return std::move(E); + } + return Record; + } + Expected<CoverageMappingRecord *> operator->() { + if (ReadErr != coveragemap_error::success) { + auto E = make_error<CoverageMapError>(ReadErr); + ReadErr = coveragemap_error::success; + return std::move(E); + } + return &Record; + } }; class CoverageMappingReader { diff --git a/include/llvm/ProfileData/GCOV.h b/include/llvm/ProfileData/GCOV.h new file mode 100644 index 0000000000000..497f80b87b267 --- /dev/null +++ b/include/llvm/ProfileData/GCOV.h @@ -0,0 +1,460 @@ +//===- GCOV.h - LLVM coverage tool ------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header provides the interface to read and write coverage files that +// use 'gcov' format. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_PROFILEDATA_GCOV_H +#define LLVM_PROFILEDATA_GCOV_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <memory> +#include <string> +#include <utility> + +namespace llvm { + +class GCOVFunction; +class GCOVBlock; +class FileInfo; + +namespace GCOV { + +enum GCOVVersion { V402, V404, V704 }; + +/// \brief A struct for passing gcov options between functions. +struct Options { + Options(bool A, bool B, bool C, bool F, bool P, bool U, bool L, bool N) + : AllBlocks(A), BranchInfo(B), BranchCount(C), FuncCoverage(F), + PreservePaths(P), UncondBranch(U), LongFileNames(L), NoOutput(N) {} + + bool AllBlocks; + bool BranchInfo; + bool BranchCount; + bool FuncCoverage; + bool PreservePaths; + bool UncondBranch; + bool LongFileNames; + bool NoOutput; +}; + +} // end namespace GCOV + +/// GCOVBuffer - A wrapper around MemoryBuffer to provide GCOV specific +/// read operations. +class GCOVBuffer { +public: + GCOVBuffer(MemoryBuffer *B) : Buffer(B) {} + + /// readGCNOFormat - Check GCNO signature is valid at the beginning of buffer. + bool readGCNOFormat() { + StringRef File = Buffer->getBuffer().slice(0, 4); + if (File != "oncg") { + errs() << "Unexpected file type: " << File << ".\n"; + return false; + } + Cursor = 4; + return true; + } + + /// readGCDAFormat - Check GCDA signature is valid at the beginning of buffer. + bool readGCDAFormat() { + StringRef File = Buffer->getBuffer().slice(0, 4); + if (File != "adcg") { + errs() << "Unexpected file type: " << File << ".\n"; + return false; + } + Cursor = 4; + return true; + } + + /// readGCOVVersion - Read GCOV version. + bool readGCOVVersion(GCOV::GCOVVersion &Version) { + StringRef VersionStr = Buffer->getBuffer().slice(Cursor, Cursor + 4); + if (VersionStr == "*204") { + Cursor += 4; + Version = GCOV::V402; + return true; + } + if (VersionStr == "*404") { + Cursor += 4; + Version = GCOV::V404; + return true; + } + if (VersionStr == "*704") { + Cursor += 4; + Version = GCOV::V704; + return true; + } + errs() << "Unexpected version: " << VersionStr << ".\n"; + return false; + } + + /// readFunctionTag - If cursor points to a function tag then increment the + /// cursor and return true otherwise return false. + bool readFunctionTag() { + StringRef Tag = Buffer->getBuffer().slice(Cursor, Cursor + 4); + if (Tag.empty() || Tag[0] != '\0' || Tag[1] != '\0' || Tag[2] != '\0' || + Tag[3] != '\1') { + return false; + } + Cursor += 4; + return true; + } + + /// readBlockTag - If cursor points to a block tag then increment the + /// cursor and return true otherwise return false. + bool readBlockTag() { + StringRef Tag = Buffer->getBuffer().slice(Cursor, Cursor + 4); + if (Tag.empty() || Tag[0] != '\0' || Tag[1] != '\0' || Tag[2] != '\x41' || + Tag[3] != '\x01') { + return false; + } + Cursor += 4; + return true; + } + + /// readEdgeTag - If cursor points to an edge tag then increment the + /// cursor and return true otherwise return false. + bool readEdgeTag() { + StringRef Tag = Buffer->getBuffer().slice(Cursor, Cursor + 4); + if (Tag.empty() || Tag[0] != '\0' || Tag[1] != '\0' || Tag[2] != '\x43' || + Tag[3] != '\x01') { + return false; + } + Cursor += 4; + return true; + } + + /// readLineTag - If cursor points to a line tag then increment the + /// cursor and return true otherwise return false. + bool readLineTag() { + StringRef Tag = Buffer->getBuffer().slice(Cursor, Cursor + 4); + if (Tag.empty() || Tag[0] != '\0' || Tag[1] != '\0' || Tag[2] != '\x45' || + Tag[3] != '\x01') { + return false; + } + Cursor += 4; + return true; + } + + /// readArcTag - If cursor points to an gcda arc tag then increment the + /// cursor and return true otherwise return false. + bool readArcTag() { + StringRef Tag = Buffer->getBuffer().slice(Cursor, Cursor + 4); + if (Tag.empty() || Tag[0] != '\0' || Tag[1] != '\0' || Tag[2] != '\xa1' || + Tag[3] != '\1') { + return false; + } + Cursor += 4; + return true; + } + + /// readObjectTag - If cursor points to an object summary tag then increment + /// the cursor and return true otherwise return false. + bool readObjectTag() { + StringRef Tag = Buffer->getBuffer().slice(Cursor, Cursor + 4); + if (Tag.empty() || Tag[0] != '\0' || Tag[1] != '\0' || Tag[2] != '\0' || + Tag[3] != '\xa1') { + return false; + } + Cursor += 4; + return true; + } + + /// readProgramTag - If cursor points to a program summary tag then increment + /// the cursor and return true otherwise return false. + bool readProgramTag() { + StringRef Tag = Buffer->getBuffer().slice(Cursor, Cursor + 4); + if (Tag.empty() || Tag[0] != '\0' || Tag[1] != '\0' || Tag[2] != '\0' || + Tag[3] != '\xa3') { + return false; + } + Cursor += 4; + return true; + } + + bool readInt(uint32_t &Val) { + if (Buffer->getBuffer().size() < Cursor + 4) { + errs() << "Unexpected end of memory buffer: " << Cursor + 4 << ".\n"; + return false; + } + StringRef Str = Buffer->getBuffer().slice(Cursor, Cursor + 4); + Cursor += 4; + Val = *(const uint32_t *)(Str.data()); + return true; + } + + bool readInt64(uint64_t &Val) { + uint32_t Lo, Hi; + if (!readInt(Lo) || !readInt(Hi)) + return false; + Val = ((uint64_t)Hi << 32) | Lo; + return true; + } + + bool readString(StringRef &Str) { + uint32_t Len = 0; + // Keep reading until we find a non-zero length. This emulates gcov's + // behaviour, which appears to do the same. + while (Len == 0) + if (!readInt(Len)) + return false; + Len *= 4; + if (Buffer->getBuffer().size() < Cursor + Len) { + errs() << "Unexpected end of memory buffer: " << Cursor + Len << ".\n"; + return false; + } + Str = Buffer->getBuffer().slice(Cursor, Cursor + Len).split('\0').first; + Cursor += Len; + return true; + } + + uint64_t getCursor() const { return Cursor; } + void advanceCursor(uint32_t n) { Cursor += n * 4; } + +private: + MemoryBuffer *Buffer; + uint64_t Cursor = 0; +}; + +/// GCOVFile - Collects coverage information for one pair of coverage file +/// (.gcno and .gcda). +class GCOVFile { +public: + GCOVFile() = default; + + bool readGCNO(GCOVBuffer &Buffer); + bool readGCDA(GCOVBuffer &Buffer); + uint32_t getChecksum() const { return Checksum; } + void print(raw_ostream &OS) const; + void dump() const; + void collectLineCounts(FileInfo &FI); + +private: + bool GCNOInitialized = false; + GCOV::GCOVVersion Version; + uint32_t Checksum = 0; + SmallVector<std::unique_ptr<GCOVFunction>, 16> Functions; + uint32_t RunCount = 0; + uint32_t ProgramCount = 0; +}; + +/// GCOVEdge - Collects edge information. +struct GCOVEdge { + GCOVEdge(GCOVBlock &S, GCOVBlock &D) : Src(S), Dst(D) {} + + GCOVBlock &Src; + GCOVBlock &Dst; + uint64_t Count = 0; +}; + +/// GCOVFunction - Collects function information. +class GCOVFunction { +public: + using BlockIterator = pointee_iterator<SmallVectorImpl< + std::unique_ptr<GCOVBlock>>::const_iterator>; + + GCOVFunction(GCOVFile &P) : Parent(P) {} + + bool readGCNO(GCOVBuffer &Buffer, GCOV::GCOVVersion Version); + bool readGCDA(GCOVBuffer &Buffer, GCOV::GCOVVersion Version); + StringRef getName() const { return Name; } + StringRef getFilename() const { return Filename; } + size_t getNumBlocks() const { return Blocks.size(); } + uint64_t getEntryCount() const; + uint64_t getExitCount() const; + + BlockIterator block_begin() const { return Blocks.begin(); } + BlockIterator block_end() const { return Blocks.end(); } + iterator_range<BlockIterator> blocks() const { + return make_range(block_begin(), block_end()); + } + + void print(raw_ostream &OS) const; + void dump() const; + void collectLineCounts(FileInfo &FI); + +private: + GCOVFile &Parent; + uint32_t Ident = 0; + uint32_t Checksum; + uint32_t LineNumber = 0; + StringRef Name; + StringRef Filename; + SmallVector<std::unique_ptr<GCOVBlock>, 16> Blocks; + SmallVector<std::unique_ptr<GCOVEdge>, 16> Edges; +}; + +/// GCOVBlock - Collects block information. +class GCOVBlock { + struct EdgeWeight { + EdgeWeight(GCOVBlock *D) : Dst(D) {} + + GCOVBlock *Dst; + uint64_t Count = 0; + }; + + struct SortDstEdgesFunctor { + bool operator()(const GCOVEdge *E1, const GCOVEdge *E2) { + return E1->Dst.Number < E2->Dst.Number; + } + }; + +public: + using EdgeIterator = SmallVectorImpl<GCOVEdge *>::const_iterator; + + GCOVBlock(GCOVFunction &P, uint32_t N) : Parent(P), Number(N) {} + ~GCOVBlock(); + + const GCOVFunction &getParent() const { return Parent; } + void addLine(uint32_t N) { Lines.push_back(N); } + uint32_t getLastLine() const { return Lines.back(); } + void addCount(size_t DstEdgeNo, uint64_t N); + uint64_t getCount() const { return Counter; } + + void addSrcEdge(GCOVEdge *Edge) { + assert(&Edge->Dst == this); // up to caller to ensure edge is valid + SrcEdges.push_back(Edge); + } + + void addDstEdge(GCOVEdge *Edge) { + assert(&Edge->Src == this); // up to caller to ensure edge is valid + // Check if adding this edge causes list to become unsorted. + if (DstEdges.size() && DstEdges.back()->Dst.Number > Edge->Dst.Number) + DstEdgesAreSorted = false; + DstEdges.push_back(Edge); + } + + size_t getNumSrcEdges() const { return SrcEdges.size(); } + size_t getNumDstEdges() const { return DstEdges.size(); } + void sortDstEdges(); + + EdgeIterator src_begin() const { return SrcEdges.begin(); } + EdgeIterator src_end() const { return SrcEdges.end(); } + iterator_range<EdgeIterator> srcs() const { + return make_range(src_begin(), src_end()); + } + + EdgeIterator dst_begin() const { return DstEdges.begin(); } + EdgeIterator dst_end() const { return DstEdges.end(); } + iterator_range<EdgeIterator> dsts() const { + return make_range(dst_begin(), dst_end()); + } + + void print(raw_ostream &OS) const; + void dump() const; + void collectLineCounts(FileInfo &FI); + +private: + GCOVFunction &Parent; + uint32_t Number; + uint64_t Counter = 0; + bool DstEdgesAreSorted = true; + SmallVector<GCOVEdge *, 16> SrcEdges; + SmallVector<GCOVEdge *, 16> DstEdges; + SmallVector<uint32_t, 16> Lines; +}; + +class FileInfo { + // It is unlikely--but possible--for multiple functions to be on the same + // line. + // Therefore this typedef allows LineData.Functions to store multiple + // functions + // per instance. This is rare, however, so optimize for the common case. + using FunctionVector = SmallVector<const GCOVFunction *, 1>; + using FunctionLines = DenseMap<uint32_t, FunctionVector>; + using BlockVector = SmallVector<const GCOVBlock *, 4>; + using BlockLines = DenseMap<uint32_t, BlockVector>; + + struct LineData { + LineData() = default; + + BlockLines Blocks; + FunctionLines Functions; + uint32_t LastLine = 0; + }; + + struct GCOVCoverage { + GCOVCoverage(StringRef Name) : Name(Name) {} + + StringRef Name; + + uint32_t LogicalLines = 0; + uint32_t LinesExec = 0; + + uint32_t Branches = 0; + uint32_t BranchesExec = 0; + uint32_t BranchesTaken = 0; + }; + +public: + FileInfo(const GCOV::Options &Options) : Options(Options) {} + + void addBlockLine(StringRef Filename, uint32_t Line, const GCOVBlock *Block) { + if (Line > LineInfo[Filename].LastLine) + LineInfo[Filename].LastLine = Line; + LineInfo[Filename].Blocks[Line - 1].push_back(Block); + } + + void addFunctionLine(StringRef Filename, uint32_t Line, + const GCOVFunction *Function) { + if (Line > LineInfo[Filename].LastLine) + LineInfo[Filename].LastLine = Line; + LineInfo[Filename].Functions[Line - 1].push_back(Function); + } + + void setRunCount(uint32_t Runs) { RunCount = Runs; } + void setProgramCount(uint32_t Programs) { ProgramCount = Programs; } + void print(raw_ostream &OS, StringRef MainFilename, StringRef GCNOFile, + StringRef GCDAFile); + +private: + std::string getCoveragePath(StringRef Filename, StringRef MainFilename); + std::unique_ptr<raw_ostream> openCoveragePath(StringRef CoveragePath); + void printFunctionSummary(raw_ostream &OS, const FunctionVector &Funcs) const; + void printBlockInfo(raw_ostream &OS, const GCOVBlock &Block, + uint32_t LineIndex, uint32_t &BlockNo) const; + void printBranchInfo(raw_ostream &OS, const GCOVBlock &Block, + GCOVCoverage &Coverage, uint32_t &EdgeNo); + void printUncondBranchInfo(raw_ostream &OS, uint32_t &EdgeNo, + uint64_t Count) const; + + void printCoverage(raw_ostream &OS, const GCOVCoverage &Coverage) const; + void printFuncCoverage(raw_ostream &OS) const; + void printFileCoverage(raw_ostream &OS) const; + + const GCOV::Options &Options; + StringMap<LineData> LineInfo; + uint32_t RunCount = 0; + uint32_t ProgramCount = 0; + + using FileCoverageList = SmallVector<std::pair<std::string, GCOVCoverage>, 4>; + using FuncCoverageMap = MapVector<const GCOVFunction *, GCOVCoverage>; + + FileCoverageList FileCoverages; + FuncCoverageMap FuncCoverages; +}; + +} // end namespace llvm + +#endif // LLVM_SUPPORT_GCOV_H diff --git a/include/llvm/ProfileData/InstrProf.h b/include/llvm/ProfileData/InstrProf.h index 772187f70153c..b08b78cd593cc 100644 --- a/include/llvm/ProfileData/InstrProf.h +++ b/include/llvm/ProfileData/InstrProf.h @@ -295,7 +295,8 @@ enum class instrprof_error { value_site_count_mismatch, compress_failed, uncompress_failed, - empty_raw_profile + empty_raw_profile, + zlib_unavailable }; inline std::error_code make_error_code(instrprof_error E) { @@ -858,7 +859,9 @@ enum ProfVersion { // In this version, profile summary data \c IndexedInstrProf::Summary is // stored after the profile header. Version4 = 4, - // The current version is 4. + // In this version, the frontend PGO stable hash algorithm defaults to V2. + Version5 = 5, + // The current version is 5. CurrentVersion = INSTR_PROF_INDEX_VERSION }; const uint64_t Version = ProfVersion::CurrentVersion; diff --git a/include/llvm/ProfileData/InstrProfData.inc b/include/llvm/ProfileData/InstrProfData.inc index be0dd4ad04bf6..6a98dc7b9b853 100644 --- a/include/llvm/ProfileData/InstrProfData.inc +++ b/include/llvm/ProfileData/InstrProfData.inc @@ -628,9 +628,9 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, /* Raw profile format version (start from 1). */ #define INSTR_PROF_RAW_VERSION 4 /* Indexed profile format version (start from 1). */ -#define INSTR_PROF_INDEX_VERSION 4 +#define INSTR_PROF_INDEX_VERSION 5 /* Coverage mapping format vresion (start from 0). */ -#define INSTR_PROF_COVMAP_VERSION 1 +#define INSTR_PROF_COVMAP_VERSION 2 /* Profile version is always of type uint64_t. Reserve the upper 8 bits in the * version for other variants of profile. We set the lowest bit of the upper 8 diff --git a/include/llvm/ProfileData/InstrProfReader.h b/include/llvm/ProfileData/InstrProfReader.h index 424360e0f7655..aa58ead1eda19 100644 --- a/include/llvm/ProfileData/InstrProfReader.h +++ b/include/llvm/ProfileData/InstrProfReader.h @@ -397,6 +397,8 @@ private: std::unique_ptr<InstrProfReaderIndexBase> Index; /// Profile summary data. std::unique_ptr<ProfileSummary> Summary; + // Index to the current record in the record array. + unsigned RecordIndex; // Read the profile summary. Return a pointer pointing to one byte past the // end of the summary data if it exists or the input \c Cur. @@ -405,7 +407,7 @@ private: public: IndexedInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer) - : DataBuffer(std::move(DataBuffer)) {} + : DataBuffer(std::move(DataBuffer)), RecordIndex(0) {} IndexedInstrProfReader(const IndexedInstrProfReader &) = delete; IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete; diff --git a/include/llvm/ProfileData/SampleProf.h b/include/llvm/ProfileData/SampleProf.h index 7fc258831be88..9eccafc65f3a9 100644 --- a/include/llvm/ProfileData/SampleProf.h +++ b/include/llvm/ProfileData/SampleProf.h @@ -185,7 +185,9 @@ raw_ostream &operator<<(raw_ostream &OS, const SampleRecord &Sample); class FunctionSamples; using BodySampleMap = std::map<LineLocation, SampleRecord>; -using FunctionSamplesMap = StringMap<FunctionSamples>; +// NOTE: Using a StringMap here makes parsed profiles consume around 17% more +// memory, which is *very* significant for large profiles. +using FunctionSamplesMap = std::map<std::string, FunctionSamples>; using CallsiteSampleMap = std::map<LineLocation, FunctionSamplesMap>; /// Representation of the samples collected for a function. @@ -278,7 +280,7 @@ public: return nullptr; auto FS = iter->second.find(CalleeName); if (FS != iter->second.end()) - return &FS->getValue(); + return &FS->second; // If we cannot find exact match of the callee name, return the FS with // the max total count. uint64_t MaxTotalSamples = 0; @@ -296,10 +298,33 @@ public: /// Return the total number of samples collected inside the function. uint64_t getTotalSamples() const { return TotalSamples; } - /// Return the total number of samples collected at the head of the - /// function. + /// Return the total number of branch samples that have the function as the + /// branch target. This should be equivalent to the sample of the first + /// instruction of the symbol. But as we directly get this info for raw + /// profile without referring to potentially inaccurate debug info, this + /// gives more accurate profile data and is preferred for standalone symbols. uint64_t getHeadSamples() const { return TotalHeadSamples; } + /// Return the sample count of the first instruction of the function. + /// The function can be either a standalone symbol or an inlined function. + uint64_t getEntrySamples() const { + // Use either BodySamples or CallsiteSamples which ever has the smaller + // lineno. + if (!BodySamples.empty() && + (CallsiteSamples.empty() || + BodySamples.begin()->first < CallsiteSamples.begin()->first)) + return BodySamples.begin()->second.getSamples(); + if (!CallsiteSamples.empty()) { + uint64_t T = 0; + // An indirect callsite may be promoted to several inlined direct calls. + // We need to get the sum of them. + for (const auto &N_FS : CallsiteSamples.begin()->second) + T += N_FS.second.getEntrySamples(); + return T; + } + return 0; + } + /// Return all the samples collected in the body of the function. const BodySampleMap &getBodySamples() const { return BodySamples; } @@ -324,24 +349,32 @@ public: const LineLocation &Loc = I.first; FunctionSamplesMap &FSMap = functionSamplesAt(Loc); for (const auto &Rec : I.second) - MergeResult(Result, FSMap[Rec.first()].merge(Rec.second, Weight)); + MergeResult(Result, FSMap[Rec.first].merge(Rec.second, Weight)); } return Result; } - /// Recursively traverses all children, if the corresponding function is - /// not defined in module \p M, and its total sample is no less than - /// \p Threshold, add its corresponding GUID to \p S. - void findImportedFunctions(DenseSet<GlobalValue::GUID> &S, const Module *M, - uint64_t Threshold) const { + /// Recursively traverses all children, if the total sample count of the + /// corresponding function is no less than \p Threshold, add its corresponding + /// GUID to \p S. Also traverse the BodySamples to add hot CallTarget's GUID + /// to \p S. + void findInlinedFunctions(DenseSet<GlobalValue::GUID> &S, const Module *M, + uint64_t Threshold) const { if (TotalSamples <= Threshold) return; - Function *F = M->getFunction(Name); - if (!F || !F->getSubprogram()) - S.insert(Function::getGUID(Name)); - for (auto CS : CallsiteSamples) + S.insert(Function::getGUID(Name)); + // Import hot CallTargets, which may not be available in IR because full + // profile annotation cannot be done until backend compilation in ThinLTO. + for (const auto &BS : BodySamples) + for (const auto &TS : BS.second.getCallTargets()) + if (TS.getValue() > Threshold) { + Function *Callee = M->getFunction(TS.getKey()); + if (!Callee || !Callee->getSubprogram()) + S.insert(Function::getGUID(TS.getKey())); + } + for (const auto &CS : CallsiteSamples) for (const auto &NameFS : CS.second) - NameFS.second.findImportedFunctions(S, M, Threshold); + NameFS.second.findInlinedFunctions(S, M, Threshold); } /// Set the name of the function. diff --git a/include/llvm/ProfileData/SampleProfReader.h b/include/llvm/ProfileData/SampleProfReader.h index 9c1f357cbbd16..0e9ab2dc60ee1 100644 --- a/include/llvm/ProfileData/SampleProfReader.h +++ b/include/llvm/ProfileData/SampleProfReader.h @@ -217,10 +217,10 @@ #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/ProfileSummary.h" +#include "llvm/ProfileData/GCOV.h" #include "llvm/ProfileData/SampleProf.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorOr.h" -#include "llvm/Support/GCOV.h" #include "llvm/Support/MemoryBuffer.h" #include <algorithm> #include <cstdint> |