diff options
Diffstat (limited to 'lib/Transforms/IPO/SampleProfile.cpp')
-rw-r--r-- | lib/Transforms/IPO/SampleProfile.cpp | 238 |
1 files changed, 192 insertions, 46 deletions
diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp index 877d20e72ffc..6184681db8a2 100644 --- a/lib/Transforms/IPO/SampleProfile.cpp +++ b/lib/Transforms/IPO/SampleProfile.cpp @@ -72,6 +72,7 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/CallPromotionUtils.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/MisExpect.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -79,6 +80,7 @@ #include <limits> #include <map> #include <memory> +#include <queue> #include <string> #include <system_error> #include <utility> @@ -128,6 +130,12 @@ static cl::opt<bool> ProfileSampleAccurate( "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. ")); +static cl::opt<bool> ProfileAccurateForSymsInList( + "profile-accurate-for-symsinlist", cl::Hidden, cl::ZeroOrMore, + cl::init(true), + cl::desc("For symbols in profile symbol list, regard their profiles to " + "be accurate. It may be overriden by profile-sample-accurate. ")); + namespace { using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>; @@ -137,9 +145,11 @@ using EdgeWeightMap = DenseMap<Edge, uint64_t>; using BlockEdgeMap = DenseMap<const BasicBlock *, SmallVector<const BasicBlock *, 8>>; +class SampleProfileLoader; + class SampleCoverageTracker { public: - SampleCoverageTracker() = default; + SampleCoverageTracker(SampleProfileLoader &SPL) : SPLoader(SPL){}; bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset, uint32_t Discriminator, uint64_t Samples); @@ -185,6 +195,76 @@ private: /// keyed by FunctionSamples pointers, but these stats are cleared after /// every function, so we just need to keep a single counter. uint64_t TotalUsedSamples = 0; + + SampleProfileLoader &SPLoader; +}; + +class GUIDToFuncNameMapper { +public: + GUIDToFuncNameMapper(Module &M, SampleProfileReader &Reader, + DenseMap<uint64_t, StringRef> &GUIDToFuncNameMap) + : CurrentReader(Reader), CurrentModule(M), + CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) { + if (CurrentReader.getFormat() != SPF_Compact_Binary) + return; + + for (const auto &F : CurrentModule) { + StringRef OrigName = F.getName(); + CurrentGUIDToFuncNameMap.insert( + {Function::getGUID(OrigName), OrigName}); + + // Local to global var promotion used by optimization like thinlto + // will rename the var and add suffix like ".llvm.xxx" to the + // original local name. In sample profile, the suffixes of function + // names are all stripped. Since it is possible that the mapper is + // built in post-thin-link phase and var promotion has been done, + // we need to add the substring of function name without the suffix + // into the GUIDToFuncNameMap. + StringRef CanonName = FunctionSamples::getCanonicalFnName(F); + if (CanonName != OrigName) + CurrentGUIDToFuncNameMap.insert( + {Function::getGUID(CanonName), CanonName}); + } + + // Update GUIDToFuncNameMap for each function including inlinees. + SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap); + } + + ~GUIDToFuncNameMapper() { + if (CurrentReader.getFormat() != SPF_Compact_Binary) + return; + + CurrentGUIDToFuncNameMap.clear(); + + // Reset GUIDToFuncNameMap for of each function as they're no + // longer valid at this point. + SetGUIDToFuncNameMapForAll(nullptr); + } + +private: + void SetGUIDToFuncNameMapForAll(DenseMap<uint64_t, StringRef> *Map) { + std::queue<FunctionSamples *> FSToUpdate; + for (auto &IFS : CurrentReader.getProfiles()) { + FSToUpdate.push(&IFS.second); + } + + while (!FSToUpdate.empty()) { + FunctionSamples *FS = FSToUpdate.front(); + FSToUpdate.pop(); + FS->GUIDToFuncNameMap = Map; + for (const auto &ICS : FS->getCallsiteSamples()) { + const FunctionSamplesMap &FSMap = ICS.second; + for (auto &IFS : FSMap) { + FunctionSamples &FS = const_cast<FunctionSamples &>(IFS.second); + FSToUpdate.push(&FS); + } + } + } + } + + SampleProfileReader &CurrentReader; + Module &CurrentModule; + DenseMap<uint64_t, StringRef> &CurrentGUIDToFuncNameMap; }; /// Sample profile pass. @@ -199,8 +279,9 @@ public: std::function<AssumptionCache &(Function &)> GetAssumptionCache, std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo) : GetAC(std::move(GetAssumptionCache)), - GetTTI(std::move(GetTargetTransformInfo)), Filename(Name), - RemappingFilename(RemapName), IsThinLTOPreLink(IsThinLTOPreLink) {} + GetTTI(std::move(GetTargetTransformInfo)), CoverageTracker(*this), + Filename(Name), RemappingFilename(RemapName), + IsThinLTOPreLink(IsThinLTOPreLink) {} bool doInitialization(Module &M); bool runOnModule(Module &M, ModuleAnalysisManager *AM, @@ -209,6 +290,8 @@ public: void dump() { Reader->dump(); } protected: + friend class SampleCoverageTracker; + bool runOnFunction(Function &F, ModuleAnalysisManager *AM); unsigned getFunctionLoc(Function &F); bool emitAnnotations(Function &F); @@ -237,6 +320,8 @@ protected: bool propagateThroughEdges(Function &F, bool UpdateBlockCount); void computeDominanceAndLoopInfo(Function &F); void clearFunctionData(); + bool callsiteIsHot(const FunctionSamples *CallsiteFS, + ProfileSummaryInfo *PSI); /// Map basic blocks to their computed weights. /// @@ -310,6 +395,10 @@ protected: /// Profile Summary Info computed from sample profile. ProfileSummaryInfo *PSI = nullptr; + /// Profle Symbol list tells whether a function name appears in the binary + /// used to generate the current profile. + std::unique_ptr<ProfileSymbolList> PSL; + /// Total number of samples collected in this profile. /// /// This is the sum of all the samples collected in all the functions executed @@ -326,6 +415,21 @@ protected: uint64_t entryCount; }; DenseMap<Function *, NotInlinedProfileInfo> notInlinedCallInfo; + + // GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for + // all the function symbols defined or declared in current module. + DenseMap<uint64_t, StringRef> GUIDToFuncNameMap; + + // All the Names used in FunctionSamples including outline function + // names, inline instance names and call target names. + StringSet<> NamesInProfile; + + // For symbol in profile symbol list, whether to regard their profiles + // to be accurate. It is mainly decided by existance of profile symbol + // list and -profile-accurate-for-symsinlist flag, but it can be + // overriden by -profile-sample-accurate or profile-sample-accurate + // attribute. + bool ProfAccForSymsInList; }; class SampleProfileLoaderLegacyPass : public ModulePass { @@ -381,14 +485,23 @@ private: /// To decide whether an inlined callsite is hot, we compare the callsite /// sample count with the hot cutoff computed by ProfileSummaryInfo, it is /// regarded as hot if the count is above the cutoff value. -static bool callsiteIsHot(const FunctionSamples *CallsiteFS, - ProfileSummaryInfo *PSI) { +/// +/// When ProfileAccurateForSymsInList is enabled and profile symbol list +/// is present, functions in the profile symbol list but without profile will +/// be regarded as cold and much less inlining will happen in CGSCC inlining +/// pass, so we tend to lower the hot criteria here to allow more early +/// inlining to happen for warm callsites and it is helpful for performance. +bool SampleProfileLoader::callsiteIsHot(const FunctionSamples *CallsiteFS, + ProfileSummaryInfo *PSI) { if (!CallsiteFS) return false; // The callsite was not inlined in the original binary. assert(PSI && "PSI is expected to be non null"); uint64_t CallsiteTotalSamples = CallsiteFS->getTotalSamples(); - return PSI->isHotCount(CallsiteTotalSamples); + if (ProfAccForSymsInList) + return !PSI->isColdCount(CallsiteTotalSamples); + else + return PSI->isHotCount(CallsiteTotalSamples); } /// Mark as used the sample record for the given function samples at @@ -425,7 +538,7 @@ SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS, for (const auto &I : FS->getCallsiteSamples()) for (const auto &J : I.second) { const FunctionSamples *CalleeSamples = &J.second; - if (callsiteIsHot(CalleeSamples, PSI)) + if (SPLoader.callsiteIsHot(CalleeSamples, PSI)) Count += countUsedRecords(CalleeSamples, PSI); } @@ -444,7 +557,7 @@ SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS, for (const auto &I : FS->getCallsiteSamples()) for (const auto &J : I.second) { const FunctionSamples *CalleeSamples = &J.second; - if (callsiteIsHot(CalleeSamples, PSI)) + if (SPLoader.callsiteIsHot(CalleeSamples, PSI)) Count += countBodyRecords(CalleeSamples, PSI); } @@ -465,7 +578,7 @@ SampleCoverageTracker::countBodySamples(const FunctionSamples *FS, for (const auto &I : FS->getCallsiteSamples()) for (const auto &J : I.second) { const FunctionSamples *CalleeSamples = &J.second; - if (callsiteIsHot(CalleeSamples, PSI)) + if (SPLoader.callsiteIsHot(CalleeSamples, PSI)) Total += countBodySamples(CalleeSamples, PSI); } @@ -788,6 +901,14 @@ bool SampleProfileLoader::inlineHotFunctions( Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) { DenseSet<Instruction *> PromotedInsns; + // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure + // Profile symbol list is ignored when profile-sample-accurate is on. + assert((!ProfAccForSymsInList || + (!ProfileSampleAccurate && + !F.hasFnAttribute("profile-sample-accurate"))) && + "ProfAccForSymsInList should be false when profile-sample-accurate " + "is enabled"); + DenseMap<Instruction *, const FunctionSamples *> localNotInlinedCallSites; bool Changed = false; while (true) { @@ -1219,17 +1340,12 @@ void SampleProfileLoader::buildEdges(Function &F) { } /// Returns the sorted CallTargetMap \p M by count in descending order. -static SmallVector<InstrProfValueData, 2> SortCallTargets( - const SampleRecord::CallTargetMap &M) { +static SmallVector<InstrProfValueData, 2> GetSortedValueDataFromCallTargets( + const SampleRecord::CallTargetMap & M) { SmallVector<InstrProfValueData, 2> R; - for (auto I = M.begin(); I != M.end(); ++I) - R.push_back({FunctionSamples::getGUID(I->getKey()), I->getValue()}); - llvm::sort(R, [](const InstrProfValueData &L, const InstrProfValueData &R) { - if (L.Count == R.Count) - return L.Value > R.Value; - else - return L.Count > R.Count; - }); + for (const auto &I : SampleRecord::SortCallTargets(M)) { + R.emplace_back(InstrProfValueData{FunctionSamples::getGUID(I.first), I.second}); + } return R; } @@ -1324,7 +1440,7 @@ void SampleProfileLoader::propagateWeights(Function &F) { if (!T || T.get().empty()) continue; SmallVector<InstrProfValueData, 2> SortedCallTargets = - SortCallTargets(T.get()); + GetSortedValueDataFromCallTargets(T.get()); uint64_t Sum; findIndirectCallFunctionSamples(I, Sum); annotateValueSite(*I.getParent()->getParent()->getParent(), I, @@ -1374,6 +1490,8 @@ void SampleProfileLoader::propagateWeights(Function &F) { } } + misexpect::verifyMisExpect(TI, Weights, TI->getContext()); + uint64_t TempWeight; // Only set weights if there is at least one non-zero weight. // In any other case, let the analyzer set weights. @@ -1557,30 +1675,29 @@ INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile", bool SampleProfileLoader::doInitialization(Module &M) { auto &Ctx = M.getContext(); - auto ReaderOrErr = SampleProfileReader::create(Filename, Ctx); + + std::unique_ptr<SampleProfileReaderItaniumRemapper> RemapReader; + auto ReaderOrErr = + SampleProfileReader::create(Filename, Ctx, RemappingFilename); if (std::error_code EC = ReaderOrErr.getError()) { std::string Msg = "Could not open profile: " + EC.message(); Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg)); return false; } Reader = std::move(ReaderOrErr.get()); - Reader->collectFuncsToUse(M); + Reader->collectFuncsFrom(M); ProfileIsValid = (Reader->read() == sampleprof_error::success); - - if (!RemappingFilename.empty()) { - // Apply profile remappings to the loaded profile data if requested. - // For now, we only support remapping symbols encoded using the Itanium - // C++ ABI's name mangling scheme. - ReaderOrErr = SampleProfileReaderItaniumRemapper::create( - RemappingFilename, Ctx, std::move(Reader)); - if (std::error_code EC = ReaderOrErr.getError()) { - std::string Msg = "Could not open profile remapping file: " + EC.message(); - Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg)); - return false; - } - Reader = std::move(ReaderOrErr.get()); - ProfileIsValid = (Reader->read() == sampleprof_error::success); + PSL = Reader->getProfileSymbolList(); + + // While profile-sample-accurate is on, ignore symbol list. + ProfAccForSymsInList = + ProfileAccurateForSymsInList && PSL && !ProfileSampleAccurate; + if (ProfAccForSymsInList) { + NamesInProfile.clear(); + if (auto NameTable = Reader->getNameTable()) + NamesInProfile.insert(NameTable->begin(), NameTable->end()); } + return true; } @@ -1594,7 +1711,7 @@ ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) { bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, ProfileSummaryInfo *_PSI) { - FunctionSamples::GUIDToFuncNameMapper Mapper(M); + GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap); if (!ProfileIsValid) return false; @@ -1651,19 +1768,48 @@ bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) { } bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) { - + DILocation2SampleMap.clear(); // By default the entry count is initialized to -1, which will be treated // conservatively by getEntryCount as the same as unknown (None). This is // to avoid newly added code to be treated as cold. If we have samples // this will be overwritten in emitAnnotations. - // If ProfileSampleAccurate is true or F has profile-sample-accurate - // attribute, initialize the entry count to 0 so callsites or functions - // unsampled will be treated as cold. - uint64_t initialEntryCount = - (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) - ? 0 - : -1; + uint64_t initialEntryCount = -1; + + ProfAccForSymsInList = ProfileAccurateForSymsInList && PSL; + if (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) { + // initialize all the function entry counts to 0. It means all the + // functions without profile will be regarded as cold. + initialEntryCount = 0; + // profile-sample-accurate is a user assertion which has a higher precedence + // than symbol list. When profile-sample-accurate is on, ignore symbol list. + ProfAccForSymsInList = false; + } + + // PSL -- profile symbol list include all the symbols in sampled binary. + // If ProfileAccurateForSymsInList is enabled, PSL is used to treat + // old functions without samples being cold, without having to worry + // about new and hot functions being mistakenly treated as cold. + if (ProfAccForSymsInList) { + // Initialize the entry count to 0 for functions in the list. + if (PSL->contains(F.getName())) + initialEntryCount = 0; + + // Function in the symbol list but without sample will be regarded as + // cold. To minimize the potential negative performance impact it could + // have, we want to be a little conservative here saying if a function + // shows up in the profile, no matter as outline function, inline instance + // or call targets, treat the function as not being cold. This will handle + // the cases such as most callsites of a function are inlined in sampled + // binary but not inlined in current build (because of source code drift, + // imprecise debug information, or the callsites are all cold individually + // but not cold accumulatively...), so the outline function showing up as + // cold in sampled binary will actually not be cold after current build. + StringRef CanonName = FunctionSamples::getCanonicalFnName(F); + if (NamesInProfile.count(CanonName)) + initialEntryCount = -1; + } + F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real)); std::unique_ptr<OptimizationRemarkEmitter> OwnedORE; if (AM) { @@ -1672,7 +1818,7 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) .getManager(); ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); } else { - OwnedORE = make_unique<OptimizationRemarkEmitter>(&F); + OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&F); ORE = OwnedORE.get(); } Samples = Reader->getSamplesFor(F); |