aboutsummaryrefslogtreecommitdiff
path: root/lib/Transforms/IPO/SampleProfile.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/IPO/SampleProfile.cpp')
-rw-r--r--lib/Transforms/IPO/SampleProfile.cpp238
1 files changed, 192 insertions, 46 deletions
diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp
index 877d20e72ffc..6184681db8a2 100644
--- a/lib/Transforms/IPO/SampleProfile.cpp
+++ b/lib/Transforms/IPO/SampleProfile.cpp
@@ -72,6 +72,7 @@
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/MisExpect.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -79,6 +80,7 @@
#include <limits>
#include <map>
#include <memory>
+#include <queue>
#include <string>
#include <system_error>
#include <utility>
@@ -128,6 +130,12 @@ static cl::opt<bool> ProfileSampleAccurate(
"callsite and function as having 0 samples. Otherwise, treat "
"un-sampled callsites and functions conservatively as unknown. "));
+static cl::opt<bool> ProfileAccurateForSymsInList(
+ "profile-accurate-for-symsinlist", cl::Hidden, cl::ZeroOrMore,
+ cl::init(true),
+ cl::desc("For symbols in profile symbol list, regard their profiles to "
+ "be accurate. It may be overriden by profile-sample-accurate. "));
+
namespace {
using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
@@ -137,9 +145,11 @@ using EdgeWeightMap = DenseMap<Edge, uint64_t>;
using BlockEdgeMap =
DenseMap<const BasicBlock *, SmallVector<const BasicBlock *, 8>>;
+class SampleProfileLoader;
+
class SampleCoverageTracker {
public:
- SampleCoverageTracker() = default;
+ SampleCoverageTracker(SampleProfileLoader &SPL) : SPLoader(SPL){};
bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset,
uint32_t Discriminator, uint64_t Samples);
@@ -185,6 +195,76 @@ private:
/// keyed by FunctionSamples pointers, but these stats are cleared after
/// every function, so we just need to keep a single counter.
uint64_t TotalUsedSamples = 0;
+
+ SampleProfileLoader &SPLoader;
+};
+
+class GUIDToFuncNameMapper {
+public:
+ GUIDToFuncNameMapper(Module &M, SampleProfileReader &Reader,
+ DenseMap<uint64_t, StringRef> &GUIDToFuncNameMap)
+ : CurrentReader(Reader), CurrentModule(M),
+ CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {
+ if (CurrentReader.getFormat() != SPF_Compact_Binary)
+ return;
+
+ for (const auto &F : CurrentModule) {
+ StringRef OrigName = F.getName();
+ CurrentGUIDToFuncNameMap.insert(
+ {Function::getGUID(OrigName), OrigName});
+
+ // Local to global var promotion used by optimization like thinlto
+ // will rename the var and add suffix like ".llvm.xxx" to the
+ // original local name. In sample profile, the suffixes of function
+ // names are all stripped. Since it is possible that the mapper is
+ // built in post-thin-link phase and var promotion has been done,
+ // we need to add the substring of function name without the suffix
+ // into the GUIDToFuncNameMap.
+ StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
+ if (CanonName != OrigName)
+ CurrentGUIDToFuncNameMap.insert(
+ {Function::getGUID(CanonName), CanonName});
+ }
+
+ // Update GUIDToFuncNameMap for each function including inlinees.
+ SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap);
+ }
+
+ ~GUIDToFuncNameMapper() {
+ if (CurrentReader.getFormat() != SPF_Compact_Binary)
+ return;
+
+ CurrentGUIDToFuncNameMap.clear();
+
+ // Reset GUIDToFuncNameMap for of each function as they're no
+ // longer valid at this point.
+ SetGUIDToFuncNameMapForAll(nullptr);
+ }
+
+private:
+ void SetGUIDToFuncNameMapForAll(DenseMap<uint64_t, StringRef> *Map) {
+ std::queue<FunctionSamples *> FSToUpdate;
+ for (auto &IFS : CurrentReader.getProfiles()) {
+ FSToUpdate.push(&IFS.second);
+ }
+
+ while (!FSToUpdate.empty()) {
+ FunctionSamples *FS = FSToUpdate.front();
+ FSToUpdate.pop();
+ FS->GUIDToFuncNameMap = Map;
+ for (const auto &ICS : FS->getCallsiteSamples()) {
+ const FunctionSamplesMap &FSMap = ICS.second;
+ for (auto &IFS : FSMap) {
+ FunctionSamples &FS = const_cast<FunctionSamples &>(IFS.second);
+ FSToUpdate.push(&FS);
+ }
+ }
+ }
+ }
+
+ SampleProfileReader &CurrentReader;
+ Module &CurrentModule;
+ DenseMap<uint64_t, StringRef> &CurrentGUIDToFuncNameMap;
};
/// Sample profile pass.
@@ -199,8 +279,9 @@ public:
std::function<AssumptionCache &(Function &)> GetAssumptionCache,
std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo)
: GetAC(std::move(GetAssumptionCache)),
- GetTTI(std::move(GetTargetTransformInfo)), Filename(Name),
- RemappingFilename(RemapName), IsThinLTOPreLink(IsThinLTOPreLink) {}
+ GetTTI(std::move(GetTargetTransformInfo)), CoverageTracker(*this),
+ Filename(Name), RemappingFilename(RemapName),
+ IsThinLTOPreLink(IsThinLTOPreLink) {}
bool doInitialization(Module &M);
bool runOnModule(Module &M, ModuleAnalysisManager *AM,
@@ -209,6 +290,8 @@ public:
void dump() { Reader->dump(); }
protected:
+ friend class SampleCoverageTracker;
+
bool runOnFunction(Function &F, ModuleAnalysisManager *AM);
unsigned getFunctionLoc(Function &F);
bool emitAnnotations(Function &F);
@@ -237,6 +320,8 @@ protected:
bool propagateThroughEdges(Function &F, bool UpdateBlockCount);
void computeDominanceAndLoopInfo(Function &F);
void clearFunctionData();
+ bool callsiteIsHot(const FunctionSamples *CallsiteFS,
+ ProfileSummaryInfo *PSI);
/// Map basic blocks to their computed weights.
///
@@ -310,6 +395,10 @@ protected:
/// Profile Summary Info computed from sample profile.
ProfileSummaryInfo *PSI = nullptr;
+ /// Profle Symbol list tells whether a function name appears in the binary
+ /// used to generate the current profile.
+ std::unique_ptr<ProfileSymbolList> PSL;
+
/// Total number of samples collected in this profile.
///
/// This is the sum of all the samples collected in all the functions executed
@@ -326,6 +415,21 @@ protected:
uint64_t entryCount;
};
DenseMap<Function *, NotInlinedProfileInfo> notInlinedCallInfo;
+
+ // GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for
+ // all the function symbols defined or declared in current module.
+ DenseMap<uint64_t, StringRef> GUIDToFuncNameMap;
+
+ // All the Names used in FunctionSamples including outline function
+ // names, inline instance names and call target names.
+ StringSet<> NamesInProfile;
+
+ // For symbol in profile symbol list, whether to regard their profiles
+ // to be accurate. It is mainly decided by existance of profile symbol
+ // list and -profile-accurate-for-symsinlist flag, but it can be
+ // overriden by -profile-sample-accurate or profile-sample-accurate
+ // attribute.
+ bool ProfAccForSymsInList;
};
class SampleProfileLoaderLegacyPass : public ModulePass {
@@ -381,14 +485,23 @@ private:
/// To decide whether an inlined callsite is hot, we compare the callsite
/// sample count with the hot cutoff computed by ProfileSummaryInfo, it is
/// regarded as hot if the count is above the cutoff value.
-static bool callsiteIsHot(const FunctionSamples *CallsiteFS,
- ProfileSummaryInfo *PSI) {
+///
+/// When ProfileAccurateForSymsInList is enabled and profile symbol list
+/// is present, functions in the profile symbol list but without profile will
+/// be regarded as cold and much less inlining will happen in CGSCC inlining
+/// pass, so we tend to lower the hot criteria here to allow more early
+/// inlining to happen for warm callsites and it is helpful for performance.
+bool SampleProfileLoader::callsiteIsHot(const FunctionSamples *CallsiteFS,
+ ProfileSummaryInfo *PSI) {
if (!CallsiteFS)
return false; // The callsite was not inlined in the original binary.
assert(PSI && "PSI is expected to be non null");
uint64_t CallsiteTotalSamples = CallsiteFS->getTotalSamples();
- return PSI->isHotCount(CallsiteTotalSamples);
+ if (ProfAccForSymsInList)
+ return !PSI->isColdCount(CallsiteTotalSamples);
+ else
+ return PSI->isHotCount(CallsiteTotalSamples);
}
/// Mark as used the sample record for the given function samples at
@@ -425,7 +538,7 @@ SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS,
for (const auto &I : FS->getCallsiteSamples())
for (const auto &J : I.second) {
const FunctionSamples *CalleeSamples = &J.second;
- if (callsiteIsHot(CalleeSamples, PSI))
+ if (SPLoader.callsiteIsHot(CalleeSamples, PSI))
Count += countUsedRecords(CalleeSamples, PSI);
}
@@ -444,7 +557,7 @@ SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS,
for (const auto &I : FS->getCallsiteSamples())
for (const auto &J : I.second) {
const FunctionSamples *CalleeSamples = &J.second;
- if (callsiteIsHot(CalleeSamples, PSI))
+ if (SPLoader.callsiteIsHot(CalleeSamples, PSI))
Count += countBodyRecords(CalleeSamples, PSI);
}
@@ -465,7 +578,7 @@ SampleCoverageTracker::countBodySamples(const FunctionSamples *FS,
for (const auto &I : FS->getCallsiteSamples())
for (const auto &J : I.second) {
const FunctionSamples *CalleeSamples = &J.second;
- if (callsiteIsHot(CalleeSamples, PSI))
+ if (SPLoader.callsiteIsHot(CalleeSamples, PSI))
Total += countBodySamples(CalleeSamples, PSI);
}
@@ -788,6 +901,14 @@ bool SampleProfileLoader::inlineHotFunctions(
Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
DenseSet<Instruction *> PromotedInsns;
+ // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
+ // Profile symbol list is ignored when profile-sample-accurate is on.
+ assert((!ProfAccForSymsInList ||
+ (!ProfileSampleAccurate &&
+ !F.hasFnAttribute("profile-sample-accurate"))) &&
+ "ProfAccForSymsInList should be false when profile-sample-accurate "
+ "is enabled");
+
DenseMap<Instruction *, const FunctionSamples *> localNotInlinedCallSites;
bool Changed = false;
while (true) {
@@ -1219,17 +1340,12 @@ void SampleProfileLoader::buildEdges(Function &F) {
}
/// Returns the sorted CallTargetMap \p M by count in descending order.
-static SmallVector<InstrProfValueData, 2> SortCallTargets(
- const SampleRecord::CallTargetMap &M) {
+static SmallVector<InstrProfValueData, 2> GetSortedValueDataFromCallTargets(
+ const SampleRecord::CallTargetMap & M) {
SmallVector<InstrProfValueData, 2> R;
- for (auto I = M.begin(); I != M.end(); ++I)
- R.push_back({FunctionSamples::getGUID(I->getKey()), I->getValue()});
- llvm::sort(R, [](const InstrProfValueData &L, const InstrProfValueData &R) {
- if (L.Count == R.Count)
- return L.Value > R.Value;
- else
- return L.Count > R.Count;
- });
+ for (const auto &I : SampleRecord::SortCallTargets(M)) {
+ R.emplace_back(InstrProfValueData{FunctionSamples::getGUID(I.first), I.second});
+ }
return R;
}
@@ -1324,7 +1440,7 @@ void SampleProfileLoader::propagateWeights(Function &F) {
if (!T || T.get().empty())
continue;
SmallVector<InstrProfValueData, 2> SortedCallTargets =
- SortCallTargets(T.get());
+ GetSortedValueDataFromCallTargets(T.get());
uint64_t Sum;
findIndirectCallFunctionSamples(I, Sum);
annotateValueSite(*I.getParent()->getParent()->getParent(), I,
@@ -1374,6 +1490,8 @@ void SampleProfileLoader::propagateWeights(Function &F) {
}
}
+ misexpect::verifyMisExpect(TI, Weights, TI->getContext());
+
uint64_t TempWeight;
// Only set weights if there is at least one non-zero weight.
// In any other case, let the analyzer set weights.
@@ -1557,30 +1675,29 @@ INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
bool SampleProfileLoader::doInitialization(Module &M) {
auto &Ctx = M.getContext();
- auto ReaderOrErr = SampleProfileReader::create(Filename, Ctx);
+
+ std::unique_ptr<SampleProfileReaderItaniumRemapper> RemapReader;
+ auto ReaderOrErr =
+ SampleProfileReader::create(Filename, Ctx, RemappingFilename);
if (std::error_code EC = ReaderOrErr.getError()) {
std::string Msg = "Could not open profile: " + EC.message();
Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
return false;
}
Reader = std::move(ReaderOrErr.get());
- Reader->collectFuncsToUse(M);
+ Reader->collectFuncsFrom(M);
ProfileIsValid = (Reader->read() == sampleprof_error::success);
-
- if (!RemappingFilename.empty()) {
- // Apply profile remappings to the loaded profile data if requested.
- // For now, we only support remapping symbols encoded using the Itanium
- // C++ ABI's name mangling scheme.
- ReaderOrErr = SampleProfileReaderItaniumRemapper::create(
- RemappingFilename, Ctx, std::move(Reader));
- if (std::error_code EC = ReaderOrErr.getError()) {
- std::string Msg = "Could not open profile remapping file: " + EC.message();
- Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
- return false;
- }
- Reader = std::move(ReaderOrErr.get());
- ProfileIsValid = (Reader->read() == sampleprof_error::success);
+ PSL = Reader->getProfileSymbolList();
+
+ // While profile-sample-accurate is on, ignore symbol list.
+ ProfAccForSymsInList =
+ ProfileAccurateForSymsInList && PSL && !ProfileSampleAccurate;
+ if (ProfAccForSymsInList) {
+ NamesInProfile.clear();
+ if (auto NameTable = Reader->getNameTable())
+ NamesInProfile.insert(NameTable->begin(), NameTable->end());
}
+
return true;
}
@@ -1594,7 +1711,7 @@ ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) {
bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
ProfileSummaryInfo *_PSI) {
- FunctionSamples::GUIDToFuncNameMapper Mapper(M);
+ GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
if (!ProfileIsValid)
return false;
@@ -1651,19 +1768,48 @@ bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
}
bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) {
-
+
DILocation2SampleMap.clear();
// By default the entry count is initialized to -1, which will be treated
// conservatively by getEntryCount as the same as unknown (None). This is
// to avoid newly added code to be treated as cold. If we have samples
// this will be overwritten in emitAnnotations.
- // If ProfileSampleAccurate is true or F has profile-sample-accurate
- // attribute, initialize the entry count to 0 so callsites or functions
- // unsampled will be treated as cold.
- uint64_t initialEntryCount =
- (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate"))
- ? 0
- : -1;
+ uint64_t initialEntryCount = -1;
+
+ ProfAccForSymsInList = ProfileAccurateForSymsInList && PSL;
+ if (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) {
+ // initialize all the function entry counts to 0. It means all the
+ // functions without profile will be regarded as cold.
+ initialEntryCount = 0;
+ // profile-sample-accurate is a user assertion which has a higher precedence
+ // than symbol list. When profile-sample-accurate is on, ignore symbol list.
+ ProfAccForSymsInList = false;
+ }
+
+ // PSL -- profile symbol list include all the symbols in sampled binary.
+ // If ProfileAccurateForSymsInList is enabled, PSL is used to treat
+ // old functions without samples being cold, without having to worry
+ // about new and hot functions being mistakenly treated as cold.
+ if (ProfAccForSymsInList) {
+ // Initialize the entry count to 0 for functions in the list.
+ if (PSL->contains(F.getName()))
+ initialEntryCount = 0;
+
+ // Function in the symbol list but without sample will be regarded as
+ // cold. To minimize the potential negative performance impact it could
+ // have, we want to be a little conservative here saying if a function
+ // shows up in the profile, no matter as outline function, inline instance
+ // or call targets, treat the function as not being cold. This will handle
+ // the cases such as most callsites of a function are inlined in sampled
+ // binary but not inlined in current build (because of source code drift,
+ // imprecise debug information, or the callsites are all cold individually
+ // but not cold accumulatively...), so the outline function showing up as
+ // cold in sampled binary will actually not be cold after current build.
+ StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
+ if (NamesInProfile.count(CanonName))
+ initialEntryCount = -1;
+ }
+
F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
if (AM) {
@@ -1672,7 +1818,7 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM)
.getManager();
ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
} else {
- OwnedORE = make_unique<OptimizationRemarkEmitter>(&F);
+ OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&F);
ORE = OwnedORE.get();
}
Samples = Reader->getSamplesFor(F);