aboutsummaryrefslogtreecommitdiff
path: root/lib/Transforms/IPO/SampleProfile.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-04-16 16:01:22 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-04-16 16:01:22 +0000
commit71d5a2540a98c81f5bcaeb48805e0e2881f530ef (patch)
tree5343938942df402b49ec7300a1c25a2d4ccd5821 /lib/Transforms/IPO/SampleProfile.cpp
parent31bbf64f3a4974a2d6c8b3b27ad2f519caf74057 (diff)
Diffstat (limited to 'lib/Transforms/IPO/SampleProfile.cpp')
-rw-r--r--lib/Transforms/IPO/SampleProfile.cpp271
1 files changed, 188 insertions, 83 deletions
diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp
index 6a43f8dbac48..3371de6e3d14 100644
--- a/lib/Transforms/IPO/SampleProfile.cpp
+++ b/lib/Transforms/IPO/SampleProfile.cpp
@@ -35,6 +35,7 @@
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -43,6 +44,7 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
+#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/SampleProfReader.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -50,6 +52,7 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include <cctype>
@@ -159,8 +162,11 @@ protected:
ErrorOr<uint64_t> getInstWeight(const Instruction &I);
ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB);
const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const;
+ std::vector<const FunctionSamples *>
+ findIndirectCallFunctionSamples(const Instruction &I) const;
const FunctionSamples *findFunctionSamples(const Instruction &I) const;
- bool inlineHotFunctions(Function &F);
+ bool inlineHotFunctions(Function &F,
+ DenseSet<GlobalValue::GUID> &ImportGUIDs);
void printEdgeWeight(raw_ostream &OS, Edge E);
void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const;
void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB);
@@ -173,7 +179,7 @@ protected:
void buildEdges(Function &F);
bool propagateThroughEdges(Function &F, bool UpdateBlockCount);
void computeDominanceAndLoopInfo(Function &F);
- unsigned getOffset(unsigned L, unsigned H) const;
+ unsigned getOffset(const DILocation *DIL) const;
void clearFunctionData();
/// \brief Map basic blocks to their computed weights.
@@ -326,11 +332,12 @@ SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS) const {
// If there are inlined callsites in this function, count the samples found
// in the respective bodies. However, do not bother counting callees with 0
// total samples, these are callees that were never invoked at runtime.
- for (const auto &I : FS->getCallsiteSamples()) {
- const FunctionSamples *CalleeSamples = &I.second;
- if (callsiteIsHot(FS, CalleeSamples))
- Count += countUsedRecords(CalleeSamples);
- }
+ for (const auto &I : FS->getCallsiteSamples())
+ for (const auto &J : I.second) {
+ const FunctionSamples *CalleeSamples = &J.second;
+ if (callsiteIsHot(FS, CalleeSamples))
+ Count += countUsedRecords(CalleeSamples);
+ }
return Count;
}
@@ -343,11 +350,12 @@ SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS) const {
unsigned Count = FS->getBodySamples().size();
// Only count records in hot callsites.
- for (const auto &I : FS->getCallsiteSamples()) {
- const FunctionSamples *CalleeSamples = &I.second;
- if (callsiteIsHot(FS, CalleeSamples))
- Count += countBodyRecords(CalleeSamples);
- }
+ for (const auto &I : FS->getCallsiteSamples())
+ for (const auto &J : I.second) {
+ const FunctionSamples *CalleeSamples = &J.second;
+ if (callsiteIsHot(FS, CalleeSamples))
+ Count += countBodyRecords(CalleeSamples);
+ }
return Count;
}
@@ -362,11 +370,12 @@ SampleCoverageTracker::countBodySamples(const FunctionSamples *FS) const {
Total += I.second.getSamples();
// Only count samples in hot callsites.
- for (const auto &I : FS->getCallsiteSamples()) {
- const FunctionSamples *CalleeSamples = &I.second;
- if (callsiteIsHot(FS, CalleeSamples))
- Total += countBodySamples(CalleeSamples);
- }
+ for (const auto &I : FS->getCallsiteSamples())
+ for (const auto &J : I.second) {
+ const FunctionSamples *CalleeSamples = &J.second;
+ if (callsiteIsHot(FS, CalleeSamples))
+ Total += countBodySamples(CalleeSamples);
+ }
return Total;
}
@@ -398,15 +407,11 @@ void SampleProfileLoader::clearFunctionData() {
CoverageTracker.clear();
}
-/// \brief Returns the offset of lineno \p L to head_lineno \p H
-///
-/// \param L Lineno
-/// \param H Header lineno of the function
-///
-/// \returns offset to the header lineno. 16 bits are used to represent offset.
+/// Returns the line offset to the start line of the subprogram.
/// We assume that a single function will not exceed 65535 LOC.
-unsigned SampleProfileLoader::getOffset(unsigned L, unsigned H) const {
- return (L - H) & 0xffff;
+unsigned SampleProfileLoader::getOffset(const DILocation *DIL) const {
+ return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
+ 0xffff;
}
/// \brief Print the weight of edge \p E on stream \p OS.
@@ -451,8 +456,7 @@ void SampleProfileLoader::printBlockWeight(raw_ostream &OS,
/// \param Inst Instruction to query.
///
/// \returns the weight of \p Inst.
-ErrorOr<uint64_t>
-SampleProfileLoader::getInstWeight(const Instruction &Inst) {
+ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
const DebugLoc &DLoc = Inst.getDebugLoc();
if (!DLoc)
return std::error_code();
@@ -470,19 +474,14 @@ SampleProfileLoader::getInstWeight(const Instruction &Inst) {
// If a call/invoke instruction is inlined in profile, but not inlined here,
// it means that the inlined callsite has no sample, thus the call
// instruction should have 0 count.
- bool IsCall = isa<CallInst>(Inst) || isa<InvokeInst>(Inst);
- if (IsCall && findCalleeFunctionSamples(Inst))
+ if ((isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) &&
+ findCalleeFunctionSamples(Inst))
return 0;
const DILocation *DIL = DLoc;
- unsigned Lineno = DLoc.getLine();
- unsigned HeaderLineno = DIL->getScope()->getSubprogram()->getLine();
-
- uint32_t LineOffset = getOffset(Lineno, HeaderLineno);
- uint32_t Discriminator = DIL->getDiscriminator();
- ErrorOr<uint64_t> R = IsCall
- ? FS->findCallSamplesAt(LineOffset, Discriminator)
- : FS->findSamplesAt(LineOffset, Discriminator);
+ uint32_t LineOffset = getOffset(DIL);
+ uint32_t Discriminator = DIL->getBaseDiscriminator();
+ ErrorOr<uint64_t> R = FS->findSamplesAt(LineOffset, Discriminator);
if (R) {
bool FirstMark =
CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get());
@@ -491,13 +490,14 @@ SampleProfileLoader::getInstWeight(const Instruction &Inst) {
LLVMContext &Ctx = F->getContext();
emitOptimizationRemark(
Ctx, DEBUG_TYPE, *F, DLoc,
- Twine("Applied ") + Twine(*R) + " samples from profile (offset: " +
- Twine(LineOffset) +
+ Twine("Applied ") + Twine(*R) +
+ " samples from profile (offset: " + Twine(LineOffset) +
((Discriminator) ? Twine(".") + Twine(Discriminator) : "") + ")");
}
- DEBUG(dbgs() << " " << Lineno << "." << DIL->getDiscriminator() << ":"
- << Inst << " (line offset: " << Lineno - HeaderLineno << "."
- << DIL->getDiscriminator() << " - weight: " << R.get()
+ DEBUG(dbgs() << " " << DLoc.getLine() << "."
+ << DIL->getBaseDiscriminator() << ":" << Inst
+ << " (line offset: " << LineOffset << "."
+ << DIL->getBaseDiscriminator() << " - weight: " << R.get()
<< ")\n");
}
return R;
@@ -511,8 +511,7 @@ SampleProfileLoader::getInstWeight(const Instruction &Inst) {
/// \param BB The basic block to query.
///
/// \returns the weight for \p BB.
-ErrorOr<uint64_t>
-SampleProfileLoader::getBlockWeight(const BasicBlock *BB) {
+ErrorOr<uint64_t> SampleProfileLoader::getBlockWeight(const BasicBlock *BB) {
uint64_t Max = 0;
bool HasWeight = false;
for (auto &I : BB->getInstList()) {
@@ -565,16 +564,49 @@ SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const {
if (!DIL) {
return nullptr;
}
- DISubprogram *SP = DIL->getScope()->getSubprogram();
- if (!SP)
- return nullptr;
+
+ StringRef CalleeName;
+ if (const CallInst *CI = dyn_cast<CallInst>(&Inst))
+ if (Function *Callee = CI->getCalledFunction())
+ CalleeName = Callee->getName();
const FunctionSamples *FS = findFunctionSamples(Inst);
if (FS == nullptr)
return nullptr;
- return FS->findFunctionSamplesAt(LineLocation(
- getOffset(DIL->getLine(), SP->getLine()), DIL->getDiscriminator()));
+ return FS->findFunctionSamplesAt(
+ LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()), CalleeName);
+}
+
+/// Returns a vector of FunctionSamples that are the indirect call targets
+/// of \p Inst. The vector is sorted by the total number of samples.
+std::vector<const FunctionSamples *>
+SampleProfileLoader::findIndirectCallFunctionSamples(
+ const Instruction &Inst) const {
+ const DILocation *DIL = Inst.getDebugLoc();
+ std::vector<const FunctionSamples *> R;
+
+ if (!DIL) {
+ return R;
+ }
+
+ const FunctionSamples *FS = findFunctionSamples(Inst);
+ if (FS == nullptr)
+ return R;
+
+ if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(
+ LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()))) {
+ if (M->size() == 0)
+ return R;
+ for (const auto &NameFS : *M) {
+ R.push_back(&NameFS.second);
+ }
+ std::sort(R.begin(), R.end(),
+ [](const FunctionSamples *L, const FunctionSamples *R) {
+ return L->getTotalSamples() > R->getTotalSamples();
+ });
+ }
+ return R;
}
/// \brief Get the FunctionSamples for an instruction.
@@ -588,23 +620,23 @@ SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const {
/// \returns the FunctionSamples pointer to the inlined instance.
const FunctionSamples *
SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
- SmallVector<LineLocation, 10> S;
+ SmallVector<std::pair<LineLocation, StringRef>, 10> S;
const DILocation *DIL = Inst.getDebugLoc();
- if (!DIL) {
+ if (!DIL)
return Samples;
- }
+
+ const DILocation *PrevDIL = DIL;
for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) {
- DISubprogram *SP = DIL->getScope()->getSubprogram();
- if (!SP)
- return nullptr;
- S.push_back(LineLocation(getOffset(DIL->getLine(), SP->getLine()),
- DIL->getDiscriminator()));
+ S.push_back(std::make_pair(
+ LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()),
+ PrevDIL->getScope()->getSubprogram()->getLinkageName()));
+ PrevDIL = DIL;
}
if (S.size() == 0)
return Samples;
const FunctionSamples *FS = Samples;
for (int i = S.size() - 1; i >= 0 && FS != nullptr; i--) {
- FS = FS->findFunctionSamplesAt(S[i]);
+ FS = FS->findFunctionSamplesAt(S[i].first, S[i].second);
}
return FS;
}
@@ -614,14 +646,17 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
/// Iteratively traverse all callsites of the function \p F, and find if
/// the corresponding inlined instance exists and is hot in profile. If
/// it is hot enough, inline the callsites and adds new callsites of the
-/// callee into the caller.
-///
-/// TODO: investigate the possibility of not invoking InlineFunction directly.
+/// callee into the caller. If the call is an indirect call, first promote
+/// it to direct call. Each indirect call is limited with a single target.
///
/// \param F function to perform iterative inlining.
+/// \param ImportGUIDs a set to be updated to include all GUIDs that come
+/// from a different module but inlined in the profiled binary.
///
/// \returns True if there is any inline happened.
-bool SampleProfileLoader::inlineHotFunctions(Function &F) {
+bool SampleProfileLoader::inlineHotFunctions(
+ Function &F, DenseSet<GlobalValue::GUID> &ImportGUIDs) {
+ DenseSet<Instruction *> PromotedInsns;
bool Changed = false;
LLVMContext &Ctx = F.getContext();
std::function<AssumptionCache &(Function &)> GetAssumptionCache = [&](
@@ -647,18 +682,42 @@ bool SampleProfileLoader::inlineHotFunctions(Function &F) {
}
for (auto I : CIS) {
InlineFunctionInfo IFI(nullptr, ACT ? &GetAssumptionCache : nullptr);
- CallSite CS(I);
- Function *CalledFunction = CS.getCalledFunction();
- if (!CalledFunction || !CalledFunction->getSubprogram())
+ Function *CalledFunction = CallSite(I).getCalledFunction();
+ Instruction *DI = I;
+ if (!CalledFunction && !PromotedInsns.count(I) &&
+ CallSite(I).isIndirectCall())
+ for (const auto *FS : findIndirectCallFunctionSamples(*I)) {
+ auto CalleeFunctionName = FS->getName();
+ const char *Reason = "Callee function not available";
+ CalledFunction = F.getParent()->getFunction(CalleeFunctionName);
+ if (CalledFunction && isLegalToPromote(I, CalledFunction, &Reason)) {
+ // The indirect target was promoted and inlined in the profile, as a
+ // result, we do not have profile info for the branch probability.
+ // We set the probability to 80% taken to indicate that the static
+ // call is likely taken.
+ DI = dyn_cast<Instruction>(
+ promoteIndirectCall(I, CalledFunction, 80, 100, false)
+ ->stripPointerCasts());
+ PromotedInsns.insert(I);
+ } else {
+ DEBUG(dbgs() << "\nFailed to promote indirect call to "
+ << CalleeFunctionName << " because " << Reason
+ << "\n");
+ continue;
+ }
+ }
+ if (!CalledFunction || !CalledFunction->getSubprogram()) {
+ findCalleeFunctionSamples(*I)->findImportedFunctions(
+ ImportGUIDs, F.getParent(),
+ Samples->getTotalSamples() * SampleProfileHotThreshold / 100);
continue;
+ }
DebugLoc DLoc = I->getDebugLoc();
- uint64_t NumSamples = findCalleeFunctionSamples(*I)->getTotalSamples();
- if (InlineFunction(CS, IFI)) {
+ if (InlineFunction(CallSite(DI), IFI)) {
LocalChanged = true;
emitOptimizationRemark(Ctx, DEBUG_TYPE, F, DLoc,
Twine("inlined hot callee '") +
- CalledFunction->getName() + "' with " +
- Twine(NumSamples) + " samples into '" +
+ CalledFunction->getName() + "' into '" +
F.getName() + "'");
}
}
@@ -994,6 +1053,26 @@ void SampleProfileLoader::buildEdges(Function &F) {
}
}
+/// Sorts the CallTargetMap \p M by count in descending order and stores the
+/// sorted result in \p Sorted. Returns the total counts.
+static uint64_t SortCallTargets(SmallVector<InstrProfValueData, 2> &Sorted,
+ const SampleRecord::CallTargetMap &M) {
+ Sorted.clear();
+ uint64_t Sum = 0;
+ for (auto I = M.begin(); I != M.end(); ++I) {
+ Sum += I->getValue();
+ Sorted.push_back({Function::getGUID(I->getKey()), I->getValue()});
+ }
+ std::sort(Sorted.begin(), Sorted.end(),
+ [](const InstrProfValueData &L, const InstrProfValueData &R) {
+ if (L.Count == R.Count)
+ return L.Value > R.Value;
+ else
+ return L.Count > R.Count;
+ });
+ return Sum;
+}
+
/// \brief Propagate weights into edges
///
/// The following rules are applied to every block BB in the CFG:
@@ -1015,10 +1094,6 @@ void SampleProfileLoader::propagateWeights(Function &F) {
bool Changed = true;
unsigned I = 0;
- // Add an entry count to the function using the samples gathered
- // at the function entry.
- F.setEntryCount(Samples->getHeadSamples() + 1);
-
// If BB weight is larger than its corresponding loop's header BB weight,
// use the BB weight to replace the loop header BB weight.
for (auto &BI : F) {
@@ -1071,13 +1146,32 @@ void SampleProfileLoader::propagateWeights(Function &F) {
if (BlockWeights[BB]) {
for (auto &I : BB->getInstList()) {
- if (CallInst *CI = dyn_cast<CallInst>(&I)) {
- if (!dyn_cast<IntrinsicInst>(&I)) {
- SmallVector<uint32_t, 1> Weights;
- Weights.push_back(BlockWeights[BB]);
- CI->setMetadata(LLVMContext::MD_prof,
- MDB.createBranchWeights(Weights));
- }
+ if (!isa<CallInst>(I) && !isa<InvokeInst>(I))
+ continue;
+ CallSite CS(&I);
+ if (!CS.getCalledFunction()) {
+ const DebugLoc &DLoc = I.getDebugLoc();
+ if (!DLoc)
+ continue;
+ const DILocation *DIL = DLoc;
+ uint32_t LineOffset = getOffset(DIL);
+ uint32_t Discriminator = DIL->getBaseDiscriminator();
+
+ const FunctionSamples *FS = findFunctionSamples(I);
+ if (!FS)
+ continue;
+ auto T = FS->findCallTargetMapAt(LineOffset, Discriminator);
+ if (!T || T.get().size() == 0)
+ continue;
+ SmallVector<InstrProfValueData, 2> SortedCallTargets;
+ uint64_t Sum = SortCallTargets(SortedCallTargets, T.get());
+ annotateValueSite(*I.getParent()->getParent()->getParent(), I,
+ SortedCallTargets, Sum, IPVK_IndirectCallTarget,
+ SortedCallTargets.size());
+ } else if (!dyn_cast<IntrinsicInst>(&I)) {
+ SmallVector<uint32_t, 1> Weights;
+ Weights.push_back(BlockWeights[BB]);
+ I.setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
}
}
}
@@ -1115,9 +1209,13 @@ void SampleProfileLoader::propagateWeights(Function &F) {
}
}
+ uint64_t TempWeight;
// Only set weights if there is at least one non-zero weight.
// In any other case, let the analyzer set weights.
- if (MaxWeight > 0) {
+ // Do not set weights if the weights are present. In ThinLTO, the profile
+ // annotation is done twice. If the first annotation already set the
+ // weights, the second pass does not need to set it.
+ if (MaxWeight > 0 && !TI->extractProfTotalWeight(TempWeight)) {
DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
TI->setMetadata(llvm::LLVMContext::MD_prof,
MDB.createBranchWeights(Weights));
@@ -1228,12 +1326,19 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {
DEBUG(dbgs() << "Line number for the first instruction in " << F.getName()
<< ": " << getFunctionLoc(F) << "\n");
- Changed |= inlineHotFunctions(F);
+ DenseSet<GlobalValue::GUID> ImportGUIDs;
+ Changed |= inlineHotFunctions(F, ImportGUIDs);
// Compute basic block weights.
Changed |= computeBlockWeights(F);
if (Changed) {
+ // Add an entry count to the function using the samples gathered at the
+ // function entry. Also sets the GUIDs that comes from a different
+ // module but inlined in the profiled binary. This is aiming at making
+ // the IR match the profiled binary before annotation.
+ F.setEntryCount(Samples->getHeadSamples() + 1, &ImportGUIDs);
+
// Compute dominance and loop info needed for propagation.
computeDominanceAndLoopInfo(F);
@@ -1329,7 +1434,7 @@ bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
bool SampleProfileLoader::runOnFunction(Function &F) {
F.setEntryCount(0);
Samples = Reader->getSamplesFor(F);
- if (!Samples->empty())
+ if (Samples && !Samples->empty())
return emitAnnotations(F);
return false;
}