diff options
Diffstat (limited to 'lib/Transforms/IPO/SampleProfile.cpp')
-rw-r--r-- | lib/Transforms/IPO/SampleProfile.cpp | 271 |
1 files changed, 188 insertions, 83 deletions
diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp index 6a43f8dbac48..3371de6e3d14 100644 --- a/lib/Transforms/IPO/SampleProfile.cpp +++ b/lib/Transforms/IPO/SampleProfile.cpp @@ -35,6 +35,7 @@ #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -43,6 +44,7 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/SampleProfReader.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -50,6 +52,7 @@ #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/Cloning.h" #include <cctype> @@ -159,8 +162,11 @@ protected: ErrorOr<uint64_t> getInstWeight(const Instruction &I); ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB); const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const; + std::vector<const FunctionSamples *> + findIndirectCallFunctionSamples(const Instruction &I) const; const FunctionSamples *findFunctionSamples(const Instruction &I) const; - bool inlineHotFunctions(Function &F); + bool inlineHotFunctions(Function &F, + DenseSet<GlobalValue::GUID> &ImportGUIDs); void printEdgeWeight(raw_ostream &OS, Edge E); void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const; void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB); @@ -173,7 +179,7 @@ protected: void buildEdges(Function &F); bool propagateThroughEdges(Function &F, bool UpdateBlockCount); void computeDominanceAndLoopInfo(Function &F); - unsigned getOffset(unsigned L, unsigned H) const; + unsigned getOffset(const DILocation *DIL) const; void clearFunctionData(); /// \brief Map basic blocks to their computed weights. @@ -326,11 +332,12 @@ SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS) const { // If there are inlined callsites in this function, count the samples found // in the respective bodies. However, do not bother counting callees with 0 // total samples, these are callees that were never invoked at runtime. - for (const auto &I : FS->getCallsiteSamples()) { - const FunctionSamples *CalleeSamples = &I.second; - if (callsiteIsHot(FS, CalleeSamples)) - Count += countUsedRecords(CalleeSamples); - } + for (const auto &I : FS->getCallsiteSamples()) + for (const auto &J : I.second) { + const FunctionSamples *CalleeSamples = &J.second; + if (callsiteIsHot(FS, CalleeSamples)) + Count += countUsedRecords(CalleeSamples); + } return Count; } @@ -343,11 +350,12 @@ SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS) const { unsigned Count = FS->getBodySamples().size(); // Only count records in hot callsites. - for (const auto &I : FS->getCallsiteSamples()) { - const FunctionSamples *CalleeSamples = &I.second; - if (callsiteIsHot(FS, CalleeSamples)) - Count += countBodyRecords(CalleeSamples); - } + for (const auto &I : FS->getCallsiteSamples()) + for (const auto &J : I.second) { + const FunctionSamples *CalleeSamples = &J.second; + if (callsiteIsHot(FS, CalleeSamples)) + Count += countBodyRecords(CalleeSamples); + } return Count; } @@ -362,11 +370,12 @@ SampleCoverageTracker::countBodySamples(const FunctionSamples *FS) const { Total += I.second.getSamples(); // Only count samples in hot callsites. - for (const auto &I : FS->getCallsiteSamples()) { - const FunctionSamples *CalleeSamples = &I.second; - if (callsiteIsHot(FS, CalleeSamples)) - Total += countBodySamples(CalleeSamples); - } + for (const auto &I : FS->getCallsiteSamples()) + for (const auto &J : I.second) { + const FunctionSamples *CalleeSamples = &J.second; + if (callsiteIsHot(FS, CalleeSamples)) + Total += countBodySamples(CalleeSamples); + } return Total; } @@ -398,15 +407,11 @@ void SampleProfileLoader::clearFunctionData() { CoverageTracker.clear(); } -/// \brief Returns the offset of lineno \p L to head_lineno \p H -/// -/// \param L Lineno -/// \param H Header lineno of the function -/// -/// \returns offset to the header lineno. 16 bits are used to represent offset. +/// Returns the line offset to the start line of the subprogram. /// We assume that a single function will not exceed 65535 LOC. -unsigned SampleProfileLoader::getOffset(unsigned L, unsigned H) const { - return (L - H) & 0xffff; +unsigned SampleProfileLoader::getOffset(const DILocation *DIL) const { + return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) & + 0xffff; } /// \brief Print the weight of edge \p E on stream \p OS. @@ -451,8 +456,7 @@ void SampleProfileLoader::printBlockWeight(raw_ostream &OS, /// \param Inst Instruction to query. /// /// \returns the weight of \p Inst. -ErrorOr<uint64_t> -SampleProfileLoader::getInstWeight(const Instruction &Inst) { +ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) { const DebugLoc &DLoc = Inst.getDebugLoc(); if (!DLoc) return std::error_code(); @@ -470,19 +474,14 @@ SampleProfileLoader::getInstWeight(const Instruction &Inst) { // If a call/invoke instruction is inlined in profile, but not inlined here, // it means that the inlined callsite has no sample, thus the call // instruction should have 0 count. - bool IsCall = isa<CallInst>(Inst) || isa<InvokeInst>(Inst); - if (IsCall && findCalleeFunctionSamples(Inst)) + if ((isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) && + findCalleeFunctionSamples(Inst)) return 0; const DILocation *DIL = DLoc; - unsigned Lineno = DLoc.getLine(); - unsigned HeaderLineno = DIL->getScope()->getSubprogram()->getLine(); - - uint32_t LineOffset = getOffset(Lineno, HeaderLineno); - uint32_t Discriminator = DIL->getDiscriminator(); - ErrorOr<uint64_t> R = IsCall - ? FS->findCallSamplesAt(LineOffset, Discriminator) - : FS->findSamplesAt(LineOffset, Discriminator); + uint32_t LineOffset = getOffset(DIL); + uint32_t Discriminator = DIL->getBaseDiscriminator(); + ErrorOr<uint64_t> R = FS->findSamplesAt(LineOffset, Discriminator); if (R) { bool FirstMark = CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get()); @@ -491,13 +490,14 @@ SampleProfileLoader::getInstWeight(const Instruction &Inst) { LLVMContext &Ctx = F->getContext(); emitOptimizationRemark( Ctx, DEBUG_TYPE, *F, DLoc, - Twine("Applied ") + Twine(*R) + " samples from profile (offset: " + - Twine(LineOffset) + + Twine("Applied ") + Twine(*R) + + " samples from profile (offset: " + Twine(LineOffset) + ((Discriminator) ? Twine(".") + Twine(Discriminator) : "") + ")"); } - DEBUG(dbgs() << " " << Lineno << "." << DIL->getDiscriminator() << ":" - << Inst << " (line offset: " << Lineno - HeaderLineno << "." - << DIL->getDiscriminator() << " - weight: " << R.get() + DEBUG(dbgs() << " " << DLoc.getLine() << "." + << DIL->getBaseDiscriminator() << ":" << Inst + << " (line offset: " << LineOffset << "." + << DIL->getBaseDiscriminator() << " - weight: " << R.get() << ")\n"); } return R; @@ -511,8 +511,7 @@ SampleProfileLoader::getInstWeight(const Instruction &Inst) { /// \param BB The basic block to query. /// /// \returns the weight for \p BB. -ErrorOr<uint64_t> -SampleProfileLoader::getBlockWeight(const BasicBlock *BB) { +ErrorOr<uint64_t> SampleProfileLoader::getBlockWeight(const BasicBlock *BB) { uint64_t Max = 0; bool HasWeight = false; for (auto &I : BB->getInstList()) { @@ -565,16 +564,49 @@ SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const { if (!DIL) { return nullptr; } - DISubprogram *SP = DIL->getScope()->getSubprogram(); - if (!SP) - return nullptr; + + StringRef CalleeName; + if (const CallInst *CI = dyn_cast<CallInst>(&Inst)) + if (Function *Callee = CI->getCalledFunction()) + CalleeName = Callee->getName(); const FunctionSamples *FS = findFunctionSamples(Inst); if (FS == nullptr) return nullptr; - return FS->findFunctionSamplesAt(LineLocation( - getOffset(DIL->getLine(), SP->getLine()), DIL->getDiscriminator())); + return FS->findFunctionSamplesAt( + LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()), CalleeName); +} + +/// Returns a vector of FunctionSamples that are the indirect call targets +/// of \p Inst. The vector is sorted by the total number of samples. +std::vector<const FunctionSamples *> +SampleProfileLoader::findIndirectCallFunctionSamples( + const Instruction &Inst) const { + const DILocation *DIL = Inst.getDebugLoc(); + std::vector<const FunctionSamples *> R; + + if (!DIL) { + return R; + } + + const FunctionSamples *FS = findFunctionSamples(Inst); + if (FS == nullptr) + return R; + + if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt( + LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()))) { + if (M->size() == 0) + return R; + for (const auto &NameFS : *M) { + R.push_back(&NameFS.second); + } + std::sort(R.begin(), R.end(), + [](const FunctionSamples *L, const FunctionSamples *R) { + return L->getTotalSamples() > R->getTotalSamples(); + }); + } + return R; } /// \brief Get the FunctionSamples for an instruction. @@ -588,23 +620,23 @@ SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const { /// \returns the FunctionSamples pointer to the inlined instance. const FunctionSamples * SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const { - SmallVector<LineLocation, 10> S; + SmallVector<std::pair<LineLocation, StringRef>, 10> S; const DILocation *DIL = Inst.getDebugLoc(); - if (!DIL) { + if (!DIL) return Samples; - } + + const DILocation *PrevDIL = DIL; for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) { - DISubprogram *SP = DIL->getScope()->getSubprogram(); - if (!SP) - return nullptr; - S.push_back(LineLocation(getOffset(DIL->getLine(), SP->getLine()), - DIL->getDiscriminator())); + S.push_back(std::make_pair( + LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()), + PrevDIL->getScope()->getSubprogram()->getLinkageName())); + PrevDIL = DIL; } if (S.size() == 0) return Samples; const FunctionSamples *FS = Samples; for (int i = S.size() - 1; i >= 0 && FS != nullptr; i--) { - FS = FS->findFunctionSamplesAt(S[i]); + FS = FS->findFunctionSamplesAt(S[i].first, S[i].second); } return FS; } @@ -614,14 +646,17 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const { /// Iteratively traverse all callsites of the function \p F, and find if /// the corresponding inlined instance exists and is hot in profile. If /// it is hot enough, inline the callsites and adds new callsites of the -/// callee into the caller. -/// -/// TODO: investigate the possibility of not invoking InlineFunction directly. +/// callee into the caller. If the call is an indirect call, first promote +/// it to direct call. Each indirect call is limited with a single target. /// /// \param F function to perform iterative inlining. +/// \param ImportGUIDs a set to be updated to include all GUIDs that come +/// from a different module but inlined in the profiled binary. /// /// \returns True if there is any inline happened. -bool SampleProfileLoader::inlineHotFunctions(Function &F) { +bool SampleProfileLoader::inlineHotFunctions( + Function &F, DenseSet<GlobalValue::GUID> &ImportGUIDs) { + DenseSet<Instruction *> PromotedInsns; bool Changed = false; LLVMContext &Ctx = F.getContext(); std::function<AssumptionCache &(Function &)> GetAssumptionCache = [&]( @@ -647,18 +682,42 @@ bool SampleProfileLoader::inlineHotFunctions(Function &F) { } for (auto I : CIS) { InlineFunctionInfo IFI(nullptr, ACT ? &GetAssumptionCache : nullptr); - CallSite CS(I); - Function *CalledFunction = CS.getCalledFunction(); - if (!CalledFunction || !CalledFunction->getSubprogram()) + Function *CalledFunction = CallSite(I).getCalledFunction(); + Instruction *DI = I; + if (!CalledFunction && !PromotedInsns.count(I) && + CallSite(I).isIndirectCall()) + for (const auto *FS : findIndirectCallFunctionSamples(*I)) { + auto CalleeFunctionName = FS->getName(); + const char *Reason = "Callee function not available"; + CalledFunction = F.getParent()->getFunction(CalleeFunctionName); + if (CalledFunction && isLegalToPromote(I, CalledFunction, &Reason)) { + // The indirect target was promoted and inlined in the profile, as a + // result, we do not have profile info for the branch probability. + // We set the probability to 80% taken to indicate that the static + // call is likely taken. + DI = dyn_cast<Instruction>( + promoteIndirectCall(I, CalledFunction, 80, 100, false) + ->stripPointerCasts()); + PromotedInsns.insert(I); + } else { + DEBUG(dbgs() << "\nFailed to promote indirect call to " + << CalleeFunctionName << " because " << Reason + << "\n"); + continue; + } + } + if (!CalledFunction || !CalledFunction->getSubprogram()) { + findCalleeFunctionSamples(*I)->findImportedFunctions( + ImportGUIDs, F.getParent(), + Samples->getTotalSamples() * SampleProfileHotThreshold / 100); continue; + } DebugLoc DLoc = I->getDebugLoc(); - uint64_t NumSamples = findCalleeFunctionSamples(*I)->getTotalSamples(); - if (InlineFunction(CS, IFI)) { + if (InlineFunction(CallSite(DI), IFI)) { LocalChanged = true; emitOptimizationRemark(Ctx, DEBUG_TYPE, F, DLoc, Twine("inlined hot callee '") + - CalledFunction->getName() + "' with " + - Twine(NumSamples) + " samples into '" + + CalledFunction->getName() + "' into '" + F.getName() + "'"); } } @@ -994,6 +1053,26 @@ void SampleProfileLoader::buildEdges(Function &F) { } } +/// Sorts the CallTargetMap \p M by count in descending order and stores the +/// sorted result in \p Sorted. Returns the total counts. +static uint64_t SortCallTargets(SmallVector<InstrProfValueData, 2> &Sorted, + const SampleRecord::CallTargetMap &M) { + Sorted.clear(); + uint64_t Sum = 0; + for (auto I = M.begin(); I != M.end(); ++I) { + Sum += I->getValue(); + Sorted.push_back({Function::getGUID(I->getKey()), I->getValue()}); + } + std::sort(Sorted.begin(), Sorted.end(), + [](const InstrProfValueData &L, const InstrProfValueData &R) { + if (L.Count == R.Count) + return L.Value > R.Value; + else + return L.Count > R.Count; + }); + return Sum; +} + /// \brief Propagate weights into edges /// /// The following rules are applied to every block BB in the CFG: @@ -1015,10 +1094,6 @@ void SampleProfileLoader::propagateWeights(Function &F) { bool Changed = true; unsigned I = 0; - // Add an entry count to the function using the samples gathered - // at the function entry. - F.setEntryCount(Samples->getHeadSamples() + 1); - // If BB weight is larger than its corresponding loop's header BB weight, // use the BB weight to replace the loop header BB weight. for (auto &BI : F) { @@ -1071,13 +1146,32 @@ void SampleProfileLoader::propagateWeights(Function &F) { if (BlockWeights[BB]) { for (auto &I : BB->getInstList()) { - if (CallInst *CI = dyn_cast<CallInst>(&I)) { - if (!dyn_cast<IntrinsicInst>(&I)) { - SmallVector<uint32_t, 1> Weights; - Weights.push_back(BlockWeights[BB]); - CI->setMetadata(LLVMContext::MD_prof, - MDB.createBranchWeights(Weights)); - } + if (!isa<CallInst>(I) && !isa<InvokeInst>(I)) + continue; + CallSite CS(&I); + if (!CS.getCalledFunction()) { + const DebugLoc &DLoc = I.getDebugLoc(); + if (!DLoc) + continue; + const DILocation *DIL = DLoc; + uint32_t LineOffset = getOffset(DIL); + uint32_t Discriminator = DIL->getBaseDiscriminator(); + + const FunctionSamples *FS = findFunctionSamples(I); + if (!FS) + continue; + auto T = FS->findCallTargetMapAt(LineOffset, Discriminator); + if (!T || T.get().size() == 0) + continue; + SmallVector<InstrProfValueData, 2> SortedCallTargets; + uint64_t Sum = SortCallTargets(SortedCallTargets, T.get()); + annotateValueSite(*I.getParent()->getParent()->getParent(), I, + SortedCallTargets, Sum, IPVK_IndirectCallTarget, + SortedCallTargets.size()); + } else if (!dyn_cast<IntrinsicInst>(&I)) { + SmallVector<uint32_t, 1> Weights; + Weights.push_back(BlockWeights[BB]); + I.setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); } } } @@ -1115,9 +1209,13 @@ void SampleProfileLoader::propagateWeights(Function &F) { } } + uint64_t TempWeight; // Only set weights if there is at least one non-zero weight. // In any other case, let the analyzer set weights. - if (MaxWeight > 0) { + // Do not set weights if the weights are present. In ThinLTO, the profile + // annotation is done twice. If the first annotation already set the + // weights, the second pass does not need to set it. + if (MaxWeight > 0 && !TI->extractProfTotalWeight(TempWeight)) { DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n"); TI->setMetadata(llvm::LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); @@ -1228,12 +1326,19 @@ bool SampleProfileLoader::emitAnnotations(Function &F) { DEBUG(dbgs() << "Line number for the first instruction in " << F.getName() << ": " << getFunctionLoc(F) << "\n"); - Changed |= inlineHotFunctions(F); + DenseSet<GlobalValue::GUID> ImportGUIDs; + Changed |= inlineHotFunctions(F, ImportGUIDs); // Compute basic block weights. Changed |= computeBlockWeights(F); if (Changed) { + // Add an entry count to the function using the samples gathered at the + // function entry. Also sets the GUIDs that comes from a different + // module but inlined in the profiled binary. This is aiming at making + // the IR match the profiled binary before annotation. + F.setEntryCount(Samples->getHeadSamples() + 1, &ImportGUIDs); + // Compute dominance and loop info needed for propagation. computeDominanceAndLoopInfo(F); @@ -1329,7 +1434,7 @@ bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) { bool SampleProfileLoader::runOnFunction(Function &F) { F.setEntryCount(0); Samples = Reader->getSamplesFor(F); - if (!Samples->empty()) + if (Samples && !Samples->empty()) return emitAnnotations(F); return false; } |