diff options
Diffstat (limited to 'llvm/lib/Transforms/Instrumentation/MemProfiler.cpp')
| -rw-r--r-- | llvm/lib/Transforms/Instrumentation/MemProfiler.cpp | 315 |
1 files changed, 312 insertions, 3 deletions
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp index 2a1601fab45f..789ed005d03d 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp @@ -18,10 +18,12 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Triple.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/MemoryProfileInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/IRBuilder.h" @@ -30,18 +32,30 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" -#include "llvm/InitializePasses.h" -#include "llvm/Pass.h" #include "llvm/ProfileData/InstrProf.h" +#include "llvm/ProfileData/InstrProfReader.h" +#include "llvm/Support/BLAKE3.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/HashBuilder.h" +#include "llvm/Support/VirtualFileSystem.h" +#include "llvm/TargetParser/Triple.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/ModuleUtils.h" +#include <map> +#include <set> using namespace llvm; +using namespace llvm::memprof; #define DEBUG_TYPE "memprof" +namespace llvm { +extern cl::opt<bool> PGOWarnMissing; +extern cl::opt<bool> NoPGOWarnMismatch; +extern cl::opt<bool> NoPGOWarnMismatchComdatWeak; +} // namespace llvm + constexpr int LLVM_MEM_PROFILER_VERSION = 1; // Size of memory mapped to a single shadow location. @@ -130,6 +144,7 @@ STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); STATISTIC(NumSkippedStackReads, "Number of non-instrumented stack reads"); STATISTIC(NumSkippedStackWrites, "Number of non-instrumented stack writes"); +STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile."); namespace { @@ -603,3 +618,297 @@ bool MemProfiler::instrumentFunction(Function &F) { return FunctionModified; } + +static void addCallsiteMetadata(Instruction &I, + std::vector<uint64_t> &InlinedCallStack, + LLVMContext &Ctx) { + I.setMetadata(LLVMContext::MD_callsite, + buildCallstackMetadata(InlinedCallStack, Ctx)); +} + +static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset, + uint32_t Column) { + llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::support::endianness::little> + HashBuilder; + HashBuilder.add(Function, LineOffset, Column); + llvm::BLAKE3Result<8> Hash = HashBuilder.final(); + uint64_t Id; + std::memcpy(&Id, Hash.data(), sizeof(Hash)); + return Id; +} + +static uint64_t computeStackId(const memprof::Frame &Frame) { + return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column); +} + +static void addCallStack(CallStackTrie &AllocTrie, + const AllocationInfo *AllocInfo) { + SmallVector<uint64_t> StackIds; + for (const auto &StackFrame : AllocInfo->CallStack) + StackIds.push_back(computeStackId(StackFrame)); + auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(), + AllocInfo->Info.getAllocCount(), + AllocInfo->Info.getTotalLifetime()); + AllocTrie.addCallStack(AllocType, StackIds); +} + +// Helper to compare the InlinedCallStack computed from an instruction's debug +// info to a list of Frames from profile data (either the allocation data or a +// callsite). For callsites, the StartIndex to use in the Frame array may be +// non-zero. +static bool +stackFrameIncludesInlinedCallStack(ArrayRef<Frame> ProfileCallStack, + ArrayRef<uint64_t> InlinedCallStack, + unsigned StartIndex = 0) { + auto StackFrame = ProfileCallStack.begin() + StartIndex; + auto InlCallStackIter = InlinedCallStack.begin(); + for (; StackFrame != ProfileCallStack.end() && + InlCallStackIter != InlinedCallStack.end(); + ++StackFrame, ++InlCallStackIter) { + uint64_t StackId = computeStackId(*StackFrame); + if (StackId != *InlCallStackIter) + return false; + } + // Return true if we found and matched all stack ids from the call + // instruction. + return InlCallStackIter == InlinedCallStack.end(); +} + +static void readMemprof(Module &M, Function &F, + IndexedInstrProfReader *MemProfReader, + const TargetLibraryInfo &TLI) { + auto &Ctx = M.getContext(); + + auto FuncName = getPGOFuncName(F); + auto FuncGUID = Function::getGUID(FuncName); + Expected<memprof::MemProfRecord> MemProfResult = + MemProfReader->getMemProfRecord(FuncGUID); + if (Error E = MemProfResult.takeError()) { + handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { + auto Err = IPE.get(); + bool SkipWarning = false; + LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName + << ": "); + if (Err == instrprof_error::unknown_function) { + NumOfMemProfMissing++; + SkipWarning = !PGOWarnMissing; + LLVM_DEBUG(dbgs() << "unknown function"); + } else if (Err == instrprof_error::hash_mismatch) { + SkipWarning = + NoPGOWarnMismatch || + (NoPGOWarnMismatchComdatWeak && + (F.hasComdat() || + F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); + LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); + } + + if (SkipWarning) + return; + + std::string Msg = (IPE.message() + Twine(" ") + F.getName().str() + + Twine(" Hash = ") + std::to_string(FuncGUID)) + .str(); + + Ctx.diagnose( + DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning)); + }); + return; + } + + // Build maps of the location hash to all profile data with that leaf location + // (allocation info and the callsites). + std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo; + // For the callsites we need to record the index of the associated frame in + // the frame array (see comments below where the map entries are added). + std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, unsigned>>> + LocHashToCallSites; + const auto MemProfRec = std::move(MemProfResult.get()); + for (auto &AI : MemProfRec.AllocSites) { + // Associate the allocation info with the leaf frame. The later matching + // code will match any inlined call sequences in the IR with a longer prefix + // of call stack frames. + uint64_t StackId = computeStackId(AI.CallStack[0]); + LocHashToAllocInfo[StackId].insert(&AI); + } + for (auto &CS : MemProfRec.CallSites) { + // Need to record all frames from leaf up to and including this function, + // as any of these may or may not have been inlined at this point. + unsigned Idx = 0; + for (auto &StackFrame : CS) { + uint64_t StackId = computeStackId(StackFrame); + LocHashToCallSites[StackId].insert(std::make_pair(&CS, Idx++)); + // Once we find this function, we can stop recording. + if (StackFrame.Function == FuncGUID) + break; + } + assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID); + } + + auto GetOffset = [](const DILocation *DIL) { + return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) & + 0xffff; + }; + + // Now walk the instructions, looking up the associated profile data using + // dbug locations. + for (auto &BB : F) { + for (auto &I : BB) { + if (I.isDebugOrPseudoInst()) + continue; + // We are only interested in calls (allocation or interior call stack + // context calls). + auto *CI = dyn_cast<CallBase>(&I); + if (!CI) + continue; + auto *CalledFunction = CI->getCalledFunction(); + if (CalledFunction && CalledFunction->isIntrinsic()) + continue; + // List of call stack ids computed from the location hashes on debug + // locations (leaf to inlined at root). + std::vector<uint64_t> InlinedCallStack; + // Was the leaf location found in one of the profile maps? + bool LeafFound = false; + // If leaf was found in a map, iterators pointing to its location in both + // of the maps. It might exist in neither, one, or both (the latter case + // can happen because we don't currently have discriminators to + // distinguish the case when a single line/col maps to both an allocation + // and another callsite). + std::map<uint64_t, std::set<const AllocationInfo *>>::iterator + AllocInfoIter; + std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, + unsigned>>>::iterator CallSitesIter; + for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr; + DIL = DIL->getInlinedAt()) { + // Use C++ linkage name if possible. Need to compile with + // -fdebug-info-for-profiling to get linkage name. + StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName(); + if (Name.empty()) + Name = DIL->getScope()->getSubprogram()->getName(); + auto CalleeGUID = Function::getGUID(Name); + auto StackId = + computeStackId(CalleeGUID, GetOffset(DIL), DIL->getColumn()); + // LeafFound will only be false on the first iteration, since we either + // set it true or break out of the loop below. + if (!LeafFound) { + AllocInfoIter = LocHashToAllocInfo.find(StackId); + CallSitesIter = LocHashToCallSites.find(StackId); + // Check if the leaf is in one of the maps. If not, no need to look + // further at this call. + if (AllocInfoIter == LocHashToAllocInfo.end() && + CallSitesIter == LocHashToCallSites.end()) + break; + LeafFound = true; + } + InlinedCallStack.push_back(StackId); + } + // If leaf not in either of the maps, skip inst. + if (!LeafFound) + continue; + + // First add !memprof metadata from allocation info, if we found the + // instruction's leaf location in that map, and if the rest of the + // instruction's locations match the prefix Frame locations on an + // allocation context with the same leaf. + if (AllocInfoIter != LocHashToAllocInfo.end()) { + // Only consider allocations via new, to reduce unnecessary metadata, + // since those are the only allocations that will be targeted initially. + if (!isNewLikeFn(CI, &TLI)) + continue; + // We may match this instruction's location list to multiple MIB + // contexts. Add them to a Trie specialized for trimming the contexts to + // the minimal needed to disambiguate contexts with unique behavior. + CallStackTrie AllocTrie; + for (auto *AllocInfo : AllocInfoIter->second) { + // Check the full inlined call stack against this one. + // If we found and thus matched all frames on the call, include + // this MIB. + if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack, + InlinedCallStack)) + addCallStack(AllocTrie, AllocInfo); + } + // We might not have matched any to the full inlined call stack. + // But if we did, create and attach metadata, or a function attribute if + // all contexts have identical profiled behavior. + if (!AllocTrie.empty()) { + // MemprofMDAttached will be false if a function attribute was + // attached. + bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI); + assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof)); + if (MemprofMDAttached) { + // Add callsite metadata for the instruction's location list so that + // it simpler later on to identify which part of the MIB contexts + // are from this particular instruction (including during inlining, + // when the callsite metdata will be updated appropriately). + // FIXME: can this be changed to strip out the matching stack + // context ids from the MIB contexts and not add any callsite + // metadata here to save space? + addCallsiteMetadata(I, InlinedCallStack, Ctx); + } + } + continue; + } + + // Otherwise, add callsite metadata. If we reach here then we found the + // instruction's leaf location in the callsites map and not the allocation + // map. + assert(CallSitesIter != LocHashToCallSites.end()); + for (auto CallStackIdx : CallSitesIter->second) { + // If we found and thus matched all frames on the call, create and + // attach call stack metadata. + if (stackFrameIncludesInlinedCallStack( + *CallStackIdx.first, InlinedCallStack, CallStackIdx.second)) { + addCallsiteMetadata(I, InlinedCallStack, Ctx); + // Only need to find one with a matching call stack and add a single + // callsite metadata. + break; + } + } + } + } +} + +MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile, + IntrusiveRefCntPtr<vfs::FileSystem> FS) + : MemoryProfileFileName(MemoryProfileFile), FS(FS) { + if (!FS) + this->FS = vfs::getRealFileSystem(); +} + +PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) { + LLVM_DEBUG(dbgs() << "Read in memory profile:"); + auto &Ctx = M.getContext(); + auto ReaderOrErr = IndexedInstrProfReader::create(MemoryProfileFileName, *FS); + if (Error E = ReaderOrErr.takeError()) { + handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { + Ctx.diagnose( + DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), EI.message())); + }); + return PreservedAnalyses::all(); + } + + std::unique_ptr<IndexedInstrProfReader> MemProfReader = + std::move(ReaderOrErr.get()); + if (!MemProfReader) { + Ctx.diagnose(DiagnosticInfoPGOProfile( + MemoryProfileFileName.data(), StringRef("Cannot get MemProfReader"))); + return PreservedAnalyses::all(); + } + + if (!MemProfReader->hasMemoryProfile()) { + Ctx.diagnose(DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), + "Not a memory profile")); + return PreservedAnalyses::all(); + } + + auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); + + for (auto &F : M) { + if (F.isDeclaration()) + continue; + + const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F); + readMemprof(M, F, MemProfReader.get(), TLI); + } + + return PreservedAnalyses::none(); +} |
