diff options
Diffstat (limited to 'lib/xray/xray_profiling.cc')
-rw-r--r-- | lib/xray/xray_profiling.cc | 387 |
1 files changed, 276 insertions, 111 deletions
diff --git a/lib/xray/xray_profiling.cc b/lib/xray/xray_profiling.cc index d4b4345d764a..4323170cd1bb 100644 --- a/lib/xray/xray_profiling.cc +++ b/lib/xray/xray_profiling.cc @@ -19,7 +19,7 @@ #include "sanitizer_common/sanitizer_flags.h" #include "xray/xray_interface.h" #include "xray/xray_log_interface.h" - +#include "xray_buffer_queue.h" #include "xray_flags.h" #include "xray_profile_collector.h" #include "xray_profiling_flags.h" @@ -32,62 +32,167 @@ namespace __xray { namespace { -atomic_sint32_t ProfilerLogFlushStatus = { +static atomic_sint32_t ProfilerLogFlushStatus = { XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING}; -atomic_sint32_t ProfilerLogStatus = {XRayLogInitStatus::XRAY_LOG_UNINITIALIZED}; +static atomic_sint32_t ProfilerLogStatus = { + XRayLogInitStatus::XRAY_LOG_UNINITIALIZED}; -SpinMutex ProfilerOptionsMutex; +static SpinMutex ProfilerOptionsMutex; -struct alignas(64) ProfilingData { - FunctionCallTrie::Allocators *Allocators = nullptr; - FunctionCallTrie *FCT = nullptr; +struct ProfilingData { + atomic_uintptr_t Allocators; + atomic_uintptr_t FCT; }; static pthread_key_t ProfilingKey; -thread_local std::aligned_storage<sizeof(ProfilingData)>::type ThreadStorage{}; -static ProfilingData &getThreadLocalData() XRAY_NEVER_INSTRUMENT { - thread_local auto ThreadOnce = [] { - new (&ThreadStorage) ProfilingData{}; - pthread_setspecific(ProfilingKey, &ThreadStorage); +// We use a global buffer queue, which gets initialized once at initialisation +// time, and gets reset when profiling is "done". +static std::aligned_storage<sizeof(BufferQueue), alignof(BufferQueue)>::type + BufferQueueStorage; +static BufferQueue *BQ = nullptr; + +thread_local FunctionCallTrie::Allocators::Buffers ThreadBuffers; +thread_local std::aligned_storage<sizeof(FunctionCallTrie::Allocators), + alignof(FunctionCallTrie::Allocators)>::type + AllocatorsStorage; +thread_local std::aligned_storage<sizeof(FunctionCallTrie), + alignof(FunctionCallTrie)>::type + FunctionCallTrieStorage; +thread_local ProfilingData TLD{{0}, {0}}; +thread_local atomic_uint8_t ReentranceGuard{0}; + +// We use a separate guard for ensuring that for this thread, if we're already +// cleaning up, that any signal handlers don't attempt to cleanup nor +// initialise. +thread_local atomic_uint8_t TLDInitGuard{0}; + +// We also use a separate latch to signal that the thread is exiting, and +// non-essential work should be ignored (things like recording events, etc.). +thread_local atomic_uint8_t ThreadExitingLatch{0}; + +static ProfilingData *getThreadLocalData() XRAY_NEVER_INSTRUMENT { + thread_local auto ThreadOnce = []() XRAY_NEVER_INSTRUMENT { + pthread_setspecific(ProfilingKey, &TLD); return false; }(); (void)ThreadOnce; - auto &TLD = *reinterpret_cast<ProfilingData *>(&ThreadStorage); + RecursionGuard TLDInit(TLDInitGuard); + if (!TLDInit) + return nullptr; - // We need to check whether the global flag to finalizing/finalized has been - // switched. If it is, then we ought to not actually initialise the data. - auto Status = atomic_load(&ProfilerLogStatus, memory_order_acquire); - if (Status == XRayLogInitStatus::XRAY_LOG_FINALIZING || - Status == XRayLogInitStatus::XRAY_LOG_FINALIZED) - return TLD; - - // If we're live, then we re-initialize TLD if the pointers are not null. - if (UNLIKELY(TLD.Allocators == nullptr && TLD.FCT == nullptr)) { - TLD.Allocators = reinterpret_cast<FunctionCallTrie::Allocators *>( - InternalAlloc(sizeof(FunctionCallTrie::Allocators))); - new (TLD.Allocators) FunctionCallTrie::Allocators(); - *TLD.Allocators = FunctionCallTrie::InitAllocators(); - TLD.FCT = reinterpret_cast<FunctionCallTrie *>( - InternalAlloc(sizeof(FunctionCallTrie))); - new (TLD.FCT) FunctionCallTrie(*TLD.Allocators); + if (atomic_load_relaxed(&ThreadExitingLatch)) + return nullptr; + + uptr Allocators = 0; + if (atomic_compare_exchange_strong(&TLD.Allocators, &Allocators, 1, + memory_order_acq_rel)) { + bool Success = false; + auto AllocatorsUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { + if (!Success) + atomic_store(&TLD.Allocators, 0, memory_order_release); + }); + + // Acquire a set of buffers for this thread. + if (BQ == nullptr) + return nullptr; + + if (BQ->getBuffer(ThreadBuffers.NodeBuffer) != BufferQueue::ErrorCode::Ok) + return nullptr; + auto NodeBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { + if (!Success) + BQ->releaseBuffer(ThreadBuffers.NodeBuffer); + }); + + if (BQ->getBuffer(ThreadBuffers.RootsBuffer) != BufferQueue::ErrorCode::Ok) + return nullptr; + auto RootsBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { + if (!Success) + BQ->releaseBuffer(ThreadBuffers.RootsBuffer); + }); + + if (BQ->getBuffer(ThreadBuffers.ShadowStackBuffer) != + BufferQueue::ErrorCode::Ok) + return nullptr; + auto ShadowStackBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { + if (!Success) + BQ->releaseBuffer(ThreadBuffers.ShadowStackBuffer); + }); + + if (BQ->getBuffer(ThreadBuffers.NodeIdPairBuffer) != + BufferQueue::ErrorCode::Ok) + return nullptr; + + Success = true; + new (&AllocatorsStorage) FunctionCallTrie::Allocators( + FunctionCallTrie::InitAllocatorsFromBuffers(ThreadBuffers)); + Allocators = reinterpret_cast<uptr>( + reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage)); + atomic_store(&TLD.Allocators, Allocators, memory_order_release); + } + + if (Allocators == 1) + return nullptr; + + uptr FCT = 0; + if (atomic_compare_exchange_strong(&TLD.FCT, &FCT, 1, memory_order_acq_rel)) { + new (&FunctionCallTrieStorage) + FunctionCallTrie(*reinterpret_cast<FunctionCallTrie::Allocators *>( + atomic_load_relaxed(&TLD.Allocators))); + FCT = reinterpret_cast<uptr>( + reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage)); + atomic_store(&TLD.FCT, FCT, memory_order_release); } - return TLD; + if (FCT == 1) + return nullptr; + + return &TLD; } static void cleanupTLD() XRAY_NEVER_INSTRUMENT { - auto &TLD = *reinterpret_cast<ProfilingData *>(&ThreadStorage); - if (TLD.Allocators != nullptr && TLD.FCT != nullptr) { - TLD.FCT->~FunctionCallTrie(); - TLD.Allocators->~Allocators(); - InternalFree(TLD.FCT); - InternalFree(TLD.Allocators); - TLD.FCT = nullptr; - TLD.Allocators = nullptr; - } + auto FCT = atomic_exchange(&TLD.FCT, 0, memory_order_acq_rel); + if (FCT == reinterpret_cast<uptr>(reinterpret_cast<FunctionCallTrie *>( + &FunctionCallTrieStorage))) + reinterpret_cast<FunctionCallTrie *>(FCT)->~FunctionCallTrie(); + + auto Allocators = atomic_exchange(&TLD.Allocators, 0, memory_order_acq_rel); + if (Allocators == + reinterpret_cast<uptr>( + reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage))) + reinterpret_cast<FunctionCallTrie::Allocators *>(Allocators)->~Allocators(); +} + +static void postCurrentThreadFCT(ProfilingData &T) XRAY_NEVER_INSTRUMENT { + RecursionGuard TLDInit(TLDInitGuard); + if (!TLDInit) + return; + + uptr P = atomic_exchange(&T.FCT, 0, memory_order_acq_rel); + if (P != reinterpret_cast<uptr>( + reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage))) + return; + + auto FCT = reinterpret_cast<FunctionCallTrie *>(P); + DCHECK_NE(FCT, nullptr); + + uptr A = atomic_exchange(&T.Allocators, 0, memory_order_acq_rel); + if (A != + reinterpret_cast<uptr>( + reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage))) + return; + + auto Allocators = reinterpret_cast<FunctionCallTrie::Allocators *>(A); + DCHECK_NE(Allocators, nullptr); + + // Always move the data into the profile collector. + profileCollectorService::post(BQ, std::move(*FCT), std::move(*Allocators), + std::move(ThreadBuffers), GetTid()); + + // Re-initialize the ThreadBuffers object to a known "default" state. + ThreadBuffers = FunctionCallTrie::Allocators::Buffers{}; } } // namespace @@ -100,9 +205,6 @@ const char *profilingCompilerDefinedFlags() XRAY_NEVER_INSTRUMENT { #endif } -atomic_sint32_t ProfileFlushStatus = { - XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING}; - XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT { if (atomic_load(&ProfilerLogStatus, memory_order_acquire) != XRayLogInitStatus::XRAY_LOG_FINALIZED) { @@ -111,12 +213,23 @@ XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT { return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; } - s32 Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; - if (!atomic_compare_exchange_strong(&ProfilerLogFlushStatus, &Result, - XRayLogFlushStatus::XRAY_LOG_FLUSHING, - memory_order_acq_rel)) { + RecursionGuard SignalGuard(ReentranceGuard); + if (!SignalGuard) { + if (Verbosity()) + Report("Cannot finalize properly inside a signal handler!\n"); + atomic_store(&ProfilerLogFlushStatus, + XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING, + memory_order_release); + return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; + } + + s32 Previous = atomic_exchange(&ProfilerLogFlushStatus, + XRayLogFlushStatus::XRAY_LOG_FLUSHING, + memory_order_acq_rel); + if (Previous == XRayLogFlushStatus::XRAY_LOG_FLUSHING) { if (Verbosity()) - Report("Not flushing profiles, implementation still finalizing.\n"); + Report("Not flushing profiles, implementation still flushing.\n"); + return XRayLogFlushStatus::XRAY_LOG_FLUSHING; } // At this point, we'll create the file that will contain the profile, but @@ -129,49 +242,33 @@ XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT { if (Verbosity()) Report("profiling: No data to flush.\n"); } else { - int Fd = getLogFD(); - if (Fd == -1) { + LogWriter *LW = LogWriter::Open(); + if (LW == nullptr) { if (Verbosity()) Report("profiling: Failed to flush to file, dropping data.\n"); } else { // Now for each of the buffers, write out the profile data as we would // see it in memory, verbatim. while (B.Data != nullptr && B.Size != 0) { - retryingWriteAll(Fd, reinterpret_cast<const char *>(B.Data), - reinterpret_cast<const char *>(B.Data) + B.Size); + LW->WriteAll(reinterpret_cast<const char *>(B.Data), + reinterpret_cast<const char *>(B.Data) + B.Size); B = profileCollectorService::nextBuffer(B); } - // Then we close out the file. - internal_close(Fd); } + LogWriter::Close(LW); } } profileCollectorService::reset(); - // Flush the current thread's local data structures as well. - cleanupTLD(); - - atomic_store(&ProfilerLogStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED, + atomic_store(&ProfilerLogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED, + memory_order_release); + atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, memory_order_release); return XRayLogFlushStatus::XRAY_LOG_FLUSHED; } -namespace { - -thread_local atomic_uint8_t ReentranceGuard{0}; - -static void postCurrentThreadFCT(ProfilingData &TLD) { - if (TLD.Allocators == nullptr || TLD.FCT == nullptr) - return; - - profileCollectorService::post(*TLD.FCT, GetTid()); - cleanupTLD(); -} - -} // namespace - void profilingHandleArg0(int32_t FuncId, XRayEntryType Entry) XRAY_NEVER_INSTRUMENT { unsigned char CPU; @@ -181,21 +278,29 @@ void profilingHandleArg0(int32_t FuncId, return; auto Status = atomic_load(&ProfilerLogStatus, memory_order_acquire); - auto &TLD = getThreadLocalData(); + if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_UNINITIALIZED || + Status == XRayLogInitStatus::XRAY_LOG_INITIALIZING)) + return; + if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_FINALIZED || Status == XRayLogInitStatus::XRAY_LOG_FINALIZING)) { postCurrentThreadFCT(TLD); return; } + auto T = getThreadLocalData(); + if (T == nullptr) + return; + + auto FCT = reinterpret_cast<FunctionCallTrie *>(atomic_load_relaxed(&T->FCT)); switch (Entry) { case XRayEntryType::ENTRY: case XRayEntryType::LOG_ARGS_ENTRY: - TLD.FCT->enterFunction(FuncId, TSC); + FCT->enterFunction(FuncId, TSC, CPU); break; case XRayEntryType::EXIT: case XRayEntryType::TAIL: - TLD.FCT->exitFunction(FuncId, TSC); + FCT->exitFunction(FuncId, TSC, CPU); break; default: // FIXME: Handle bugs. @@ -218,12 +323,22 @@ XRayLogInitStatus profilingFinalize() XRAY_NEVER_INSTRUMENT { return static_cast<XRayLogInitStatus>(CurrentStatus); } + // Mark then finalize the current generation of buffers. This allows us to let + // the threads currently holding onto new buffers still use them, but let the + // last reference do the memory cleanup. + DCHECK_NE(BQ, nullptr); + BQ->finalize(); + // Wait a grace period to allow threads to see that we're finalizing. SleepForMillis(profilingFlags()->grace_period_ms); - // We also want to make sure that the current thread's data is cleaned up, - // if we have any. - auto &TLD = getThreadLocalData(); + // If we for some reason are entering this function from an instrumented + // handler, we bail out. + RecursionGuard G(ReentranceGuard); + if (!G) + return static_cast<XRayLogInitStatus>(CurrentStatus); + + // Post the current thread's data if we have any. postCurrentThreadFCT(TLD); // Then we force serialize the log data. @@ -235,19 +350,16 @@ XRayLogInitStatus profilingFinalize() XRAY_NEVER_INSTRUMENT { } XRayLogInitStatus -profilingLoggingInit(size_t BufferSize, size_t BufferMax, void *Options, +profilingLoggingInit(size_t, size_t, void *Options, size_t OptionsSize) XRAY_NEVER_INSTRUMENT { - if (BufferSize != 0 || BufferMax != 0) { - if (Verbosity()) - Report("__xray_log_init() being used, and is unsupported. Use " - "__xray_log_init_mode(...) instead. Bailing out."); + RecursionGuard G(ReentranceGuard); + if (!G) return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; - } s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZING, - memory_order_release)) { + memory_order_acq_rel)) { if (Verbosity()) Report("Cannot initialize already initialised profiling " "implementation.\n"); @@ -276,35 +388,88 @@ profilingLoggingInit(size_t BufferSize, size_t BufferMax, void *Options, // We need to reset the profile data collection implementation now. profileCollectorService::reset(); - // We need to set up the exit handlers. - static pthread_once_t Once = PTHREAD_ONCE_INIT; - pthread_once(&Once, +[] { - pthread_key_create(&ProfilingKey, +[](void *P) { - // This is the thread-exit handler. - auto &TLD = *reinterpret_cast<ProfilingData *>(P); - if (TLD.Allocators == nullptr && TLD.FCT == nullptr) - return; - - postCurrentThreadFCT(TLD); - }); + // Then also reset the buffer queue implementation. + if (BQ == nullptr) { + bool Success = false; + new (&BufferQueueStorage) + BufferQueue(profilingFlags()->per_thread_allocator_max, + profilingFlags()->buffers_max, Success); + if (!Success) { + if (Verbosity()) + Report("Failed to initialize preallocated memory buffers!"); + atomic_store(&ProfilerLogStatus, + XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, + memory_order_release); + return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + } - // We also need to set up an exit handler, so that we can get the profile - // information at exit time. We use the C API to do this, to not rely on C++ - // ABI functions for registering exit handlers. - Atexit(+[] { - // Finalize and flush. - if (profilingFinalize() != XRAY_LOG_FINALIZED) { - cleanupTLD(); - return; - } - if (profilingFlush() != XRAY_LOG_FLUSHED) { - cleanupTLD(); - return; - } + // If we've succeded, set the global pointer to the initialised storage. + BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage); + } else { + BQ->finalize(); + auto InitStatus = BQ->init(profilingFlags()->per_thread_allocator_max, + profilingFlags()->buffers_max); + + if (InitStatus != BufferQueue::ErrorCode::Ok) { if (Verbosity()) - Report("XRay Profile flushed at exit."); - }); - }); + Report("Failed to initialize preallocated memory buffers; error: %s", + BufferQueue::getErrorString(InitStatus)); + atomic_store(&ProfilerLogStatus, + XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, + memory_order_release); + return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + } + + DCHECK(!BQ->finalizing()); + } + + // We need to set up the exit handlers. + static pthread_once_t Once = PTHREAD_ONCE_INIT; + pthread_once( + &Once, +[] { + pthread_key_create( + &ProfilingKey, +[](void *P) XRAY_NEVER_INSTRUMENT { + if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel)) + return; + + if (P == nullptr) + return; + + auto T = reinterpret_cast<ProfilingData *>(P); + if (atomic_load_relaxed(&T->Allocators) == 0) + return; + + { + // If we're somehow executing this while inside a + // non-reentrant-friendly context, we skip attempting to post + // the current thread's data. + RecursionGuard G(ReentranceGuard); + if (!G) + return; + + postCurrentThreadFCT(*T); + } + }); + + // We also need to set up an exit handler, so that we can get the + // profile information at exit time. We use the C API to do this, to not + // rely on C++ ABI functions for registering exit handlers. + Atexit(+[]() XRAY_NEVER_INSTRUMENT { + if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel)) + return; + + auto Cleanup = + at_scope_exit([]() XRAY_NEVER_INSTRUMENT { cleanupTLD(); }); + + // Finalize and flush. + if (profilingFinalize() != XRAY_LOG_FINALIZED || + profilingFlush() != XRAY_LOG_FLUSHED) + return; + + if (Verbosity()) + Report("XRay Profile flushed at exit."); + }); + }); __xray_log_set_buffer_iterator(profileCollectorService::nextBuffer); __xray_set_handler(profilingHandleArg0); |