aboutsummaryrefslogtreecommitdiff
path: root/lib/xray/xray_profiling.cc
diff options
context:
space:
mode:
Diffstat (limited to 'lib/xray/xray_profiling.cc')
-rw-r--r--lib/xray/xray_profiling.cc387
1 files changed, 276 insertions, 111 deletions
diff --git a/lib/xray/xray_profiling.cc b/lib/xray/xray_profiling.cc
index d4b4345d764a..4323170cd1bb 100644
--- a/lib/xray/xray_profiling.cc
+++ b/lib/xray/xray_profiling.cc
@@ -19,7 +19,7 @@
#include "sanitizer_common/sanitizer_flags.h"
#include "xray/xray_interface.h"
#include "xray/xray_log_interface.h"
-
+#include "xray_buffer_queue.h"
#include "xray_flags.h"
#include "xray_profile_collector.h"
#include "xray_profiling_flags.h"
@@ -32,62 +32,167 @@ namespace __xray {
namespace {
-atomic_sint32_t ProfilerLogFlushStatus = {
+static atomic_sint32_t ProfilerLogFlushStatus = {
XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
-atomic_sint32_t ProfilerLogStatus = {XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};
+static atomic_sint32_t ProfilerLogStatus = {
+ XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};
-SpinMutex ProfilerOptionsMutex;
+static SpinMutex ProfilerOptionsMutex;
-struct alignas(64) ProfilingData {
- FunctionCallTrie::Allocators *Allocators = nullptr;
- FunctionCallTrie *FCT = nullptr;
+struct ProfilingData {
+ atomic_uintptr_t Allocators;
+ atomic_uintptr_t FCT;
};
static pthread_key_t ProfilingKey;
-thread_local std::aligned_storage<sizeof(ProfilingData)>::type ThreadStorage{};
-static ProfilingData &getThreadLocalData() XRAY_NEVER_INSTRUMENT {
- thread_local auto ThreadOnce = [] {
- new (&ThreadStorage) ProfilingData{};
- pthread_setspecific(ProfilingKey, &ThreadStorage);
+// We use a global buffer queue, which gets initialized once at initialisation
+// time, and gets reset when profiling is "done".
+static std::aligned_storage<sizeof(BufferQueue), alignof(BufferQueue)>::type
+ BufferQueueStorage;
+static BufferQueue *BQ = nullptr;
+
+thread_local FunctionCallTrie::Allocators::Buffers ThreadBuffers;
+thread_local std::aligned_storage<sizeof(FunctionCallTrie::Allocators),
+ alignof(FunctionCallTrie::Allocators)>::type
+ AllocatorsStorage;
+thread_local std::aligned_storage<sizeof(FunctionCallTrie),
+ alignof(FunctionCallTrie)>::type
+ FunctionCallTrieStorage;
+thread_local ProfilingData TLD{{0}, {0}};
+thread_local atomic_uint8_t ReentranceGuard{0};
+
+// We use a separate guard for ensuring that for this thread, if we're already
+// cleaning up, that any signal handlers don't attempt to cleanup nor
+// initialise.
+thread_local atomic_uint8_t TLDInitGuard{0};
+
+// We also use a separate latch to signal that the thread is exiting, and
+// non-essential work should be ignored (things like recording events, etc.).
+thread_local atomic_uint8_t ThreadExitingLatch{0};
+
+static ProfilingData *getThreadLocalData() XRAY_NEVER_INSTRUMENT {
+ thread_local auto ThreadOnce = []() XRAY_NEVER_INSTRUMENT {
+ pthread_setspecific(ProfilingKey, &TLD);
return false;
}();
(void)ThreadOnce;
- auto &TLD = *reinterpret_cast<ProfilingData *>(&ThreadStorage);
+ RecursionGuard TLDInit(TLDInitGuard);
+ if (!TLDInit)
+ return nullptr;
- // We need to check whether the global flag to finalizing/finalized has been
- // switched. If it is, then we ought to not actually initialise the data.
- auto Status = atomic_load(&ProfilerLogStatus, memory_order_acquire);
- if (Status == XRayLogInitStatus::XRAY_LOG_FINALIZING ||
- Status == XRayLogInitStatus::XRAY_LOG_FINALIZED)
- return TLD;
-
- // If we're live, then we re-initialize TLD if the pointers are not null.
- if (UNLIKELY(TLD.Allocators == nullptr && TLD.FCT == nullptr)) {
- TLD.Allocators = reinterpret_cast<FunctionCallTrie::Allocators *>(
- InternalAlloc(sizeof(FunctionCallTrie::Allocators)));
- new (TLD.Allocators) FunctionCallTrie::Allocators();
- *TLD.Allocators = FunctionCallTrie::InitAllocators();
- TLD.FCT = reinterpret_cast<FunctionCallTrie *>(
- InternalAlloc(sizeof(FunctionCallTrie)));
- new (TLD.FCT) FunctionCallTrie(*TLD.Allocators);
+ if (atomic_load_relaxed(&ThreadExitingLatch))
+ return nullptr;
+
+ uptr Allocators = 0;
+ if (atomic_compare_exchange_strong(&TLD.Allocators, &Allocators, 1,
+ memory_order_acq_rel)) {
+ bool Success = false;
+ auto AllocatorsUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
+ if (!Success)
+ atomic_store(&TLD.Allocators, 0, memory_order_release);
+ });
+
+ // Acquire a set of buffers for this thread.
+ if (BQ == nullptr)
+ return nullptr;
+
+ if (BQ->getBuffer(ThreadBuffers.NodeBuffer) != BufferQueue::ErrorCode::Ok)
+ return nullptr;
+ auto NodeBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
+ if (!Success)
+ BQ->releaseBuffer(ThreadBuffers.NodeBuffer);
+ });
+
+ if (BQ->getBuffer(ThreadBuffers.RootsBuffer) != BufferQueue::ErrorCode::Ok)
+ return nullptr;
+ auto RootsBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
+ if (!Success)
+ BQ->releaseBuffer(ThreadBuffers.RootsBuffer);
+ });
+
+ if (BQ->getBuffer(ThreadBuffers.ShadowStackBuffer) !=
+ BufferQueue::ErrorCode::Ok)
+ return nullptr;
+ auto ShadowStackBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
+ if (!Success)
+ BQ->releaseBuffer(ThreadBuffers.ShadowStackBuffer);
+ });
+
+ if (BQ->getBuffer(ThreadBuffers.NodeIdPairBuffer) !=
+ BufferQueue::ErrorCode::Ok)
+ return nullptr;
+
+ Success = true;
+ new (&AllocatorsStorage) FunctionCallTrie::Allocators(
+ FunctionCallTrie::InitAllocatorsFromBuffers(ThreadBuffers));
+ Allocators = reinterpret_cast<uptr>(
+ reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage));
+ atomic_store(&TLD.Allocators, Allocators, memory_order_release);
+ }
+
+ if (Allocators == 1)
+ return nullptr;
+
+ uptr FCT = 0;
+ if (atomic_compare_exchange_strong(&TLD.FCT, &FCT, 1, memory_order_acq_rel)) {
+ new (&FunctionCallTrieStorage)
+ FunctionCallTrie(*reinterpret_cast<FunctionCallTrie::Allocators *>(
+ atomic_load_relaxed(&TLD.Allocators)));
+ FCT = reinterpret_cast<uptr>(
+ reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage));
+ atomic_store(&TLD.FCT, FCT, memory_order_release);
}
- return TLD;
+ if (FCT == 1)
+ return nullptr;
+
+ return &TLD;
}
static void cleanupTLD() XRAY_NEVER_INSTRUMENT {
- auto &TLD = *reinterpret_cast<ProfilingData *>(&ThreadStorage);
- if (TLD.Allocators != nullptr && TLD.FCT != nullptr) {
- TLD.FCT->~FunctionCallTrie();
- TLD.Allocators->~Allocators();
- InternalFree(TLD.FCT);
- InternalFree(TLD.Allocators);
- TLD.FCT = nullptr;
- TLD.Allocators = nullptr;
- }
+ auto FCT = atomic_exchange(&TLD.FCT, 0, memory_order_acq_rel);
+ if (FCT == reinterpret_cast<uptr>(reinterpret_cast<FunctionCallTrie *>(
+ &FunctionCallTrieStorage)))
+ reinterpret_cast<FunctionCallTrie *>(FCT)->~FunctionCallTrie();
+
+ auto Allocators = atomic_exchange(&TLD.Allocators, 0, memory_order_acq_rel);
+ if (Allocators ==
+ reinterpret_cast<uptr>(
+ reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage)))
+ reinterpret_cast<FunctionCallTrie::Allocators *>(Allocators)->~Allocators();
+}
+
+static void postCurrentThreadFCT(ProfilingData &T) XRAY_NEVER_INSTRUMENT {
+ RecursionGuard TLDInit(TLDInitGuard);
+ if (!TLDInit)
+ return;
+
+ uptr P = atomic_exchange(&T.FCT, 0, memory_order_acq_rel);
+ if (P != reinterpret_cast<uptr>(
+ reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage)))
+ return;
+
+ auto FCT = reinterpret_cast<FunctionCallTrie *>(P);
+ DCHECK_NE(FCT, nullptr);
+
+ uptr A = atomic_exchange(&T.Allocators, 0, memory_order_acq_rel);
+ if (A !=
+ reinterpret_cast<uptr>(
+ reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage)))
+ return;
+
+ auto Allocators = reinterpret_cast<FunctionCallTrie::Allocators *>(A);
+ DCHECK_NE(Allocators, nullptr);
+
+ // Always move the data into the profile collector.
+ profileCollectorService::post(BQ, std::move(*FCT), std::move(*Allocators),
+ std::move(ThreadBuffers), GetTid());
+
+ // Re-initialize the ThreadBuffers object to a known "default" state.
+ ThreadBuffers = FunctionCallTrie::Allocators::Buffers{};
}
} // namespace
@@ -100,9 +205,6 @@ const char *profilingCompilerDefinedFlags() XRAY_NEVER_INSTRUMENT {
#endif
}
-atomic_sint32_t ProfileFlushStatus = {
- XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
-
XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT {
if (atomic_load(&ProfilerLogStatus, memory_order_acquire) !=
XRayLogInitStatus::XRAY_LOG_FINALIZED) {
@@ -111,12 +213,23 @@ XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT {
return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
}
- s32 Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
- if (!atomic_compare_exchange_strong(&ProfilerLogFlushStatus, &Result,
- XRayLogFlushStatus::XRAY_LOG_FLUSHING,
- memory_order_acq_rel)) {
+ RecursionGuard SignalGuard(ReentranceGuard);
+ if (!SignalGuard) {
+ if (Verbosity())
+ Report("Cannot finalize properly inside a signal handler!\n");
+ atomic_store(&ProfilerLogFlushStatus,
+ XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING,
+ memory_order_release);
+ return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
+ }
+
+ s32 Previous = atomic_exchange(&ProfilerLogFlushStatus,
+ XRayLogFlushStatus::XRAY_LOG_FLUSHING,
+ memory_order_acq_rel);
+ if (Previous == XRayLogFlushStatus::XRAY_LOG_FLUSHING) {
if (Verbosity())
- Report("Not flushing profiles, implementation still finalizing.\n");
+ Report("Not flushing profiles, implementation still flushing.\n");
+ return XRayLogFlushStatus::XRAY_LOG_FLUSHING;
}
// At this point, we'll create the file that will contain the profile, but
@@ -129,49 +242,33 @@ XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT {
if (Verbosity())
Report("profiling: No data to flush.\n");
} else {
- int Fd = getLogFD();
- if (Fd == -1) {
+ LogWriter *LW = LogWriter::Open();
+ if (LW == nullptr) {
if (Verbosity())
Report("profiling: Failed to flush to file, dropping data.\n");
} else {
// Now for each of the buffers, write out the profile data as we would
// see it in memory, verbatim.
while (B.Data != nullptr && B.Size != 0) {
- retryingWriteAll(Fd, reinterpret_cast<const char *>(B.Data),
- reinterpret_cast<const char *>(B.Data) + B.Size);
+ LW->WriteAll(reinterpret_cast<const char *>(B.Data),
+ reinterpret_cast<const char *>(B.Data) + B.Size);
B = profileCollectorService::nextBuffer(B);
}
- // Then we close out the file.
- internal_close(Fd);
}
+ LogWriter::Close(LW);
}
}
profileCollectorService::reset();
- // Flush the current thread's local data structures as well.
- cleanupTLD();
-
- atomic_store(&ProfilerLogStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,
+ atomic_store(&ProfilerLogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,
+ memory_order_release);
+ atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
memory_order_release);
return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
}
-namespace {
-
-thread_local atomic_uint8_t ReentranceGuard{0};
-
-static void postCurrentThreadFCT(ProfilingData &TLD) {
- if (TLD.Allocators == nullptr || TLD.FCT == nullptr)
- return;
-
- profileCollectorService::post(*TLD.FCT, GetTid());
- cleanupTLD();
-}
-
-} // namespace
-
void profilingHandleArg0(int32_t FuncId,
XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
unsigned char CPU;
@@ -181,21 +278,29 @@ void profilingHandleArg0(int32_t FuncId,
return;
auto Status = atomic_load(&ProfilerLogStatus, memory_order_acquire);
- auto &TLD = getThreadLocalData();
+ if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_UNINITIALIZED ||
+ Status == XRayLogInitStatus::XRAY_LOG_INITIALIZING))
+ return;
+
if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_FINALIZED ||
Status == XRayLogInitStatus::XRAY_LOG_FINALIZING)) {
postCurrentThreadFCT(TLD);
return;
}
+ auto T = getThreadLocalData();
+ if (T == nullptr)
+ return;
+
+ auto FCT = reinterpret_cast<FunctionCallTrie *>(atomic_load_relaxed(&T->FCT));
switch (Entry) {
case XRayEntryType::ENTRY:
case XRayEntryType::LOG_ARGS_ENTRY:
- TLD.FCT->enterFunction(FuncId, TSC);
+ FCT->enterFunction(FuncId, TSC, CPU);
break;
case XRayEntryType::EXIT:
case XRayEntryType::TAIL:
- TLD.FCT->exitFunction(FuncId, TSC);
+ FCT->exitFunction(FuncId, TSC, CPU);
break;
default:
// FIXME: Handle bugs.
@@ -218,12 +323,22 @@ XRayLogInitStatus profilingFinalize() XRAY_NEVER_INSTRUMENT {
return static_cast<XRayLogInitStatus>(CurrentStatus);
}
+ // Mark then finalize the current generation of buffers. This allows us to let
+ // the threads currently holding onto new buffers still use them, but let the
+ // last reference do the memory cleanup.
+ DCHECK_NE(BQ, nullptr);
+ BQ->finalize();
+
// Wait a grace period to allow threads to see that we're finalizing.
SleepForMillis(profilingFlags()->grace_period_ms);
- // We also want to make sure that the current thread's data is cleaned up,
- // if we have any.
- auto &TLD = getThreadLocalData();
+ // If we for some reason are entering this function from an instrumented
+ // handler, we bail out.
+ RecursionGuard G(ReentranceGuard);
+ if (!G)
+ return static_cast<XRayLogInitStatus>(CurrentStatus);
+
+ // Post the current thread's data if we have any.
postCurrentThreadFCT(TLD);
// Then we force serialize the log data.
@@ -235,19 +350,16 @@ XRayLogInitStatus profilingFinalize() XRAY_NEVER_INSTRUMENT {
}
XRayLogInitStatus
-profilingLoggingInit(size_t BufferSize, size_t BufferMax, void *Options,
+profilingLoggingInit(size_t, size_t, void *Options,
size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
- if (BufferSize != 0 || BufferMax != 0) {
- if (Verbosity())
- Report("__xray_log_init() being used, and is unsupported. Use "
- "__xray_log_init_mode(...) instead. Bailing out.");
+ RecursionGuard G(ReentranceGuard);
+ if (!G)
return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
- }
s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus,
XRayLogInitStatus::XRAY_LOG_INITIALIZING,
- memory_order_release)) {
+ memory_order_acq_rel)) {
if (Verbosity())
Report("Cannot initialize already initialised profiling "
"implementation.\n");
@@ -276,35 +388,88 @@ profilingLoggingInit(size_t BufferSize, size_t BufferMax, void *Options,
// We need to reset the profile data collection implementation now.
profileCollectorService::reset();
- // We need to set up the exit handlers.
- static pthread_once_t Once = PTHREAD_ONCE_INIT;
- pthread_once(&Once, +[] {
- pthread_key_create(&ProfilingKey, +[](void *P) {
- // This is the thread-exit handler.
- auto &TLD = *reinterpret_cast<ProfilingData *>(P);
- if (TLD.Allocators == nullptr && TLD.FCT == nullptr)
- return;
-
- postCurrentThreadFCT(TLD);
- });
+ // Then also reset the buffer queue implementation.
+ if (BQ == nullptr) {
+ bool Success = false;
+ new (&BufferQueueStorage)
+ BufferQueue(profilingFlags()->per_thread_allocator_max,
+ profilingFlags()->buffers_max, Success);
+ if (!Success) {
+ if (Verbosity())
+ Report("Failed to initialize preallocated memory buffers!");
+ atomic_store(&ProfilerLogStatus,
+ XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
+ memory_order_release);
+ return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
+ }
- // We also need to set up an exit handler, so that we can get the profile
- // information at exit time. We use the C API to do this, to not rely on C++
- // ABI functions for registering exit handlers.
- Atexit(+[] {
- // Finalize and flush.
- if (profilingFinalize() != XRAY_LOG_FINALIZED) {
- cleanupTLD();
- return;
- }
- if (profilingFlush() != XRAY_LOG_FLUSHED) {
- cleanupTLD();
- return;
- }
+ // If we've succeded, set the global pointer to the initialised storage.
+ BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage);
+ } else {
+ BQ->finalize();
+ auto InitStatus = BQ->init(profilingFlags()->per_thread_allocator_max,
+ profilingFlags()->buffers_max);
+
+ if (InitStatus != BufferQueue::ErrorCode::Ok) {
if (Verbosity())
- Report("XRay Profile flushed at exit.");
- });
- });
+ Report("Failed to initialize preallocated memory buffers; error: %s",
+ BufferQueue::getErrorString(InitStatus));
+ atomic_store(&ProfilerLogStatus,
+ XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
+ memory_order_release);
+ return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
+ }
+
+ DCHECK(!BQ->finalizing());
+ }
+
+ // We need to set up the exit handlers.
+ static pthread_once_t Once = PTHREAD_ONCE_INIT;
+ pthread_once(
+ &Once, +[] {
+ pthread_key_create(
+ &ProfilingKey, +[](void *P) XRAY_NEVER_INSTRUMENT {
+ if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel))
+ return;
+
+ if (P == nullptr)
+ return;
+
+ auto T = reinterpret_cast<ProfilingData *>(P);
+ if (atomic_load_relaxed(&T->Allocators) == 0)
+ return;
+
+ {
+ // If we're somehow executing this while inside a
+ // non-reentrant-friendly context, we skip attempting to post
+ // the current thread's data.
+ RecursionGuard G(ReentranceGuard);
+ if (!G)
+ return;
+
+ postCurrentThreadFCT(*T);
+ }
+ });
+
+ // We also need to set up an exit handler, so that we can get the
+ // profile information at exit time. We use the C API to do this, to not
+ // rely on C++ ABI functions for registering exit handlers.
+ Atexit(+[]() XRAY_NEVER_INSTRUMENT {
+ if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel))
+ return;
+
+ auto Cleanup =
+ at_scope_exit([]() XRAY_NEVER_INSTRUMENT { cleanupTLD(); });
+
+ // Finalize and flush.
+ if (profilingFinalize() != XRAY_LOG_FINALIZED ||
+ profilingFlush() != XRAY_LOG_FLUSHED)
+ return;
+
+ if (Verbosity())
+ Report("XRay Profile flushed at exit.");
+ });
+ });
__xray_log_set_buffer_iterator(profileCollectorService::nextBuffer);
__xray_set_handler(profilingHandleArg0);