summaryrefslogtreecommitdiff
path: root/lib/xray/xray_fdr_logging_impl.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/xray/xray_fdr_logging_impl.h')
-rw-r--r--lib/xray/xray_fdr_logging_impl.h237
1 files changed, 146 insertions, 91 deletions
diff --git a/lib/xray/xray_fdr_logging_impl.h b/lib/xray/xray_fdr_logging_impl.h
index ce360cb03ea7d..4a1d80fd0ebae 100644
--- a/lib/xray/xray_fdr_logging_impl.h
+++ b/lib/xray/xray_fdr_logging_impl.h
@@ -37,6 +37,9 @@
namespace __xray {
+__sanitizer::atomic_sint32_t LoggingStatus = {
+ XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};
+
/// We expose some of the state transitions when FDR logging mode is operating
/// such that we can simulate a series of log events that may occur without
/// and test with determinism without worrying about the real CPU time.
@@ -123,12 +126,21 @@ thread_local uint8_t NumTailCalls = 0;
constexpr auto MetadataRecSize = sizeof(MetadataRecord);
constexpr auto FunctionRecSize = sizeof(FunctionRecord);
+// We use a thread_local variable to keep track of which CPUs we've already
+// run, and the TSC times for these CPUs. This allows us to stop repeating the
+// CPU field in the function records.
+//
+// We assume that we'll support only 65536 CPUs for x86_64.
+thread_local uint16_t CurrentCPU = std::numeric_limits<uint16_t>::max();
+thread_local uint64_t LastTSC = 0;
+thread_local uint64_t LastFunctionEntryTSC = 0;
+
class ThreadExitBufferCleanup {
- std::weak_ptr<BufferQueue> Buffers;
+ std::shared_ptr<BufferQueue> &Buffers;
BufferQueue::Buffer &Buffer;
public:
- explicit ThreadExitBufferCleanup(std::weak_ptr<BufferQueue> BQ,
+ explicit ThreadExitBufferCleanup(std::shared_ptr<BufferQueue> &BQ,
BufferQueue::Buffer &Buffer)
XRAY_NEVER_INSTRUMENT : Buffers(BQ),
Buffer(Buffer) {}
@@ -142,17 +154,24 @@ public:
// the queue.
assert((RecordPtr + MetadataRecSize) - static_cast<char *>(Buffer.Buffer) >=
static_cast<ptrdiff_t>(MetadataRecSize));
- if (auto BQ = Buffers.lock()) {
+ if (Buffers) {
writeEOBMetadata();
- auto EC = BQ->releaseBuffer(Buffer);
+ auto EC = Buffers->releaseBuffer(Buffer);
if (EC != BufferQueue::ErrorCode::Ok)
Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer,
BufferQueue::getErrorString(EC));
+ Buffers = nullptr;
return;
}
}
};
+// Make sure a thread that's ever called handleArg0 has a thread-local
+// live reference to the buffer queue for this particular instance of
+// FDRLogging, and that we're going to clean it up when the thread exits.
+thread_local std::shared_ptr<BufferQueue> LocalBQ = nullptr;
+thread_local ThreadExitBufferCleanup Cleanup(LocalBQ, Buffer);
+
class RecursionGuard {
bool &Running;
const bool Valid;
@@ -176,7 +195,7 @@ public:
}
};
-static inline bool loggingInitialized(
+inline bool loggingInitialized(
const __sanitizer::atomic_sint32_t &LoggingStatus) XRAY_NEVER_INSTRUMENT {
return __sanitizer::atomic_load(&LoggingStatus,
__sanitizer::memory_order_acquire) ==
@@ -185,8 +204,8 @@ static inline bool loggingInitialized(
} // namespace
-static inline void writeNewBufferPreamble(pid_t Tid, timespec TS,
- char *&MemPtr) XRAY_NEVER_INSTRUMENT {
+inline void writeNewBufferPreamble(pid_t Tid, timespec TS,
+ char *&MemPtr) XRAY_NEVER_INSTRUMENT {
static constexpr int InitRecordsCount = 2;
std::aligned_storage<sizeof(MetadataRecord)>::type Records[InitRecordsCount];
{
@@ -222,9 +241,8 @@ static inline void writeNewBufferPreamble(pid_t Tid, timespec TS,
NumTailCalls = 0;
}
-static inline void setupNewBuffer(int (*wall_clock_reader)(clockid_t,
- struct timespec *))
- XRAY_NEVER_INSTRUMENT {
+inline void setupNewBuffer(int (*wall_clock_reader)(
+ clockid_t, struct timespec *)) XRAY_NEVER_INSTRUMENT {
RecordPtr = static_cast<char *>(Buffer.Buffer);
pid_t Tid = syscall(SYS_gettid);
timespec TS{0, 0};
@@ -235,8 +253,8 @@ static inline void setupNewBuffer(int (*wall_clock_reader)(clockid_t,
NumTailCalls = 0;
}
-static inline void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC,
- char *&MemPtr) XRAY_NEVER_INSTRUMENT {
+inline void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC,
+ char *&MemPtr) XRAY_NEVER_INSTRUMENT {
MetadataRecord NewCPUId;
NewCPUId.Type = uint8_t(RecordType::Metadata);
NewCPUId.RecordKind = uint8_t(MetadataRecord::RecordKinds::NewCPUId);
@@ -253,12 +271,12 @@ static inline void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC,
NumTailCalls = 0;
}
-static inline void writeNewCPUIdMetadata(uint16_t CPU,
- uint64_t TSC) XRAY_NEVER_INSTRUMENT {
+inline void writeNewCPUIdMetadata(uint16_t CPU,
+ uint64_t TSC) XRAY_NEVER_INSTRUMENT {
writeNewCPUIdMetadata(CPU, TSC, RecordPtr);
}
-static inline void writeEOBMetadata(char *&MemPtr) XRAY_NEVER_INSTRUMENT {
+inline void writeEOBMetadata(char *&MemPtr) XRAY_NEVER_INSTRUMENT {
MetadataRecord EOBMeta;
EOBMeta.Type = uint8_t(RecordType::Metadata);
EOBMeta.RecordKind = uint8_t(MetadataRecord::RecordKinds::EndOfBuffer);
@@ -269,12 +287,12 @@ static inline void writeEOBMetadata(char *&MemPtr) XRAY_NEVER_INSTRUMENT {
NumTailCalls = 0;
}
-static inline void writeEOBMetadata() XRAY_NEVER_INSTRUMENT {
+inline void writeEOBMetadata() XRAY_NEVER_INSTRUMENT {
writeEOBMetadata(RecordPtr);
}
-static inline void writeTSCWrapMetadata(uint64_t TSC,
- char *&MemPtr) XRAY_NEVER_INSTRUMENT {
+inline void writeTSCWrapMetadata(uint64_t TSC,
+ char *&MemPtr) XRAY_NEVER_INSTRUMENT {
MetadataRecord TSCWrap;
TSCWrap.Type = uint8_t(RecordType::Metadata);
TSCWrap.RecordKind = uint8_t(MetadataRecord::RecordKinds::TSCWrap);
@@ -289,13 +307,13 @@ static inline void writeTSCWrapMetadata(uint64_t TSC,
NumTailCalls = 0;
}
-static inline void writeTSCWrapMetadata(uint64_t TSC) XRAY_NEVER_INSTRUMENT {
+inline void writeTSCWrapMetadata(uint64_t TSC) XRAY_NEVER_INSTRUMENT {
writeTSCWrapMetadata(TSC, RecordPtr);
}
-static inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
- XRayEntryType EntryType,
- char *&MemPtr) XRAY_NEVER_INSTRUMENT {
+inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
+ XRayEntryType EntryType,
+ char *&MemPtr) XRAY_NEVER_INSTRUMENT {
std::aligned_storage<sizeof(FunctionRecord), alignof(FunctionRecord)>::type
AlignedFuncRecordBuffer;
auto &FuncRecord =
@@ -339,6 +357,17 @@ static inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
FuncRecord.RecordKind =
uint8_t(FunctionRecord::RecordKinds::FunctionTailExit);
break;
+ case XRayEntryType::CUSTOM_EVENT: {
+ // This is a bug in patching, so we'll report it once and move on.
+ static bool Once = [&] {
+ Report("Internal error: patched an XRay custom event call as a function; "
+ "func id = %d\n",
+ FuncId);
+ return true;
+ }();
+ (void)Once;
+ return;
+ }
}
std::memcpy(MemPtr, &AlignedFuncRecordBuffer, sizeof(FunctionRecord));
@@ -346,8 +375,9 @@ static inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
}
static uint64_t thresholdTicks() {
- static uint64_t TicksPerSec = probeRequiredCPUFeatures() ? getTSCFrequency() :
- __xray::NanosecondsPerSecond;
+ static uint64_t TicksPerSec = probeRequiredCPUFeatures()
+ ? getTSCFrequency()
+ : __xray::NanosecondsPerSecond;
static const uint64_t ThresholdTicks =
TicksPerSec * flags()->xray_fdr_log_func_duration_threshold_us / 1000000;
return ThresholdTicks;
@@ -397,9 +427,8 @@ static void rewindRecentCall(uint64_t TSC, uint64_t &LastTSC,
RewindingRecordPtr -= FunctionRecSize;
RewindingTSC -= ExpectedTailExit.TSCDelta;
AlignedFuncStorage FunctionEntryBuffer;
- const auto &ExpectedFunctionEntry =
- *reinterpret_cast<FunctionRecord *>(std::memcpy(
- &FunctionEntryBuffer, RewindingRecordPtr, FunctionRecSize));
+ const auto &ExpectedFunctionEntry = *reinterpret_cast<FunctionRecord *>(
+ std::memcpy(&FunctionEntryBuffer, RewindingRecordPtr, FunctionRecSize));
assert(ExpectedFunctionEntry.RecordKind ==
uint8_t(FunctionRecord::RecordKinds::FunctionEnter) &&
"Expected to find function entry when rewinding tail call.");
@@ -422,7 +451,7 @@ static void rewindRecentCall(uint64_t TSC, uint64_t &LastTSC,
}
}
-static inline bool releaseThreadLocalBuffer(BufferQueue *BQ) {
+inline bool releaseThreadLocalBuffer(BufferQueue *BQ) {
auto EC = BQ->releaseBuffer(Buffer);
if (EC != BufferQueue::ErrorCode::Ok) {
Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer,
@@ -432,11 +461,29 @@ static inline bool releaseThreadLocalBuffer(BufferQueue *BQ) {
return true;
}
-static inline void processFunctionHook(
- int32_t FuncId, XRayEntryType Entry, uint64_t TSC, unsigned char CPU,
- int (*wall_clock_reader)(clockid_t, struct timespec *),
- __sanitizer::atomic_sint32_t &LoggingStatus,
- const std::shared_ptr<BufferQueue> &BQ) XRAY_NEVER_INSTRUMENT {
+inline bool prepareBuffer(int (*wall_clock_reader)(clockid_t,
+ struct timespec *),
+ size_t MaxSize) XRAY_NEVER_INSTRUMENT {
+ char *BufferStart = static_cast<char *>(Buffer.Buffer);
+ if ((RecordPtr + MaxSize) > (BufferStart + Buffer.Size - MetadataRecSize)) {
+ writeEOBMetadata();
+ if (!releaseThreadLocalBuffer(LocalBQ.get()))
+ return false;
+ auto EC = LocalBQ->getBuffer(Buffer);
+ if (EC != BufferQueue::ErrorCode::Ok) {
+ Report("Failed to acquire a buffer; error=%s\n",
+ BufferQueue::getErrorString(EC));
+ return false;
+ }
+ setupNewBuffer(wall_clock_reader);
+ }
+ return true;
+}
+
+inline bool isLogInitializedAndReady(
+ std::shared_ptr<BufferQueue> &LocalBQ, uint64_t TSC, unsigned char CPU,
+ int (*wall_clock_reader)(clockid_t,
+ struct timespec *)) XRAY_NEVER_INSTRUMENT {
// Bail out right away if logging is not initialized yet.
// We should take the opportunity to release the buffer though.
auto Status = __sanitizer::atomic_load(&LoggingStatus,
@@ -446,44 +493,19 @@ static inline void processFunctionHook(
(Status == XRayLogInitStatus::XRAY_LOG_FINALIZING ||
Status == XRayLogInitStatus::XRAY_LOG_FINALIZED)) {
writeEOBMetadata();
- if (!releaseThreadLocalBuffer(BQ.get()))
- return;
+ if (!releaseThreadLocalBuffer(LocalBQ.get()))
+ return false;
RecordPtr = nullptr;
+ LocalBQ = nullptr;
+ return false;
}
- return;
- }
-
- // We use a thread_local variable to keep track of which CPUs we've already
- // run, and the TSC times for these CPUs. This allows us to stop repeating the
- // CPU field in the function records.
- //
- // We assume that we'll support only 65536 CPUs for x86_64.
- thread_local uint16_t CurrentCPU = std::numeric_limits<uint16_t>::max();
- thread_local uint64_t LastTSC = 0;
- thread_local uint64_t LastFunctionEntryTSC = 0;
-
- // Make sure a thread that's ever called handleArg0 has a thread-local
- // live reference to the buffer queue for this particular instance of
- // FDRLogging, and that we're going to clean it up when the thread exits.
- thread_local auto LocalBQ = BQ;
- thread_local ThreadExitBufferCleanup Cleanup(LocalBQ, Buffer);
-
- // Prevent signal handler recursion, so in case we're already in a log writing
- // mode and the signal handler comes in (and is also instrumented) then we
- // don't want to be clobbering potentially partial writes already happening in
- // the thread. We use a simple thread_local latch to only allow one on-going
- // handleArg0 to happen at any given time.
- thread_local bool Running = false;
- RecursionGuard Guard{Running};
- if (!Guard) {
- assert(Running == true && "RecursionGuard is buggy!");
- return;
+ return false;
}
if (!loggingInitialized(LoggingStatus) || LocalBQ->finalizing()) {
writeEOBMetadata();
- if (!releaseThreadLocalBuffer(BQ.get()))
- return;
+ if (!releaseThreadLocalBuffer(LocalBQ.get()))
+ return false;
RecordPtr = nullptr;
}
@@ -496,19 +518,57 @@ static inline void processFunctionHook(
LS != XRayLogInitStatus::XRAY_LOG_FINALIZED)
Report("Failed to acquire a buffer; error=%s\n",
BufferQueue::getErrorString(EC));
- return;
+ return false;
}
setupNewBuffer(wall_clock_reader);
}
if (CurrentCPU == std::numeric_limits<uint16_t>::max()) {
- // This means this is the first CPU this thread has ever run on. We set the
- // current CPU and record this as the first TSC we've seen.
+ // This means this is the first CPU this thread has ever run on. We set
+ // the current CPU and record this as the first TSC we've seen.
CurrentCPU = CPU;
writeNewCPUIdMetadata(CPU, TSC);
}
+ return true;
+} // namespace __xray_fdr_internal
+
+inline void endBufferIfFull() XRAY_NEVER_INSTRUMENT {
+ auto BufferStart = static_cast<char *>(Buffer.Buffer);
+ if ((RecordPtr + MetadataRecSize) - BufferStart == MetadataRecSize) {
+ writeEOBMetadata();
+ if (!releaseThreadLocalBuffer(LocalBQ.get()))
+ return;
+ RecordPtr = nullptr;
+ }
+}
+
+inline void processFunctionHook(
+ int32_t FuncId, XRayEntryType Entry, uint64_t TSC, unsigned char CPU,
+ int (*wall_clock_reader)(clockid_t, struct timespec *),
+ __sanitizer::atomic_sint32_t &LoggingStatus,
+ const std::shared_ptr<BufferQueue> &BQ) XRAY_NEVER_INSTRUMENT {
+ // Prevent signal handler recursion, so in case we're already in a log writing
+ // mode and the signal handler comes in (and is also instrumented) then we
+ // don't want to be clobbering potentially partial writes already happening in
+ // the thread. We use a simple thread_local latch to only allow one on-going
+ // handleArg0 to happen at any given time.
+ thread_local bool Running = false;
+ RecursionGuard Guard{Running};
+ if (!Guard) {
+ assert(Running == true && "RecursionGuard is buggy!");
+ return;
+ }
+
+ // In case the reference has been cleaned up before, we make sure we
+ // initialize it to the provided BufferQueue.
+ if (LocalBQ == nullptr)
+ LocalBQ = BQ;
+
+ if (!isLogInitializedAndReady(LocalBQ, TSC, CPU, wall_clock_reader))
+ return;
+
// Before we go setting up writing new function entries, we need to be really
// careful about the pointer math we're doing. This means we need to ensure
// that the record we are about to write is going to fit into the buffer,
@@ -545,25 +605,15 @@ static inline void processFunctionHook(
// bytes in the end of the buffer, we need to write out the EOB, get a new
// Buffer, set it up properly before doing any further writing.
//
- char *BufferStart = static_cast<char *>(Buffer.Buffer);
- if ((RecordPtr + (MetadataRecSize + FunctionRecSize)) - BufferStart <
- static_cast<ptrdiff_t>(MetadataRecSize)) {
- writeEOBMetadata();
- if (!releaseThreadLocalBuffer(LocalBQ.get()))
- return;
- auto EC = LocalBQ->getBuffer(Buffer);
- if (EC != BufferQueue::ErrorCode::Ok) {
- Report("Failed to acquire a buffer; error=%s\n",
- BufferQueue::getErrorString(EC));
- return;
- }
- setupNewBuffer(wall_clock_reader);
+ if (!prepareBuffer(wall_clock_reader, FunctionRecSize + MetadataRecSize)) {
+ LocalBQ = nullptr;
+ return;
}
// By this point, we are now ready to write at most 24 bytes (one metadata
// record and one function record).
- BufferStart = static_cast<char *>(Buffer.Buffer);
- assert((RecordPtr + (MetadataRecSize + FunctionRecSize)) - BufferStart >=
+ assert((RecordPtr + (MetadataRecSize + FunctionRecSize)) -
+ static_cast<char *>(Buffer.Buffer) >=
static_cast<ptrdiff_t>(MetadataRecSize) &&
"Misconfigured BufferQueue provided; Buffer size not large enough.");
@@ -586,7 +636,6 @@ static inline void processFunctionHook(
// FunctionRecord. In this case we write down just a FunctionRecord with
// the correct TSC delta.
//
-
uint32_t RecordTSCDelta = 0;
if (CPU != CurrentCPU) {
// We've moved to a new CPU.
@@ -619,21 +668,27 @@ static inline void processFunctionHook(
break;
rewindRecentCall(TSC, LastTSC, LastFunctionEntryTSC, FuncId);
return; // without writing log.
+ case XRayEntryType::CUSTOM_EVENT: {
+ // This is a bug in patching, so we'll report it once and move on.
+ static bool Once = [&] {
+ Report("Internal error: patched an XRay custom event call as a function; "
+ "func id = %d",
+ FuncId);
+ return true;
+ }();
+ (void)Once;
+ return;
+ }
}
writeFunctionRecord(FuncId, RecordTSCDelta, Entry, RecordPtr);
// If we've exhausted the buffer by this time, we then release the buffer to
// make sure that other threads may start using this buffer.
- if ((RecordPtr + MetadataRecSize) - BufferStart == MetadataRecSize) {
- writeEOBMetadata();
- if (!releaseThreadLocalBuffer(LocalBQ.get()))
- return;
- RecordPtr = nullptr;
- }
+ endBufferIfFull();
}
} // namespace __xray_fdr_internal
-
} // namespace __xray
+
#endif // XRAY_XRAY_FDR_LOGGING_IMPL_H