summaryrefslogtreecommitdiff
path: root/lib/xray
diff options
context:
space:
mode:
Diffstat (limited to 'lib/xray')
-rw-r--r--lib/xray/CMakeLists.txt71
-rw-r--r--lib/xray/tests/CMakeLists.txt46
-rw-r--r--lib/xray/tests/unit/buffer_queue_test.cc4
-rw-r--r--lib/xray/tests/unit/fdr_logging_test.cc107
-rw-r--r--lib/xray/weak_symbols.txt4
-rw-r--r--lib/xray/xray_buffer_queue.cc79
-rw-r--r--lib/xray/xray_buffer_queue.h97
-rw-r--r--lib/xray/xray_fdr_log_records.h3
-rw-r--r--lib/xray/xray_fdr_logging.cc206
-rw-r--r--lib/xray/xray_fdr_logging.h1
-rw-r--r--lib/xray/xray_fdr_logging_impl.h605
-rw-r--r--lib/xray/xray_flags.h1
-rw-r--r--lib/xray/xray_flags.inc28
-rw-r--r--lib/xray/xray_init.cc35
-rw-r--r--lib/xray/xray_inmemory_log.cc453
-rw-r--r--lib/xray/xray_inmemory_log.h44
-rw-r--r--lib/xray/xray_interface.cc314
-rw-r--r--lib/xray/xray_interface_internal.h6
-rw-r--r--lib/xray/xray_log_interface.cc63
-rw-r--r--lib/xray/xray_trampoline_x86_64.S147
-rw-r--r--lib/xray/xray_utils.cc3
-rw-r--r--lib/xray/xray_x86_64.cc40
22 files changed, 1562 insertions, 795 deletions
diff --git a/lib/xray/CMakeLists.txt b/lib/xray/CMakeLists.txt
index 72caa9fac732a..5547600b943ae 100644
--- a/lib/xray/CMakeLists.txt
+++ b/lib/xray/CMakeLists.txt
@@ -13,47 +13,39 @@ set(XRAY_SOURCES
set(x86_64_SOURCES
xray_x86_64.cc
- xray_trampoline_x86_64.S
- ${XRAY_SOURCES})
+ xray_trampoline_x86_64.S)
set(arm_SOURCES
xray_arm.cc
- xray_trampoline_arm.S
- ${XRAY_SOURCES})
+ xray_trampoline_arm.S)
set(armhf_SOURCES
- ${arm_SOURCES})
+ ${arm_SOURCES})
set(aarch64_SOURCES
xray_AArch64.cc
- xray_trampoline_AArch64.S
- ${XRAY_SOURCES})
+ xray_trampoline_AArch64.S)
set(mips_SOURCES
xray_mips.cc
- xray_trampoline_mips.S
- ${XRAY_SOURCES})
+ xray_trampoline_mips.S)
set(mipsel_SOURCES
xray_mips.cc
- xray_trampoline_mips.S
- ${XRAY_SOURCES})
+ xray_trampoline_mips.S)
set(mips64_SOURCES
xray_mips64.cc
- xray_trampoline_mips64.S
- ${XRAY_SOURCES})
+ xray_trampoline_mips64.S)
set(mips64el_SOURCES
xray_mips64.cc
- xray_trampoline_mips64.S
- ${XRAY_SOURCES})
+ xray_trampoline_mips64.S)
set(powerpc64le_SOURCES
- xray_powerpc64.cc
- xray_trampoline_powerpc64.cc
- xray_trampoline_powerpc64_asm.S
- ${XRAY_SOURCES})
+ xray_powerpc64.cc
+ xray_trampoline_powerpc64.cc
+ xray_trampoline_powerpc64_asm.S)
include_directories(..)
include_directories(../../include)
@@ -62,20 +54,50 @@ set(XRAY_CFLAGS ${SANITIZER_COMMON_CFLAGS})
set(XRAY_COMMON_DEFINITIONS XRAY_HAS_EXCEPTIONS=1)
append_list_if(
COMPILER_RT_HAS_XRAY_COMPILER_FLAG XRAY_SUPPORTED=1 XRAY_COMMON_DEFINITIONS)
-
-add_compiler_rt_object_libraries(RTXray
- ARCHS ${XRAY_SUPPORTED_ARCH}
- SOURCES ${XRAY_SOURCES} CFLAGS ${XRAY_CFLAGS}
- DEFS ${XRAY_COMMON_DEFINITIONS})
+append_list_if(
+ COMPILER_RT_BUILD_XRAY_NO_PREINIT XRAY_NO_PREINIT XRAY_COMMON_DEFINITIONS)
add_compiler_rt_component(xray)
set(XRAY_COMMON_RUNTIME_OBJECT_LIBS
+ RTXray
RTSanitizerCommon
RTSanitizerCommonLibc)
+if (APPLE)
+ set(XRAY_LINK_LIBS ${SANITIZER_COMMON_LINK_LIBS})
+ add_asm_sources(XRAY_ASM_SOURCES xray_trampoline_x86_64.S)
+
+ add_weak_symbols("sanitizer_common" WEAK_SYMBOL_LINK_FLAGS)
+ add_weak_symbols("xray" WEAK_SYMBOL_LINK_FLAGS)
+
+ add_compiler_rt_object_libraries(RTXray
+ OS ${XRAY_SUPPORTED_OS}
+ ARCHS ${XRAY_SUPPORTED_ARCH}
+ SOURCES ${x86_64_SOURCES}
+ CFLAGS ${XRAY_CFLAGS}
+ DEFS ${XRAY_COMMON_DEFINITIONS})
+
+ # We only support running on osx for now.
+ add_compiler_rt_runtime(clang_rt.xray
+ STATIC
+ OS ${XRAY_SUPPORTED_OS}
+ ARCHS ${XRAY_SUPPORTED_ARCH}
+ OBJECT_LIBS RTXray
+ RTSanitizerCommon
+ RTSanitizerCommonLibc
+ CFLAGS ${XRAY_CFLAGS}
+ DEFS ${XRAY_COMMON_DEFINITIONS}
+ LINK_FLAGS ${SANITIZER_COMMON_LINK_FLAGS} ${WEAK_SYMBOL_LINK_FLAGS}
+ LINK_LIBS ${XRAY_LINK_LIBS}
+ PARENT_TARGET xray)
+else()
foreach(arch ${XRAY_SUPPORTED_ARCH})
if(CAN_TARGET_${arch})
+ add_compiler_rt_object_libraries(RTXray
+ ARCHS ${arch}
+ SOURCES ${XRAY_SOURCES} CFLAGS ${XRAY_CFLAGS}
+ DEFS ${XRAY_COMMON_DEFINITIONS})
add_compiler_rt_runtime(clang_rt.xray
STATIC
ARCHS ${arch}
@@ -86,6 +108,7 @@ foreach(arch ${XRAY_SUPPORTED_ARCH})
PARENT_TARGET xray)
endif()
endforeach()
+endif()
if(COMPILER_RT_INCLUDE_TESTS)
add_subdirectory(tests)
diff --git a/lib/xray/tests/CMakeLists.txt b/lib/xray/tests/CMakeLists.txt
index a1eb4a030ccc5..e54e63f27890c 100644
--- a/lib/xray/tests/CMakeLists.txt
+++ b/lib/xray/tests/CMakeLists.txt
@@ -11,47 +11,23 @@ set(XRAY_UNITTEST_CFLAGS
-I${COMPILER_RT_SOURCE_DIR}/lib/xray
-I${COMPILER_RT_SOURCE_DIR}/lib)
-macro(xray_compile obj_list source arch)
- get_filename_component(basename ${source} NAME)
- set(output_obj "${basename}.${arch}.o")
- get_target_flags_for_arch(${arch} TARGET_CFLAGS)
- if(NOT COMPILER_RT_STANDALONE_BUILD)
- list(APPEND COMPILE_DEPS gtest_main xray)
- endif()
- clang_compile(${output_obj} ${source}
- CFLAGS ${XRAY_UNITTEST_CFLAGS} ${TARGET_CFLAGS}
- DEPS ${COMPILE_DEPS})
- list(APPEND ${obj_list} ${output_obj})
-endmacro()
-
+set(XRAY_TEST_ARCH ${XRAY_SUPPORTED_ARCH})
macro(add_xray_unittest testname)
- set(XRAY_TEST_ARCH ${XRAY_SUPPORTED_ARCH})
- if (APPLE)
- darwin_filter_host_archs(XRAY_SUPPORTED_ARCH)
- endif()
- if(UNIX)
+ cmake_parse_arguments(TEST "" "" "SOURCES;HEADERS" ${ARGN})
+ if(UNIX AND NOT APPLE)
foreach(arch ${XRAY_TEST_ARCH})
- cmake_parse_arguments(TEST "" "" "SOURCES;HEADERS" ${ARGN})
set(TEST_OBJECTS)
- foreach(SOURCE ${TEST_SOURCES} ${COMPILER_RT_GTEST_SOURCE})
- xray_compile(TEST_OBJECTS ${SOURCE} ${arch} ${TEST_HEADERS})
- endforeach()
- get_target_flags_for_arch(${arch} TARGET_LINK_FLAGS)
- set(TEST_DEPS ${TEST_OBJECTS})
- if(NOT COMPILER_RT_STANDALONE_BUILD)
- list(APPEND TEST_DEPS gtest_main xray)
- endif()
- if(NOT APPLE)
- add_compiler_rt_test(XRayUnitTests ${testname}-${arch}
- OBJECTS ${TEST_OBJECTS}
- DEPS ${TEST_DEPS}
- LINK_FLAGS ${TARGET_LINK_FLAGS}
+ generate_compiler_rt_tests(TEST_OBJECTS
+ XRayUnitTests "${testname}-${arch}-Test" "${arch}"
+ SOURCES ${TEST_SOURCES} ${COMPILER_RT_GTEST_SOURCE}
+ DEPS gtest xray llvm-xray
+ CFLAGS ${XRAY_UNITTEST_CFLAGS}
+ LINK_FLAGS -fxray-instrument
+ ${TARGET_LINK_FLAGS}
-lstdc++ -lm ${CMAKE_THREAD_LIBS_INIT}
-lpthread
- -L${COMPILER_RT_LIBRARY_OUTPUT_DIR} -lclang_rt.xray-${arch}
-ldl -lrt)
- endif()
- # FIXME: Figure out how to run even just the unit tests on APPLE.
+ set_target_properties(XRayUnitTests PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
endforeach()
endif()
endmacro()
diff --git a/lib/xray/tests/unit/buffer_queue_test.cc b/lib/xray/tests/unit/buffer_queue_test.cc
index ac89a8dbc50e7..1ec7469ce1874 100644
--- a/lib/xray/tests/unit/buffer_queue_test.cc
+++ b/lib/xray/tests/unit/buffer_queue_test.cc
@@ -68,9 +68,9 @@ TEST(BufferQueueTest, ErrorsWhenFinalising) {
ASSERT_NE(nullptr, Buf.Buffer);
ASSERT_EQ(Buffers.finalize(), BufferQueue::ErrorCode::Ok);
BufferQueue::Buffer OtherBuf;
- ASSERT_EQ(BufferQueue::ErrorCode::AlreadyFinalized,
+ ASSERT_EQ(BufferQueue::ErrorCode::QueueFinalizing,
Buffers.getBuffer(OtherBuf));
- ASSERT_EQ(BufferQueue::ErrorCode::AlreadyFinalized,
+ ASSERT_EQ(BufferQueue::ErrorCode::QueueFinalizing,
Buffers.finalize());
ASSERT_EQ(Buffers.releaseBuffer(Buf), BufferQueue::ErrorCode::Ok);
}
diff --git a/lib/xray/tests/unit/fdr_logging_test.cc b/lib/xray/tests/unit/fdr_logging_test.cc
index 0d5e99a743344..1009d56a43b33 100644
--- a/lib/xray/tests/unit/fdr_logging_test.cc
+++ b/lib/xray/tests/unit/fdr_logging_test.cc
@@ -17,8 +17,10 @@
#include <iostream>
#include <sys/mman.h>
#include <sys/stat.h>
+#include <sys/syscall.h>
#include <sys/types.h>
#include <system_error>
+#include <thread>
#include <unistd.h>
#include "xray/xray_records.h"
@@ -34,14 +36,23 @@ struct ScopedFileCloserAndDeleter {
: Fd(Fd), Filename(Filename) {}
~ScopedFileCloserAndDeleter() {
+ if (Map)
+ munmap(Map, Size);
if (Fd) {
close(Fd);
unlink(Filename);
}
}
+ void registerMap(void *M, size_t S) {
+ Map = M;
+ Size = S;
+ }
+
int Fd;
const char *Filename;
+ void *Map = nullptr;
+ size_t Size = 0;
};
TEST(FDRLoggingTest, Simple) {
@@ -51,13 +62,12 @@ TEST(FDRLoggingTest, Simple) {
Options.Fd = mkstemp(TmpFilename);
ASSERT_NE(Options.Fd, -1);
ASSERT_EQ(fdrLoggingInit(kBufferSize, kBufferMax, &Options,
- sizeof(FDRLoggingOptions)),
+ sizeof(FDRLoggingOptions)),
XRayLogInitStatus::XRAY_LOG_INITIALIZED);
fdrLoggingHandleArg0(1, XRayEntryType::ENTRY);
fdrLoggingHandleArg0(1, XRayEntryType::EXIT);
ASSERT_EQ(fdrLoggingFinalize(), XRayLogInitStatus::XRAY_LOG_FINALIZED);
ASSERT_EQ(fdrLoggingFlush(), XRayLogFlushStatus::XRAY_LOG_FLUSHED);
- ASSERT_EQ(fdrLoggingReset(), XRayLogInitStatus::XRAY_LOG_UNINITIALIZED);
// To do this properly, we have to close the file descriptor then re-open the
// file for reading this time.
@@ -68,20 +78,25 @@ TEST(FDRLoggingTest, Simple) {
auto Size = lseek(Fd, 0, SEEK_END);
ASSERT_NE(Size, 0);
// Map the file contents.
- const char *Contents = static_cast<const char *>(
- mmap(NULL, Size, PROT_READ, MAP_PRIVATE, Fd, 0));
+ void *Map = mmap(NULL, Size, PROT_READ, MAP_PRIVATE, Fd, 0);
+ const char *Contents = static_cast<const char *>(Map);
+ Guard.registerMap(Map, Size);
ASSERT_NE(Contents, nullptr);
XRayFileHeader H;
memcpy(&H, Contents, sizeof(XRayFileHeader));
- ASSERT_EQ(H.Version, 1);
+ ASSERT_EQ(H.Version, 2);
ASSERT_EQ(H.Type, FileTypes::FDR_LOG);
- // We require one buffer at least to have the "start of buffer" metadata
- // record.
- MetadataRecord MDR;
- memcpy(&MDR, Contents + sizeof(XRayFileHeader), sizeof(MetadataRecord));
- ASSERT_EQ(MDR.RecordKind, uint8_t(MetadataRecord::RecordKinds::NewBuffer));
+ // We require one buffer at least to have the "extents" metadata record,
+ // followed by the NewBuffer record.
+ MetadataRecord MDR0, MDR1;
+ memcpy(&MDR0, Contents + sizeof(XRayFileHeader), sizeof(MetadataRecord));
+ memcpy(&MDR1, Contents + sizeof(XRayFileHeader) + sizeof(MetadataRecord),
+ sizeof(MetadataRecord));
+ ASSERT_EQ(MDR0.RecordKind,
+ uint8_t(MetadataRecord::RecordKinds::BufferExtents));
+ ASSERT_EQ(MDR1.RecordKind, uint8_t(MetadataRecord::RecordKinds::NewBuffer));
}
TEST(FDRLoggingTest, Multiple) {
@@ -90,7 +105,7 @@ TEST(FDRLoggingTest, Multiple) {
Options.Fd = mkstemp(TmpFilename);
ASSERT_NE(Options.Fd, -1);
ASSERT_EQ(fdrLoggingInit(kBufferSize, kBufferMax, &Options,
- sizeof(FDRLoggingOptions)),
+ sizeof(FDRLoggingOptions)),
XRayLogInitStatus::XRAY_LOG_INITIALIZED);
for (uint64_t I = 0; I < 100; ++I) {
fdrLoggingHandleArg0(1, XRayEntryType::ENTRY);
@@ -98,7 +113,6 @@ TEST(FDRLoggingTest, Multiple) {
}
ASSERT_EQ(fdrLoggingFinalize(), XRayLogInitStatus::XRAY_LOG_FINALIZED);
ASSERT_EQ(fdrLoggingFlush(), XRayLogFlushStatus::XRAY_LOG_FLUSHED);
- ASSERT_EQ(fdrLoggingReset(), XRayLogInitStatus::XRAY_LOG_UNINITIALIZED);
// To do this properly, we have to close the file descriptor then re-open the
// file for reading this time.
@@ -109,18 +123,77 @@ TEST(FDRLoggingTest, Multiple) {
auto Size = lseek(Fd, 0, SEEK_END);
ASSERT_NE(Size, 0);
// Map the file contents.
- const char *Contents = static_cast<const char *>(
- mmap(NULL, Size, PROT_READ, MAP_PRIVATE, Fd, 0));
+ void *Map = mmap(NULL, Size, PROT_READ, MAP_PRIVATE, Fd, 0);
+ const char *Contents = static_cast<const char *>(Map);
+ Guard.registerMap(Map, Size);
+ ASSERT_NE(Contents, nullptr);
+
+ XRayFileHeader H;
+ memcpy(&H, Contents, sizeof(XRayFileHeader));
+ ASSERT_EQ(H.Version, 2);
+ ASSERT_EQ(H.Type, FileTypes::FDR_LOG);
+
+ MetadataRecord MDR0, MDR1;
+ memcpy(&MDR0, Contents + sizeof(XRayFileHeader), sizeof(MetadataRecord));
+ memcpy(&MDR1, Contents + sizeof(XRayFileHeader) + sizeof(MetadataRecord),
+ sizeof(MetadataRecord));
+ ASSERT_EQ(MDR0.RecordKind,
+ uint8_t(MetadataRecord::RecordKinds::BufferExtents));
+ ASSERT_EQ(MDR1.RecordKind, uint8_t(MetadataRecord::RecordKinds::NewBuffer));
+}
+
+TEST(FDRLoggingTest, MultiThreadedCycling) {
+ FDRLoggingOptions Options;
+ char TmpFilename[] = "fdr-logging-test.XXXXXX";
+ Options.Fd = mkstemp(TmpFilename);
+ ASSERT_NE(Options.Fd, -1);
+ ASSERT_EQ(fdrLoggingInit(kBufferSize, 1, &Options, sizeof(FDRLoggingOptions)),
+ XRayLogInitStatus::XRAY_LOG_INITIALIZED);
+
+ // Now we want to create one thread, do some logging, then create another one,
+ // in succession and making sure that we're able to get thread records from
+ // the latest thread (effectively being able to recycle buffers).
+ std::array<pid_t, 2> Threads;
+ for (uint64_t I = 0; I < 2; ++I) {
+ std::thread t{[I, &Threads] {
+ fdrLoggingHandleArg0(I + 1, XRayEntryType::ENTRY);
+ fdrLoggingHandleArg0(I + 1, XRayEntryType::EXIT);
+ Threads[I] = syscall(SYS_gettid);
+ }};
+ t.join();
+ }
+ ASSERT_EQ(fdrLoggingFinalize(), XRayLogInitStatus::XRAY_LOG_FINALIZED);
+ ASSERT_EQ(fdrLoggingFlush(), XRayLogFlushStatus::XRAY_LOG_FLUSHED);
+
+ // To do this properly, we have to close the file descriptor then re-open the
+ // file for reading this time.
+ ASSERT_EQ(close(Options.Fd), 0);
+ int Fd = open(TmpFilename, O_RDONLY);
+ ASSERT_NE(-1, Fd);
+ ScopedFileCloserAndDeleter Guard(Fd, TmpFilename);
+ auto Size = lseek(Fd, 0, SEEK_END);
+ ASSERT_NE(Size, 0);
+ // Map the file contents.
+ void *Map = mmap(NULL, Size, PROT_READ, MAP_PRIVATE, Fd, 0);
+ const char *Contents = static_cast<const char *>(Map);
+ Guard.registerMap(Map, Size);
ASSERT_NE(Contents, nullptr);
XRayFileHeader H;
memcpy(&H, Contents, sizeof(XRayFileHeader));
- ASSERT_EQ(H.Version, 1);
+ ASSERT_EQ(H.Version, 2);
ASSERT_EQ(H.Type, FileTypes::FDR_LOG);
- MetadataRecord MDR0;
+ MetadataRecord MDR0, MDR1;
memcpy(&MDR0, Contents + sizeof(XRayFileHeader), sizeof(MetadataRecord));
- ASSERT_EQ(MDR0.RecordKind, uint8_t(MetadataRecord::RecordKinds::NewBuffer));
+ memcpy(&MDR1, Contents + sizeof(XRayFileHeader) + sizeof(MetadataRecord),
+ sizeof(MetadataRecord));
+ ASSERT_EQ(MDR0.RecordKind,
+ uint8_t(MetadataRecord::RecordKinds::BufferExtents));
+ ASSERT_EQ(MDR1.RecordKind, uint8_t(MetadataRecord::RecordKinds::NewBuffer));
+ pid_t Latest = 0;
+ memcpy(&Latest, MDR1.Data, sizeof(pid_t));
+ ASSERT_EQ(Latest, Threads[1]);
}
} // namespace
diff --git a/lib/xray/weak_symbols.txt b/lib/xray/weak_symbols.txt
new file mode 100644
index 0000000000000..963fff2d697ee
--- /dev/null
+++ b/lib/xray/weak_symbols.txt
@@ -0,0 +1,4 @@
+___start_xray_fn_idx
+___start_xray_instr_map
+___stop_xray_fn_idx
+___stop_xray_instr_map
diff --git a/lib/xray/xray_buffer_queue.cc b/lib/xray/xray_buffer_queue.cc
index 7ba755ac30692..a0018f6b0cba4 100644
--- a/lib/xray/xray_buffer_queue.cc
+++ b/lib/xray/xray_buffer_queue.cc
@@ -13,28 +13,34 @@
//
//===----------------------------------------------------------------------===//
#include "xray_buffer_queue.h"
+#include "sanitizer_common/sanitizer_allocator_internal.h"
#include "sanitizer_common/sanitizer_common.h"
#include "sanitizer_common/sanitizer_libc.h"
-#include <cstdlib>
-#include <tuple>
-
using namespace __xray;
using namespace __sanitizer;
-BufferQueue::BufferQueue(std::size_t B, std::size_t N, bool &Success)
- : BufferSize(B), Buffers(N), Mutex(), OwnedBuffers(), Finalizing{0} {
- for (auto &T : Buffers) {
- void *Tmp = malloc(BufferSize);
+BufferQueue::BufferQueue(size_t B, size_t N, bool &Success)
+ : BufferSize(B), Buffers(new BufferRep[N]()), BufferCount(N), Finalizing{0},
+ OwnedBuffers(new void *[N]()), Next(Buffers), First(Buffers),
+ LiveBuffers(0) {
+ for (size_t i = 0; i < N; ++i) {
+ auto &T = Buffers[i];
+ void *Tmp = InternalAlloc(BufferSize, nullptr, 64);
if (Tmp == nullptr) {
Success = false;
return;
}
-
- auto &Buf = std::get<0>(T);
+ void *Extents = InternalAlloc(sizeof(BufferExtents), nullptr, 64);
+ if (Extents == nullptr) {
+ Success = false;
+ return;
+ }
+ auto &Buf = T.Buff;
Buf.Buffer = Tmp;
Buf.Size = B;
- OwnedBuffers.emplace(Tmp);
+ Buf.Extents = reinterpret_cast<BufferExtents *>(Extents);
+ OwnedBuffers[i] = Tmp;
}
Success = true;
}
@@ -42,27 +48,50 @@ BufferQueue::BufferQueue(std::size_t B, std::size_t N, bool &Success)
BufferQueue::ErrorCode BufferQueue::getBuffer(Buffer &Buf) {
if (__sanitizer::atomic_load(&Finalizing, __sanitizer::memory_order_acquire))
return ErrorCode::QueueFinalizing;
- __sanitizer::BlockingMutexLock Guard(&Mutex);
- if (Buffers.empty())
+ __sanitizer::SpinMutexLock Guard(&Mutex);
+ if (LiveBuffers == BufferCount)
return ErrorCode::NotEnoughMemory;
- auto &T = Buffers.front();
- auto &B = std::get<0>(T);
+
+ auto &T = *Next;
+ auto &B = T.Buff;
Buf = B;
- B.Buffer = nullptr;
- B.Size = 0;
- Buffers.pop_front();
+ T.Used = true;
+ ++LiveBuffers;
+
+ if (++Next == (Buffers + BufferCount))
+ Next = Buffers;
+
return ErrorCode::Ok;
}
BufferQueue::ErrorCode BufferQueue::releaseBuffer(Buffer &Buf) {
- if (OwnedBuffers.count(Buf.Buffer) == 0)
+ // Blitz through the buffers array to find the buffer.
+ bool Found = false;
+ for (auto I = OwnedBuffers, E = OwnedBuffers + BufferCount; I != E; ++I) {
+ if (*I == Buf.Buffer) {
+ Found = true;
+ break;
+ }
+ }
+ if (!Found)
return ErrorCode::UnrecognizedBuffer;
- __sanitizer::BlockingMutexLock Guard(&Mutex);
+
+ __sanitizer::SpinMutexLock Guard(&Mutex);
+
+ // This points to a semantic bug, we really ought to not be releasing more
+ // buffers than we actually get.
+ if (LiveBuffers == 0)
+ return ErrorCode::NotEnoughMemory;
// Now that the buffer has been released, we mark it as "used".
- Buffers.emplace(Buffers.end(), Buf, true /* used */);
+ First->Buff = Buf;
+ First->Used = true;
Buf.Buffer = nullptr;
Buf.Size = 0;
+ --LiveBuffers;
+ if (++First == (Buffers + BufferCount))
+ First = Buffers;
+
return ErrorCode::Ok;
}
@@ -74,8 +103,12 @@ BufferQueue::ErrorCode BufferQueue::finalize() {
}
BufferQueue::~BufferQueue() {
- for (auto &T : Buffers) {
- auto &Buf = std::get<0>(T);
- free(Buf.Buffer);
+ for (auto I = Buffers, E = Buffers + BufferCount; I != E; ++I) {
+ auto &T = *I;
+ auto &Buf = T.Buff;
+ InternalFree(Buf.Buffer);
+ InternalFree(Buf.Extents);
}
+ delete[] Buffers;
+ delete[] OwnedBuffers;
}
diff --git a/lib/xray/xray_buffer_queue.h b/lib/xray/xray_buffer_queue.h
index e051695a297b5..1ceb582746165 100644
--- a/lib/xray/xray_buffer_queue.h
+++ b/lib/xray/xray_buffer_queue.h
@@ -15,11 +15,9 @@
#ifndef XRAY_BUFFER_QUEUE_H
#define XRAY_BUFFER_QUEUE_H
+#include <cstddef>
#include "sanitizer_common/sanitizer_atomic.h"
#include "sanitizer_common/sanitizer_mutex.h"
-#include <deque>
-#include <unordered_set>
-#include <utility>
namespace __xray {
@@ -29,23 +27,50 @@ namespace __xray {
/// the "flight data recorder" (FDR) mode to support ongoing XRay function call
/// trace collection.
class BufferQueue {
-public:
+ public:
+ struct alignas(64) BufferExtents {
+ __sanitizer::atomic_uint64_t Size;
+ };
+
struct Buffer {
void *Buffer = nullptr;
size_t Size = 0;
+ BufferExtents* Extents;
+ };
+
+ private:
+ struct BufferRep {
+ // The managed buffer.
+ Buffer Buff;
+
+ // This is true if the buffer has been returned to the available queue, and
+ // is considered "used" by another thread.
+ bool Used = false;
};
-private:
+ // Size of each individual Buffer.
size_t BufferSize;
- // We use a bool to indicate whether the Buffer has been used in this
- // freelist implementation.
- std::deque<std::tuple<Buffer, bool>> Buffers;
- __sanitizer::BlockingMutex Mutex;
- std::unordered_set<void *> OwnedBuffers;
+ BufferRep *Buffers;
+ size_t BufferCount;
+
+ __sanitizer::SpinMutex Mutex;
__sanitizer::atomic_uint8_t Finalizing;
-public:
+ // Pointers to buffers managed/owned by the BufferQueue.
+ void **OwnedBuffers;
+
+ // Pointer to the next buffer to be handed out.
+ BufferRep *Next;
+
+ // Pointer to the entry in the array where the next released buffer will be
+ // placed.
+ BufferRep *First;
+
+ // Count of buffers that have been handed out through 'getBuffer'.
+ size_t LiveBuffers;
+
+ public:
enum class ErrorCode : unsigned {
Ok,
NotEnoughMemory,
@@ -56,16 +81,16 @@ public:
static const char *getErrorString(ErrorCode E) {
switch (E) {
- case ErrorCode::Ok:
- return "(none)";
- case ErrorCode::NotEnoughMemory:
- return "no available buffers in the queue";
- case ErrorCode::QueueFinalizing:
- return "queue already finalizing";
- case ErrorCode::UnrecognizedBuffer:
- return "buffer being returned not owned by buffer queue";
- case ErrorCode::AlreadyFinalized:
- return "queue already finalized";
+ case ErrorCode::Ok:
+ return "(none)";
+ case ErrorCode::NotEnoughMemory:
+ return "no available buffers in the queue";
+ case ErrorCode::QueueFinalizing:
+ return "queue already finalizing";
+ case ErrorCode::UnrecognizedBuffer:
+ return "buffer being returned not owned by buffer queue";
+ case ErrorCode::AlreadyFinalized:
+ return "queue already finalized";
}
return "unknown error";
}
@@ -82,15 +107,18 @@ public:
/// - BufferQueue is not finalising.
///
/// Returns:
- /// - std::errc::not_enough_memory on exceeding MaxSize.
- /// - no error when we find a Buffer.
- /// - std::errc::state_not_recoverable on finalising BufferQueue.
+ /// - ErrorCode::NotEnoughMemory on exceeding MaxSize.
+ /// - ErrorCode::Ok when we find a Buffer.
+ /// - ErrorCode::QueueFinalizing or ErrorCode::AlreadyFinalized on
+ /// a finalizing/finalized BufferQueue.
ErrorCode getBuffer(Buffer &Buf);
/// Updates |Buf| to point to nullptr, with size 0.
///
/// Returns:
- /// - ...
+ /// - ErrorCode::Ok when we successfully release the buffer.
+ /// - ErrorCode::UnrecognizedBuffer for when this BufferQueue does not own
+ /// the buffer being released.
ErrorCode releaseBuffer(Buffer &Buf);
bool finalizing() const {
@@ -107,17 +135,18 @@ public:
/// - All releaseBuffer operations will not fail.
///
/// After a call to finalize succeeds, all subsequent calls to finalize will
- /// fail with std::errc::state_not_recoverable.
+ /// fail with ErrorCode::QueueFinalizing.
ErrorCode finalize();
/// Applies the provided function F to each Buffer in the queue, only if the
/// Buffer is marked 'used' (i.e. has been the result of getBuffer(...) and a
- /// releaseBuffer(...) operation.
- template <class F> void apply(F Fn) {
- __sanitizer::BlockingMutexLock G(&Mutex);
- for (const auto &T : Buffers) {
- if (std::get<1>(T))
- Fn(std::get<0>(T));
+ /// releaseBuffer(...) operation).
+ template <class F>
+ void apply(F Fn) {
+ __sanitizer::SpinMutexLock G(&Mutex);
+ for (auto I = Buffers, E = Buffers + BufferCount; I != E; ++I) {
+ const auto &T = *I;
+ if (T.Used) Fn(T.Buff);
}
}
@@ -125,6 +154,6 @@ public:
~BufferQueue();
};
-} // namespace __xray
+} // namespace __xray
-#endif // XRAY_BUFFER_QUEUE_H
+#endif // XRAY_BUFFER_QUEUE_H
diff --git a/lib/xray/xray_fdr_log_records.h b/lib/xray/xray_fdr_log_records.h
index 3d6d38892c76f..324208db82ca0 100644
--- a/lib/xray/xray_fdr_log_records.h
+++ b/lib/xray/xray_fdr_log_records.h
@@ -30,7 +30,10 @@ struct alignas(16) MetadataRecord {
TSCWrap,
WalltimeMarker,
CustomEventMarker,
+ CallArgument,
+ BufferExtents,
};
+
// Use 7 bits to identify this record type.
/* RecordKinds */ uint8_t RecordKind : 7;
char Data[15];
diff --git a/lib/xray/xray_fdr_logging.cc b/lib/xray/xray_fdr_logging.cc
index a7e1382c3865b..1bfa10c21f5cd 100644
--- a/lib/xray/xray_fdr_logging.cc
+++ b/lib/xray/xray_fdr_logging.cc
@@ -15,15 +15,11 @@
//
//===----------------------------------------------------------------------===//
#include "xray_fdr_logging.h"
-#include <algorithm>
-#include <bitset>
-#include <cerrno>
-#include <cstring>
+#include <errno.h>
#include <sys/syscall.h>
#include <sys/time.h>
#include <time.h>
#include <unistd.h>
-#include <unordered_map>
#include "sanitizer_common/sanitizer_atomic.h"
#include "sanitizer_common/sanitizer_common.h"
@@ -39,7 +35,7 @@
namespace __xray {
// Global BufferQueue.
-std::shared_ptr<BufferQueue> BQ;
+BufferQueue *BQ = nullptr;
__sanitizer::atomic_sint32_t LogFlushStatus = {
XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
@@ -52,19 +48,31 @@ __sanitizer::SpinMutex FDROptionsMutex;
XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT {
if (__sanitizer::atomic_load(&LoggingStatus,
__sanitizer::memory_order_acquire) !=
- XRayLogInitStatus::XRAY_LOG_FINALIZED)
+ XRayLogInitStatus::XRAY_LOG_FINALIZED) {
+ if (__sanitizer::Verbosity())
+ Report("Not flushing log, implementation is not finalized.\n");
return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
+ }
s32 Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
if (!__sanitizer::atomic_compare_exchange_strong(
&LogFlushStatus, &Result, XRayLogFlushStatus::XRAY_LOG_FLUSHING,
- __sanitizer::memory_order_release))
+ __sanitizer::memory_order_release)) {
+
+ if (__sanitizer::Verbosity())
+ Report("Not flushing log, implementation is still finalizing.\n");
return static_cast<XRayLogFlushStatus>(Result);
+ }
- // Make a copy of the BufferQueue pointer to prevent other threads that may be
- // resetting it from blowing away the queue prematurely while we're dealing
- // with it.
- auto LocalBQ = BQ;
+ if (BQ == nullptr) {
+ if (__sanitizer::Verbosity())
+ Report("Cannot flush when global buffer queue is null.\n");
+ return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
+ }
+
+ // We wait a number of milliseconds to allow threads to see that we've
+ // finalised before attempting to flush the log.
+ __sanitizer::SleepForMillis(flags()->xray_fdr_log_grace_period_ms);
// We write out the file in the following format:
//
@@ -95,24 +103,44 @@ XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT {
TSCSupported ? getTSCFrequency() : __xray::NanosecondsPerSecond;
XRayFileHeader Header;
- Header.Version = 1;
+
+ // Version 2 of the log writes the extents of the buffer, instead of relying
+ // on an end-of-buffer record.
+ Header.Version = 2;
Header.Type = FileTypes::FDR_LOG;
Header.CycleFrequency = CycleFrequency;
+
// FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc'
// before setting the values in the header.
Header.ConstantTSC = 1;
Header.NonstopTSC = 1;
- Header.FdrData = FdrAdditionalHeaderData{LocalBQ->ConfiguredBufferSize()};
+ Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()};
retryingWriteAll(Fd, reinterpret_cast<char *>(&Header),
reinterpret_cast<char *>(&Header) + sizeof(Header));
- LocalBQ->apply([&](const BufferQueue::Buffer &B) {
- uint64_t BufferSize = B.Size;
- if (BufferSize > 0) {
+ BQ->apply([&](const BufferQueue::Buffer &B) {
+ // Starting at version 2 of the FDR logging implementation, we only write
+ // the records identified by the extents of the buffer. We use the Extents
+ // from the Buffer and write that out as the first record in the buffer.
+ // We still use a Metadata record, but fill in the extents instead for the
+ // data.
+ MetadataRecord ExtentsRecord;
+ auto BufferExtents = __sanitizer::atomic_load(
+ &B.Extents->Size, __sanitizer::memory_order_acquire);
+ assert(BufferExtents <= B.Size);
+ ExtentsRecord.Type = uint8_t(RecordType::Metadata);
+ ExtentsRecord.RecordKind =
+ uint8_t(MetadataRecord::RecordKinds::BufferExtents);
+ std::memcpy(ExtentsRecord.Data, &BufferExtents, sizeof(BufferExtents));
+ if (BufferExtents > 0) {
+ retryingWriteAll(Fd, reinterpret_cast<char *>(&ExtentsRecord),
+ reinterpret_cast<char *>(&ExtentsRecord) +
+ sizeof(MetadataRecord));
retryingWriteAll(Fd, reinterpret_cast<char *>(B.Buffer),
- reinterpret_cast<char *>(B.Buffer) + B.Size);
+ reinterpret_cast<char *>(B.Buffer) + BufferExtents);
}
});
+
__sanitizer::atomic_store(&LogFlushStatus,
XRayLogFlushStatus::XRAY_LOG_FLUSHED,
__sanitizer::memory_order_release);
@@ -124,8 +152,11 @@ XRayLogInitStatus fdrLoggingFinalize() XRAY_NEVER_INSTRUMENT {
if (!__sanitizer::atomic_compare_exchange_strong(
&LoggingStatus, &CurrentStatus,
XRayLogInitStatus::XRAY_LOG_FINALIZING,
- __sanitizer::memory_order_release))
+ __sanitizer::memory_order_release)) {
+ if (__sanitizer::Verbosity())
+ Report("Cannot finalize log, implementation not initialized.\n");
return static_cast<XRayLogInitStatus>(CurrentStatus);
+ }
// Do special things to make the log finalize itself, and not allow any more
// operations to be performed until re-initialized.
@@ -146,7 +177,8 @@ XRayLogInitStatus fdrLoggingReset() XRAY_NEVER_INSTRUMENT {
return static_cast<XRayLogInitStatus>(CurrentStatus);
// Release the in-memory buffer queue.
- BQ.reset();
+ delete BQ;
+ BQ = nullptr;
// Spin until the flushing status is flushed.
s32 CurrentFlushingStatus = XRayLogFlushStatus::XRAY_LOG_FLUSHED;
@@ -163,19 +195,22 @@ XRayLogInitStatus fdrLoggingReset() XRAY_NEVER_INSTRUMENT {
return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
}
-static std::tuple<uint64_t, unsigned char>
-getTimestamp() XRAY_NEVER_INSTRUMENT {
+struct TSCAndCPU {
+ uint64_t TSC = 0;
+ unsigned char CPU = 0;
+};
+
+static TSCAndCPU getTimestamp() XRAY_NEVER_INSTRUMENT {
// We want to get the TSC as early as possible, so that we can check whether
// we've seen this CPU before. We also do it before we load anything else, to
// allow for forward progress with the scheduling.
- unsigned char CPU;
- uint64_t TSC;
+ TSCAndCPU Result;
// Test once for required CPU features
static bool TSCSupported = probeRequiredCPUFeatures();
if (TSCSupported) {
- TSC = __xray::readTSC(CPU);
+ Result.TSC = __xray::readTSC(Result.CPU);
} else {
// FIXME: This code needs refactoring as it appears in multiple locations
timespec TS;
@@ -184,32 +219,35 @@ getTimestamp() XRAY_NEVER_INSTRUMENT {
Report("clock_gettime(2) return %d, errno=%d", result, int(errno));
TS = {0, 0};
}
- CPU = 0;
- TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec;
+ Result.CPU = 0;
+ Result.TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec;
}
- return std::make_tuple(TSC, CPU);
+ return Result;
}
void fdrLoggingHandleArg0(int32_t FuncId,
XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
- auto TSC_CPU = getTimestamp();
- __xray_fdr_internal::processFunctionHook(FuncId, Entry, std::get<0>(TSC_CPU),
- std::get<1>(TSC_CPU), clock_gettime,
- LoggingStatus, BQ);
+ auto TC = getTimestamp();
+ __xray_fdr_internal::processFunctionHook(FuncId, Entry, TC.TSC, TC.CPU, 0,
+ clock_gettime, BQ);
+}
+
+void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry,
+ uint64_t Arg) XRAY_NEVER_INSTRUMENT {
+ auto TC = getTimestamp();
+ __xray_fdr_internal::processFunctionHook(FuncId, Entry, TC.TSC, TC.CPU, Arg,
+ clock_gettime, BQ);
}
void fdrLoggingHandleCustomEvent(void *Event,
std::size_t EventSize) XRAY_NEVER_INSTRUMENT {
using namespace __xray_fdr_internal;
- auto TSC_CPU = getTimestamp();
- auto &TSC = std::get<0>(TSC_CPU);
- auto &CPU = std::get<1>(TSC_CPU);
- thread_local bool Running = false;
+ auto TC = getTimestamp();
+ auto &TSC = TC.TSC;
+ auto &CPU = TC.CPU;
RecursionGuard Guard{Running};
- if (!Guard) {
- assert(Running && "RecursionGuard is buggy!");
+ if (!Guard)
return;
- }
if (EventSize > std::numeric_limits<int32_t>::max()) {
using Empty = struct {};
static Empty Once = [&] {
@@ -220,15 +258,16 @@ void fdrLoggingHandleCustomEvent(void *Event,
(void)Once;
}
int32_t ReducedEventSize = static_cast<int32_t>(EventSize);
- if (!isLogInitializedAndReady(LocalBQ, TSC, CPU, clock_gettime))
+ auto &TLD = getThreadLocalData();
+ if (!isLogInitializedAndReady(TLD.BQ, TSC, CPU, clock_gettime))
return;
// Here we need to prepare the log to handle:
// - The metadata record we're going to write. (16 bytes)
// - The additional data we're going to write. Currently, that's the size of
// the event we're going to dump into the log as free-form bytes.
- if (!prepareBuffer(clock_gettime, MetadataRecSize + EventSize)) {
- LocalBQ = nullptr;
+ if (!prepareBuffer(TSC, CPU, clock_gettime, MetadataRecSize + EventSize)) {
+ TLD.BQ = nullptr;
return;
}
@@ -240,27 +279,35 @@ void fdrLoggingHandleCustomEvent(void *Event,
CustomEvent.Type = uint8_t(RecordType::Metadata);
CustomEvent.RecordKind =
uint8_t(MetadataRecord::RecordKinds::CustomEventMarker);
- constexpr auto TSCSize = sizeof(std::get<0>(TSC_CPU));
+ constexpr auto TSCSize = sizeof(TC.TSC);
std::memcpy(&CustomEvent.Data, &ReducedEventSize, sizeof(int32_t));
std::memcpy(&CustomEvent.Data[sizeof(int32_t)], &TSC, TSCSize);
- std::memcpy(RecordPtr, &CustomEvent, sizeof(CustomEvent));
- RecordPtr += sizeof(CustomEvent);
- std::memcpy(RecordPtr, Event, ReducedEventSize);
+ std::memcpy(TLD.RecordPtr, &CustomEvent, sizeof(CustomEvent));
+ TLD.RecordPtr += sizeof(CustomEvent);
+ std::memcpy(TLD.RecordPtr, Event, ReducedEventSize);
+ incrementExtents(MetadataRecSize + EventSize);
endBufferIfFull();
}
XRayLogInitStatus fdrLoggingInit(std::size_t BufferSize, std::size_t BufferMax,
void *Options,
size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
- if (OptionsSize != sizeof(FDRLoggingOptions))
+ if (OptionsSize != sizeof(FDRLoggingOptions)) {
+ if (__sanitizer::Verbosity())
+ Report("Cannot initialize FDR logging; wrong size for options: %d\n",
+ OptionsSize);
return static_cast<XRayLogInitStatus>(__sanitizer::atomic_load(
&LoggingStatus, __sanitizer::memory_order_acquire));
+ }
s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
if (!__sanitizer::atomic_compare_exchange_strong(
&LoggingStatus, &CurrentStatus,
XRayLogInitStatus::XRAY_LOG_INITIALIZING,
- __sanitizer::memory_order_release))
+ __sanitizer::memory_order_release)) {
+ if (__sanitizer::Verbosity())
+ Report("Cannot initialize already initialized implementation.\n");
return static_cast<XRayLogInitStatus>(CurrentStatus);
+ }
{
__sanitizer::SpinMutexLock Guard(&FDROptionsMutex);
@@ -268,12 +315,40 @@ XRayLogInitStatus fdrLoggingInit(std::size_t BufferSize, std::size_t BufferMax,
}
bool Success = false;
- BQ = std::make_shared<BufferQueue>(BufferSize, BufferMax, Success);
+
+ if (BQ != nullptr) {
+ delete BQ;
+ BQ = nullptr;
+ }
+
+ if (BQ == nullptr)
+ BQ = new BufferQueue(BufferSize, BufferMax, Success);
+
if (!Success) {
Report("BufferQueue init failed.\n");
+ if (BQ != nullptr) {
+ delete BQ;
+ BQ = nullptr;
+ }
return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
}
+ static bool UNUSED Once = [] {
+ pthread_key_create(&__xray_fdr_internal::Key, +[](void *) {
+ auto &TLD = __xray_fdr_internal::getThreadLocalData();
+ if (TLD.BQ == nullptr)
+ return;
+ auto EC = TLD.BQ->releaseBuffer(TLD.Buffer);
+ if (EC != BufferQueue::ErrorCode::Ok)
+ Report("At thread exit, failed to release buffer at %p; error=%s\n",
+ TLD.Buffer.Buffer, BufferQueue::getErrorString(EC));
+ });
+ return false;
+ }();
+
+ // Arg1 handler should go in first to avoid concurrent code accidentally
+ // falling back to arg0 when it should have ran arg1.
+ __xray_set_handler_arg1(fdrLoggingHandleArg1);
// Install the actual handleArg0 handler after initialising the buffers.
__xray_set_handler(fdrLoggingHandleArg0);
__xray_set_customevent_handler(fdrLoggingHandleCustomEvent);
@@ -281,20 +356,31 @@ XRayLogInitStatus fdrLoggingInit(std::size_t BufferSize, std::size_t BufferMax,
__sanitizer::atomic_store(&LoggingStatus,
XRayLogInitStatus::XRAY_LOG_INITIALIZED,
__sanitizer::memory_order_release);
- Report("XRay FDR init successful.\n");
+
+ if (__sanitizer::Verbosity())
+ Report("XRay FDR init successful.\n");
return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
}
-} // namespace __xray
-
-static auto UNUSED Unused = [] {
+bool fdrLogDynamicInitializer() XRAY_NEVER_INSTRUMENT {
using namespace __xray;
- if (flags()->xray_fdr_log) {
- XRayLogImpl Impl{
- fdrLoggingInit, fdrLoggingFinalize, fdrLoggingHandleArg0,
- fdrLoggingFlush,
- };
+ XRayLogImpl Impl{
+ fdrLoggingInit,
+ fdrLoggingFinalize,
+ fdrLoggingHandleArg0,
+ fdrLoggingFlush,
+ };
+ auto RegistrationResult = __xray_log_register_mode("xray-fdr", Impl);
+ if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK &&
+ __sanitizer::Verbosity())
+ Report("Cannot register XRay FDR mode to 'xray-fdr'; error = %d\n",
+ RegistrationResult);
+ if (flags()->xray_fdr_log ||
+ !__sanitizer::internal_strcmp(flags()->xray_mode, "xray-fdr"))
__xray_set_log_impl(Impl);
- }
return true;
-}();
+}
+
+} // namespace __xray
+
+static auto UNUSED Unused = __xray::fdrLogDynamicInitializer();
diff --git a/lib/xray/xray_fdr_logging.h b/lib/xray/xray_fdr_logging.h
index 426b54dc78843..1639d550a44c6 100644
--- a/lib/xray/xray_fdr_logging.h
+++ b/lib/xray/xray_fdr_logging.h
@@ -30,6 +30,7 @@ XRayLogInitStatus fdrLoggingInit(size_t BufferSize, size_t BufferMax,
void *Options, size_t OptionsSize);
XRayLogInitStatus fdrLoggingFinalize();
void fdrLoggingHandleArg0(int32_t FuncId, XRayEntryType Entry);
+void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry, uint64_t Arg1);
XRayLogFlushStatus fdrLoggingFlush();
XRayLogInitStatus fdrLoggingReset();
diff --git a/lib/xray/xray_fdr_logging_impl.h b/lib/xray/xray_fdr_logging_impl.h
index 4a1d80fd0ebae..59eab55b2573c 100644
--- a/lib/xray/xray_fdr_logging_impl.h
+++ b/lib/xray/xray_fdr_logging_impl.h
@@ -18,13 +18,13 @@
#define XRAY_XRAY_FDR_LOGGING_IMPL_H
#include <cassert>
-#include <cstdint>
+#include <cstddef>
#include <cstring>
#include <limits>
-#include <memory>
-#include <string>
+#include <pthread.h>
#include <sys/syscall.h>
#include <time.h>
+#include <type_traits>
#include <unistd.h>
#include "sanitizer_common/sanitizer_common.h"
@@ -52,57 +52,104 @@ __sanitizer::atomic_sint32_t LoggingStatus = {
/// cooperation with xray_fdr_logging class, so be careful and think twice.
namespace __xray_fdr_internal {
-/// Writes the new buffer record and wallclock time that begin a buffer for a
-/// thread to MemPtr and increments MemPtr. Bypasses the thread local state
-/// machine and writes directly to memory without checks.
-static void writeNewBufferPreamble(pid_t Tid, timespec TS, char *&MemPtr);
+/// Writes the new buffer record and wallclock time that begin a buffer for the
+/// current thread.
+static void writeNewBufferPreamble(pid_t Tid, timespec TS);
-/// Write a metadata record to switch to a new CPU to MemPtr and increments
-/// MemPtr. Bypasses the thread local state machine and writes directly to
-/// memory without checks.
-static void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC, char *&MemPtr);
-
-/// Writes an EOB metadata record to MemPtr and increments MemPtr. Bypasses the
-/// thread local state machine and writes directly to memory without checks.
-static void writeEOBMetadata(char *&MemPtr);
-
-/// Writes a TSC Wrap metadata record to MemPtr and increments MemPtr. Bypasses
-/// the thread local state machine and directly writes to memory without checks.
-static void writeTSCWrapMetadata(uint64_t TSC, char *&MemPtr);
-
-/// Writes a Function Record to MemPtr and increments MemPtr. Bypasses the
-/// thread local state machine and writes the function record directly to
-/// memory.
+/// Writes a Function Record to the buffer associated with the current thread.
static void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
- XRayEntryType EntryType, char *&MemPtr);
+ XRayEntryType EntryType);
/// Sets up a new buffer in thread_local storage and writes a preamble. The
/// wall_clock_reader function is used to populate the WallTimeRecord entry.
static void setupNewBuffer(int (*wall_clock_reader)(clockid_t,
struct timespec *));
-/// Called to record CPU time for a new CPU within the current thread.
-static void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC);
-
-/// Called to close the buffer when the thread exhausts the buffer or when the
-/// thread exits (via a thread local variable destructor).
-static void writeEOBMetadata();
-
/// TSC Wrap records are written when a TSC delta encoding scheme overflows.
static void writeTSCWrapMetadata(uint64_t TSC);
-/// Here's where the meat of the processing happens. The writer captures
-/// function entry, exit and tail exit points with a time and will create
-/// TSCWrap, NewCPUId and Function records as necessary. The writer might
-/// walk backward through its buffer and erase trivial functions to avoid
-/// polluting the log and may use the buffer queue to obtain or release a
-/// buffer.
-static void processFunctionHook(int32_t FuncId, XRayEntryType Entry,
- uint64_t TSC, unsigned char CPU,
- int (*wall_clock_reader)(clockid_t,
- struct timespec *),
- __sanitizer::atomic_sint32_t &LoggingStatus,
- const std::shared_ptr<BufferQueue> &BQ);
+// Group together thread-local-data in a struct, then hide it behind a function
+// call so that it can be initialized on first use instead of as a global. We
+// force the alignment to 64-bytes for x86 cache line alignment, as this
+// structure is used in the hot path of implementation.
+struct alignas(64) ThreadLocalData {
+ BufferQueue::Buffer Buffer;
+ char *RecordPtr = nullptr;
+ // The number of FunctionEntry records immediately preceding RecordPtr.
+ uint8_t NumConsecutiveFnEnters = 0;
+
+ // The number of adjacent, consecutive pairs of FunctionEntry, Tail Exit
+ // records preceding RecordPtr.
+ uint8_t NumTailCalls = 0;
+
+ // We use a thread_local variable to keep track of which CPUs we've already
+ // run, and the TSC times for these CPUs. This allows us to stop repeating the
+ // CPU field in the function records.
+ //
+ // We assume that we'll support only 65536 CPUs for x86_64.
+ uint16_t CurrentCPU = std::numeric_limits<uint16_t>::max();
+ uint64_t LastTSC = 0;
+ uint64_t LastFunctionEntryTSC = 0;
+
+ // Make sure a thread that's ever called handleArg0 has a thread-local
+ // live reference to the buffer queue for this particular instance of
+ // FDRLogging, and that we're going to clean it up when the thread exits.
+ BufferQueue *BQ = nullptr;
+};
+
+static_assert(std::is_trivially_destructible<ThreadLocalData>::value,
+ "ThreadLocalData must be trivially destructible");
+
+static constexpr auto MetadataRecSize = sizeof(MetadataRecord);
+static constexpr auto FunctionRecSize = sizeof(FunctionRecord);
+
+// Use a global pthread key to identify thread-local data for logging.
+static pthread_key_t Key;
+
+// This function will initialize the thread-local data structure used by the FDR
+// logging implementation and return a reference to it. The implementation
+// details require a bit of care to maintain.
+//
+// First, some requirements on the implementation in general:
+//
+// - XRay handlers should not call any memory allocation routines that may
+// delegate to an instrumented implementation. This means functions like
+// malloc() and free() should not be called while instrumenting.
+//
+// - We would like to use some thread-local data initialized on first-use of
+// the XRay instrumentation. These allow us to implement unsynchronized
+// routines that access resources associated with the thread.
+//
+// The implementation here uses a few mechanisms that allow us to provide both
+// the requirements listed above. We do this by:
+//
+// 1. Using a thread-local aligned storage buffer for representing the
+// ThreadLocalData struct. This data will be uninitialized memory by
+// design.
+//
+// 2. Not requiring a thread exit handler/implementation, keeping the
+// thread-local as purely a collection of references/data that do not
+// require cleanup.
+//
+// We're doing this to avoid using a `thread_local` object that has a
+// non-trivial destructor, because the C++ runtime might call std::malloc(...)
+// to register calls to destructors. Deadlocks may arise when, for example, an
+// externally provided malloc implementation is XRay instrumented, and
+// initializing the thread-locals involves calling into malloc. A malloc
+// implementation that does global synchronization might be holding a lock for a
+// critical section, calling a function that might be XRay instrumented (and
+// thus in turn calling into malloc by virtue of registration of the
+// thread_local's destructor).
+static ThreadLocalData &getThreadLocalData() {
+ static_assert(alignof(ThreadLocalData) >= 64,
+ "ThreadLocalData must be cache line aligned.");
+ thread_local ThreadLocalData TLD;
+ thread_local bool UNUSED ThreadOnce = [] {
+ pthread_setspecific(Key, &TLD);
+ return false;
+ }();
+ return TLD;
+}
//-----------------------------------------------------------------------------|
// The rest of the file is implementation. |
@@ -113,71 +160,12 @@ static void processFunctionHook(int32_t FuncId, XRayEntryType Entry,
namespace {
-thread_local BufferQueue::Buffer Buffer;
-thread_local char *RecordPtr = nullptr;
-
-// The number of FunctionEntry records immediately preceding RecordPtr.
-thread_local uint8_t NumConsecutiveFnEnters = 0;
-
-// The number of adjacent, consecutive pairs of FunctionEntry, Tail Exit
-// records preceding RecordPtr.
-thread_local uint8_t NumTailCalls = 0;
-
-constexpr auto MetadataRecSize = sizeof(MetadataRecord);
-constexpr auto FunctionRecSize = sizeof(FunctionRecord);
-
-// We use a thread_local variable to keep track of which CPUs we've already
-// run, and the TSC times for these CPUs. This allows us to stop repeating the
-// CPU field in the function records.
-//
-// We assume that we'll support only 65536 CPUs for x86_64.
-thread_local uint16_t CurrentCPU = std::numeric_limits<uint16_t>::max();
-thread_local uint64_t LastTSC = 0;
-thread_local uint64_t LastFunctionEntryTSC = 0;
-
-class ThreadExitBufferCleanup {
- std::shared_ptr<BufferQueue> &Buffers;
- BufferQueue::Buffer &Buffer;
-
-public:
- explicit ThreadExitBufferCleanup(std::shared_ptr<BufferQueue> &BQ,
- BufferQueue::Buffer &Buffer)
- XRAY_NEVER_INSTRUMENT : Buffers(BQ),
- Buffer(Buffer) {}
-
- ~ThreadExitBufferCleanup() noexcept XRAY_NEVER_INSTRUMENT {
- if (RecordPtr == nullptr)
- return;
-
- // We make sure that upon exit, a thread will write out the EOB
- // MetadataRecord in the thread-local log, and also release the buffer to
- // the queue.
- assert((RecordPtr + MetadataRecSize) - static_cast<char *>(Buffer.Buffer) >=
- static_cast<ptrdiff_t>(MetadataRecSize));
- if (Buffers) {
- writeEOBMetadata();
- auto EC = Buffers->releaseBuffer(Buffer);
- if (EC != BufferQueue::ErrorCode::Ok)
- Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer,
- BufferQueue::getErrorString(EC));
- Buffers = nullptr;
- return;
- }
- }
-};
-
-// Make sure a thread that's ever called handleArg0 has a thread-local
-// live reference to the buffer queue for this particular instance of
-// FDRLogging, and that we're going to clean it up when the thread exits.
-thread_local std::shared_ptr<BufferQueue> LocalBQ = nullptr;
-thread_local ThreadExitBufferCleanup Cleanup(LocalBQ, Buffer);
-
class RecursionGuard {
- bool &Running;
+ volatile bool &Running;
const bool Valid;
public:
- explicit RecursionGuard(bool &R) : Running(R), Valid(!R) {
+ explicit RecursionGuard(volatile bool &R) : Running(R), Valid(!R) {
if (Valid)
Running = true;
}
@@ -195,34 +183,29 @@ public:
}
};
-inline bool loggingInitialized(
- const __sanitizer::atomic_sint32_t &LoggingStatus) XRAY_NEVER_INSTRUMENT {
- return __sanitizer::atomic_load(&LoggingStatus,
- __sanitizer::memory_order_acquire) ==
- XRayLogInitStatus::XRAY_LOG_INITIALIZED;
-}
-
} // namespace
-inline void writeNewBufferPreamble(pid_t Tid, timespec TS,
- char *&MemPtr) XRAY_NEVER_INSTRUMENT {
+static void writeNewBufferPreamble(pid_t Tid,
+ timespec TS) XRAY_NEVER_INSTRUMENT {
static constexpr int InitRecordsCount = 2;
- std::aligned_storage<sizeof(MetadataRecord)>::type Records[InitRecordsCount];
+ auto &TLD = getThreadLocalData();
+ MetadataRecord Metadata[InitRecordsCount];
{
// Write out a MetadataRecord to signify that this is the start of a new
// buffer, associated with a particular thread, with a new CPU. For the
// data, we have 15 bytes to squeeze as much information as we can. At this
// point we only write down the following bytes:
// - Thread ID (pid_t, 4 bytes)
- auto &NewBuffer = *reinterpret_cast<MetadataRecord *>(&Records[0]);
+ auto &NewBuffer = Metadata[0];
NewBuffer.Type = uint8_t(RecordType::Metadata);
NewBuffer.RecordKind = uint8_t(MetadataRecord::RecordKinds::NewBuffer);
std::memcpy(&NewBuffer.Data, &Tid, sizeof(pid_t));
}
+
// Also write the WalltimeMarker record.
{
static_assert(sizeof(time_t) <= 8, "time_t needs to be at most 8 bytes");
- auto &WalltimeMarker = *reinterpret_cast<MetadataRecord *>(&Records[1]);
+ auto &WalltimeMarker = Metadata[1];
WalltimeMarker.Type = uint8_t(RecordType::Metadata);
WalltimeMarker.RecordKind =
uint8_t(MetadataRecord::RecordKinds::WalltimeMarker);
@@ -235,26 +218,48 @@ inline void writeNewBufferPreamble(pid_t Tid, timespec TS,
std::memcpy(WalltimeMarker.Data, &Seconds, sizeof(Seconds));
std::memcpy(WalltimeMarker.Data + sizeof(Seconds), &Micros, sizeof(Micros));
}
- std::memcpy(MemPtr, Records, sizeof(MetadataRecord) * InitRecordsCount);
- MemPtr += sizeof(MetadataRecord) * InitRecordsCount;
- NumConsecutiveFnEnters = 0;
- NumTailCalls = 0;
+
+ TLD.NumConsecutiveFnEnters = 0;
+ TLD.NumTailCalls = 0;
+ if (TLD.BQ == nullptr || TLD.BQ->finalizing())
+ return;
+ std::memcpy(TLD.RecordPtr, Metadata, sizeof(Metadata));
+ TLD.RecordPtr += sizeof(Metadata);
+ // Since we write out the extents as the first metadata record of the
+ // buffer, we need to write out the extents including the extents record.
+ __sanitizer::atomic_store(&TLD.Buffer.Extents->Size, sizeof(Metadata),
+ __sanitizer::memory_order_release);
}
inline void setupNewBuffer(int (*wall_clock_reader)(
clockid_t, struct timespec *)) XRAY_NEVER_INSTRUMENT {
- RecordPtr = static_cast<char *>(Buffer.Buffer);
+ auto &TLD = getThreadLocalData();
+ auto &B = TLD.Buffer;
+ TLD.RecordPtr = static_cast<char *>(B.Buffer);
pid_t Tid = syscall(SYS_gettid);
timespec TS{0, 0};
// This is typically clock_gettime, but callers have injection ability.
wall_clock_reader(CLOCK_MONOTONIC, &TS);
- writeNewBufferPreamble(Tid, TS, RecordPtr);
- NumConsecutiveFnEnters = 0;
- NumTailCalls = 0;
+ writeNewBufferPreamble(Tid, TS);
+ TLD.NumConsecutiveFnEnters = 0;
+ TLD.NumTailCalls = 0;
+}
+
+static void incrementExtents(size_t Add) {
+ auto &TLD = getThreadLocalData();
+ __sanitizer::atomic_fetch_add(&TLD.Buffer.Extents->Size, Add,
+ __sanitizer::memory_order_acq_rel);
+}
+
+static void decrementExtents(size_t Subtract) {
+ auto &TLD = getThreadLocalData();
+ __sanitizer::atomic_fetch_sub(&TLD.Buffer.Extents->Size, Subtract,
+ __sanitizer::memory_order_acq_rel);
}
-inline void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC,
- char *&MemPtr) XRAY_NEVER_INSTRUMENT {
+inline void writeNewCPUIdMetadata(uint16_t CPU,
+ uint64_t TSC) XRAY_NEVER_INSTRUMENT {
+ auto &TLD = getThreadLocalData();
MetadataRecord NewCPUId;
NewCPUId.Type = uint8_t(RecordType::Metadata);
NewCPUId.RecordKind = uint8_t(MetadataRecord::RecordKinds::NewCPUId);
@@ -265,34 +270,15 @@ inline void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC,
// Total = 10 bytes.
std::memcpy(&NewCPUId.Data, &CPU, sizeof(CPU));
std::memcpy(&NewCPUId.Data[sizeof(CPU)], &TSC, sizeof(TSC));
- std::memcpy(MemPtr, &NewCPUId, sizeof(MetadataRecord));
- MemPtr += sizeof(MetadataRecord);
- NumConsecutiveFnEnters = 0;
- NumTailCalls = 0;
-}
-
-inline void writeNewCPUIdMetadata(uint16_t CPU,
- uint64_t TSC) XRAY_NEVER_INSTRUMENT {
- writeNewCPUIdMetadata(CPU, TSC, RecordPtr);
-}
-
-inline void writeEOBMetadata(char *&MemPtr) XRAY_NEVER_INSTRUMENT {
- MetadataRecord EOBMeta;
- EOBMeta.Type = uint8_t(RecordType::Metadata);
- EOBMeta.RecordKind = uint8_t(MetadataRecord::RecordKinds::EndOfBuffer);
- // For now we don't write any bytes into the Data field.
- std::memcpy(MemPtr, &EOBMeta, sizeof(MetadataRecord));
- MemPtr += sizeof(MetadataRecord);
- NumConsecutiveFnEnters = 0;
- NumTailCalls = 0;
-}
-
-inline void writeEOBMetadata() XRAY_NEVER_INSTRUMENT {
- writeEOBMetadata(RecordPtr);
+ std::memcpy(TLD.RecordPtr, &NewCPUId, sizeof(MetadataRecord));
+ TLD.RecordPtr += sizeof(MetadataRecord);
+ TLD.NumConsecutiveFnEnters = 0;
+ TLD.NumTailCalls = 0;
+ incrementExtents(sizeof(MetadataRecord));
}
-inline void writeTSCWrapMetadata(uint64_t TSC,
- char *&MemPtr) XRAY_NEVER_INSTRUMENT {
+inline void writeTSCWrapMetadata(uint64_t TSC) XRAY_NEVER_INSTRUMENT {
+ auto &TLD = getThreadLocalData();
MetadataRecord TSCWrap;
TSCWrap.Type = uint8_t(RecordType::Metadata);
TSCWrap.RecordKind = uint8_t(MetadataRecord::RecordKinds::TSCWrap);
@@ -301,58 +287,67 @@ inline void writeTSCWrapMetadata(uint64_t TSC,
// - Full TSC (uint64_t, 8 bytes)
// Total = 8 bytes.
std::memcpy(&TSCWrap.Data, &TSC, sizeof(TSC));
- std::memcpy(MemPtr, &TSCWrap, sizeof(MetadataRecord));
- MemPtr += sizeof(MetadataRecord);
- NumConsecutiveFnEnters = 0;
- NumTailCalls = 0;
+ std::memcpy(TLD.RecordPtr, &TSCWrap, sizeof(MetadataRecord));
+ TLD.RecordPtr += sizeof(MetadataRecord);
+ TLD.NumConsecutiveFnEnters = 0;
+ TLD.NumTailCalls = 0;
+ incrementExtents(sizeof(MetadataRecord));
}
-inline void writeTSCWrapMetadata(uint64_t TSC) XRAY_NEVER_INSTRUMENT {
- writeTSCWrapMetadata(TSC, RecordPtr);
+// Call Argument metadata records store the arguments to a function in the
+// order of their appearance; holes are not supported by the buffer format.
+static inline void writeCallArgumentMetadata(uint64_t A) XRAY_NEVER_INSTRUMENT {
+ auto &TLD = getThreadLocalData();
+ MetadataRecord CallArg;
+ CallArg.Type = uint8_t(RecordType::Metadata);
+ CallArg.RecordKind = uint8_t(MetadataRecord::RecordKinds::CallArgument);
+
+ std::memcpy(CallArg.Data, &A, sizeof(A));
+ std::memcpy(TLD.RecordPtr, &CallArg, sizeof(MetadataRecord));
+ TLD.RecordPtr += sizeof(MetadataRecord);
+ incrementExtents(sizeof(MetadataRecord));
}
-inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
- XRayEntryType EntryType,
- char *&MemPtr) XRAY_NEVER_INSTRUMENT {
- std::aligned_storage<sizeof(FunctionRecord), alignof(FunctionRecord)>::type
- AlignedFuncRecordBuffer;
- auto &FuncRecord =
- *reinterpret_cast<FunctionRecord *>(&AlignedFuncRecordBuffer);
+static inline void
+writeFunctionRecord(int FuncId, uint32_t TSCDelta,
+ XRayEntryType EntryType) XRAY_NEVER_INSTRUMENT {
+ FunctionRecord FuncRecord;
FuncRecord.Type = uint8_t(RecordType::Function);
// Only take 28 bits of the function id.
FuncRecord.FuncId = FuncId & ~(0x0F << 28);
FuncRecord.TSCDelta = TSCDelta;
+ auto &TLD = getThreadLocalData();
switch (EntryType) {
case XRayEntryType::ENTRY:
- ++NumConsecutiveFnEnters;
+ ++TLD.NumConsecutiveFnEnters;
FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionEnter);
break;
case XRayEntryType::LOG_ARGS_ENTRY:
// We should not rewind functions with logged args.
- NumConsecutiveFnEnters = 0;
- NumTailCalls = 0;
+ TLD.NumConsecutiveFnEnters = 0;
+ TLD.NumTailCalls = 0;
FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionEnter);
break;
case XRayEntryType::EXIT:
// If we've decided to log the function exit, we will never erase the log
// before it.
- NumConsecutiveFnEnters = 0;
- NumTailCalls = 0;
+ TLD.NumConsecutiveFnEnters = 0;
+ TLD.NumTailCalls = 0;
FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionExit);
break;
case XRayEntryType::TAIL:
// If we just entered the function we're tail exiting from or erased every
// invocation since then, this function entry tail pair is a candidate to
// be erased when the child function exits.
- if (NumConsecutiveFnEnters > 0) {
- ++NumTailCalls;
- NumConsecutiveFnEnters = 0;
+ if (TLD.NumConsecutiveFnEnters > 0) {
+ ++TLD.NumTailCalls;
+ TLD.NumConsecutiveFnEnters = 0;
} else {
// We will never be able to erase this tail call since we have logged
// something in between the function entry and tail exit.
- NumTailCalls = 0;
- NumConsecutiveFnEnters = 0;
+ TLD.NumTailCalls = 0;
+ TLD.NumConsecutiveFnEnters = 0;
}
FuncRecord.RecordKind =
uint8_t(FunctionRecord::RecordKinds::FunctionTailExit);
@@ -370,8 +365,9 @@ inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta,
}
}
- std::memcpy(MemPtr, &AlignedFuncRecordBuffer, sizeof(FunctionRecord));
- MemPtr += sizeof(FunctionRecord);
+ std::memcpy(TLD.RecordPtr, &FuncRecord, sizeof(FunctionRecord));
+ TLD.RecordPtr += sizeof(FunctionRecord);
+ incrementExtents(sizeof(FunctionRecord));
}
static uint64_t thresholdTicks() {
@@ -387,23 +383,21 @@ static uint64_t thresholdTicks() {
// "Function Entry" record and any "Tail Call Exit" records after that.
static void rewindRecentCall(uint64_t TSC, uint64_t &LastTSC,
uint64_t &LastFunctionEntryTSC, int32_t FuncId) {
- using AlignedFuncStorage =
- std::aligned_storage<sizeof(FunctionRecord),
- alignof(FunctionRecord)>::type;
- RecordPtr -= FunctionRecSize;
- AlignedFuncStorage AlignedFuncRecordBuffer;
- const auto &FuncRecord = *reinterpret_cast<FunctionRecord *>(
- std::memcpy(&AlignedFuncRecordBuffer, RecordPtr, FunctionRecSize));
+ auto &TLD = getThreadLocalData();
+ TLD.RecordPtr -= FunctionRecSize;
+ decrementExtents(FunctionRecSize);
+ FunctionRecord FuncRecord;
+ std::memcpy(&FuncRecord, TLD.RecordPtr, FunctionRecSize);
assert(FuncRecord.RecordKind ==
uint8_t(FunctionRecord::RecordKinds::FunctionEnter) &&
"Expected to find function entry recording when rewinding.");
assert(FuncRecord.FuncId == (FuncId & ~(0x0F << 28)) &&
"Expected matching function id when rewinding Exit");
- --NumConsecutiveFnEnters;
+ --TLD.NumConsecutiveFnEnters;
LastTSC -= FuncRecord.TSCDelta;
// We unwound one call. Update the state and return without writing a log.
- if (NumConsecutiveFnEnters != 0) {
+ if (TLD.NumConsecutiveFnEnters != 0) {
LastFunctionEntryTSC -= FuncRecord.TSCDelta;
return;
}
@@ -413,22 +407,19 @@ static void rewindRecentCall(uint64_t TSC, uint64_t &LastTSC,
// exited from via this exit.
LastFunctionEntryTSC = 0;
auto RewindingTSC = LastTSC;
- auto RewindingRecordPtr = RecordPtr - FunctionRecSize;
- while (NumTailCalls > 0) {
- AlignedFuncStorage TailExitRecordBuffer;
+ auto RewindingRecordPtr = TLD.RecordPtr - FunctionRecSize;
+ while (TLD.NumTailCalls > 0) {
// Rewind the TSC back over the TAIL EXIT record.
- const auto &ExpectedTailExit =
- *reinterpret_cast<FunctionRecord *>(std::memcpy(
- &TailExitRecordBuffer, RewindingRecordPtr, FunctionRecSize));
+ FunctionRecord ExpectedTailExit;
+ std::memcpy(&ExpectedTailExit, RewindingRecordPtr, FunctionRecSize);
assert(ExpectedTailExit.RecordKind ==
uint8_t(FunctionRecord::RecordKinds::FunctionTailExit) &&
"Expected to find tail exit when rewinding.");
RewindingRecordPtr -= FunctionRecSize;
RewindingTSC -= ExpectedTailExit.TSCDelta;
- AlignedFuncStorage FunctionEntryBuffer;
- const auto &ExpectedFunctionEntry = *reinterpret_cast<FunctionRecord *>(
- std::memcpy(&FunctionEntryBuffer, RewindingRecordPtr, FunctionRecSize));
+ FunctionRecord ExpectedFunctionEntry;
+ std::memcpy(&ExpectedFunctionEntry, RewindingRecordPtr, FunctionRecSize);
assert(ExpectedFunctionEntry.RecordKind ==
uint8_t(FunctionRecord::RecordKinds::FunctionEnter) &&
"Expected to find function entry when rewinding tail call.");
@@ -437,80 +428,87 @@ static void rewindRecentCall(uint64_t TSC, uint64_t &LastTSC,
// This tail call exceeded the threshold duration. It will not be erased.
if ((TSC - RewindingTSC) >= thresholdTicks()) {
- NumTailCalls = 0;
+ TLD.NumTailCalls = 0;
return;
}
// We can erase a tail exit pair that we're exiting through since
// its duration is under threshold.
- --NumTailCalls;
+ --TLD.NumTailCalls;
RewindingRecordPtr -= FunctionRecSize;
RewindingTSC -= ExpectedFunctionEntry.TSCDelta;
- RecordPtr -= 2 * FunctionRecSize;
+ TLD.RecordPtr -= 2 * FunctionRecSize;
LastTSC = RewindingTSC;
+ decrementExtents(2 * FunctionRecSize);
}
}
-inline bool releaseThreadLocalBuffer(BufferQueue *BQ) {
- auto EC = BQ->releaseBuffer(Buffer);
+inline bool releaseThreadLocalBuffer(BufferQueue &BQArg) {
+ auto &TLD = getThreadLocalData();
+ auto EC = BQArg.releaseBuffer(TLD.Buffer);
if (EC != BufferQueue::ErrorCode::Ok) {
- Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer,
+ Report("Failed to release buffer at %p; error=%s\n", TLD.Buffer.Buffer,
BufferQueue::getErrorString(EC));
return false;
}
return true;
}
-inline bool prepareBuffer(int (*wall_clock_reader)(clockid_t,
+inline bool prepareBuffer(uint64_t TSC, unsigned char CPU,
+ int (*wall_clock_reader)(clockid_t,
struct timespec *),
size_t MaxSize) XRAY_NEVER_INSTRUMENT {
- char *BufferStart = static_cast<char *>(Buffer.Buffer);
- if ((RecordPtr + MaxSize) > (BufferStart + Buffer.Size - MetadataRecSize)) {
- writeEOBMetadata();
- if (!releaseThreadLocalBuffer(LocalBQ.get()))
+ auto &TLD = getThreadLocalData();
+ char *BufferStart = static_cast<char *>(TLD.Buffer.Buffer);
+ if ((TLD.RecordPtr + MaxSize) > (BufferStart + TLD.Buffer.Size)) {
+ if (!releaseThreadLocalBuffer(*TLD.BQ))
return false;
- auto EC = LocalBQ->getBuffer(Buffer);
+ auto EC = TLD.BQ->getBuffer(TLD.Buffer);
if (EC != BufferQueue::ErrorCode::Ok) {
Report("Failed to acquire a buffer; error=%s\n",
BufferQueue::getErrorString(EC));
return false;
}
setupNewBuffer(wall_clock_reader);
+
+ // Always write the CPU metadata as the first record in the buffer.
+ writeNewCPUIdMetadata(CPU, TSC);
}
return true;
}
-inline bool isLogInitializedAndReady(
- std::shared_ptr<BufferQueue> &LocalBQ, uint64_t TSC, unsigned char CPU,
- int (*wall_clock_reader)(clockid_t,
- struct timespec *)) XRAY_NEVER_INSTRUMENT {
+inline bool
+isLogInitializedAndReady(BufferQueue *LBQ, uint64_t TSC, unsigned char CPU,
+ int (*wall_clock_reader)(clockid_t, struct timespec *))
+ XRAY_NEVER_INSTRUMENT {
// Bail out right away if logging is not initialized yet.
// We should take the opportunity to release the buffer though.
auto Status = __sanitizer::atomic_load(&LoggingStatus,
__sanitizer::memory_order_acquire);
+ auto &TLD = getThreadLocalData();
if (Status != XRayLogInitStatus::XRAY_LOG_INITIALIZED) {
- if (RecordPtr != nullptr &&
+ if (TLD.RecordPtr != nullptr &&
(Status == XRayLogInitStatus::XRAY_LOG_FINALIZING ||
Status == XRayLogInitStatus::XRAY_LOG_FINALIZED)) {
- writeEOBMetadata();
- if (!releaseThreadLocalBuffer(LocalBQ.get()))
+ if (!releaseThreadLocalBuffer(*LBQ))
return false;
- RecordPtr = nullptr;
- LocalBQ = nullptr;
+ TLD.RecordPtr = nullptr;
return false;
}
return false;
}
- if (!loggingInitialized(LoggingStatus) || LocalBQ->finalizing()) {
- writeEOBMetadata();
- if (!releaseThreadLocalBuffer(LocalBQ.get()))
+ if (__sanitizer::atomic_load(&LoggingStatus,
+ __sanitizer::memory_order_acquire) !=
+ XRayLogInitStatus::XRAY_LOG_INITIALIZED ||
+ LBQ->finalizing()) {
+ if (!releaseThreadLocalBuffer(*LBQ))
return false;
- RecordPtr = nullptr;
+ TLD.RecordPtr = nullptr;
}
- if (Buffer.Buffer == nullptr) {
- auto EC = LocalBQ->getBuffer(Buffer);
+ if (TLD.Buffer.Buffer == nullptr) {
+ auto EC = LBQ->getBuffer(TLD.Buffer);
if (EC != BufferQueue::ErrorCode::Ok) {
auto LS = __sanitizer::atomic_load(&LoggingStatus,
__sanitizer::memory_order_acquire);
@@ -522,51 +520,100 @@ inline bool isLogInitializedAndReady(
}
setupNewBuffer(wall_clock_reader);
+
+ // Always write the CPU metadata as the first record in the buffer.
+ writeNewCPUIdMetadata(CPU, TSC);
}
- if (CurrentCPU == std::numeric_limits<uint16_t>::max()) {
+ if (TLD.CurrentCPU == std::numeric_limits<uint16_t>::max()) {
// This means this is the first CPU this thread has ever run on. We set
// the current CPU and record this as the first TSC we've seen.
- CurrentCPU = CPU;
+ TLD.CurrentCPU = CPU;
writeNewCPUIdMetadata(CPU, TSC);
}
return true;
} // namespace __xray_fdr_internal
+// Compute the TSC difference between the time of measurement and the previous
+// event. There are a few interesting situations we need to account for:
+//
+// - The thread has migrated to a different CPU. If this is the case, then
+// we write down the following records:
+//
+// 1. A 'NewCPUId' Metadata record.
+// 2. A FunctionRecord with a 0 for the TSCDelta field.
+//
+// - The TSC delta is greater than the 32 bits we can store in a
+// FunctionRecord. In this case we write down the following records:
+//
+// 1. A 'TSCWrap' Metadata record.
+// 2. A FunctionRecord with a 0 for the TSCDelta field.
+//
+// - The TSC delta is representable within the 32 bits we can store in a
+// FunctionRecord. In this case we write down just a FunctionRecord with
+// the correct TSC delta.
+inline uint32_t writeCurrentCPUTSC(ThreadLocalData &TLD, uint64_t TSC,
+ uint8_t CPU) {
+ if (CPU != TLD.CurrentCPU) {
+ // We've moved to a new CPU.
+ writeNewCPUIdMetadata(CPU, TSC);
+ return 0;
+ }
+ // If the delta is greater than the range for a uint32_t, then we write out
+ // the TSC wrap metadata entry with the full TSC, and the TSC for the
+ // function record be 0.
+ uint64_t Delta = TSC - TLD.LastTSC;
+ if (Delta <= std::numeric_limits<uint32_t>::max())
+ return Delta;
+
+ writeTSCWrapMetadata(TSC);
+ return 0;
+}
+
inline void endBufferIfFull() XRAY_NEVER_INSTRUMENT {
- auto BufferStart = static_cast<char *>(Buffer.Buffer);
- if ((RecordPtr + MetadataRecSize) - BufferStart == MetadataRecSize) {
- writeEOBMetadata();
- if (!releaseThreadLocalBuffer(LocalBQ.get()))
+ auto &TLD = getThreadLocalData();
+ auto BufferStart = static_cast<char *>(TLD.Buffer.Buffer);
+ if ((TLD.RecordPtr + MetadataRecSize) - BufferStart <=
+ ptrdiff_t{MetadataRecSize}) {
+ if (!releaseThreadLocalBuffer(*TLD.BQ))
return;
- RecordPtr = nullptr;
+ TLD.RecordPtr = nullptr;
}
}
-inline void processFunctionHook(
- int32_t FuncId, XRayEntryType Entry, uint64_t TSC, unsigned char CPU,
- int (*wall_clock_reader)(clockid_t, struct timespec *),
- __sanitizer::atomic_sint32_t &LoggingStatus,
- const std::shared_ptr<BufferQueue> &BQ) XRAY_NEVER_INSTRUMENT {
+thread_local volatile bool Running = false;
+
+/// Here's where the meat of the processing happens. The writer captures
+/// function entry, exit and tail exit points with a time and will create
+/// TSCWrap, NewCPUId and Function records as necessary. The writer might
+/// walk backward through its buffer and erase trivial functions to avoid
+/// polluting the log and may use the buffer queue to obtain or release a
+/// buffer.
+inline void processFunctionHook(int32_t FuncId, XRayEntryType Entry,
+ uint64_t TSC, unsigned char CPU, uint64_t Arg1,
+ int (*wall_clock_reader)(clockid_t,
+ struct timespec *),
+ BufferQueue *BQ) XRAY_NEVER_INSTRUMENT {
// Prevent signal handler recursion, so in case we're already in a log writing
// mode and the signal handler comes in (and is also instrumented) then we
// don't want to be clobbering potentially partial writes already happening in
// the thread. We use a simple thread_local latch to only allow one on-going
// handleArg0 to happen at any given time.
- thread_local bool Running = false;
RecursionGuard Guard{Running};
if (!Guard) {
assert(Running == true && "RecursionGuard is buggy!");
return;
}
+ auto &TLD = getThreadLocalData();
+
// In case the reference has been cleaned up before, we make sure we
// initialize it to the provided BufferQueue.
- if (LocalBQ == nullptr)
- LocalBQ = BQ;
+ if (TLD.BQ == nullptr)
+ TLD.BQ = BQ;
- if (!isLogInitializedAndReady(LocalBQ, TSC, CPU, wall_clock_reader))
+ if (!isLogInitializedAndReady(TLD.BQ, TSC, CPU, wall_clock_reader))
return;
// Before we go setting up writing new function entries, we need to be really
@@ -579,10 +626,10 @@ inline void processFunctionHook(
// - The least number of bytes we will ever write is 8
// (sizeof(FunctionRecord)) only if the delta between the previous entry
// and this entry is within 32 bits.
- // - The most number of bytes we will ever write is 8 + 16 = 24. This is
- // computed by:
+ // - The most number of bytes we will ever write is 8 + 16 + 16 = 40.
+ // This is computed by:
//
- // sizeof(FunctionRecord) + sizeof(MetadataRecord)
+ // MaxSize = sizeof(FunctionRecord) + 2 * sizeof(MetadataRecord)
//
// These arise in the following cases:
//
@@ -596,77 +643,39 @@ inline void processFunctionHook(
// FunctionRecord.
// 3. When we learn about a new CPU ID, we need to write down a "new cpu
// id" MetadataRecord before writing out the actual FunctionRecord.
+ // 4. The second MetadataRecord is the optional function call argument.
//
- // - An End-of-Buffer (EOB) MetadataRecord is 16 bytes.
- //
- // So the math we need to do is to determine whether writing 24 bytes past the
- // current pointer leaves us with enough bytes to write the EOB
- // MetadataRecord. If we don't have enough space after writing as much as 24
- // bytes in the end of the buffer, we need to write out the EOB, get a new
- // Buffer, set it up properly before doing any further writing.
- //
- if (!prepareBuffer(wall_clock_reader, FunctionRecSize + MetadataRecSize)) {
- LocalBQ = nullptr;
+ // So the math we need to do is to determine whether writing 40 bytes past the
+ // current pointer exceeds the buffer's maximum size. If we don't have enough
+ // space to write 40 bytes in the buffer, we need get a new Buffer, set it up
+ // properly before doing any further writing.
+ size_t MaxSize = FunctionRecSize + 2 * MetadataRecSize;
+ if (!prepareBuffer(TSC, CPU, wall_clock_reader, MaxSize)) {
+ TLD.BQ = nullptr;
return;
}
- // By this point, we are now ready to write at most 24 bytes (one metadata
- // record and one function record).
- assert((RecordPtr + (MetadataRecSize + FunctionRecSize)) -
- static_cast<char *>(Buffer.Buffer) >=
+ // By this point, we are now ready to write up to 40 bytes (explained above).
+ assert((TLD.RecordPtr + MaxSize) - static_cast<char *>(TLD.Buffer.Buffer) >=
static_cast<ptrdiff_t>(MetadataRecSize) &&
"Misconfigured BufferQueue provided; Buffer size not large enough.");
- // Here we compute the TSC Delta. There are a few interesting situations we
- // need to account for:
- //
- // - The thread has migrated to a different CPU. If this is the case, then
- // we write down the following records:
- //
- // 1. A 'NewCPUId' Metadata record.
- // 2. A FunctionRecord with a 0 for the TSCDelta field.
- //
- // - The TSC delta is greater than the 32 bits we can store in a
- // FunctionRecord. In this case we write down the following records:
- //
- // 1. A 'TSCWrap' Metadata record.
- // 2. A FunctionRecord with a 0 for the TSCDelta field.
- //
- // - The TSC delta is representable within the 32 bits we can store in a
- // FunctionRecord. In this case we write down just a FunctionRecord with
- // the correct TSC delta.
- //
- uint32_t RecordTSCDelta = 0;
- if (CPU != CurrentCPU) {
- // We've moved to a new CPU.
- writeNewCPUIdMetadata(CPU, TSC);
- } else {
- // If the delta is greater than the range for a uint32_t, then we write out
- // the TSC wrap metadata entry with the full TSC, and the TSC for the
- // function record be 0.
- auto Delta = TSC - LastTSC;
- if (Delta > (1ULL << 32) - 1)
- writeTSCWrapMetadata(TSC);
- else
- RecordTSCDelta = Delta;
- }
-
- LastTSC = TSC;
- CurrentCPU = CPU;
+ auto RecordTSCDelta = writeCurrentCPUTSC(TLD, TSC, CPU);
+ TLD.LastTSC = TSC;
+ TLD.CurrentCPU = CPU;
switch (Entry) {
case XRayEntryType::ENTRY:
case XRayEntryType::LOG_ARGS_ENTRY:
// Update the thread local state for the next invocation.
- LastFunctionEntryTSC = TSC;
+ TLD.LastFunctionEntryTSC = TSC;
break;
case XRayEntryType::TAIL:
- break;
case XRayEntryType::EXIT:
// Break out and write the exit record if we can't erase any functions.
- if (NumConsecutiveFnEnters == 0 ||
- (TSC - LastFunctionEntryTSC) >= thresholdTicks())
+ if (TLD.NumConsecutiveFnEnters == 0 ||
+ (TSC - TLD.LastFunctionEntryTSC) >= thresholdTicks())
break;
- rewindRecentCall(TSC, LastTSC, LastFunctionEntryTSC, FuncId);
+ rewindRecentCall(TSC, TLD.LastTSC, TLD.LastFunctionEntryTSC, FuncId);
return; // without writing log.
case XRayEntryType::CUSTOM_EVENT: {
// This is a bug in patching, so we'll report it once and move on.
@@ -681,7 +690,9 @@ inline void processFunctionHook(
}
}
- writeFunctionRecord(FuncId, RecordTSCDelta, Entry, RecordPtr);
+ writeFunctionRecord(FuncId, RecordTSCDelta, Entry);
+ if (Entry == XRayEntryType::LOG_ARGS_ENTRY)
+ writeCallArgumentMetadata(Arg1);
// If we've exhausted the buffer by this time, we then release the buffer to
// make sure that other threads may start using this buffer.
diff --git a/lib/xray/xray_flags.h b/lib/xray/xray_flags.h
index f4e30283b8de6..3ed5b8844cb46 100644
--- a/lib/xray/xray_flags.h
+++ b/lib/xray/xray_flags.h
@@ -16,6 +16,7 @@
#define XRAY_FLAGS_H
#include "sanitizer_common/sanitizer_flag_parser.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
namespace __xray {
diff --git a/lib/xray/xray_flags.inc b/lib/xray/xray_flags.inc
index 7ddce78eb413e..29f1fce7d7f4d 100644
--- a/lib/xray/xray_flags.inc
+++ b/lib/xray/xray_flags.inc
@@ -16,12 +16,34 @@
XRAY_FLAG(bool, patch_premain, false,
"Whether to patch instrumentation points before main.")
-XRAY_FLAG(bool, xray_naive_log, true,
- "Whether to install the naive log implementation.")
XRAY_FLAG(const char *, xray_logfile_base, "xray-log.",
"Filename base for the xray logfile.")
+XRAY_FLAG(const char *, xray_mode, "", "Mode to install by default.")
+XRAY_FLAG(uptr, xray_page_size_override, 0,
+ "Override the default page size for the system, in bytes. The size "
+ "should be a power-of-two.")
+
+// Basic (Naive) Mode logging options.
+XRAY_FLAG(bool, xray_naive_log, false,
+ "DEPRECATED: Use xray_mode=xray-basic instead.")
+XRAY_FLAG(int, xray_naive_log_func_duration_threshold_us, 5,
+ "Naive logging will try to skip functions that execute for fewer "
+ "microseconds than this threshold.")
+XRAY_FLAG(int, xray_naive_log_max_stack_depth, 64,
+ "Naive logging will keep track of at most this deep a call stack, "
+ "any more and the recordings will be droppped.")
+XRAY_FLAG(int, xray_naive_log_thread_buffer_size, 1024,
+ "The number of entries to keep on a per-thread buffer.")
+
+// FDR (Flight Data Recorder) Mode logging options.
XRAY_FLAG(bool, xray_fdr_log, false,
- "Whether to install the flight data recorder logging implementation.")
+ "DEPRECATED: Use xray_mode=xray-fdr instead.")
XRAY_FLAG(int, xray_fdr_log_func_duration_threshold_us, 5,
"FDR logging will try to skip functions that execute for fewer "
"microseconds than this threshold.")
+XRAY_FLAG(int, xray_fdr_log_grace_period_us, 0,
+ "DEPRECATED: use xray_fdr_log_grace_period_ms instead.")
+XRAY_FLAG(int, xray_fdr_log_grace_period_ms, 100,
+ "FDR logging will wait this much time in microseconds before "
+ "actually flushing the log; this gives a chance for threads to "
+ "notice that the log has been finalized and clean up.")
diff --git a/lib/xray/xray_init.cc b/lib/xray/xray_init.cc
index aa660baa99206..11892cb8b7a31 100644
--- a/lib/xray/xray_init.cc
+++ b/lib/xray/xray_init.cc
@@ -44,12 +44,31 @@ __sanitizer::atomic_uint8_t XRayInitialized{0};
__sanitizer::SpinMutex XRayInstrMapMutex;
XRaySledMap XRayInstrMap;
+// Global flag to determine whether the flags have been initialized.
+__sanitizer::atomic_uint8_t XRayFlagsInitialized{0};
+
+// A mutex to allow only one thread to initialize the XRay data structures.
+__sanitizer::SpinMutex XRayInitMutex;
+
// __xray_init() will do the actual loading of the current process' memory map
// and then proceed to look for the .xray_instr_map section/segment.
void __xray_init() XRAY_NEVER_INSTRUMENT {
- initializeFlags();
+ __sanitizer::SpinMutexLock Guard(&XRayInitMutex);
+ // Short-circuit if we've already initialized XRay before.
+ if (__sanitizer::atomic_load(&XRayInitialized,
+ __sanitizer::memory_order_acquire))
+ return;
+
+ if (!__sanitizer::atomic_load(&XRayFlagsInitialized,
+ __sanitizer::memory_order_acquire)) {
+ initializeFlags();
+ __sanitizer::atomic_store(&XRayFlagsInitialized, true,
+ __sanitizer::memory_order_release);
+ }
+
if (__start_xray_instr_map == nullptr) {
- Report("XRay instrumentation map missing. Not initializing XRay.\n");
+ if (Verbosity())
+ Report("XRay instrumentation map missing. Not initializing XRay.\n");
return;
}
@@ -63,9 +82,21 @@ void __xray_init() XRAY_NEVER_INSTRUMENT {
__sanitizer::atomic_store(&XRayInitialized, true,
__sanitizer::memory_order_release);
+#ifndef XRAY_NO_PREINIT
if (flags()->patch_premain)
__xray_patch();
+#endif
}
+#if !defined(XRAY_NO_PREINIT) && SANITIZER_CAN_USE_PREINIT_ARRAY
+// Only add the preinit array initialization if the sanitizers can.
__attribute__((section(".preinit_array"),
used)) void (*__local_xray_preinit)(void) = __xray_init;
+#else
+// If we cannot use the .preinit_array section, we should instead use dynamic
+// initialisation.
+static bool UNUSED __local_xray_dyninit = [] {
+ __xray_init();
+ return true;
+}();
+#endif
diff --git a/lib/xray/xray_inmemory_log.cc b/lib/xray/xray_inmemory_log.cc
index 83aecfaf7700a..a27ffbcbd12ee 100644
--- a/lib/xray/xray_inmemory_log.cc
+++ b/lib/xray/xray_inmemory_log.cc
@@ -16,80 +16,85 @@
//===----------------------------------------------------------------------===//
#include <cassert>
+#include <cstring>
+#include <errno.h>
#include <fcntl.h>
-#include <mutex>
+#include <pthread.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/types.h>
-#include <thread>
+#include <time.h>
#include <unistd.h>
+#include "sanitizer_common/sanitizer_allocator_internal.h"
#include "sanitizer_common/sanitizer_libc.h"
#include "xray/xray_records.h"
#include "xray_defs.h"
#include "xray_flags.h"
+#include "xray_inmemory_log.h"
#include "xray_interface_internal.h"
#include "xray_tsc.h"
#include "xray_utils.h"
-// __xray_InMemoryRawLog will use a thread-local aligned buffer capped to a
-// certain size (32kb by default) and use it as if it were a circular buffer for
-// events. We store simple fixed-sized entries in the log for external analysis.
+namespace __xray {
-extern "C" {
-void __xray_InMemoryRawLog(int32_t FuncId,
- XRayEntryType Type) XRAY_NEVER_INSTRUMENT;
-}
+__sanitizer::SpinMutex LogMutex;
-namespace __xray {
+// We use elements of this type to record the entry TSC of every function ID we
+// see as we're tracing a particular thread's execution.
+struct alignas(16) StackEntry {
+ int32_t FuncId;
+ uint16_t Type;
+ uint8_t CPU;
+ uint8_t Padding;
+ uint64_t TSC;
+};
-std::mutex LogMutex;
-
-class ThreadExitFlusher {
- int Fd;
- XRayRecord *Start;
- size_t &Offset;
-
-public:
- explicit ThreadExitFlusher(int Fd, XRayRecord *Start,
- size_t &Offset) XRAY_NEVER_INSTRUMENT
- : Fd(Fd),
- Start(Start),
- Offset(Offset) {}
-
- ~ThreadExitFlusher() XRAY_NEVER_INSTRUMENT {
- std::lock_guard<std::mutex> L(LogMutex);
- if (Fd > 0 && Start != nullptr) {
- retryingWriteAll(Fd, reinterpret_cast<char *>(Start),
- reinterpret_cast<char *>(Start + Offset));
- // Because this thread's exit could be the last one trying to write to the
- // file and that we're not able to close out the file properly, we sync
- // instead and hope that the pending writes are flushed as the thread
- // exits.
- fsync(Fd);
- }
- }
+static_assert(sizeof(StackEntry) == 16, "Wrong size for StackEntry");
+
+struct alignas(64) ThreadLocalData {
+ void *InMemoryBuffer = nullptr;
+ size_t BufferSize = 0;
+ size_t BufferOffset = 0;
+ void *ShadowStack = nullptr;
+ size_t StackSize = 0;
+ size_t StackEntries = 0;
+ int Fd = -1;
+ pid_t TID = 0;
};
-} // namespace __xray
+static pthread_key_t PThreadKey;
+
+static __sanitizer::atomic_uint8_t BasicInitialized{0};
+
+BasicLoggingOptions GlobalOptions;
+
+thread_local volatile bool RecursionGuard = false;
-using namespace __xray;
+static uint64_t thresholdTicks() XRAY_NEVER_INSTRUMENT {
+ static uint64_t TicksPerSec = probeRequiredCPUFeatures()
+ ? getTSCFrequency()
+ : __xray::NanosecondsPerSecond;
+ static const uint64_t ThresholdTicks =
+ TicksPerSec * GlobalOptions.DurationFilterMicros / 1000000;
+ return ThresholdTicks;
+}
-static int __xray_OpenLogFile() XRAY_NEVER_INSTRUMENT {
+static int openLogFile() XRAY_NEVER_INSTRUMENT {
int F = getLogFD();
if (F == -1)
return -1;
// Test for required CPU features and cache the cycle frequency
static bool TSCSupported = probeRequiredCPUFeatures();
- static uint64_t CycleFrequency = TSCSupported ? getTSCFrequency()
- : __xray::NanosecondsPerSecond;
+ static uint64_t CycleFrequency =
+ TSCSupported ? getTSCFrequency() : __xray::NanosecondsPerSecond;
// Since we're here, we get to write the header. We set it up so that the
// header will only be written once, at the start, and let the threads
// logging do writes which just append.
XRayFileHeader Header;
- Header.Version = 1;
+ Header.Version = 2; // Version 2 includes tail exit records.
Header.Type = FileTypes::NAIVE_LOG;
Header.CycleFrequency = CycleFrequency;
@@ -102,47 +107,210 @@ static int __xray_OpenLogFile() XRAY_NEVER_INSTRUMENT {
return F;
}
+int getGlobalFd() XRAY_NEVER_INSTRUMENT {
+ static int Fd = openLogFile();
+ return Fd;
+}
+
+ThreadLocalData &getThreadLocalData() XRAY_NEVER_INSTRUMENT {
+ thread_local ThreadLocalData TLD;
+ thread_local bool UNUSED TOnce = [] {
+ if (GlobalOptions.ThreadBufferSize == 0) {
+ if (__sanitizer::Verbosity())
+ Report("Not initializing TLD since ThreadBufferSize == 0.\n");
+ return false;
+ }
+ TLD.TID = __sanitizer::GetTid();
+ pthread_setspecific(PThreadKey, &TLD);
+ TLD.Fd = getGlobalFd();
+ TLD.InMemoryBuffer = reinterpret_cast<XRayRecord *>(
+ InternalAlloc(sizeof(XRayRecord) * GlobalOptions.ThreadBufferSize,
+ nullptr, alignof(XRayRecord)));
+ TLD.BufferSize = GlobalOptions.ThreadBufferSize;
+ TLD.BufferOffset = 0;
+ if (GlobalOptions.MaxStackDepth == 0) {
+ if (__sanitizer::Verbosity())
+ Report("Not initializing the ShadowStack since MaxStackDepth == 0.\n");
+ TLD.StackSize = 0;
+ TLD.StackEntries = 0;
+ TLD.ShadowStack = nullptr;
+ return false;
+ }
+ TLD.ShadowStack = reinterpret_cast<StackEntry *>(
+ InternalAlloc(sizeof(StackEntry) * GlobalOptions.MaxStackDepth, nullptr,
+ alignof(StackEntry)));
+ TLD.StackSize = GlobalOptions.MaxStackDepth;
+ TLD.StackEntries = 0;
+ if (__sanitizer::Verbosity() >= 2) {
+ static auto UNUSED Once = [] {
+ auto ticks = thresholdTicks();
+ Report("Ticks threshold: %d\n", ticks);
+ return false;
+ }();
+ }
+ return false;
+ }();
+ return TLD;
+}
+
template <class RDTSC>
-void __xray_InMemoryRawLog(int32_t FuncId, XRayEntryType Type,
- RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT {
- using Buffer =
- std::aligned_storage<sizeof(XRayRecord), alignof(XRayRecord)>::type;
- static constexpr size_t BuffLen = 1024;
- thread_local static Buffer InMemoryBuffer[BuffLen] = {};
- thread_local static size_t Offset = 0;
- static int Fd = __xray_OpenLogFile();
+void InMemoryRawLog(int32_t FuncId, XRayEntryType Type,
+ RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT {
+ auto &TLD = getThreadLocalData();
+ auto &InMemoryBuffer = TLD.InMemoryBuffer;
+ int Fd = getGlobalFd();
if (Fd == -1)
return;
- thread_local __xray::ThreadExitFlusher Flusher(
- Fd, reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer), Offset);
- thread_local pid_t TId = syscall(SYS_gettid);
- // First we get the useful data, and stuff it into the already aligned buffer
- // through a pointer offset.
- auto &R = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer)[Offset];
+ // Use a simple recursion guard, to handle cases where we're already logging
+ // and for one reason or another, this function gets called again in the same
+ // thread.
+ if (RecursionGuard)
+ return;
+ RecursionGuard = true;
+ auto ExitGuard = __sanitizer::at_scope_exit([] { RecursionGuard = false; });
+
+ uint8_t CPU = 0;
+ uint64_t TSC = ReadTSC(CPU);
+
+ switch (Type) {
+ case XRayEntryType::ENTRY:
+ case XRayEntryType::LOG_ARGS_ENTRY: {
+ // Short circuit if we've reached the maximum depth of the stack.
+ if (TLD.StackEntries++ >= TLD.StackSize)
+ return;
+
+ // When we encounter an entry event, we keep track of the TSC and the CPU,
+ // and put it in the stack.
+ StackEntry E;
+ E.FuncId = FuncId;
+ E.CPU = CPU;
+ E.Type = Type;
+ E.TSC = TSC;
+ auto StackEntryPtr = static_cast<char *>(TLD.ShadowStack) +
+ (sizeof(StackEntry) * (TLD.StackEntries - 1));
+ __sanitizer::internal_memcpy(StackEntryPtr, &E, sizeof(StackEntry));
+ break;
+ }
+ case XRayEntryType::EXIT:
+ case XRayEntryType::TAIL: {
+ if (TLD.StackEntries == 0)
+ break;
+
+ if (--TLD.StackEntries >= TLD.StackSize)
+ return;
+
+ // When we encounter an exit event, we check whether all the following are
+ // true:
+ //
+ // - The Function ID is the same as the most recent entry in the stack.
+ // - The CPU is the same as the most recent entry in the stack.
+ // - The Delta of the TSCs is less than the threshold amount of time we're
+ // looking to record.
+ //
+ // If all of these conditions are true, we pop the stack and don't write a
+ // record and move the record offset back.
+ StackEntry StackTop;
+ auto StackEntryPtr = static_cast<char *>(TLD.ShadowStack) +
+ (sizeof(StackEntry) * TLD.StackEntries);
+ __sanitizer::internal_memcpy(&StackTop, StackEntryPtr, sizeof(StackEntry));
+ if (StackTop.FuncId == FuncId && StackTop.CPU == CPU &&
+ StackTop.TSC < TSC) {
+ auto Delta = TSC - StackTop.TSC;
+ if (Delta < thresholdTicks()) {
+ assert(TLD.BufferOffset > 0);
+ TLD.BufferOffset -= StackTop.Type == XRayEntryType::ENTRY ? 1 : 2;
+ return;
+ }
+ }
+ break;
+ }
+ default:
+ // Should be unreachable.
+ assert(false && "Unsupported XRayEntryType encountered.");
+ break;
+ }
+
+ // First determine whether the delta between the function's enter record and
+ // the exit record is higher than the threshold.
+ __xray::XRayRecord R;
R.RecordType = RecordTypes::NORMAL;
- R.TSC = ReadTSC(R.CPU);
- R.TId = TId;
+ R.CPU = CPU;
+ R.TSC = TSC;
+ R.TId = TLD.TID;
R.Type = Type;
R.FuncId = FuncId;
- ++Offset;
- if (Offset == BuffLen) {
- std::lock_guard<std::mutex> L(LogMutex);
+ auto EntryPtr = static_cast<char *>(InMemoryBuffer) +
+ (sizeof(__xray::XRayRecord) * TLD.BufferOffset);
+ __sanitizer::internal_memcpy(EntryPtr, &R, sizeof(R));
+ if (++TLD.BufferOffset == TLD.BufferSize) {
+ __sanitizer::SpinMutexLock L(&LogMutex);
+ auto RecordBuffer = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer);
+ retryingWriteAll(Fd, reinterpret_cast<char *>(RecordBuffer),
+ reinterpret_cast<char *>(RecordBuffer + TLD.BufferOffset));
+ TLD.BufferOffset = 0;
+ TLD.StackEntries = 0;
+ }
+}
+
+template <class RDTSC>
+void InMemoryRawLogWithArg(int32_t FuncId, XRayEntryType Type, uint64_t Arg1,
+ RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT {
+ auto &TLD = getThreadLocalData();
+ auto &InMemoryBuffer = TLD.InMemoryBuffer;
+ auto &Offset = TLD.BufferOffset;
+ const auto &BuffLen = TLD.BufferSize;
+ int Fd = getGlobalFd();
+ if (Fd == -1)
+ return;
+
+ // First we check whether there's enough space to write the data consecutively
+ // in the thread-local buffer. If not, we first flush the buffer before
+ // attempting to write the two records that must be consecutive.
+ if (Offset + 2 > BuffLen) {
+ __sanitizer::SpinMutexLock L(&LogMutex);
+ auto RecordBuffer = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer);
+ retryingWriteAll(Fd, reinterpret_cast<char *>(RecordBuffer),
+ reinterpret_cast<char *>(RecordBuffer + Offset));
+ Offset = 0;
+ TLD.StackEntries = 0;
+ }
+
+ // Then we write the "we have an argument" record.
+ InMemoryRawLog(FuncId, Type, ReadTSC);
+
+ if (RecursionGuard)
+ return;
+ RecursionGuard = true;
+ auto ExitGuard = __sanitizer::at_scope_exit([] { RecursionGuard = false; });
+
+ // And from here on write the arg payload.
+ __xray::XRayArgPayload R;
+ R.RecordType = RecordTypes::ARG_PAYLOAD;
+ R.FuncId = FuncId;
+ R.TId = TLD.TID;
+ R.Arg = Arg1;
+ auto EntryPtr =
+ &reinterpret_cast<__xray::XRayArgPayload *>(&InMemoryBuffer)[Offset];
+ std::memcpy(EntryPtr, &R, sizeof(R));
+ if (++Offset == BuffLen) {
+ __sanitizer::SpinMutexLock L(&LogMutex);
auto RecordBuffer = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer);
retryingWriteAll(Fd, reinterpret_cast<char *>(RecordBuffer),
reinterpret_cast<char *>(RecordBuffer + Offset));
Offset = 0;
+ TLD.StackEntries = 0;
}
}
-void __xray_InMemoryRawLogRealTSC(int32_t FuncId,
- XRayEntryType Type) XRAY_NEVER_INSTRUMENT {
- __xray_InMemoryRawLog(FuncId, Type, __xray::readTSC);
+void basicLoggingHandleArg0RealTSC(int32_t FuncId,
+ XRayEntryType Type) XRAY_NEVER_INSTRUMENT {
+ InMemoryRawLog(FuncId, Type, __xray::readTSC);
}
-void __xray_InMemoryEmulateTSC(int32_t FuncId,
- XRayEntryType Type) XRAY_NEVER_INSTRUMENT {
- __xray_InMemoryRawLog(FuncId, Type, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
+void basicLoggingHandleArg0EmulateTSC(int32_t FuncId, XRayEntryType Type)
+ XRAY_NEVER_INSTRUMENT {
+ InMemoryRawLog(FuncId, Type, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
timespec TS;
int result = clock_gettime(CLOCK_REALTIME, &TS);
if (result != 0) {
@@ -154,13 +322,150 @@ void __xray_InMemoryEmulateTSC(int32_t FuncId,
});
}
-static auto UNUSED Unused = [] {
- auto UseRealTSC = probeRequiredCPUFeatures();
- if (!UseRealTSC)
+void basicLoggingHandleArg1RealTSC(int32_t FuncId, XRayEntryType Type,
+ uint64_t Arg1) XRAY_NEVER_INSTRUMENT {
+ InMemoryRawLogWithArg(FuncId, Type, Arg1, __xray::readTSC);
+}
+
+void basicLoggingHandleArg1EmulateTSC(int32_t FuncId, XRayEntryType Type,
+ uint64_t Arg1) XRAY_NEVER_INSTRUMENT {
+ InMemoryRawLogWithArg(
+ FuncId, Type, Arg1, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
+ timespec TS;
+ int result = clock_gettime(CLOCK_REALTIME, &TS);
+ if (result != 0) {
+ Report("clock_gettimg(2) return %d, errno=%d.", result, int(errno));
+ TS = {0, 0};
+ }
+ CPU = 0;
+ return TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec;
+ });
+}
+
+static void TLDDestructor(void *P) XRAY_NEVER_INSTRUMENT {
+ ThreadLocalData &TLD = *reinterpret_cast<ThreadLocalData *>(P);
+ auto ExitGuard = __sanitizer::at_scope_exit([&TLD] {
+ // Clean up dynamic resources.
+ if (TLD.InMemoryBuffer)
+ InternalFree(TLD.InMemoryBuffer);
+ if (TLD.ShadowStack)
+ InternalFree(TLD.ShadowStack);
+ if (__sanitizer::Verbosity())
+ Report("Cleaned up log for TID: %d\n", TLD.TID);
+ });
+
+ if (TLD.Fd == -1 || TLD.BufferOffset == 0) {
+ if (__sanitizer::Verbosity())
+ Report("Skipping buffer for TID: %d; Fd = %d; Offset = %llu\n", TLD.TID,
+ TLD.Fd, TLD.BufferOffset);
+ return;
+ }
+
+ {
+ __sanitizer::SpinMutexLock L(&LogMutex);
+ retryingWriteAll(TLD.Fd, reinterpret_cast<char *>(TLD.InMemoryBuffer),
+ reinterpret_cast<char *>(TLD.InMemoryBuffer) +
+ (sizeof(__xray::XRayRecord) * TLD.BufferOffset));
+ }
+
+ // Because this thread's exit could be the last one trying to write to
+ // the file and that we're not able to close out the file properly, we
+ // sync instead and hope that the pending writes are flushed as the
+ // thread exits.
+ fsync(TLD.Fd);
+}
+
+XRayLogInitStatus basicLoggingInit(size_t BufferSize, size_t BufferMax,
+ void *Options,
+ size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
+ static bool UNUSED Once = [] {
+ pthread_key_create(&PThreadKey, TLDDestructor);
+ return false;
+ }();
+
+ uint8_t Expected = 0;
+ if (!__sanitizer::atomic_compare_exchange_strong(
+ &BasicInitialized, &Expected, 1, __sanitizer::memory_order_acq_rel)) {
+ if (__sanitizer::Verbosity())
+ Report("Basic logging already initialized.\n");
+ return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
+ }
+
+ if (OptionsSize != sizeof(BasicLoggingOptions)) {
+ Report("Invalid options size, potential ABI mismatch; expected %d got %d",
+ sizeof(BasicLoggingOptions), OptionsSize);
+ return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
+ }
+
+ static auto UseRealTSC = probeRequiredCPUFeatures();
+ if (!UseRealTSC && __sanitizer::Verbosity())
Report("WARNING: Required CPU features missing for XRay instrumentation, "
"using emulation instead.\n");
- if (flags()->xray_naive_log)
- __xray_set_handler(UseRealTSC ? __xray_InMemoryRawLogRealTSC
- : __xray_InMemoryEmulateTSC);
+
+ GlobalOptions = *reinterpret_cast<BasicLoggingOptions *>(Options);
+ __xray_set_handler_arg1(UseRealTSC ? basicLoggingHandleArg1RealTSC
+ : basicLoggingHandleArg1EmulateTSC);
+ __xray_set_handler(UseRealTSC ? basicLoggingHandleArg0RealTSC
+ : basicLoggingHandleArg0EmulateTSC);
+ __xray_remove_customevent_handler();
+
+ return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
+}
+
+XRayLogInitStatus basicLoggingFinalize() XRAY_NEVER_INSTRUMENT {
+ uint8_t Expected = 0;
+ if (!__sanitizer::atomic_compare_exchange_strong(
+ &BasicInitialized, &Expected, 0, __sanitizer::memory_order_acq_rel) &&
+ __sanitizer::Verbosity())
+ Report("Basic logging already finalized.\n");
+
+ // Nothing really to do aside from marking state of the global to be
+ // uninitialized.
+
+ return XRayLogInitStatus::XRAY_LOG_FINALIZED;
+}
+
+XRayLogFlushStatus basicLoggingFlush() XRAY_NEVER_INSTRUMENT {
+ // This really does nothing, since flushing the logs happen at the end of a
+ // thread's lifetime, or when the buffers are full.
+ return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
+}
+
+// This is a handler that, effectively, does nothing.
+void basicLoggingHandleArg0Empty(int32_t, XRayEntryType) XRAY_NEVER_INSTRUMENT {
+}
+
+bool basicLogDynamicInitializer() XRAY_NEVER_INSTRUMENT {
+ XRayLogImpl Impl{
+ basicLoggingInit,
+ basicLoggingFinalize,
+ basicLoggingHandleArg0Empty,
+ basicLoggingFlush,
+ };
+ auto RegistrationResult = __xray_log_register_mode("xray-basic", Impl);
+ if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK &&
+ __sanitizer::Verbosity())
+ Report("Cannot register XRay Basic Mode to 'xray-basic'; error = %d\n",
+ RegistrationResult);
+ if (flags()->xray_naive_log ||
+ !__sanitizer::internal_strcmp(flags()->xray_mode, "xray-basic")) {
+ __xray_set_log_impl(Impl);
+ BasicLoggingOptions Options;
+ Options.DurationFilterMicros =
+ flags()->xray_naive_log_func_duration_threshold_us;
+ Options.MaxStackDepth = flags()->xray_naive_log_max_stack_depth;
+ Options.ThreadBufferSize = flags()->xray_naive_log_thread_buffer_size;
+ __xray_log_init(flags()->xray_naive_log_thread_buffer_size, 0, &Options,
+ sizeof(BasicLoggingOptions));
+ static auto UNUSED Once = [] {
+ static auto UNUSED &TLD = getThreadLocalData();
+ __sanitizer::Atexit(+[] { TLDDestructor(&TLD); });
+ return false;
+ }();
+ }
return true;
-}();
+}
+
+} // namespace __xray
+
+static auto UNUSED Unused = __xray::basicLogDynamicInitializer();
diff --git a/lib/xray/xray_inmemory_log.h b/lib/xray/xray_inmemory_log.h
new file mode 100644
index 0000000000000..e4fcb8ca5ffdc
--- /dev/null
+++ b/lib/xray/xray_inmemory_log.h
@@ -0,0 +1,44 @@
+//===-- xray_inmemory_log.h
+//------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a function call tracing system.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_XRAY_INMEMORY_LOG_H
+#define XRAY_XRAY_INMEMORY_LOG_H
+
+#include "xray/xray_log_interface.h"
+
+/// Basic (Naive) Mode
+/// ==================
+///
+/// This implementation hooks in through the XRay logging implementation
+/// framework. The Basic Mode implementation will keep appending to a file as
+/// soon as the thread-local buffers are full. It keeps minimal in-memory state
+/// and does the minimum filtering required to keep log files smaller.
+
+namespace __xray {
+
+XRayLogInitStatus basicLoggingInit(size_t BufferSize, size_t BufferMax,
+ void *Options, size_t OptionsSize);
+XRayLogInitStatus basicLoggingFinalize();
+
+void basicLoggingHandleArg0RealTSC(int32_t FuncId, XRayEntryType Entry);
+void basicLoggingHandleArg0EmulateTSC(int32_t FuncId, XRayEntryType Entry);
+void basicLoggingHandleArg1RealTSC(int32_t FuncId, XRayEntryType Entry,
+ uint64_t Arg1);
+void basicLoggingHandleArg1EmulateTSC(int32_t FuncId, XRayEntryType Entry,
+ uint64_t Arg1);
+XRayLogFlushStatus basicLoggingFlush();
+XRayLogInitStatus basicLoggingReset();
+
+} // namespace __xray
+
+#endif // XRAY_XRAY_INMEMORY_LOG_H
diff --git a/lib/xray/xray_interface.cc b/lib/xray/xray_interface.cc
index 694d34c0102b7..766313e85c584 100644
--- a/lib/xray/xray_interface.cc
+++ b/lib/xray/xray_interface.cc
@@ -23,12 +23,15 @@
#include "sanitizer_common/sanitizer_common.h"
#include "xray_defs.h"
+#include "xray_flags.h"
+
+extern __sanitizer::SpinMutex XRayInstrMapMutex;
+extern __sanitizer::atomic_uint8_t XRayInitialized;
+extern __xray::XRaySledMap XRayInstrMap;
namespace __xray {
#if defined(__x86_64__)
-// FIXME: The actual length is 11 bytes. Why was length 12 passed to mprotect()
-// ?
static const int16_t cSledLength = 12;
#elif defined(__aarch64__)
static const int16_t cSledLength = 32;
@@ -53,6 +56,10 @@ __sanitizer::atomic_uintptr_t XRayArgLogger{0};
// This is the function to call when we encounter a custom event log call.
__sanitizer::atomic_uintptr_t XRayPatchedCustomEvent{0};
+// This is the global status to determine whether we are currently
+// patching/unpatching.
+__sanitizer::atomic_uint8_t XRayPatching{0};
+
// MProtectHelper is an RAII wrapper for calls to mprotect(...) that will undo
// any successful mprotect(...) changes. This is used to make a page writeable
// and executable, and upon destruction if it was successful in doing so returns
@@ -88,85 +95,10 @@ public:
}
};
-} // namespace __xray
-
-extern __sanitizer::SpinMutex XRayInstrMapMutex;
-extern __sanitizer::atomic_uint8_t XRayInitialized;
-extern __xray::XRaySledMap XRayInstrMap;
-
-int __xray_set_handler(void (*entry)(int32_t,
- XRayEntryType)) XRAY_NEVER_INSTRUMENT {
- if (__sanitizer::atomic_load(&XRayInitialized,
- __sanitizer::memory_order_acquire)) {
-
- __sanitizer::atomic_store(&__xray::XRayPatchedFunction,
- reinterpret_cast<uintptr_t>(entry),
- __sanitizer::memory_order_release);
- return 1;
- }
- return 0;
-}
-
-int __xray_set_customevent_handler(void (*entry)(void *, size_t))
- XRAY_NEVER_INSTRUMENT {
- if (__sanitizer::atomic_load(&XRayInitialized,
- __sanitizer::memory_order_acquire)) {
- __sanitizer::atomic_store(&__xray::XRayPatchedCustomEvent,
- reinterpret_cast<uintptr_t>(entry),
- __sanitizer::memory_order_release);
- return 1;
- }
- return 0;
-}
-
-
-int __xray_remove_handler() XRAY_NEVER_INSTRUMENT {
- return __xray_set_handler(nullptr);
-}
-
-int __xray_remove_customevent_handler() XRAY_NEVER_INSTRUMENT {
- return __xray_set_customevent_handler(nullptr);
-}
-
-__sanitizer::atomic_uint8_t XRayPatching{0};
-
-using namespace __xray;
-
-// FIXME: Figure out whether we can move this class to sanitizer_common instead
-// as a generic "scope guard".
-template <class Function> class CleanupInvoker {
- Function Fn;
-
-public:
- explicit CleanupInvoker(Function Fn) XRAY_NEVER_INSTRUMENT : Fn(Fn) {}
- CleanupInvoker(const CleanupInvoker &) XRAY_NEVER_INSTRUMENT = default;
- CleanupInvoker(CleanupInvoker &&) XRAY_NEVER_INSTRUMENT = default;
- CleanupInvoker &
- operator=(const CleanupInvoker &) XRAY_NEVER_INSTRUMENT = delete;
- CleanupInvoker &operator=(CleanupInvoker &&) XRAY_NEVER_INSTRUMENT = delete;
- ~CleanupInvoker() XRAY_NEVER_INSTRUMENT { Fn(); }
-};
-
-template <class Function>
-CleanupInvoker<Function> scopeCleanup(Function Fn) XRAY_NEVER_INSTRUMENT {
- return CleanupInvoker<Function>{Fn};
-}
-
-inline bool patchSled(const XRaySledEntry &Sled, bool Enable,
- int32_t FuncId) XRAY_NEVER_INSTRUMENT {
- // While we're here, we should patch the nop sled. To do that we mprotect
- // the page containing the function to be writeable.
- const uint64_t PageSize = GetPageSizeCached();
- void *PageAlignedAddr =
- reinterpret_cast<void *>(Sled.Address & ~(PageSize - 1));
- std::size_t MProtectLen = (Sled.Address + cSledLength) -
- reinterpret_cast<uint64_t>(PageAlignedAddr);
- MProtectHelper Protector(PageAlignedAddr, MProtectLen);
- if (Protector.MakeWriteable() == -1) {
- printf("Failed mprotect: %d\n", errno);
- return XRayPatchingStatus::FAILED;
- }
+namespace {
+bool patchSled(const XRaySledEntry &Sled, bool Enable,
+ int32_t FuncId) XRAY_NEVER_INSTRUMENT {
bool Success = false;
switch (Sled.Kind) {
case XRayEntryType::ENTRY:
@@ -191,6 +123,55 @@ inline bool patchSled(const XRaySledEntry &Sled, bool Enable,
return Success;
}
+XRayPatchingStatus patchFunction(int32_t FuncId,
+ bool Enable) XRAY_NEVER_INSTRUMENT {
+ if (!__sanitizer::atomic_load(&XRayInitialized,
+ __sanitizer::memory_order_acquire))
+ return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized.
+
+ uint8_t NotPatching = false;
+ if (!__sanitizer::atomic_compare_exchange_strong(
+ &XRayPatching, &NotPatching, true, __sanitizer::memory_order_acq_rel))
+ return XRayPatchingStatus::ONGOING; // Already patching.
+
+ // Next, we look for the function index.
+ XRaySledMap InstrMap;
+ {
+ __sanitizer::SpinMutexLock Guard(&XRayInstrMapMutex);
+ InstrMap = XRayInstrMap;
+ }
+
+ // If we don't have an index, we can't patch individual functions.
+ if (InstrMap.Functions == 0)
+ return XRayPatchingStatus::NOT_INITIALIZED;
+
+ // FuncId must be a positive number, less than the number of functions
+ // instrumented.
+ if (FuncId <= 0 || static_cast<size_t>(FuncId) > InstrMap.Functions) {
+ Report("Invalid function id provided: %d\n", FuncId);
+ return XRayPatchingStatus::FAILED;
+ }
+
+ // Now we patch ths sleds for this specific function.
+ auto SledRange = InstrMap.SledsIndex[FuncId - 1];
+ auto *f = SledRange.Begin;
+ auto *e = SledRange.End;
+
+ bool SucceedOnce = false;
+ while (f != e)
+ SucceedOnce |= patchSled(*f++, Enable, FuncId);
+
+ __sanitizer::atomic_store(&XRayPatching, false,
+ __sanitizer::memory_order_release);
+
+ if (!SucceedOnce) {
+ Report("Failed patching any sled for function '%d'.", FuncId);
+ return XRayPatchingStatus::FAILED;
+ }
+
+ return XRayPatchingStatus::SUCCESS;
+}
+
// controlPatching implements the common internals of the patching/unpatching
// implementation. |Enable| defines whether we're enabling or disabling the
// runtime XRay instrumentation.
@@ -205,14 +186,13 @@ XRayPatchingStatus controlPatching(bool Enable) XRAY_NEVER_INSTRUMENT {
return XRayPatchingStatus::ONGOING; // Already patching.
uint8_t PatchingSuccess = false;
- auto XRayPatchingStatusResetter = scopeCleanup([&PatchingSuccess] {
- if (!PatchingSuccess)
- __sanitizer::atomic_store(&XRayPatching, false,
- __sanitizer::memory_order_release);
- });
-
- // Step 1: Compute the function id, as a unique identifier per function in the
- // instrumentation map.
+ auto XRayPatchingStatusResetter =
+ __sanitizer::at_scope_exit([&PatchingSuccess] {
+ if (!PatchingSuccess)
+ __sanitizer::atomic_store(&XRayPatching, false,
+ __sanitizer::memory_order_release);
+ });
+
XRaySledMap InstrMap;
{
__sanitizer::SpinMutexLock Guard(&XRayInstrMapMutex);
@@ -221,16 +201,47 @@ XRayPatchingStatus controlPatching(bool Enable) XRAY_NEVER_INSTRUMENT {
if (InstrMap.Entries == 0)
return XRayPatchingStatus::NOT_INITIALIZED;
- const uint64_t PageSize = GetPageSizeCached();
+ uint32_t FuncId = 1;
+ uint64_t CurFun = 0;
+
+ // First we want to find the bounds for which we have instrumentation points,
+ // and try to get as few calls to mprotect(...) as possible. We're assuming
+ // that all the sleds for the instrumentation map are contiguous as a single
+ // set of pages. When we do support dynamic shared object instrumentation,
+ // we'll need to do this for each set of page load offsets per DSO loaded. For
+ // now we're assuming we can mprotect the whole section of text between the
+ // minimum sled address and the maximum sled address (+ the largest sled
+ // size).
+ auto MinSled = InstrMap.Sleds[0];
+ auto MaxSled = InstrMap.Sleds[InstrMap.Entries - 1];
+ for (std::size_t I = 0; I < InstrMap.Entries; I++) {
+ const auto &Sled = InstrMap.Sleds[I];
+ if (Sled.Address < MinSled.Address)
+ MinSled = Sled;
+ if (Sled.Address > MaxSled.Address)
+ MaxSled = Sled;
+ }
+
+ const size_t PageSize = flags()->xray_page_size_override > 0
+ ? flags()->xray_page_size_override
+ : GetPageSizeCached();
if ((PageSize == 0) || ((PageSize & (PageSize - 1)) != 0)) {
Report("System page size is not a power of two: %lld\n", PageSize);
return XRayPatchingStatus::FAILED;
}
- uint32_t FuncId = 1;
- uint64_t CurFun = 0;
- for (std::size_t I = 0; I < InstrMap.Entries; I++) {
- auto Sled = InstrMap.Sleds[I];
+ void *PageAlignedAddr =
+ reinterpret_cast<void *>(MinSled.Address & ~(PageSize - 1));
+ size_t MProtectLen =
+ (MaxSled.Address - reinterpret_cast<uptr>(PageAlignedAddr)) + cSledLength;
+ MProtectHelper Protector(PageAlignedAddr, MProtectLen);
+ if (Protector.MakeWriteable() == -1) {
+ Report("Failed mprotect: %d\n", errno);
+ return XRayPatchingStatus::FAILED;
+ }
+
+ for (std::size_t I = 0; I < InstrMap.Entries; ++I) {
+ auto &Sled = InstrMap.Sleds[I];
auto F = Sled.Function;
if (CurFun == 0)
CurFun = F;
@@ -246,36 +257,14 @@ XRayPatchingStatus controlPatching(bool Enable) XRAY_NEVER_INSTRUMENT {
return XRayPatchingStatus::SUCCESS;
}
-XRayPatchingStatus __xray_patch() XRAY_NEVER_INSTRUMENT {
- return controlPatching(true);
-}
-
-XRayPatchingStatus __xray_unpatch() XRAY_NEVER_INSTRUMENT {
- return controlPatching(false);
-}
-
-XRayPatchingStatus patchFunction(int32_t FuncId,
- bool Enable) XRAY_NEVER_INSTRUMENT {
- if (!__sanitizer::atomic_load(&XRayInitialized,
- __sanitizer::memory_order_acquire))
- return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized.
-
- uint8_t NotPatching = false;
- if (!__sanitizer::atomic_compare_exchange_strong(
- &XRayPatching, &NotPatching, true, __sanitizer::memory_order_acq_rel))
- return XRayPatchingStatus::ONGOING; // Already patching.
-
- // Next, we look for the function index.
+XRayPatchingStatus mprotectAndPatchFunction(int32_t FuncId,
+ bool Enable) XRAY_NEVER_INSTRUMENT {
XRaySledMap InstrMap;
{
__sanitizer::SpinMutexLock Guard(&XRayInstrMapMutex);
InstrMap = XRayInstrMap;
}
- // If we don't have an index, we can't patch individual functions.
- if (InstrMap.Functions == 0)
- return XRayPatchingStatus::NOT_INITIALIZED;
-
// FuncId must be a positive number, less than the number of functions
// instrumented.
if (FuncId <= 0 || static_cast<size_t>(FuncId) > InstrMap.Functions) {
@@ -283,33 +272,98 @@ XRayPatchingStatus patchFunction(int32_t FuncId,
return XRayPatchingStatus::FAILED;
}
- // Now we patch ths sleds for this specific function.
+ const size_t PageSize = flags()->xray_page_size_override > 0
+ ? flags()->xray_page_size_override
+ : GetPageSizeCached();
+ if ((PageSize == 0) || ((PageSize & (PageSize - 1)) != 0)) {
+ Report("Provided page size is not a power of two: %lld\n", PageSize);
+ return XRayPatchingStatus::FAILED;
+ }
+
+ // Here we compute the minumum sled and maximum sled associated with a
+ // particular function ID.
auto SledRange = InstrMap.SledsIndex[FuncId - 1];
auto *f = SledRange.Begin;
auto *e = SledRange.End;
+ auto MinSled = *f;
+ auto MaxSled = *(SledRange.End - 1);
+ while (f != e) {
+ if (f->Address < MinSled.Address)
+ MinSled = *f;
+ if (f->Address > MaxSled.Address)
+ MaxSled = *f;
+ ++f;
+ }
- bool SucceedOnce = false;
- while (f != e)
- SucceedOnce |= patchSled(*f++, Enable, FuncId);
+ void *PageAlignedAddr =
+ reinterpret_cast<void *>(MinSled.Address & ~(PageSize - 1));
+ size_t MProtectLen =
+ (MaxSled.Address - reinterpret_cast<uptr>(PageAlignedAddr)) + cSledLength;
+ MProtectHelper Protector(PageAlignedAddr, MProtectLen);
+ if (Protector.MakeWriteable() == -1) {
+ Report("Failed mprotect: %d\n", errno);
+ return XRayPatchingStatus::FAILED;
+ }
+ return patchFunction(FuncId, Enable);
+}
- __sanitizer::atomic_store(&XRayPatching, false,
- __sanitizer::memory_order_release);
+} // namespace
- if (!SucceedOnce) {
- Report("Failed patching any sled for function '%d'.", FuncId);
- return XRayPatchingStatus::FAILED;
+} // namespace __xray
+
+using namespace __xray;
+
+// The following functions are declared `extern "C" {...}` in the header, hence
+// they're defined in the global namespace.
+
+int __xray_set_handler(void (*entry)(int32_t,
+ XRayEntryType)) XRAY_NEVER_INSTRUMENT {
+ if (__sanitizer::atomic_load(&XRayInitialized,
+ __sanitizer::memory_order_acquire)) {
+
+ __sanitizer::atomic_store(&__xray::XRayPatchedFunction,
+ reinterpret_cast<uintptr_t>(entry),
+ __sanitizer::memory_order_release);
+ return 1;
}
+ return 0;
+}
- return XRayPatchingStatus::SUCCESS;
+int __xray_set_customevent_handler(void (*entry)(void *, size_t))
+ XRAY_NEVER_INSTRUMENT {
+ if (__sanitizer::atomic_load(&XRayInitialized,
+ __sanitizer::memory_order_acquire)) {
+ __sanitizer::atomic_store(&__xray::XRayPatchedCustomEvent,
+ reinterpret_cast<uintptr_t>(entry),
+ __sanitizer::memory_order_release);
+ return 1;
+ }
+ return 0;
+}
+
+int __xray_remove_handler() XRAY_NEVER_INSTRUMENT {
+ return __xray_set_handler(nullptr);
+}
+
+int __xray_remove_customevent_handler() XRAY_NEVER_INSTRUMENT {
+ return __xray_set_customevent_handler(nullptr);
+}
+
+XRayPatchingStatus __xray_patch() XRAY_NEVER_INSTRUMENT {
+ return controlPatching(true);
+}
+
+XRayPatchingStatus __xray_unpatch() XRAY_NEVER_INSTRUMENT {
+ return controlPatching(false);
}
XRayPatchingStatus __xray_patch_function(int32_t FuncId) XRAY_NEVER_INSTRUMENT {
- return patchFunction(FuncId, true);
+ return mprotectAndPatchFunction(FuncId, true);
}
XRayPatchingStatus
__xray_unpatch_function(int32_t FuncId) XRAY_NEVER_INSTRUMENT {
- return patchFunction(FuncId, false);
+ return mprotectAndPatchFunction(FuncId, false);
}
int __xray_set_handler_arg1(void (*entry)(int32_t, XRayEntryType, uint64_t)) {
@@ -331,7 +385,7 @@ uintptr_t __xray_function_address(int32_t FuncId) XRAY_NEVER_INSTRUMENT {
__sanitizer::SpinMutexLock Guard(&XRayInstrMapMutex);
if (FuncId <= 0 || static_cast<size_t>(FuncId) > XRayInstrMap.Functions)
return 0;
- return XRayInstrMap.SledsIndex[FuncId - 1].Begin->Address
+ return XRayInstrMap.SledsIndex[FuncId - 1].Begin->Function
// On PPC, function entries are always aligned to 16 bytes. The beginning of a
// sled might be a local entry, which is always +8 based on the global entry.
// Always return the global entry.
diff --git a/lib/xray/xray_interface_internal.h b/lib/xray/xray_interface_internal.h
index 4a2784612fcbb..5811e2b7300a6 100644
--- a/lib/xray/xray_interface_internal.h
+++ b/lib/xray/xray_interface_internal.h
@@ -28,13 +28,15 @@ struct XRaySledEntry {
uint64_t Function;
unsigned char Kind;
unsigned char AlwaysInstrument;
- unsigned char Padding[14]; // Need 32 bytes
+ unsigned char Version;
+ unsigned char Padding[13]; // Need 32 bytes
#elif SANITIZER_WORDSIZE == 32
uint32_t Address;
uint32_t Function;
unsigned char Kind;
unsigned char AlwaysInstrument;
- unsigned char Padding[6]; // Need 16 bytes
+ unsigned char Version;
+ unsigned char Padding[5]; // Need 16 bytes
#else
#error "Unsupported word size."
#endif
diff --git a/lib/xray/xray_log_interface.cc b/lib/xray/xray_log_interface.cc
index ee14ae4b1b624..783f004d292aa 100644
--- a/lib/xray/xray_log_interface.cc
+++ b/lib/xray/xray_log_interface.cc
@@ -12,35 +12,84 @@
//===----------------------------------------------------------------------===//
#include "xray/xray_log_interface.h"
+#include "sanitizer_common/sanitizer_allocator_internal.h"
#include "sanitizer_common/sanitizer_atomic.h"
#include "sanitizer_common/sanitizer_mutex.h"
#include "xray/xray_interface.h"
#include "xray_defs.h"
-#include <memory>
-
__sanitizer::SpinMutex XRayImplMutex;
-std::unique_ptr<XRayLogImpl> GlobalXRayImpl;
+XRayLogImpl CurrentXRayImpl{nullptr, nullptr, nullptr, nullptr};
+XRayLogImpl *GlobalXRayImpl = nullptr;
+
+// We use a linked list of Mode to XRayLogImpl mappings. This is a linked list
+// when it should be a map because we're avoiding having to depend on C++
+// standard library data structures at this level of the implementation.
+struct ModeImpl {
+ ModeImpl *Next;
+ const char *Mode;
+ XRayLogImpl Impl;
+};
+
+ModeImpl SentinelModeImpl{
+ nullptr, nullptr, {nullptr, nullptr, nullptr, nullptr}};
+ModeImpl *ModeImpls = &SentinelModeImpl;
+
+XRayLogRegisterStatus
+__xray_log_register_mode(const char *Mode,
+ XRayLogImpl Impl) XRAY_NEVER_INSTRUMENT {
+ if (Impl.flush_log == nullptr || Impl.handle_arg0 == nullptr ||
+ Impl.log_finalize == nullptr || Impl.log_init == nullptr)
+ return XRayLogRegisterStatus::XRAY_INCOMPLETE_IMPL;
+
+ __sanitizer::SpinMutexLock Guard(&XRayImplMutex);
+ // First, look for whether the mode already has a registered implementation.
+ for (ModeImpl *it = ModeImpls; it != &SentinelModeImpl; it = it->Next) {
+ if (!__sanitizer::internal_strcmp(Mode, it->Mode))
+ return XRayLogRegisterStatus::XRAY_DUPLICATE_MODE;
+ }
+ auto *NewModeImpl =
+ static_cast<ModeImpl *>(__sanitizer::InternalAlloc(sizeof(ModeImpl)));
+ NewModeImpl->Next = ModeImpls;
+ NewModeImpl->Mode = __sanitizer::internal_strdup(Mode);
+ NewModeImpl->Impl = Impl;
+ ModeImpls = NewModeImpl;
+ return XRayLogRegisterStatus::XRAY_REGISTRATION_OK;
+}
+
+XRayLogRegisterStatus
+__xray_log_select_mode(const char *Mode) XRAY_NEVER_INSTRUMENT {
+ __sanitizer::SpinMutexLock Guard(&XRayImplMutex);
+ for (ModeImpl *it = ModeImpls; it != &SentinelModeImpl; it = it->Next) {
+ if (!__sanitizer::internal_strcmp(Mode, it->Mode)) {
+ CurrentXRayImpl = it->Impl;
+ GlobalXRayImpl = &CurrentXRayImpl;
+ __xray_set_handler(it->Impl.handle_arg0);
+ return XRayLogRegisterStatus::XRAY_REGISTRATION_OK;
+ }
+ }
+ return XRayLogRegisterStatus::XRAY_MODE_NOT_FOUND;
+}
void __xray_set_log_impl(XRayLogImpl Impl) XRAY_NEVER_INSTRUMENT {
if (Impl.log_init == nullptr || Impl.log_finalize == nullptr ||
Impl.handle_arg0 == nullptr || Impl.flush_log == nullptr) {
__sanitizer::SpinMutexLock Guard(&XRayImplMutex);
- GlobalXRayImpl.reset();
+ GlobalXRayImpl = nullptr;
__xray_remove_handler();
__xray_remove_handler_arg1();
return;
}
__sanitizer::SpinMutexLock Guard(&XRayImplMutex);
- GlobalXRayImpl.reset(new XRayLogImpl);
- *GlobalXRayImpl = Impl;
+ CurrentXRayImpl = Impl;
+ GlobalXRayImpl = &CurrentXRayImpl;
__xray_set_handler(Impl.handle_arg0);
}
void __xray_remove_log_impl() XRAY_NEVER_INSTRUMENT {
__sanitizer::SpinMutexLock Guard(&XRayImplMutex);
- GlobalXRayImpl.reset();
+ GlobalXRayImpl = nullptr;
__xray_remove_handler();
__xray_remove_handler_arg1();
}
diff --git a/lib/xray/xray_trampoline_x86_64.S b/lib/xray/xray_trampoline_x86_64.S
index b59eedc4bb1b8..350afd9265fde 100644
--- a/lib/xray/xray_trampoline_x86_64.S
+++ b/lib/xray/xray_trampoline_x86_64.S
@@ -14,10 +14,13 @@
//===----------------------------------------------------------------------===//
#include "../builtins/assembly.h"
+#include "../sanitizer_common/sanitizer_asm.h"
+
+
.macro SAVE_REGISTERS
subq $192, %rsp
- .cfi_def_cfa_offset 200
+ CFI_DEF_CFA_OFFSET(200)
// At this point, the stack pointer should be aligned to an 8-byte boundary,
// because any call instructions that come after this will add another 8
// bytes and therefore align it to 16-bytes.
@@ -57,63 +60,82 @@
movq 8(%rsp), %r8
movq 0(%rsp), %r9
addq $192, %rsp
- .cfi_def_cfa_offset 8
+ CFI_DEF_CFA_OFFSET(8)
+.endm
+
+.macro ALIGNED_CALL_RAX
+ // Call the logging handler, after aligning the stack to a 16-byte boundary.
+ // The approach we're taking here uses additional stack space to stash the
+ // stack pointer twice before aligning the pointer to 16-bytes. If the stack
+ // was 8-byte aligned, it will become 16-byte aligned -- when restoring the
+ // pointer, we can always look -8 bytes from the current position to get
+ // either of the values we've stashed in the first place.
+ pushq %rsp
+ pushq (%rsp)
+ andq $-0x10, %rsp
+ callq *%rax
+ movq 8(%rsp), %rsp
.endm
.text
+#if !defined(__APPLE__)
+ .section .text
+#else
+ .section __TEXT,__text
+#endif
.file "xray_trampoline_x86.S"
//===----------------------------------------------------------------------===//
- .globl __xray_FunctionEntry
+ .globl ASM_SYMBOL(__xray_FunctionEntry)
.align 16, 0x90
- .type __xray_FunctionEntry,@function
-
-__xray_FunctionEntry:
- .cfi_startproc
+ ASM_TYPE_FUNCTION(__xray_FunctionEntry)
+ASM_SYMBOL(__xray_FunctionEntry):
+ CFI_STARTPROC
SAVE_REGISTERS
// This load has to be atomic, it's concurrent with __xray_patch().
// On x86/amd64, a simple (type-aligned) MOV instruction is enough.
- movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax
+ movq ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax
testq %rax, %rax
je .Ltmp0
// The patched function prolog puts its xray_instr_map index into %r10d.
movl %r10d, %edi
xor %esi,%esi
- callq *%rax
+ ALIGNED_CALL_RAX
+
.Ltmp0:
RESTORE_REGISTERS
retq
-.Ltmp1:
- .size __xray_FunctionEntry, .Ltmp1-__xray_FunctionEntry
- .cfi_endproc
+ ASM_SIZE(__xray_FunctionEntry)
+ CFI_ENDPROC
//===----------------------------------------------------------------------===//
- .globl __xray_FunctionExit
+ .globl ASM_SYMBOL(__xray_FunctionExit)
.align 16, 0x90
- .type __xray_FunctionExit,@function
-__xray_FunctionExit:
- .cfi_startproc
+ ASM_TYPE_FUNCTION(__xray_FunctionExit)
+ASM_SYMBOL(__xray_FunctionExit):
+ CFI_STARTPROC
// Save the important registers first. Since we're assuming that this
// function is only jumped into, we only preserve the registers for
// returning.
subq $56, %rsp
- .cfi_def_cfa_offset 64
+ CFI_DEF_CFA_OFFSET(64)
movq %rbp, 48(%rsp)
movupd %xmm0, 32(%rsp)
movupd %xmm1, 16(%rsp)
movq %rax, 8(%rsp)
movq %rdx, 0(%rsp)
- movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax
+ movq ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax
testq %rax,%rax
je .Ltmp2
movl %r10d, %edi
movl $1, %esi
- callq *%rax
+ ALIGNED_CALL_RAX
+
.Ltmp2:
// Restore the important registers.
movq 48(%rsp), %rbp
@@ -122,111 +144,94 @@ __xray_FunctionExit:
movq 8(%rsp), %rax
movq 0(%rsp), %rdx
addq $56, %rsp
- .cfi_def_cfa_offset 8
+ CFI_DEF_CFA_OFFSET(8)
retq
-.Ltmp3:
- .size __xray_FunctionExit, .Ltmp3-__xray_FunctionExit
- .cfi_endproc
+ ASM_SIZE(__xray_FunctionExit)
+ CFI_ENDPROC
//===----------------------------------------------------------------------===//
- .global __xray_FunctionTailExit
+ .globl ASM_SYMBOL(__xray_FunctionTailExit)
.align 16, 0x90
- .type __xray_FunctionTailExit,@function
-__xray_FunctionTailExit:
- .cfi_startproc
- // Save the important registers as in the entry trampoline, but indicate that
- // this is an exit. In the future, we will introduce a new entry type that
- // differentiates between a normal exit and a tail exit, but we'd have to do
- // this and increment the version number for the header.
+ ASM_TYPE_FUNCTION(__xray_FunctionTailExit)
+ASM_SYMBOL(__xray_FunctionTailExit):
+ CFI_STARTPROC
SAVE_REGISTERS
- movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax
+ movq ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax
testq %rax,%rax
je .Ltmp4
movl %r10d, %edi
- movl $1, %esi
- callq *%rax
+ movl $2, %esi
+
+ ALIGNED_CALL_RAX
.Ltmp4:
RESTORE_REGISTERS
retq
-.Ltmp5:
- .size __xray_FunctionTailExit, .Ltmp5-__xray_FunctionTailExit
- .cfi_endproc
+ ASM_SIZE(__xray_FunctionTailExit)
+ CFI_ENDPROC
//===----------------------------------------------------------------------===//
- .globl __xray_ArgLoggerEntry
+ .globl ASM_SYMBOL(__xray_ArgLoggerEntry)
.align 16, 0x90
- .type __xray_ArgLoggerEntry,@function
-__xray_ArgLoggerEntry:
- .cfi_startproc
+ ASM_TYPE_FUNCTION(__xray_ArgLoggerEntry)
+ASM_SYMBOL(__xray_ArgLoggerEntry):
+ CFI_STARTPROC
SAVE_REGISTERS
// Again, these function pointer loads must be atomic; MOV is fine.
- movq _ZN6__xray13XRayArgLoggerE(%rip), %rax
+ movq ASM_SYMBOL(_ZN6__xray13XRayArgLoggerE)(%rip), %rax
testq %rax, %rax
jne .Larg1entryLog
// If [arg1 logging handler] not set, defer to no-arg logging.
- movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax
+ movq ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax
testq %rax, %rax
je .Larg1entryFail
.Larg1entryLog:
- // First argument will become the third
+ // First argument will become the third
movq %rdi, %rdx
- // XRayEntryType::ENTRY into the second
- xorq %rsi, %rsi
+ // XRayEntryType::LOG_ARGS_ENTRY into the second
+ mov $0x3, %esi
// 32-bit function ID becomes the first
movl %r10d, %edi
- callq *%rax
+ ALIGNED_CALL_RAX
.Larg1entryFail:
RESTORE_REGISTERS
retq
-
-.Larg1entryEnd:
- .size __xray_ArgLoggerEntry, .Larg1entryEnd-__xray_ArgLoggerEntry
- .cfi_endproc
+ ASM_SIZE(__xray_ArgLoggerEntry)
+ CFI_ENDPROC
//===----------------------------------------------------------------------===//
- .global __xray_CustomEvent
+ .global ASM_SYMBOL(__xray_CustomEvent)
.align 16, 0x90
- .type __xray_CustomEvent,@function
-__xray_CustomEvent:
- .cfi_startproc
- subq $16, %rsp
- .cfi_def_cfa_offset 24
- movq %rbp, 8(%rsp)
- movq %rax, 0(%rsp)
+ ASM_TYPE_FUNCTION(__xray_CustomEvent)
+ASM_SYMBOL(__xray_CustomEvent):
+ CFI_STARTPROC
+ SAVE_REGISTERS
// We take two arguments to this trampoline, which should be in rdi and rsi
// already. We also make sure that we stash %rax because we use that register
// to call the logging handler.
- movq _ZN6__xray22XRayPatchedCustomEventE(%rip), %rax
+ movq ASM_SYMBOL(_ZN6__xray22XRayPatchedCustomEventE)(%rip), %rax
testq %rax,%rax
je .LcustomEventCleanup
- // At this point we know that rcx and rdx already has the data, so we just
- // call the logging handler.
- callq *%rax
+ ALIGNED_CALL_RAX
.LcustomEventCleanup:
- movq 0(%rsp), %rax
- movq 8(%rsp), %rbp
- addq $16, %rsp
- .cfi_def_cfa_offset 8
+ RESTORE_REGISTERS
retq
-
-.Ltmp8:
- .size __xray_CustomEvent, .Ltmp8-__xray_CustomEvent
- .cfi_endproc
+ ASM_SIZE(__xray_CustomEvent)
+ CFI_ENDPROC
NO_EXEC_STACK_DIRECTIVE
diff --git a/lib/xray/xray_utils.cc b/lib/xray/xray_utils.cc
index b9a38d1b98eb4..cf800d3aeaf88 100644
--- a/lib/xray/xray_utils.cc
+++ b/lib/xray/xray_utils.cc
@@ -117,7 +117,8 @@ int getLogFD() XRAY_NEVER_INSTRUMENT {
TmpFilename);
return -1;
}
- Report("XRay: Log file in '%s'\n", TmpFilename);
+ if (__sanitizer::Verbosity())
+ Report("XRay: Log file in '%s'\n", TmpFilename);
return Fd;
}
diff --git a/lib/xray/xray_x86_64.cc b/lib/xray/xray_x86_64.cc
index e34806fa1cea2..e17f00ac3a62a 100644
--- a/lib/xray/xray_x86_64.cc
+++ b/lib/xray/xray_x86_64.cc
@@ -76,6 +76,7 @@ static constexpr uint8_t CallOpCode = 0xe8;
static constexpr uint16_t MovR10Seq = 0xba41;
static constexpr uint16_t Jmp9Seq = 0x09eb;
static constexpr uint16_t Jmp20Seq = 0x14eb;
+static constexpr uint16_t Jmp15Seq = 0x0feb;
static constexpr uint8_t JmpOpCode = 0xe9;
static constexpr uint8_t RetOpCode = 0xc3;
static constexpr uint16_t NopwSeq = 0x9066;
@@ -207,8 +208,10 @@ bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
// Here we do the dance of replacing the following sled:
//
+ // In Version 0:
+ //
// xray_sled_n:
- // jmp +19 // 2 bytes
+ // jmp +20 // 2 bytes
// ...
//
// With the following:
@@ -216,24 +219,35 @@ bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
// nopw // 2 bytes*
// ...
//
- // We need to do this in the following order:
//
- // 1. Overwrite the 5-byte nop with the call (relative), where (relative) is
- // the relative offset to the __xray_CustomEvent trampoline.
- // 2. Do a two-byte atomic write over the 'jmp +24' to turn it into a 'nopw'.
- // This allows us to "enable" this code once the changes have committed.
+ // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
+ //
+ // ---
//
- // The "unpatch" should just turn the 'nopw' back to a 'jmp +24'.
+ // In Version 1:
+ //
+ // The jump offset is now 15 bytes (0x0f), so when restoring the nopw back
+ // to a jmp, use 15 bytes instead.
//
if (Enable) {
std::atomic_store_explicit(
reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), NopwSeq,
std::memory_order_release);
} else {
- std::atomic_store_explicit(
- reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp20Seq,
- std::memory_order_release);
- }
+ switch (Sled.Version) {
+ case 1:
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp15Seq,
+ std::memory_order_release);
+ break;
+ case 0:
+ default:
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp20Seq,
+ std::memory_order_release);
+ break;
+ }
+ }
return false;
}
@@ -244,9 +258,9 @@ bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT {
// We check whether rdtscp support is enabled. According to the x86_64 manual,
// level should be set at 0x80000001, and we should have a look at bit 27 in
- // EDX. That's 0x8000000 (or 1u << 26).
+ // EDX. That's 0x8000000 (or 1u << 27).
__get_cpuid(0x80000001, &EAX, &EBX, &ECX, &EDX);
- if (!(EDX & (1u << 26))) {
+ if (!(EDX & (1u << 27))) {
Report("Missing rdtscp support.\n");
return false;
}