summaryrefslogtreecommitdiff
path: root/lib/xray
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-01-02 19:18:27 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-01-02 19:18:27 +0000
commit316d58822dada9440bd06ecfc758dcc2364d617c (patch)
treefe72ec2e6ce9a360dda74d9d57f7acdb0e3c39d6 /lib/xray
parent0230fcf22fe7d19f03d981c9c2c59a3db0b72ea5 (diff)
Notes
Diffstat (limited to 'lib/xray')
-rw-r--r--lib/xray/CMakeLists.txt79
-rw-r--r--lib/xray/tests/CMakeLists.txt59
-rw-r--r--lib/xray/tests/unit/CMakeLists.txt2
-rw-r--r--lib/xray/tests/unit/buffer_queue_test.cc81
-rw-r--r--lib/xray/tests/unit/xray_unit_test_main.cc18
-rw-r--r--lib/xray/xray_AArch64.cc119
-rw-r--r--lib/xray/xray_arm.cc156
-rw-r--r--lib/xray/xray_buffer_queue.cc65
-rw-r--r--lib/xray/xray_buffer_queue.h86
-rw-r--r--lib/xray/xray_defs.h22
-rw-r--r--lib/xray/xray_emulate_tsc.h40
-rw-r--r--lib/xray/xray_flags.cc62
-rw-r--r--lib/xray/xray_flags.h37
-rw-r--r--lib/xray/xray_flags.inc22
-rw-r--r--lib/xray/xray_init.cc70
-rw-r--r--lib/xray/xray_inmemory_log.cc185
-rw-r--r--lib/xray/xray_interface.cc207
-rw-r--r--lib/xray/xray_interface_internal.h69
-rw-r--r--lib/xray/xray_trampoline_AArch64.S89
-rw-r--r--lib/xray/xray_trampoline_arm.S65
-rw-r--r--lib/xray/xray_trampoline_x86_64.S147
-rw-r--r--lib/xray/xray_x86_64.cc202
-rw-r--r--lib/xray/xray_x86_64.h32
23 files changed, 1914 insertions, 0 deletions
diff --git a/lib/xray/CMakeLists.txt b/lib/xray/CMakeLists.txt
new file mode 100644
index 0000000000000..9c7cf6ce361c8
--- /dev/null
+++ b/lib/xray/CMakeLists.txt
@@ -0,0 +1,79 @@
+# Build for the XRay runtime support library.
+
+# Core XRay runtime library implementation files.
+set(XRAY_SOURCES
+ xray_init.cc
+ xray_interface.cc
+ xray_flags.cc
+ xray_inmemory_log.cc)
+
+# XRay flight data recorder (FDR) implementation files.
+set(XRAY_FDR_SOURCES
+ xray_buffer_queue.cc)
+
+set(x86_64_SOURCES
+ xray_x86_64.cc
+ xray_trampoline_x86_64.S
+ ${XRAY_SOURCES})
+
+set(arm_SOURCES
+ xray_arm.cc
+ xray_trampoline_arm.S
+ ${XRAY_SOURCES})
+
+set(armhf_SOURCES ${arm_SOURCES})
+
+set(aarch64_SOURCES
+ xray_AArch64.cc
+ xray_trampoline_AArch64.S
+ ${XRAY_SOURCES})
+
+include_directories(..)
+include_directories(../../include)
+
+set(XRAY_CFLAGS ${SANITIZER_COMMON_CFLAGS})
+set(XRAY_COMMON_DEFINITIONS XRAY_HAS_EXCEPTIONS=1)
+append_list_if(
+ COMPILER_RT_HAS_XRAY_COMPILER_FLAG XRAY_SUPPORTED=1 XRAY_COMMON_DEFINITIONS)
+
+add_compiler_rt_object_libraries(RTXray
+ ARCHS ${XRAY_SUPPORTED_ARCH}
+ SOURCES ${XRAY_SOURCES} CFLAGS ${XRAY_CFLAGS}
+ DEFS ${XRAY_COMMON_DEFINITIONS})
+
+add_compiler_rt_object_libraries(RTXrayFDR
+ ARCHS ${XRAY_SUPPORTED_ARCH}
+ SOURCES ${XRAY_FDR_SOURCES} CFLAGS ${XRAY_CFLAGS}
+ DEFS ${XRAY_COMMON_DEFINITIONS})
+
+add_compiler_rt_component(xray)
+add_compiler_rt_component(xray-fdr)
+
+set(XRAY_COMMON_RUNTIME_OBJECT_LIBS
+ RTSanitizerCommon
+ RTSanitizerCommonLibc)
+
+foreach(arch ${XRAY_SUPPORTED_ARCH})
+ if(CAN_TARGET_${arch})
+ add_compiler_rt_runtime(clang_rt.xray
+ STATIC
+ ARCHS ${arch}
+ SOURCES ${${arch}_SOURCES}
+ CFLAGS ${XRAY_CFLAGS}
+ DEFS ${XRAY_COMMON_DEFINITIONS}
+ OBJECT_LIBS ${XRAY_COMMON_RUNTIME_OBJECT_LIBS}
+ PARENT_TARGET xray)
+ add_compiler_rt_runtime(clang_rt.xray-fdr
+ STATIC
+ ARCHS ${arch}
+ SOURCES ${XRAY_FDR_SOURCES}
+ CFLAGS ${XRAY_CFLAGS}
+ DEFS ${XRAY_COMMON_DEFINITIONS}
+ OBJECT_LIBS ${XRAY_COMMON_RUNTIME_OBJECT_LIBS}
+ PARENT_TARGET xray-fdr)
+ endif()
+endforeach()
+
+if(COMPILER_RT_INCLUDE_TESTS)
+ add_subdirectory(tests)
+endif()
diff --git a/lib/xray/tests/CMakeLists.txt b/lib/xray/tests/CMakeLists.txt
new file mode 100644
index 0000000000000..6cb17934c8952
--- /dev/null
+++ b/lib/xray/tests/CMakeLists.txt
@@ -0,0 +1,59 @@
+include_directories(..)
+
+add_custom_target(XRayUnitTests)
+set_target_properties(XRayUnitTests PROPERTIES FOLDER "XRay unittests")
+
+set(XRAY_UNITTEST_CFLAGS
+ ${XRAY_CFLAGS}
+ ${COMPILER_RT_UNITTEST_CFLAGS}
+ ${COMPILER_RT_GTEST_CFLAGS}
+ -I${COMPILER_RT_SOURCE_DIR}/include
+ -I${COMPILER_RT_SOURCE_DIR}/lib/xray)
+
+macro(xray_compile obj_list source arch)
+ get_filename_component(basename ${source} NAME)
+ set(output_obj "${basename}.${arch}.o")
+ get_target_flags_for_arch(${arch} TARGET_CFLAGS)
+ if(NOT COMPILER_RT_STANDALONE_BUILD)
+ list(APPEND COMPILE_DEPS gtest_main xray-fdr)
+ endif()
+ clang_compile(${output_obj} ${source}
+ CFLAGS ${XRAY_UNITTEST_CFLAGS} ${TARGET_CFLAGS}
+ DEPS ${COMPILE_DEPS})
+ list(APPEND ${obj_list} ${output_obj})
+endmacro()
+
+macro(add_xray_unittest testname)
+ set(XRAY_TEST_ARCH ${XRAY_SUPPORTED_ARCH})
+ if (APPLE)
+ darwin_filter_host_archs(XRAY_SUPPORTED_ARCH)
+ endif()
+ if(UNIX)
+ foreach(arch ${XRAY_TEST_ARCH})
+ cmake_parse_arguments(TEST "" "" "SOURCES;HEADERS" ${ARGN})
+ set(TEST_OBJECTS)
+ foreach(SOURCE ${TEST_SOURCES} ${COMPILER_RT_GTEST_SOURCE})
+ xray_compile(TEST_OBJECTS ${SOURCE} ${arch} ${TEST_HEADERS})
+ endforeach()
+ get_target_flags_for_arch(${arch} TARGET_LINK_FLAGS)
+ set(TEST_DEPS ${TEST_OBJECTS})
+ if(NOT COMPILER_RT_STANDALONE_BUILD)
+ list(APPEND TEST_DEPS gtest_main xray-fdr)
+ endif()
+ if(NOT APPLE)
+ add_compiler_rt_test(XRayUnitTests ${testname}
+ OBJECTS ${TEST_OBJECTS}
+ DEPS ${TEST_DEPS}
+ LINK_FLAGS ${TARGET_LINK_FLAGS}
+ -lstdc++ -lm ${CMAKE_THREAD_LIBS_INIT}
+ -lpthread
+ -L${COMPILER_RT_LIBRARY_OUTPUT_DIR} -lclang_rt.xray-fdr-${arch})
+ endif()
+ # FIXME: Figure out how to run even just the unit tests on APPLE.
+ endforeach()
+ endif()
+endmacro()
+
+if(COMPILER_RT_CAN_EXECUTE_TESTS)
+ add_subdirectory(unit)
+endif()
diff --git a/lib/xray/tests/unit/CMakeLists.txt b/lib/xray/tests/unit/CMakeLists.txt
new file mode 100644
index 0000000000000..3e5412d41e6e3
--- /dev/null
+++ b/lib/xray/tests/unit/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_xray_unittest(XRayBufferQueueTest SOURCES
+ buffer_queue_test.cc xray_unit_test_main.cc)
diff --git a/lib/xray/tests/unit/buffer_queue_test.cc b/lib/xray/tests/unit/buffer_queue_test.cc
new file mode 100644
index 0000000000000..d46f19402c2a1
--- /dev/null
+++ b/lib/xray/tests/unit/buffer_queue_test.cc
@@ -0,0 +1,81 @@
+//===-- buffer_queue_test.cc ----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a function call tracing system.
+//
+//===----------------------------------------------------------------------===//
+#include "xray_buffer_queue.h"
+#include "gtest/gtest.h"
+
+#include <future>
+#include <system_error>
+#include <unistd.h>
+
+namespace __xray {
+
+static constexpr size_t kSize = 4096;
+
+TEST(BufferQueueTest, API) { BufferQueue Buffers(kSize, 1); }
+
+TEST(BufferQueueTest, GetAndRelease) {
+ BufferQueue Buffers(kSize, 1);
+ BufferQueue::Buffer Buf;
+ ASSERT_EQ(Buffers.getBuffer(Buf), std::error_code());
+ ASSERT_NE(nullptr, Buf.Buffer);
+ ASSERT_EQ(Buffers.releaseBuffer(Buf), std::error_code());
+ ASSERT_EQ(nullptr, Buf.Buffer);
+}
+
+TEST(BufferQueueTest, GetUntilFailed) {
+ BufferQueue Buffers(kSize, 1);
+ BufferQueue::Buffer Buf0;
+ EXPECT_EQ(Buffers.getBuffer(Buf0), std::error_code());
+ BufferQueue::Buffer Buf1;
+ EXPECT_EQ(std::errc::not_enough_memory, Buffers.getBuffer(Buf1));
+ EXPECT_EQ(Buffers.releaseBuffer(Buf0), std::error_code());
+}
+
+TEST(BufferQueueTest, ReleaseUnknown) {
+ BufferQueue Buffers(kSize, 1);
+ BufferQueue::Buffer Buf;
+ Buf.Buffer = reinterpret_cast<void *>(0xdeadbeef);
+ Buf.Size = kSize;
+ EXPECT_EQ(std::errc::argument_out_of_domain, Buffers.releaseBuffer(Buf));
+}
+
+TEST(BufferQueueTest, ErrorsWhenFinalising) {
+ BufferQueue Buffers(kSize, 2);
+ BufferQueue::Buffer Buf;
+ ASSERT_EQ(Buffers.getBuffer(Buf), std::error_code());
+ ASSERT_NE(nullptr, Buf.Buffer);
+ ASSERT_EQ(Buffers.finalize(), std::error_code());
+ BufferQueue::Buffer OtherBuf;
+ ASSERT_EQ(std::errc::state_not_recoverable, Buffers.getBuffer(OtherBuf));
+ ASSERT_EQ(std::errc::state_not_recoverable, Buffers.finalize());
+ ASSERT_EQ(Buffers.releaseBuffer(Buf), std::error_code());
+}
+
+TEST(BufferQueueTest, MultiThreaded) {
+ BufferQueue Buffers(kSize, 100);
+ auto F = [&] {
+ BufferQueue::Buffer B;
+ while (!Buffers.getBuffer(B)) {
+ Buffers.releaseBuffer(B);
+ }
+ };
+ auto T0 = std::async(std::launch::async, F);
+ auto T1 = std::async(std::launch::async, F);
+ auto T2 = std::async(std::launch::async, [&] {
+ while (!Buffers.finalize())
+ ;
+ });
+ F();
+}
+
+} // namespace __xray
diff --git a/lib/xray/tests/unit/xray_unit_test_main.cc b/lib/xray/tests/unit/xray_unit_test_main.cc
new file mode 100644
index 0000000000000..27d17527dd5bc
--- /dev/null
+++ b/lib/xray/tests/unit/xray_unit_test_main.cc
@@ -0,0 +1,18 @@
+//===-- xray_unit_test_main.cc --------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a function call tracing system.
+//
+//===----------------------------------------------------------------------===//
+#include "gtest/gtest.h"
+
+int main(int argc, char **argv) {
+ testing::InitGoogleTest(&argc, argv);
+ return RUN_ALL_TESTS();
+}
diff --git a/lib/xray/xray_AArch64.cc b/lib/xray/xray_AArch64.cc
new file mode 100644
index 0000000000000..8f24610dab275
--- /dev/null
+++ b/lib/xray/xray_AArch64.cc
@@ -0,0 +1,119 @@
+//===-- xray_AArch64.cc -----------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Implementation of AArch64-specific routines (64-bit).
+//
+//===----------------------------------------------------------------------===//
+#include "sanitizer_common/sanitizer_common.h"
+#include "xray_defs.h"
+#include "xray_emulate_tsc.h"
+#include "xray_interface_internal.h"
+#include <atomic>
+#include <cassert>
+
+namespace __xray {
+
+uint64_t cycleFrequency() XRAY_NEVER_INSTRUMENT {
+ // There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does
+ // not have a constant frequency like TSC on x86[_64]; it may go faster or
+ // slower depending on CPU's turbo or power saving modes. Furthermore, to
+ // read from CP15 on ARM a kernel modification or a driver is needed.
+ // We can not require this from users of compiler-rt.
+ // So on ARM we use clock_gettime(2) which gives the result in nanoseconds.
+ // To get the measurements per second, we scale this by the number of
+ // nanoseconds per second, pretending that the TSC frequency is 1GHz and
+ // one TSC tick is 1 nanosecond.
+ return NanosecondsPerSecond;
+}
+
+// The machine codes for some instructions used in runtime patching.
+enum class PatchOpcodes : uint32_t {
+ PO_StpX0X30SP_m16e = 0xA9BF7BE0, // STP X0, X30, [SP, #-16]!
+ PO_LdrW0_12 = 0x18000060, // LDR W0, #12
+ PO_LdrX16_12 = 0x58000070, // LDR X16, #12
+ PO_BlrX16 = 0xD63F0200, // BLR X16
+ PO_LdpX0X30SP_16 = 0xA8C17BE0, // LDP X0, X30, [SP], #16
+ PO_B32 = 0x14000008 // B #32
+};
+
+inline static bool patchSled(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled,
+ void (*TracingHook)()) XRAY_NEVER_INSTRUMENT {
+ // When |Enable| == true,
+ // We replace the following compile-time stub (sled):
+ //
+ // xray_sled_n:
+ // B #32
+ // 7 NOPs (24 bytes)
+ //
+ // With the following runtime patch:
+ //
+ // xray_sled_n:
+ // STP X0, X30, [SP, #-16]! ; PUSH {r0, lr}
+ // LDR W0, #12 ; W0 := function ID
+ // LDR X16,#12 ; X16 := address of the trampoline
+ // BLR X16
+ // ;DATA: 32 bits of function ID
+ // ;DATA: lower 32 bits of the address of the trampoline
+ // ;DATA: higher 32 bits of the address of the trampoline
+ // LDP X0, X30, [SP], #16 ; POP {r0, lr}
+ //
+ // Replacement of the first 4-byte instruction should be the last and atomic
+ // operation, so that the user code which reaches the sled concurrently
+ // either jumps over the whole sled, or executes the whole sled when the
+ // latter is ready.
+ //
+ // When |Enable|==false, we set back the first instruction in the sled to be
+ // B #32
+
+ uint32_t *FirstAddress = reinterpret_cast<uint32_t *>(Sled.Address);
+ if (Enable) {
+ uint32_t *CurAddress = FirstAddress + 1;
+ *CurAddress = uint32_t(PatchOpcodes::PO_LdrW0_12);
+ CurAddress++;
+ *CurAddress = uint32_t(PatchOpcodes::PO_LdrX16_12);
+ CurAddress++;
+ *CurAddress = uint32_t(PatchOpcodes::PO_BlrX16);
+ CurAddress++;
+ *CurAddress = FuncId;
+ CurAddress++;
+ *reinterpret_cast<void (**)()>(CurAddress) = TracingHook;
+ CurAddress += 2;
+ *CurAddress = uint32_t(PatchOpcodes::PO_LdpX0X30SP_16);
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
+ uint32_t(PatchOpcodes::PO_StpX0X30SP_m16e), std::memory_order_release);
+ } else {
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
+ uint32_t(PatchOpcodes::PO_B32), std::memory_order_release);
+ }
+ return true;
+}
+
+bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ return patchSled(Enable, FuncId, Sled, __xray_FunctionEntry);
+}
+
+bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+}
+
+bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // FIXME: In the future we'd need to distinguish between non-tail exits and
+ // tail exits for better information preservation.
+ return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+}
+
+} // namespace __xray
diff --git a/lib/xray/xray_arm.cc b/lib/xray/xray_arm.cc
new file mode 100644
index 0000000000000..d89322e833e58
--- /dev/null
+++ b/lib/xray/xray_arm.cc
@@ -0,0 +1,156 @@
+//===-- xray_arm.cc ---------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Implementation of ARM-specific routines (32-bit).
+//
+//===----------------------------------------------------------------------===//
+#include "sanitizer_common/sanitizer_common.h"
+#include "xray_defs.h"
+#include "xray_emulate_tsc.h"
+#include "xray_interface_internal.h"
+#include <atomic>
+#include <cassert>
+
+namespace __xray {
+
+uint64_t cycleFrequency() XRAY_NEVER_INSTRUMENT {
+ // There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does
+ // not have a constant frequency like TSC on x86[_64]; it may go faster or
+ // slower depending on CPU's turbo or power saving modes. Furthermore, to
+ // read from CP15 on ARM a kernel modification or a driver is needed.
+ // We can not require this from users of compiler-rt.
+ // So on ARM we use clock_gettime(2) which gives the result in nanoseconds.
+ // To get the measurements per second, we scale this by the number of
+ // nanoseconds per second, pretending that the TSC frequency is 1GHz and
+ // one TSC tick is 1 nanosecond.
+ return NanosecondsPerSecond;
+}
+
+// The machine codes for some instructions used in runtime patching.
+enum class PatchOpcodes : uint32_t {
+ PO_PushR0Lr = 0xE92D4001, // PUSH {r0, lr}
+ PO_BlxIp = 0xE12FFF3C, // BLX ip
+ PO_PopR0Lr = 0xE8BD4001, // POP {r0, lr}
+ PO_B20 = 0xEA000005 // B #20
+};
+
+// 0xUUUUWXYZ -> 0x000W0XYZ
+inline static uint32_t getMovwMask(const uint32_t Value) XRAY_NEVER_INSTRUMENT {
+ return (Value & 0xfff) | ((Value & 0xf000) << 4);
+}
+
+// 0xWXYZUUUU -> 0x000W0XYZ
+inline static uint32_t getMovtMask(const uint32_t Value) XRAY_NEVER_INSTRUMENT {
+ return getMovwMask(Value >> 16);
+}
+
+// Writes the following instructions:
+// MOVW R<regNo>, #<lower 16 bits of the |Value|>
+// MOVT R<regNo>, #<higher 16 bits of the |Value|>
+inline static uint32_t *
+write32bitLoadReg(uint8_t regNo, uint32_t *Address,
+ const uint32_t Value) XRAY_NEVER_INSTRUMENT {
+ // This is a fatal error: we cannot just report it and continue execution.
+ assert(regNo <= 15 && "Register number must be 0 to 15.");
+ // MOVW R, #0xWXYZ in machine code is 0xE30WRXYZ
+ *Address = (0xE3000000 | (uint32_t(regNo) << 12) | getMovwMask(Value));
+ Address++;
+ // MOVT R, #0xWXYZ in machine code is 0xE34WRXYZ
+ *Address = (0xE3400000 | (uint32_t(regNo) << 12) | getMovtMask(Value));
+ return Address + 1;
+}
+
+// Writes the following instructions:
+// MOVW r0, #<lower 16 bits of the |Value|>
+// MOVT r0, #<higher 16 bits of the |Value|>
+inline static uint32_t *
+Write32bitLoadR0(uint32_t *Address,
+ const uint32_t Value) XRAY_NEVER_INSTRUMENT {
+ return write32bitLoadReg(0, Address, Value);
+}
+
+// Writes the following instructions:
+// MOVW ip, #<lower 16 bits of the |Value|>
+// MOVT ip, #<higher 16 bits of the |Value|>
+inline static uint32_t *
+Write32bitLoadIP(uint32_t *Address,
+ const uint32_t Value) XRAY_NEVER_INSTRUMENT {
+ return write32bitLoadReg(12, Address, Value);
+}
+
+inline static bool patchSled(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled,
+ void (*TracingHook)()) XRAY_NEVER_INSTRUMENT {
+ // When |Enable| == true,
+ // We replace the following compile-time stub (sled):
+ //
+ // xray_sled_n:
+ // B #20
+ // 6 NOPs (24 bytes)
+ //
+ // With the following runtime patch:
+ //
+ // xray_sled_n:
+ // PUSH {r0, lr}
+ // MOVW r0, #<lower 16 bits of function ID>
+ // MOVT r0, #<higher 16 bits of function ID>
+ // MOVW ip, #<lower 16 bits of address of TracingHook>
+ // MOVT ip, #<higher 16 bits of address of TracingHook>
+ // BLX ip
+ // POP {r0, lr}
+ //
+ // Replacement of the first 4-byte instruction should be the last and atomic
+ // operation, so that the user code which reaches the sled concurrently
+ // either jumps over the whole sled, or executes the whole sled when the
+ // latter is ready.
+ //
+ // When |Enable|==false, we set back the first instruction in the sled to be
+ // B #20
+
+ uint32_t *FirstAddress = reinterpret_cast<uint32_t *>(Sled.Address);
+ if (Enable) {
+ uint32_t *CurAddress = FirstAddress + 1;
+ CurAddress =
+ Write32bitLoadR0(CurAddress, reinterpret_cast<uint32_t>(FuncId));
+ CurAddress =
+ Write32bitLoadIP(CurAddress, reinterpret_cast<uint32_t>(TracingHook));
+ *CurAddress = uint32_t(PatchOpcodes::PO_BlxIp);
+ CurAddress++;
+ *CurAddress = uint32_t(PatchOpcodes::PO_PopR0Lr);
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
+ uint32_t(PatchOpcodes::PO_PushR0Lr), std::memory_order_release);
+ } else {
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
+ uint32_t(PatchOpcodes::PO_B20), std::memory_order_release);
+ }
+ return true;
+}
+
+bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ return patchSled(Enable, FuncId, Sled, __xray_FunctionEntry);
+}
+
+bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+}
+
+bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // FIXME: In the future we'd need to distinguish between non-tail exits and
+ // tail exits for better information preservation.
+ return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+}
+
+} // namespace __xray
diff --git a/lib/xray/xray_buffer_queue.cc b/lib/xray/xray_buffer_queue.cc
new file mode 100644
index 0000000000000..7e5462fb8e113
--- /dev/null
+++ b/lib/xray/xray_buffer_queue.cc
@@ -0,0 +1,65 @@
+//===-- xray_buffer_queue.cc -----------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instruementation system.
+//
+// Defines the interface for a buffer queue implementation.
+//
+//===----------------------------------------------------------------------===//
+#include "xray_buffer_queue.h"
+#include <cassert>
+#include <cstdlib>
+
+using namespace __xray;
+
+BufferQueue::BufferQueue(std::size_t B, std::size_t N)
+ : BufferSize(B), Buffers(N), Mutex(), OwnedBuffers(), Finalizing(false) {
+ for (auto &Buf : Buffers) {
+ void *Tmp = malloc(BufferSize);
+ Buf.Buffer = Tmp;
+ Buf.Size = B;
+ if (Tmp != 0)
+ OwnedBuffers.insert(Tmp);
+ }
+}
+
+std::error_code BufferQueue::getBuffer(Buffer &Buf) {
+ if (Finalizing.load(std::memory_order_acquire))
+ return std::make_error_code(std::errc::state_not_recoverable);
+ std::lock_guard<std::mutex> Guard(Mutex);
+ if (Buffers.empty())
+ return std::make_error_code(std::errc::not_enough_memory);
+ Buf = Buffers.front();
+ Buffers.pop_front();
+ return {};
+}
+
+std::error_code BufferQueue::releaseBuffer(Buffer &Buf) {
+ if (OwnedBuffers.count(Buf.Buffer) == 0)
+ return std::make_error_code(std::errc::argument_out_of_domain);
+ std::lock_guard<std::mutex> Guard(Mutex);
+ Buffers.push_back(Buf);
+ Buf.Buffer = nullptr;
+ Buf.Size = BufferSize;
+ return {};
+}
+
+std::error_code BufferQueue::finalize() {
+ if (Finalizing.exchange(true, std::memory_order_acq_rel))
+ return std::make_error_code(std::errc::state_not_recoverable);
+ return {};
+}
+
+BufferQueue::~BufferQueue() {
+ for (auto &Buf : Buffers) {
+ free(Buf.Buffer);
+ Buf.Buffer = nullptr;
+ Buf.Size = 0;
+ }
+}
diff --git a/lib/xray/xray_buffer_queue.h b/lib/xray/xray_buffer_queue.h
new file mode 100644
index 0000000000000..bf0b7af9df4d0
--- /dev/null
+++ b/lib/xray/xray_buffer_queue.h
@@ -0,0 +1,86 @@
+//===-- xray_buffer_queue.h ------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Defines the interface for a buffer queue implementation.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_BUFFER_QUEUE_H
+#define XRAY_BUFFER_QUEUE_H
+
+#include <atomic>
+#include <cstdint>
+#include <deque>
+#include <mutex>
+#include <system_error>
+#include <unordered_set>
+
+namespace __xray {
+
+/// BufferQueue implements a circular queue of fixed sized buffers (much like a
+/// freelist) but is concerned mostly with making it really quick to initialise,
+/// finalise, and get/return buffers to the queue. This is one key component of
+/// the "flight data recorder" (FDR) mode to support ongoing XRay function call
+/// trace collection.
+class BufferQueue {
+public:
+ struct Buffer {
+ void *Buffer = nullptr;
+ std::size_t Size = 0;
+ };
+
+private:
+ std::size_t BufferSize;
+ std::deque<Buffer> Buffers;
+ std::mutex Mutex;
+ std::unordered_set<void *> OwnedBuffers;
+ std::atomic<bool> Finalizing;
+
+public:
+ /// Initialise a queue of size |N| with buffers of size |B|.
+ BufferQueue(std::size_t B, std::size_t N);
+
+ /// Updates |Buf| to contain the pointer to an appropriate buffer. Returns an
+ /// error in case there are no available buffers to return when we will run
+ /// over the upper bound for the total buffers.
+ ///
+ /// Requirements:
+ /// - BufferQueue is not finalising.
+ ///
+ /// Returns:
+ /// - std::errc::not_enough_memory on exceeding MaxSize.
+ /// - no error when we find a Buffer.
+ /// - std::errc::state_not_recoverable on finalising BufferQueue.
+ std::error_code getBuffer(Buffer &Buf);
+
+ /// Updates |Buf| to point to nullptr, with size 0.
+ ///
+ /// Returns:
+ /// - ...
+ std::error_code releaseBuffer(Buffer &Buf);
+
+ bool finalizing() const { return Finalizing.load(std::memory_order_acquire); }
+
+ // Sets the state of the BufferQueue to finalizing, which ensures that:
+ //
+ // - All subsequent attempts to retrieve a Buffer will fail.
+ // - All releaseBuffer operations will not fail.
+ //
+ // After a call to finalize succeeds, all subsequent calls to finalize will
+ // fail with std::errc::state_not_recoverable.
+ std::error_code finalize();
+
+ // Cleans up allocated buffers.
+ ~BufferQueue();
+};
+
+} // namespace __xray
+
+#endif // XRAY_BUFFER_QUEUE_H
diff --git a/lib/xray/xray_defs.h b/lib/xray/xray_defs.h
new file mode 100644
index 0000000000000..e5c37c0665db0
--- /dev/null
+++ b/lib/xray/xray_defs.h
@@ -0,0 +1,22 @@
+//===-- xray_defs.h ---------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Common definitions useful for XRay sources.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_XRAY_DEFS_H
+#define XRAY_XRAY_DEFS_H
+
+#if XRAY_SUPPORTED
+#define XRAY_NEVER_INSTRUMENT __attribute__((xray_never_instrument))
+#else
+#define XRAY_NEVER_INSTRUMENT
+#endif
+
+#endif // XRAY_XRAY_DEFS_H
diff --git a/lib/xray/xray_emulate_tsc.h b/lib/xray/xray_emulate_tsc.h
new file mode 100644
index 0000000000000..a3e8b1c92eb40
--- /dev/null
+++ b/lib/xray/xray_emulate_tsc.h
@@ -0,0 +1,40 @@
+//===-- xray_emulate_tsc.h --------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_EMULATE_TSC_H
+#define XRAY_EMULATE_TSC_H
+
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "xray_defs.h"
+#include <cerrno>
+#include <cstdint>
+#include <time.h>
+
+namespace __xray {
+
+static constexpr uint64_t NanosecondsPerSecond = 1000ULL * 1000 * 1000;
+
+ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
+ timespec TS;
+ int result = clock_gettime(CLOCK_REALTIME, &TS);
+ if (result != 0) {
+ Report("clock_gettime(2) returned %d, errno=%d.", result, int(errno));
+ TS.tv_sec = 0;
+ TS.tv_nsec = 0;
+ }
+ CPU = 0;
+ return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec;
+}
+}
+
+#endif // XRAY_EMULATE_TSC_H
diff --git a/lib/xray/xray_flags.cc b/lib/xray/xray_flags.cc
new file mode 100644
index 0000000000000..338c2378b8cd7
--- /dev/null
+++ b/lib/xray/xray_flags.cc
@@ -0,0 +1,62 @@
+//===-- xray_flags.cc -------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// XRay flag parsing logic.
+//===----------------------------------------------------------------------===//
+
+#include "xray_flags.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_flag_parser.h"
+#include "sanitizer_common/sanitizer_libc.h"
+#include "xray_defs.h"
+
+using namespace __sanitizer;
+
+namespace __xray {
+
+Flags xray_flags_dont_use_directly; // use via flags().
+
+void Flags::SetDefaults() XRAY_NEVER_INSTRUMENT {
+#define XRAY_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue;
+#include "xray_flags.inc"
+#undef XRAY_FLAG
+}
+
+static void RegisterXRayFlags(FlagParser *P, Flags *F) XRAY_NEVER_INSTRUMENT {
+#define XRAY_FLAG(Type, Name, DefaultValue, Description) \
+ RegisterFlag(P, #Name, Description, &F->Name);
+#include "xray_flags.inc"
+#undef XRAY_FLAG
+}
+
+void InitializeFlags() XRAY_NEVER_INSTRUMENT {
+ SetCommonFlagsDefaults();
+ auto *F = flags();
+ F->SetDefaults();
+
+ FlagParser XRayParser;
+ RegisterXRayFlags(&XRayParser, F);
+ RegisterCommonFlags(&XRayParser);
+
+ // Override from command line.
+ XRayParser.ParseString(GetEnv("XRAY_OPTIONS"));
+
+ InitializeCommonFlags();
+
+ if (Verbosity())
+ ReportUnrecognizedFlags();
+
+ if (common_flags()->help) {
+ XRayParser.PrintFlagDescriptions();
+ }
+}
+
+} // namespace __xray
diff --git a/lib/xray/xray_flags.h b/lib/xray/xray_flags.h
new file mode 100644
index 0000000000000..2ecf5fb9ba1d4
--- /dev/null
+++ b/lib/xray/xray_flags.h
@@ -0,0 +1,37 @@
+//===-- xray_flags.h -------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instruementation system.
+//
+// XRay runtime flags.
+//===----------------------------------------------------------------------===//
+
+#ifndef XRAY_FLAGS_H
+#define XRAY_FLAGS_H
+
+#include "sanitizer_common/sanitizer_flag_parser.h"
+
+namespace __xray {
+
+struct Flags {
+#define XRAY_FLAG(Type, Name, DefaultValue, Description) Type Name;
+#include "xray_flags.inc"
+#undef XRAY_FLAG
+
+ void SetDefaults();
+};
+
+extern Flags xray_flags_dont_use_directly;
+inline Flags *flags() { return &xray_flags_dont_use_directly; }
+
+void InitializeFlags();
+
+} // namespace __xray
+
+#endif // XRAY_FLAGS_H
diff --git a/lib/xray/xray_flags.inc b/lib/xray/xray_flags.inc
new file mode 100644
index 0000000000000..0f6ced8ead0c2
--- /dev/null
+++ b/lib/xray/xray_flags.inc
@@ -0,0 +1,22 @@
+//===-- xray_flags.inc ------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// XRay runtime flags.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_FLAG
+#error "Define XRAY_FLAG prior to including this file!"
+#endif
+
+XRAY_FLAG(bool, patch_premain, true,
+ "Whether to patch instrumentation points before main.")
+XRAY_FLAG(bool, xray_naive_log, true,
+ "Whether to install the naive log implementation.")
+XRAY_FLAG(const char *, xray_logfile_base, "xray-log.",
+ "Filename base for the xray logfile.")
diff --git a/lib/xray/xray_init.cc b/lib/xray/xray_init.cc
new file mode 100644
index 0000000000000..eb86182910cf7
--- /dev/null
+++ b/lib/xray/xray_init.cc
@@ -0,0 +1,70 @@
+//===-- xray_init.cc --------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// XRay initialisation logic.
+//===----------------------------------------------------------------------===//
+
+#include <atomic>
+#include <fcntl.h>
+#include <strings.h>
+#include <unistd.h>
+
+#include "sanitizer_common/sanitizer_common.h"
+#include "xray_defs.h"
+#include "xray_flags.h"
+#include "xray_interface_internal.h"
+
+extern "C" {
+void __xray_init();
+extern const XRaySledEntry __start_xray_instr_map[] __attribute__((weak));
+extern const XRaySledEntry __stop_xray_instr_map[] __attribute__((weak));
+}
+
+using namespace __sanitizer;
+using namespace __xray;
+
+// When set to 'true' this means the XRay runtime has been initialised. We use
+// the weak symbols defined above (__start_xray_inst_map and
+// __stop_xray_instr_map) to initialise the instrumentation map that XRay uses
+// for runtime patching/unpatching of instrumentation points.
+//
+// FIXME: Support DSO instrumentation maps too. The current solution only works
+// for statically linked executables.
+std::atomic<bool> XRayInitialized{false};
+
+// This should always be updated before XRayInitialized is updated.
+std::atomic<__xray::XRaySledMap> XRayInstrMap{};
+
+// __xray_init() will do the actual loading of the current process' memory map
+// and then proceed to look for the .xray_instr_map section/segment.
+void __xray_init() XRAY_NEVER_INSTRUMENT {
+ InitializeFlags();
+ if (__start_xray_instr_map == nullptr) {
+ Report("XRay instrumentation map missing. Not initializing XRay.\n");
+ return;
+ }
+
+ // Now initialize the XRayInstrMap global struct with the address of the
+ // entries, reinterpreted as an array of XRaySledEntry objects. We use the
+ // virtual pointer we have from the section to provide us the correct
+ // information.
+ __xray::XRaySledMap SledMap{};
+ SledMap.Sleds = __start_xray_instr_map;
+ SledMap.Entries = __stop_xray_instr_map - __start_xray_instr_map;
+ XRayInstrMap.store(SledMap, std::memory_order_release);
+ XRayInitialized.store(true, std::memory_order_release);
+
+ if (flags()->patch_premain)
+ __xray_patch();
+}
+
+__attribute__((section(".preinit_array"),
+ used)) void (*__local_xray_preinit)(void) = __xray_init;
diff --git a/lib/xray/xray_inmemory_log.cc b/lib/xray/xray_inmemory_log.cc
new file mode 100644
index 0000000000000..7ec56f4867074
--- /dev/null
+++ b/lib/xray/xray_inmemory_log.cc
@@ -0,0 +1,185 @@
+//===-- xray_inmemory_log.cc ------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Implementation of a simple in-memory log of XRay events. This defines a
+// logging function that's compatible with the XRay handler interface, and
+// routines for exporting data to files.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cassert>
+#include <cstdint>
+#include <cstdio>
+#include <fcntl.h>
+#include <mutex>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <thread>
+#include <unistd.h>
+
+#if defined(__x86_64__)
+#include "xray_x86_64.h"
+#elif defined(__arm__) || defined(__aarch64__)
+#include "xray_emulate_tsc.h"
+#else
+#error "Unsupported CPU Architecture"
+#endif /* Architecture-specific inline intrinsics */
+
+#include "sanitizer_common/sanitizer_libc.h"
+#include "xray/xray_records.h"
+#include "xray_defs.h"
+#include "xray_flags.h"
+#include "xray_interface_internal.h"
+
+// __xray_InMemoryRawLog will use a thread-local aligned buffer capped to a
+// certain size (32kb by default) and use it as if it were a circular buffer for
+// events. We store simple fixed-sized entries in the log for external analysis.
+
+extern "C" {
+void __xray_InMemoryRawLog(int32_t FuncId,
+ XRayEntryType Type) XRAY_NEVER_INSTRUMENT;
+}
+
+namespace __xray {
+
+std::mutex LogMutex;
+
+static void retryingWriteAll(int Fd, char *Begin,
+ char *End) XRAY_NEVER_INSTRUMENT {
+ if (Begin == End)
+ return;
+ auto TotalBytes = std::distance(Begin, End);
+ while (auto Written = write(Fd, Begin, TotalBytes)) {
+ if (Written < 0) {
+ if (errno == EINTR)
+ continue; // Try again.
+ Report("Failed to write; errno = %d\n", errno);
+ return;
+ }
+ TotalBytes -= Written;
+ if (TotalBytes == 0)
+ break;
+ Begin += Written;
+ }
+}
+
+class ThreadExitFlusher {
+ int Fd;
+ XRayRecord *Start;
+ size_t &Offset;
+
+public:
+ explicit ThreadExitFlusher(int Fd, XRayRecord *Start,
+ size_t &Offset) XRAY_NEVER_INSTRUMENT
+ : Fd(Fd),
+ Start(Start),
+ Offset(Offset) {}
+
+ ~ThreadExitFlusher() XRAY_NEVER_INSTRUMENT {
+ std::lock_guard<std::mutex> L(LogMutex);
+ if (Fd > 0 && Start != nullptr) {
+ retryingWriteAll(Fd, reinterpret_cast<char *>(Start),
+ reinterpret_cast<char *>(Start + Offset));
+ // Because this thread's exit could be the last one trying to write to the
+ // file and that we're not able to close out the file properly, we sync
+ // instead and hope that the pending writes are flushed as the thread
+ // exits.
+ fsync(Fd);
+ }
+ }
+};
+
+} // namespace __xray
+
+using namespace __xray;
+
+void PrintToStdErr(const char *Buffer) XRAY_NEVER_INSTRUMENT {
+ fprintf(stderr, "%s", Buffer);
+}
+
+static int __xray_OpenLogFile() XRAY_NEVER_INSTRUMENT {
+ // FIXME: Figure out how to make this less stderr-dependent.
+ SetPrintfAndReportCallback(PrintToStdErr);
+ // Open a temporary file once for the log.
+ static char TmpFilename[256] = {};
+ static char TmpWildcardPattern[] = "XXXXXX";
+ auto E = internal_strncat(TmpFilename, flags()->xray_logfile_base,
+ sizeof(TmpFilename) - 10);
+ if (static_cast<size_t>((E + 6) - TmpFilename) > (sizeof(TmpFilename) - 1)) {
+ Report("XRay log file base too long: %s\n", flags()->xray_logfile_base);
+ return -1;
+ }
+ internal_strncat(TmpFilename, TmpWildcardPattern,
+ sizeof(TmpWildcardPattern) - 1);
+ int Fd = mkstemp(TmpFilename);
+ if (Fd == -1) {
+ Report("XRay: Failed opening temporary file '%s'; not logging events.\n",
+ TmpFilename);
+ return -1;
+ }
+ if (Verbosity())
+ fprintf(stderr, "XRay: Log file in '%s'\n", TmpFilename);
+
+ // Since we're here, we get to write the header. We set it up so that the
+ // header will only be written once, at the start, and let the threads
+ // logging do writes which just append.
+ XRayFileHeader Header;
+ Header.Version = 1;
+ Header.Type = FileTypes::NAIVE_LOG;
+ Header.CycleFrequency = __xray::cycleFrequency();
+
+ // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc'
+ // before setting the values in the header.
+ Header.ConstantTSC = 1;
+ Header.NonstopTSC = 1;
+ retryingWriteAll(Fd, reinterpret_cast<char *>(&Header),
+ reinterpret_cast<char *>(&Header) + sizeof(Header));
+ return Fd;
+}
+
+void __xray_InMemoryRawLog(int32_t FuncId,
+ XRayEntryType Type) XRAY_NEVER_INSTRUMENT {
+ using Buffer =
+ std::aligned_storage<sizeof(XRayRecord), alignof(XRayRecord)>::type;
+ static constexpr size_t BuffLen = 1024;
+ thread_local static Buffer InMemoryBuffer[BuffLen] = {};
+ thread_local static size_t Offset = 0;
+ static int Fd = __xray_OpenLogFile();
+ if (Fd == -1)
+ return;
+ thread_local __xray::ThreadExitFlusher Flusher(
+ Fd, reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer), Offset);
+ thread_local pid_t TId = syscall(SYS_gettid);
+
+ // First we get the useful data, and stuff it into the already aligned buffer
+ // through a pointer offset.
+ auto &R = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer)[Offset];
+ R.RecordType = RecordTypes::NORMAL;
+ R.TSC = __xray::readTSC(R.CPU);
+ R.TId = TId;
+ R.Type = Type;
+ R.FuncId = FuncId;
+ ++Offset;
+ if (Offset == BuffLen) {
+ std::lock_guard<std::mutex> L(LogMutex);
+ auto RecordBuffer = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer);
+ retryingWriteAll(Fd, reinterpret_cast<char *>(RecordBuffer),
+ reinterpret_cast<char *>(RecordBuffer + Offset));
+ Offset = 0;
+ }
+}
+
+static auto Unused = [] {
+ if (flags()->xray_naive_log)
+ __xray_set_handler(__xray_InMemoryRawLog);
+ return true;
+}();
diff --git a/lib/xray/xray_interface.cc b/lib/xray/xray_interface.cc
new file mode 100644
index 0000000000000..20a2b66c4401c
--- /dev/null
+++ b/lib/xray/xray_interface.cc
@@ -0,0 +1,207 @@
+//===-- xray_interface.cpp --------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Implementation of the API functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "xray_interface_internal.h"
+
+#include <atomic>
+#include <cstdint>
+#include <cstdio>
+#include <errno.h>
+#include <limits>
+#include <sys/mman.h>
+
+#include "sanitizer_common/sanitizer_common.h"
+#include "xray_defs.h"
+
+namespace __xray {
+
+#if defined(__x86_64__)
+// FIXME: The actual length is 11 bytes. Why was length 12 passed to mprotect()
+// ?
+static const int16_t cSledLength = 12;
+#elif defined(__aarch64__)
+static const int16_t cSledLength = 32;
+#elif defined(__arm__)
+static const int16_t cSledLength = 28;
+#else
+#error "Unsupported CPU Architecture"
+#endif /* CPU architecture */
+
+// This is the function to call when we encounter the entry or exit sleds.
+std::atomic<void (*)(int32_t, XRayEntryType)> XRayPatchedFunction{nullptr};
+
+// MProtectHelper is an RAII wrapper for calls to mprotect(...) that will undo
+// any successful mprotect(...) changes. This is used to make a page writeable
+// and executable, and upon destruction if it was successful in doing so returns
+// the page into a read-only and executable page.
+//
+// This is only used specifically for runtime-patching of the XRay
+// instrumentation points. This assumes that the executable pages are originally
+// read-and-execute only.
+class MProtectHelper {
+ void *PageAlignedAddr;
+ std::size_t MProtectLen;
+ bool MustCleanup;
+
+public:
+ explicit MProtectHelper(void *PageAlignedAddr,
+ std::size_t MProtectLen) XRAY_NEVER_INSTRUMENT
+ : PageAlignedAddr(PageAlignedAddr),
+ MProtectLen(MProtectLen),
+ MustCleanup(false) {}
+
+ int MakeWriteable() XRAY_NEVER_INSTRUMENT {
+ auto R = mprotect(PageAlignedAddr, MProtectLen,
+ PROT_READ | PROT_WRITE | PROT_EXEC);
+ if (R != -1)
+ MustCleanup = true;
+ return R;
+ }
+
+ ~MProtectHelper() XRAY_NEVER_INSTRUMENT {
+ if (MustCleanup) {
+ mprotect(PageAlignedAddr, MProtectLen, PROT_READ | PROT_EXEC);
+ }
+ }
+};
+
+} // namespace __xray
+
+extern std::atomic<bool> XRayInitialized;
+extern std::atomic<__xray::XRaySledMap> XRayInstrMap;
+
+int __xray_set_handler(void (*entry)(int32_t,
+ XRayEntryType)) XRAY_NEVER_INSTRUMENT {
+ if (XRayInitialized.load(std::memory_order_acquire)) {
+ __xray::XRayPatchedFunction.store(entry, std::memory_order_release);
+ return 1;
+ }
+ return 0;
+}
+
+int __xray_remove_handler() XRAY_NEVER_INSTRUMENT {
+ return __xray_set_handler(nullptr);
+}
+
+std::atomic<bool> XRayPatching{false};
+
+using namespace __xray;
+
+// FIXME: Figure out whether we can move this class to sanitizer_common instead
+// as a generic "scope guard".
+template <class Function> class CleanupInvoker {
+ Function Fn;
+
+public:
+ explicit CleanupInvoker(Function Fn) XRAY_NEVER_INSTRUMENT : Fn(Fn) {}
+ CleanupInvoker(const CleanupInvoker &) XRAY_NEVER_INSTRUMENT = default;
+ CleanupInvoker(CleanupInvoker &&) XRAY_NEVER_INSTRUMENT = default;
+ CleanupInvoker &
+ operator=(const CleanupInvoker &) XRAY_NEVER_INSTRUMENT = delete;
+ CleanupInvoker &operator=(CleanupInvoker &&) XRAY_NEVER_INSTRUMENT = delete;
+ ~CleanupInvoker() XRAY_NEVER_INSTRUMENT { Fn(); }
+};
+
+template <class Function>
+CleanupInvoker<Function> ScopeCleanup(Function Fn) XRAY_NEVER_INSTRUMENT {
+ return CleanupInvoker<Function>{Fn};
+}
+
+// ControlPatching implements the common internals of the patching/unpatching
+// implementation. |Enable| defines whether we're enabling or disabling the
+// runtime XRay instrumentation.
+XRayPatchingStatus ControlPatching(bool Enable) XRAY_NEVER_INSTRUMENT {
+ if (!XRayInitialized.load(std::memory_order_acquire))
+ return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized.
+
+ static bool NotPatching = false;
+ if (!XRayPatching.compare_exchange_strong(NotPatching, true,
+ std::memory_order_acq_rel,
+ std::memory_order_acquire)) {
+ return XRayPatchingStatus::ONGOING; // Already patching.
+ }
+
+ bool PatchingSuccess = false;
+ auto XRayPatchingStatusResetter = ScopeCleanup([&PatchingSuccess] {
+ if (!PatchingSuccess) {
+ XRayPatching.store(false, std::memory_order_release);
+ }
+ });
+
+ // Step 1: Compute the function id, as a unique identifier per function in the
+ // instrumentation map.
+ XRaySledMap InstrMap = XRayInstrMap.load(std::memory_order_acquire);
+ if (InstrMap.Entries == 0)
+ return XRayPatchingStatus::NOT_INITIALIZED;
+
+ const uint64_t PageSize = GetPageSizeCached();
+ if ((PageSize == 0) || ((PageSize & (PageSize - 1)) != 0)) {
+ Report("System page size is not a power of two: %lld\n", PageSize);
+ return XRayPatchingStatus::FAILED;
+ }
+
+ uint32_t FuncId = 1;
+ uint64_t CurFun = 0;
+ for (std::size_t I = 0; I < InstrMap.Entries; I++) {
+ auto Sled = InstrMap.Sleds[I];
+ auto F = Sled.Function;
+ if (CurFun == 0)
+ CurFun = F;
+ if (F != CurFun) {
+ ++FuncId;
+ CurFun = F;
+ }
+
+ // While we're here, we should patch the nop sled. To do that we mprotect
+ // the page containing the function to be writeable.
+ void *PageAlignedAddr =
+ reinterpret_cast<void *>(Sled.Address & ~(PageSize - 1));
+ std::size_t MProtectLen = (Sled.Address + cSledLength) -
+ reinterpret_cast<uint64_t>(PageAlignedAddr);
+ MProtectHelper Protector(PageAlignedAddr, MProtectLen);
+ if (Protector.MakeWriteable() == -1) {
+ printf("Failed mprotect: %d\n", errno);
+ return XRayPatchingStatus::FAILED;
+ }
+
+ bool Success = false;
+ switch (Sled.Kind) {
+ case XRayEntryType::ENTRY:
+ Success = patchFunctionEntry(Enable, FuncId, Sled);
+ break;
+ case XRayEntryType::EXIT:
+ Success = patchFunctionExit(Enable, FuncId, Sled);
+ break;
+ case XRayEntryType::TAIL:
+ Success = patchFunctionTailExit(Enable, FuncId, Sled);
+ break;
+ default:
+ Report("Unsupported sled kind: %d\n", int(Sled.Kind));
+ continue;
+ }
+ (void)Success;
+ }
+ XRayPatching.store(false, std::memory_order_release);
+ PatchingSuccess = true;
+ return XRayPatchingStatus::SUCCESS;
+}
+
+XRayPatchingStatus __xray_patch() XRAY_NEVER_INSTRUMENT {
+ return ControlPatching(true);
+}
+
+XRayPatchingStatus __xray_unpatch() XRAY_NEVER_INSTRUMENT {
+ return ControlPatching(false);
+}
diff --git a/lib/xray/xray_interface_internal.h b/lib/xray/xray_interface_internal.h
new file mode 100644
index 0000000000000..a8434a699f865
--- /dev/null
+++ b/lib/xray/xray_interface_internal.h
@@ -0,0 +1,69 @@
+//===-- xray_interface_internal.h -------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Implementation of the API functions. See also include/xray/xray_interface.h.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_INTERFACE_INTERNAL_H
+#define XRAY_INTERFACE_INTERNAL_H
+
+#include "sanitizer_common/sanitizer_platform.h"
+#include "xray/xray_interface.h"
+#include <cstddef>
+#include <cstdint>
+
+extern "C" {
+
+struct XRaySledEntry {
+#if SANITIZER_WORDSIZE == 64
+ uint64_t Address;
+ uint64_t Function;
+ unsigned char Kind;
+ unsigned char AlwaysInstrument;
+ unsigned char Padding[14]; // Need 32 bytes
+#elif SANITIZER_WORDSIZE == 32
+ uint32_t Address;
+ uint32_t Function;
+ unsigned char Kind;
+ unsigned char AlwaysInstrument;
+ unsigned char Padding[6]; // Need 16 bytes
+#else
+#error "Unsupported word size."
+#endif
+};
+}
+
+namespace __xray {
+
+struct XRaySledMap {
+ const XRaySledEntry *Sleds;
+ size_t Entries;
+};
+
+uint64_t cycleFrequency();
+
+bool patchFunctionEntry(bool Enable, uint32_t FuncId,
+ const XRaySledEntry &Sled);
+bool patchFunctionExit(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled);
+bool patchFunctionTailExit(bool Enable, uint32_t FuncId,
+ const XRaySledEntry &Sled);
+
+} // namespace __xray
+
+extern "C" {
+// The following functions have to be defined in assembler, on a per-platform
+// basis. See xray_trampoline_*.S files for implementations.
+extern void __xray_FunctionEntry();
+extern void __xray_FunctionExit();
+extern void __xray_FunctionTailExit();
+}
+
+#endif
diff --git a/lib/xray/xray_trampoline_AArch64.S b/lib/xray/xray_trampoline_AArch64.S
new file mode 100644
index 0000000000000..f1a471c041f9a
--- /dev/null
+++ b/lib/xray/xray_trampoline_AArch64.S
@@ -0,0 +1,89 @@
+ .text
+ /* The variable containing the handler function pointer */
+ .global _ZN6__xray19XRayPatchedFunctionE
+ /* Word-aligned function entry point */
+ .p2align 2
+ /* Let C/C++ see the symbol */
+ .global __xray_FunctionEntry
+ .type __xray_FunctionEntry, %function
+ /* In C++ it is void extern "C" __xray_FunctionEntry(uint32_t FuncId) with
+ FuncId passed in W0 register. */
+__xray_FunctionEntry:
+ /* Move the return address beyond the end of sled data. The 12 bytes of
+ data are inserted in the code of the runtime patch, between the call
+ instruction and the instruction returned into. The data contains 32
+ bits of instrumented function ID and 64 bits of the address of
+ the current trampoline. */
+ ADD X30, X30, #12
+ /* Push the registers which may be modified by the handler function */
+ STP X1, X2, [SP, #-16]!
+ STP X3, X4, [SP, #-16]!
+ STP X5, X6, [SP, #-16]!
+ STP X7, X30, [SP, #-16]!
+ STP Q0, Q1, [SP, #-32]!
+ STP Q2, Q3, [SP, #-32]!
+ STP Q4, Q5, [SP, #-32]!
+ STP Q6, Q7, [SP, #-32]!
+ /* Load the address of _ZN6__xray19XRayPatchedFunctionE into X1 */
+ LDR X1, =_ZN6__xray19XRayPatchedFunctionE
+ /* Load the handler function pointer into X2 */
+ LDR X2, [X1]
+ /* Handler address is nullptr if handler is not set */
+ CMP X2, #0
+ BEQ FunctionEntry_restore
+ /* Function ID is already in W0 (the first parameter).
+ X1=0 means that we are tracing an entry event */
+ MOV X1, #0
+ /* Call the handler with 2 parameters in W0 and X1 */
+ BLR X2
+FunctionEntry_restore:
+ /* Pop the saved registers */
+ LDP Q6, Q7, [SP], #32
+ LDP Q4, Q5, [SP], #32
+ LDP Q2, Q3, [SP], #32
+ LDP Q0, Q1, [SP], #32
+ LDP X7, X30, [SP], #16
+ LDP X5, X6, [SP], #16
+ LDP X3, X4, [SP], #16
+ LDP X1, X2, [SP], #16
+ RET
+
+ /* Word-aligned function entry point */
+ .p2align 2
+ /* Let C/C++ see the symbol */
+ .global __xray_FunctionExit
+ .type __xray_FunctionExit, %function
+ /* In C++ it is void extern "C" __xray_FunctionExit(uint32_t FuncId) with
+ FuncId passed in W0 register. */
+__xray_FunctionExit:
+ /* Move the return address beyond the end of sled data. The 12 bytes of
+ data are inserted in the code of the runtime patch, between the call
+ instruction and the instruction returned into. The data contains 32
+ bits of instrumented function ID and 64 bits of the address of
+ the current trampoline. */
+ ADD X30, X30, #12
+ /* Push the registers which may be modified by the handler function */
+ STP X1, X2, [SP, #-16]!
+ STP X3, X4, [SP, #-16]!
+ STP X5, X6, [SP, #-16]!
+ STP X7, X30, [SP, #-16]!
+ STR Q0, [SP, #-16]!
+ /* Load the address of _ZN6__xray19XRayPatchedFunctionE into X1 */
+ LDR X1, =_ZN6__xray19XRayPatchedFunctionE
+ /* Load the handler function pointer into X2 */
+ LDR X2, [X1]
+ /* Handler address is nullptr if handler is not set */
+ CMP X2, #0
+ BEQ FunctionExit_restore
+ /* Function ID is already in W0 (the first parameter).
+ X1=1 means that we are tracing an exit event */
+ MOV X1, #1
+ /* Call the handler with 2 parameters in W0 and X1 */
+ BLR X2
+FunctionExit_restore:
+ LDR Q0, [SP], #16
+ LDP X7, X30, [SP], #16
+ LDP X5, X6, [SP], #16
+ LDP X3, X4, [SP], #16
+ LDP X1, X2, [SP], #16
+ RET
diff --git a/lib/xray/xray_trampoline_arm.S b/lib/xray/xray_trampoline_arm.S
new file mode 100644
index 0000000000000..5d87c971364dd
--- /dev/null
+++ b/lib/xray/xray_trampoline_arm.S
@@ -0,0 +1,65 @@
+ .syntax unified
+ .arch armv6t2
+ .fpu vfpv2
+ .code 32
+ .global _ZN6__xray19XRayPatchedFunctionE
+ @ Word-aligned function entry point
+ .p2align 2
+ @ Let C/C++ see the symbol
+ .global __xray_FunctionEntry
+ @ It preserves all registers except r0, r12(ip), r14(lr) and r15(pc)
+ @ Assume that "q" part of the floating-point registers is not used
+ @ for passing parameters to C/C++ functions.
+ .type __xray_FunctionEntry, %function
+ @ In C++ it is void extern "C" __xray_FunctionEntry(uint32_t FuncId) with
+ @ FuncId passed in r0 register.
+__xray_FunctionEntry:
+ PUSH {r1-r3,lr}
+ @ Save floating-point parameters of the instrumented function
+ VPUSH {d0-d7}
+ MOVW r1,#:lower16:_ZN6__xray19XRayPatchedFunctionE
+ MOVT r1,#:upper16:_ZN6__xray19XRayPatchedFunctionE
+ LDR r2, [r1]
+ @ Handler address is nullptr if handler is not set
+ CMP r2, #0
+ BEQ FunctionEntry_restore
+ @ Function ID is already in r0 (the first parameter).
+ @ r1=0 means that we are tracing an entry event
+ MOV r1, #0
+ @ Call the handler with 2 parameters in r0 and r1
+ BLX r2
+FunctionEntry_restore:
+ @ Restore floating-point parameters of the instrumented function
+ VPOP {d0-d7}
+ POP {r1-r3,pc}
+
+ @ Word-aligned function entry point
+ .p2align 2
+ @ Let C/C++ see the symbol
+ .global __xray_FunctionExit
+ @ Assume that d1-d7 are not used for the return value.
+ @ Assume that "q" part of the floating-point registers is not used for the
+ @ return value in C/C++.
+ .type __xray_FunctionExit, %function
+ @ In C++ it is extern "C" void __xray_FunctionExit(uint32_t FuncId) with
+ @ FuncId passed in r0 register.
+__xray_FunctionExit:
+ PUSH {r1-r3,lr}
+ @ Save the floating-point return value of the instrumented function
+ VPUSH {d0}
+ @ Load the handler address
+ MOVW r1,#:lower16:_ZN6__xray19XRayPatchedFunctionE
+ MOVT r1,#:upper16:_ZN6__xray19XRayPatchedFunctionE
+ LDR r2, [r1]
+ @ Handler address is nullptr if handler is not set
+ CMP r2, #0
+ BEQ FunctionExit_restore
+ @ Function ID is already in r0 (the first parameter).
+ @ 1 means that we are tracing an exit event
+ MOV r1, #1
+ @ Call the handler with 2 parameters in r0 and r1
+ BLX r2
+FunctionExit_restore:
+ @ Restore the floating-point return value of the instrumented function
+ VPOP {d0}
+ POP {r1-r3,pc}
diff --git a/lib/xray/xray_trampoline_x86_64.S b/lib/xray/xray_trampoline_x86_64.S
new file mode 100644
index 0000000000000..d90c30cd98e96
--- /dev/null
+++ b/lib/xray/xray_trampoline_x86_64.S
@@ -0,0 +1,147 @@
+//===-- xray_trampoline_x86.s -----------------------------------*- ASM -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// This implements the X86-specific assembler for the trampolines.
+//
+//===----------------------------------------------------------------------===//
+
+.macro SAVE_REGISTERS
+ subq $200, %rsp
+ movupd %xmm0, 184(%rsp)
+ movupd %xmm1, 168(%rsp)
+ movupd %xmm2, 152(%rsp)
+ movupd %xmm3, 136(%rsp)
+ movupd %xmm4, 120(%rsp)
+ movupd %xmm5, 104(%rsp)
+ movupd %xmm6, 88(%rsp)
+ movupd %xmm7, 72(%rsp)
+ movq %rdi, 64(%rsp)
+ movq %rax, 56(%rsp)
+ movq %rdx, 48(%rsp)
+ movq %rsi, 40(%rsp)
+ movq %rcx, 32(%rsp)
+ movq %r8, 24(%rsp)
+ movq %r9, 16(%rsp)
+.endm
+
+.macro RESTORE_REGISTERS
+ movupd 184(%rsp), %xmm0
+ movupd 168(%rsp), %xmm1
+ movupd 152(%rsp), %xmm2
+ movupd 136(%rsp), %xmm3
+ movupd 120(%rsp), %xmm4
+ movupd 104(%rsp), %xmm5
+ movupd 88(%rsp) , %xmm6
+ movupd 72(%rsp) , %xmm7
+ movq 64(%rsp), %rdi
+ movq 56(%rsp), %rax
+ movq 48(%rsp), %rdx
+ movq 40(%rsp), %rsi
+ movq 32(%rsp), %rcx
+ movq 24(%rsp), %r8
+ movq 16(%rsp), %r9
+ addq $200, %rsp
+.endm
+
+ .text
+ .file "xray_trampoline_x86.S"
+ .globl __xray_FunctionEntry
+ .align 16, 0x90
+ .type __xray_FunctionEntry,@function
+
+__xray_FunctionEntry:
+ .cfi_startproc
+ pushq %rbp
+ .cfi_def_cfa_offset 16
+ SAVE_REGISTERS
+
+ // This load has to be atomic, it's concurrent with __xray_patch().
+ // On x86/amd64, a simple (type-aligned) MOV instruction is enough.
+ movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax
+ testq %rax, %rax
+ je .Ltmp0
+
+ // The patched function prolog puts its xray_instr_map index into %r10d.
+ movl %r10d, %edi
+ xor %esi,%esi
+ callq *%rax
+.Ltmp0:
+ RESTORE_REGISTERS
+ popq %rbp
+ retq
+.Ltmp1:
+ .size __xray_FunctionEntry, .Ltmp1-__xray_FunctionEntry
+ .cfi_endproc
+
+ .globl __xray_FunctionExit
+ .align 16, 0x90
+ .type __xray_FunctionExit,@function
+__xray_FunctionExit:
+ .cfi_startproc
+ // Save the important registers first. Since we're assuming that this
+ // function is only jumped into, we only preserve the registers for
+ // returning.
+ pushq %rbp
+ .cfi_def_cfa_offset 16
+ subq $56, %rsp
+ .cfi_def_cfa_offset 32
+ movupd %xmm0, 40(%rsp)
+ movupd %xmm1, 24(%rsp)
+ movq %rax, 16(%rsp)
+ movq %rdx, 8(%rsp)
+ movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax
+ testq %rax,%rax
+ je .Ltmp2
+
+ movl %r10d, %edi
+ movl $1, %esi
+ callq *%rax
+.Ltmp2:
+ // Restore the important registers.
+ movupd 40(%rsp), %xmm0
+ movupd 24(%rsp), %xmm1
+ movq 16(%rsp), %rax
+ movq 8(%rsp), %rdx
+ addq $56, %rsp
+ popq %rbp
+ retq
+.Ltmp3:
+ .size __xray_FunctionExit, .Ltmp3-__xray_FunctionExit
+ .cfi_endproc
+
+ .global __xray_FunctionTailExit
+ .align 16, 0x90
+ .type __xray_FunctionTailExit,@function
+__xray_FunctionTailExit:
+ .cfi_startproc
+ // Save the important registers as in the entry trampoline, but indicate that
+ // this is an exit. In the future, we will introduce a new entry type that
+ // differentiates between a normal exit and a tail exit, but we'd have to do
+ // this and increment the version number for the header.
+ pushq %rbp
+ .cfi_def_cfa_offset 16
+ SAVE_REGISTERS
+
+ movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax
+ testq %rax,%rax
+ je .Ltmp4
+
+ movl %r10d, %edi
+ movl $1, %esi
+ callq *%rax
+
+.Ltmp4:
+ RESTORE_REGISTERS
+ popq %rbp
+ retq
+.Ltmp5:
+ .size __xray_FunctionTailExit, .Ltmp5-__xray_FunctionTailExit
+ .cfi_endproc
diff --git a/lib/xray/xray_x86_64.cc b/lib/xray/xray_x86_64.cc
new file mode 100644
index 0000000000000..3ee91896c6e0a
--- /dev/null
+++ b/lib/xray/xray_x86_64.cc
@@ -0,0 +1,202 @@
+#include "sanitizer_common/sanitizer_common.h"
+#include "xray_defs.h"
+#include "xray_interface_internal.h"
+#include <atomic>
+#include <cstdint>
+#include <errno.h>
+#include <fcntl.h>
+#include <iterator>
+#include <limits>
+#include <tuple>
+#include <unistd.h>
+
+namespace __xray {
+
+static std::pair<ssize_t, bool>
+retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT {
+ auto BytesToRead = std::distance(Begin, End);
+ ssize_t BytesRead;
+ ssize_t TotalBytesRead = 0;
+ while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) {
+ if (BytesRead == -1) {
+ if (errno == EINTR)
+ continue;
+ Report("Read error; errno = %d\n", errno);
+ return std::make_pair(TotalBytesRead, false);
+ }
+
+ TotalBytesRead += BytesRead;
+ BytesToRead -= BytesRead;
+ Begin += BytesRead;
+ }
+ return std::make_pair(TotalBytesRead, true);
+}
+
+static bool readValueFromFile(const char *Filename,
+ long long *Value) XRAY_NEVER_INSTRUMENT {
+ int Fd = open(Filename, O_RDONLY | O_CLOEXEC);
+ if (Fd == -1)
+ return false;
+ static constexpr size_t BufSize = 256;
+ char Line[BufSize] = {};
+ ssize_t BytesRead;
+ bool Success;
+ std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize);
+ if (!Success)
+ return false;
+ close(Fd);
+ char *End = nullptr;
+ long long Tmp = internal_simple_strtoll(Line, &End, 10);
+ bool Result = false;
+ if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) {
+ *Value = Tmp;
+ Result = true;
+ }
+ return Result;
+}
+
+uint64_t cycleFrequency() XRAY_NEVER_INSTRUMENT {
+ long long CPUFrequency = -1;
+ if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz",
+ &CPUFrequency)) {
+ CPUFrequency *= 1000;
+ } else if (readValueFromFile(
+ "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
+ &CPUFrequency)) {
+ CPUFrequency *= 1000;
+ } else {
+ Report("Unable to determine CPU frequency for TSC accounting.\n");
+ }
+ return CPUFrequency == -1 ? 0 : static_cast<uint64_t>(CPUFrequency);
+}
+
+static constexpr uint8_t CallOpCode = 0xe8;
+static constexpr uint16_t MovR10Seq = 0xba41;
+static constexpr uint16_t Jmp9Seq = 0x09eb;
+static constexpr uint8_t JmpOpCode = 0xe9;
+static constexpr uint8_t RetOpCode = 0xc3;
+
+static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()};
+static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()};
+
+bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // Here we do the dance of replacing the following sled:
+ //
+ // xray_sled_n:
+ // jmp +9
+ // <9 byte nop>
+ //
+ // With the following:
+ //
+ // mov r10d, <function id>
+ // call <relative 32bit offset to entry trampoline>
+ //
+ // We need to do this in the following order:
+ //
+ // 1. Put the function id first, 2 bytes from the start of the sled (just
+ // after the 2-byte jmp instruction).
+ // 2. Put the call opcode 6 bytes from the start of the sled.
+ // 3. Put the relative offset 7 bytes from the start of the sled.
+ // 4. Do an atomic write over the jmp instruction for the "mov r10d"
+ // opcode and first operand.
+ //
+ // Prerequisite is to compute the relative offset to the
+ // __xray_FunctionEntry function's address.
+ int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionEntry) -
+ (static_cast<int64_t>(Sled.Address) + 11);
+ if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
+ Report("XRay Entry trampoline (%p) too far from sled (%p)\n",
+ __xray_FunctionEntry, reinterpret_cast<void *>(Sled.Address));
+ return false;
+ }
+ if (Enable) {
+ *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
+ *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode;
+ *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
+ std::memory_order_release);
+ } else {
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq,
+ std::memory_order_release);
+ // FIXME: Write out the nops still?
+ }
+ return true;
+}
+
+bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // Here we do the dance of replacing the following sled:
+ //
+ // xray_sled_n:
+ // ret
+ // <10 byte nop>
+ //
+ // With the following:
+ //
+ // mov r10d, <function id>
+ // jmp <relative 32bit offset to exit trampoline>
+ //
+ // 1. Put the function id first, 2 bytes from the start of the sled (just
+ // after the 1-byte ret instruction).
+ // 2. Put the jmp opcode 6 bytes from the start of the sled.
+ // 3. Put the relative offset 7 bytes from the start of the sled.
+ // 4. Do an atomic write over the jmp instruction for the "mov r10d"
+ // opcode and first operand.
+ //
+ // Prerequisite is to compute the relative offset fo the
+ // __xray_FunctionExit function's address.
+ int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionExit) -
+ (static_cast<int64_t>(Sled.Address) + 11);
+ if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
+ Report("XRay Exit trampoline (%p) too far from sled (%p)\n",
+ __xray_FunctionExit, reinterpret_cast<void *>(Sled.Address));
+ return false;
+ }
+ if (Enable) {
+ *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
+ *reinterpret_cast<uint8_t *>(Sled.Address + 6) = JmpOpCode;
+ *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
+ std::memory_order_release);
+ } else {
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint8_t> *>(Sled.Address), RetOpCode,
+ std::memory_order_release);
+ // FIXME: Write out the nops still?
+ }
+ return true;
+}
+
+bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // Here we do the dance of replacing the tail call sled with a similar
+ // sequence as the entry sled, but calls the tail exit sled instead.
+ int64_t TrampolineOffset =
+ reinterpret_cast<int64_t>(__xray_FunctionTailExit) -
+ (static_cast<int64_t>(Sled.Address) + 11);
+ if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
+ Report("XRay Exit trampoline (%p) too far from sled (%p)\n",
+ __xray_FunctionExit, reinterpret_cast<void *>(Sled.Address));
+ return false;
+ }
+ if (Enable) {
+ *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
+ *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode;
+ *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
+ std::memory_order_release);
+ } else {
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq,
+ std::memory_order_release);
+ // FIXME: Write out the nops still?
+ }
+ return true;
+}
+
+} // namespace __xray
diff --git a/lib/xray/xray_x86_64.h b/lib/xray/xray_x86_64.h
new file mode 100644
index 0000000000000..52d2dea8f0d93
--- /dev/null
+++ b/lib/xray/xray_x86_64.h
@@ -0,0 +1,32 @@
+//===-- xray_x86_64.h -------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+//===----------------------------------------------------------------------===//
+#ifndef XRAY_X86_64_H
+#define XRAY_X86_64_H
+
+#include <cstdint>
+#include <x86intrin.h>
+
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "xray_defs.h"
+
+namespace __xray {
+
+ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
+ unsigned LongCPU;
+ uint64_t TSC = __rdtscp(&LongCPU);
+ CPU = LongCPU;
+ return TSC;
+}
+}
+
+#endif // XRAY_X86_64_H