diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-01-02 19:18:27 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-01-02 19:18:27 +0000 |
commit | 316d58822dada9440bd06ecfc758dcc2364d617c (patch) | |
tree | fe72ec2e6ce9a360dda74d9d57f7acdb0e3c39d6 /lib/xray | |
parent | 0230fcf22fe7d19f03d981c9c2c59a3db0b72ea5 (diff) |
Notes
Diffstat (limited to 'lib/xray')
-rw-r--r-- | lib/xray/CMakeLists.txt | 79 | ||||
-rw-r--r-- | lib/xray/tests/CMakeLists.txt | 59 | ||||
-rw-r--r-- | lib/xray/tests/unit/CMakeLists.txt | 2 | ||||
-rw-r--r-- | lib/xray/tests/unit/buffer_queue_test.cc | 81 | ||||
-rw-r--r-- | lib/xray/tests/unit/xray_unit_test_main.cc | 18 | ||||
-rw-r--r-- | lib/xray/xray_AArch64.cc | 119 | ||||
-rw-r--r-- | lib/xray/xray_arm.cc | 156 | ||||
-rw-r--r-- | lib/xray/xray_buffer_queue.cc | 65 | ||||
-rw-r--r-- | lib/xray/xray_buffer_queue.h | 86 | ||||
-rw-r--r-- | lib/xray/xray_defs.h | 22 | ||||
-rw-r--r-- | lib/xray/xray_emulate_tsc.h | 40 | ||||
-rw-r--r-- | lib/xray/xray_flags.cc | 62 | ||||
-rw-r--r-- | lib/xray/xray_flags.h | 37 | ||||
-rw-r--r-- | lib/xray/xray_flags.inc | 22 | ||||
-rw-r--r-- | lib/xray/xray_init.cc | 70 | ||||
-rw-r--r-- | lib/xray/xray_inmemory_log.cc | 185 | ||||
-rw-r--r-- | lib/xray/xray_interface.cc | 207 | ||||
-rw-r--r-- | lib/xray/xray_interface_internal.h | 69 | ||||
-rw-r--r-- | lib/xray/xray_trampoline_AArch64.S | 89 | ||||
-rw-r--r-- | lib/xray/xray_trampoline_arm.S | 65 | ||||
-rw-r--r-- | lib/xray/xray_trampoline_x86_64.S | 147 | ||||
-rw-r--r-- | lib/xray/xray_x86_64.cc | 202 | ||||
-rw-r--r-- | lib/xray/xray_x86_64.h | 32 |
23 files changed, 1914 insertions, 0 deletions
diff --git a/lib/xray/CMakeLists.txt b/lib/xray/CMakeLists.txt new file mode 100644 index 0000000000000..9c7cf6ce361c8 --- /dev/null +++ b/lib/xray/CMakeLists.txt @@ -0,0 +1,79 @@ +# Build for the XRay runtime support library. + +# Core XRay runtime library implementation files. +set(XRAY_SOURCES + xray_init.cc + xray_interface.cc + xray_flags.cc + xray_inmemory_log.cc) + +# XRay flight data recorder (FDR) implementation files. +set(XRAY_FDR_SOURCES + xray_buffer_queue.cc) + +set(x86_64_SOURCES + xray_x86_64.cc + xray_trampoline_x86_64.S + ${XRAY_SOURCES}) + +set(arm_SOURCES + xray_arm.cc + xray_trampoline_arm.S + ${XRAY_SOURCES}) + +set(armhf_SOURCES ${arm_SOURCES}) + +set(aarch64_SOURCES + xray_AArch64.cc + xray_trampoline_AArch64.S + ${XRAY_SOURCES}) + +include_directories(..) +include_directories(../../include) + +set(XRAY_CFLAGS ${SANITIZER_COMMON_CFLAGS}) +set(XRAY_COMMON_DEFINITIONS XRAY_HAS_EXCEPTIONS=1) +append_list_if( + COMPILER_RT_HAS_XRAY_COMPILER_FLAG XRAY_SUPPORTED=1 XRAY_COMMON_DEFINITIONS) + +add_compiler_rt_object_libraries(RTXray + ARCHS ${XRAY_SUPPORTED_ARCH} + SOURCES ${XRAY_SOURCES} CFLAGS ${XRAY_CFLAGS} + DEFS ${XRAY_COMMON_DEFINITIONS}) + +add_compiler_rt_object_libraries(RTXrayFDR + ARCHS ${XRAY_SUPPORTED_ARCH} + SOURCES ${XRAY_FDR_SOURCES} CFLAGS ${XRAY_CFLAGS} + DEFS ${XRAY_COMMON_DEFINITIONS}) + +add_compiler_rt_component(xray) +add_compiler_rt_component(xray-fdr) + +set(XRAY_COMMON_RUNTIME_OBJECT_LIBS + RTSanitizerCommon + RTSanitizerCommonLibc) + +foreach(arch ${XRAY_SUPPORTED_ARCH}) + if(CAN_TARGET_${arch}) + add_compiler_rt_runtime(clang_rt.xray + STATIC + ARCHS ${arch} + SOURCES ${${arch}_SOURCES} + CFLAGS ${XRAY_CFLAGS} + DEFS ${XRAY_COMMON_DEFINITIONS} + OBJECT_LIBS ${XRAY_COMMON_RUNTIME_OBJECT_LIBS} + PARENT_TARGET xray) + add_compiler_rt_runtime(clang_rt.xray-fdr + STATIC + ARCHS ${arch} + SOURCES ${XRAY_FDR_SOURCES} + CFLAGS ${XRAY_CFLAGS} + DEFS ${XRAY_COMMON_DEFINITIONS} + OBJECT_LIBS ${XRAY_COMMON_RUNTIME_OBJECT_LIBS} + PARENT_TARGET xray-fdr) + endif() +endforeach() + +if(COMPILER_RT_INCLUDE_TESTS) + add_subdirectory(tests) +endif() diff --git a/lib/xray/tests/CMakeLists.txt b/lib/xray/tests/CMakeLists.txt new file mode 100644 index 0000000000000..6cb17934c8952 --- /dev/null +++ b/lib/xray/tests/CMakeLists.txt @@ -0,0 +1,59 @@ +include_directories(..) + +add_custom_target(XRayUnitTests) +set_target_properties(XRayUnitTests PROPERTIES FOLDER "XRay unittests") + +set(XRAY_UNITTEST_CFLAGS + ${XRAY_CFLAGS} + ${COMPILER_RT_UNITTEST_CFLAGS} + ${COMPILER_RT_GTEST_CFLAGS} + -I${COMPILER_RT_SOURCE_DIR}/include + -I${COMPILER_RT_SOURCE_DIR}/lib/xray) + +macro(xray_compile obj_list source arch) + get_filename_component(basename ${source} NAME) + set(output_obj "${basename}.${arch}.o") + get_target_flags_for_arch(${arch} TARGET_CFLAGS) + if(NOT COMPILER_RT_STANDALONE_BUILD) + list(APPEND COMPILE_DEPS gtest_main xray-fdr) + endif() + clang_compile(${output_obj} ${source} + CFLAGS ${XRAY_UNITTEST_CFLAGS} ${TARGET_CFLAGS} + DEPS ${COMPILE_DEPS}) + list(APPEND ${obj_list} ${output_obj}) +endmacro() + +macro(add_xray_unittest testname) + set(XRAY_TEST_ARCH ${XRAY_SUPPORTED_ARCH}) + if (APPLE) + darwin_filter_host_archs(XRAY_SUPPORTED_ARCH) + endif() + if(UNIX) + foreach(arch ${XRAY_TEST_ARCH}) + cmake_parse_arguments(TEST "" "" "SOURCES;HEADERS" ${ARGN}) + set(TEST_OBJECTS) + foreach(SOURCE ${TEST_SOURCES} ${COMPILER_RT_GTEST_SOURCE}) + xray_compile(TEST_OBJECTS ${SOURCE} ${arch} ${TEST_HEADERS}) + endforeach() + get_target_flags_for_arch(${arch} TARGET_LINK_FLAGS) + set(TEST_DEPS ${TEST_OBJECTS}) + if(NOT COMPILER_RT_STANDALONE_BUILD) + list(APPEND TEST_DEPS gtest_main xray-fdr) + endif() + if(NOT APPLE) + add_compiler_rt_test(XRayUnitTests ${testname} + OBJECTS ${TEST_OBJECTS} + DEPS ${TEST_DEPS} + LINK_FLAGS ${TARGET_LINK_FLAGS} + -lstdc++ -lm ${CMAKE_THREAD_LIBS_INIT} + -lpthread + -L${COMPILER_RT_LIBRARY_OUTPUT_DIR} -lclang_rt.xray-fdr-${arch}) + endif() + # FIXME: Figure out how to run even just the unit tests on APPLE. + endforeach() + endif() +endmacro() + +if(COMPILER_RT_CAN_EXECUTE_TESTS) + add_subdirectory(unit) +endif() diff --git a/lib/xray/tests/unit/CMakeLists.txt b/lib/xray/tests/unit/CMakeLists.txt new file mode 100644 index 0000000000000..3e5412d41e6e3 --- /dev/null +++ b/lib/xray/tests/unit/CMakeLists.txt @@ -0,0 +1,2 @@ +add_xray_unittest(XRayBufferQueueTest SOURCES + buffer_queue_test.cc xray_unit_test_main.cc) diff --git a/lib/xray/tests/unit/buffer_queue_test.cc b/lib/xray/tests/unit/buffer_queue_test.cc new file mode 100644 index 0000000000000..d46f19402c2a1 --- /dev/null +++ b/lib/xray/tests/unit/buffer_queue_test.cc @@ -0,0 +1,81 @@ +//===-- buffer_queue_test.cc ----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a function call tracing system. +// +//===----------------------------------------------------------------------===// +#include "xray_buffer_queue.h" +#include "gtest/gtest.h" + +#include <future> +#include <system_error> +#include <unistd.h> + +namespace __xray { + +static constexpr size_t kSize = 4096; + +TEST(BufferQueueTest, API) { BufferQueue Buffers(kSize, 1); } + +TEST(BufferQueueTest, GetAndRelease) { + BufferQueue Buffers(kSize, 1); + BufferQueue::Buffer Buf; + ASSERT_EQ(Buffers.getBuffer(Buf), std::error_code()); + ASSERT_NE(nullptr, Buf.Buffer); + ASSERT_EQ(Buffers.releaseBuffer(Buf), std::error_code()); + ASSERT_EQ(nullptr, Buf.Buffer); +} + +TEST(BufferQueueTest, GetUntilFailed) { + BufferQueue Buffers(kSize, 1); + BufferQueue::Buffer Buf0; + EXPECT_EQ(Buffers.getBuffer(Buf0), std::error_code()); + BufferQueue::Buffer Buf1; + EXPECT_EQ(std::errc::not_enough_memory, Buffers.getBuffer(Buf1)); + EXPECT_EQ(Buffers.releaseBuffer(Buf0), std::error_code()); +} + +TEST(BufferQueueTest, ReleaseUnknown) { + BufferQueue Buffers(kSize, 1); + BufferQueue::Buffer Buf; + Buf.Buffer = reinterpret_cast<void *>(0xdeadbeef); + Buf.Size = kSize; + EXPECT_EQ(std::errc::argument_out_of_domain, Buffers.releaseBuffer(Buf)); +} + +TEST(BufferQueueTest, ErrorsWhenFinalising) { + BufferQueue Buffers(kSize, 2); + BufferQueue::Buffer Buf; + ASSERT_EQ(Buffers.getBuffer(Buf), std::error_code()); + ASSERT_NE(nullptr, Buf.Buffer); + ASSERT_EQ(Buffers.finalize(), std::error_code()); + BufferQueue::Buffer OtherBuf; + ASSERT_EQ(std::errc::state_not_recoverable, Buffers.getBuffer(OtherBuf)); + ASSERT_EQ(std::errc::state_not_recoverable, Buffers.finalize()); + ASSERT_EQ(Buffers.releaseBuffer(Buf), std::error_code()); +} + +TEST(BufferQueueTest, MultiThreaded) { + BufferQueue Buffers(kSize, 100); + auto F = [&] { + BufferQueue::Buffer B; + while (!Buffers.getBuffer(B)) { + Buffers.releaseBuffer(B); + } + }; + auto T0 = std::async(std::launch::async, F); + auto T1 = std::async(std::launch::async, F); + auto T2 = std::async(std::launch::async, [&] { + while (!Buffers.finalize()) + ; + }); + F(); +} + +} // namespace __xray diff --git a/lib/xray/tests/unit/xray_unit_test_main.cc b/lib/xray/tests/unit/xray_unit_test_main.cc new file mode 100644 index 0000000000000..27d17527dd5bc --- /dev/null +++ b/lib/xray/tests/unit/xray_unit_test_main.cc @@ -0,0 +1,18 @@ +//===-- xray_unit_test_main.cc --------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a function call tracing system. +// +//===----------------------------------------------------------------------===// +#include "gtest/gtest.h" + +int main(int argc, char **argv) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/lib/xray/xray_AArch64.cc b/lib/xray/xray_AArch64.cc new file mode 100644 index 0000000000000..8f24610dab275 --- /dev/null +++ b/lib/xray/xray_AArch64.cc @@ -0,0 +1,119 @@ +//===-- xray_AArch64.cc -----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of AArch64-specific routines (64-bit). +// +//===----------------------------------------------------------------------===// +#include "sanitizer_common/sanitizer_common.h" +#include "xray_defs.h" +#include "xray_emulate_tsc.h" +#include "xray_interface_internal.h" +#include <atomic> +#include <cassert> + +namespace __xray { + +uint64_t cycleFrequency() XRAY_NEVER_INSTRUMENT { + // There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does + // not have a constant frequency like TSC on x86[_64]; it may go faster or + // slower depending on CPU's turbo or power saving modes. Furthermore, to + // read from CP15 on ARM a kernel modification or a driver is needed. + // We can not require this from users of compiler-rt. + // So on ARM we use clock_gettime(2) which gives the result in nanoseconds. + // To get the measurements per second, we scale this by the number of + // nanoseconds per second, pretending that the TSC frequency is 1GHz and + // one TSC tick is 1 nanosecond. + return NanosecondsPerSecond; +} + +// The machine codes for some instructions used in runtime patching. +enum class PatchOpcodes : uint32_t { + PO_StpX0X30SP_m16e = 0xA9BF7BE0, // STP X0, X30, [SP, #-16]! + PO_LdrW0_12 = 0x18000060, // LDR W0, #12 + PO_LdrX16_12 = 0x58000070, // LDR X16, #12 + PO_BlrX16 = 0xD63F0200, // BLR X16 + PO_LdpX0X30SP_16 = 0xA8C17BE0, // LDP X0, X30, [SP], #16 + PO_B32 = 0x14000008 // B #32 +}; + +inline static bool patchSled(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled, + void (*TracingHook)()) XRAY_NEVER_INSTRUMENT { + // When |Enable| == true, + // We replace the following compile-time stub (sled): + // + // xray_sled_n: + // B #32 + // 7 NOPs (24 bytes) + // + // With the following runtime patch: + // + // xray_sled_n: + // STP X0, X30, [SP, #-16]! ; PUSH {r0, lr} + // LDR W0, #12 ; W0 := function ID + // LDR X16,#12 ; X16 := address of the trampoline + // BLR X16 + // ;DATA: 32 bits of function ID + // ;DATA: lower 32 bits of the address of the trampoline + // ;DATA: higher 32 bits of the address of the trampoline + // LDP X0, X30, [SP], #16 ; POP {r0, lr} + // + // Replacement of the first 4-byte instruction should be the last and atomic + // operation, so that the user code which reaches the sled concurrently + // either jumps over the whole sled, or executes the whole sled when the + // latter is ready. + // + // When |Enable|==false, we set back the first instruction in the sled to be + // B #32 + + uint32_t *FirstAddress = reinterpret_cast<uint32_t *>(Sled.Address); + if (Enable) { + uint32_t *CurAddress = FirstAddress + 1; + *CurAddress = uint32_t(PatchOpcodes::PO_LdrW0_12); + CurAddress++; + *CurAddress = uint32_t(PatchOpcodes::PO_LdrX16_12); + CurAddress++; + *CurAddress = uint32_t(PatchOpcodes::PO_BlrX16); + CurAddress++; + *CurAddress = FuncId; + CurAddress++; + *reinterpret_cast<void (**)()>(CurAddress) = TracingHook; + CurAddress += 2; + *CurAddress = uint32_t(PatchOpcodes::PO_LdpX0X30SP_16); + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress), + uint32_t(PatchOpcodes::PO_StpX0X30SP_m16e), std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress), + uint32_t(PatchOpcodes::PO_B32), std::memory_order_release); + } + return true; +} + +bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, __xray_FunctionEntry); +} + +bool patchFunctionExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + +bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: In the future we'd need to distinguish between non-tail exits and + // tail exits for better information preservation. + return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + +} // namespace __xray diff --git a/lib/xray/xray_arm.cc b/lib/xray/xray_arm.cc new file mode 100644 index 0000000000000..d89322e833e58 --- /dev/null +++ b/lib/xray/xray_arm.cc @@ -0,0 +1,156 @@ +//===-- xray_arm.cc ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of ARM-specific routines (32-bit). +// +//===----------------------------------------------------------------------===// +#include "sanitizer_common/sanitizer_common.h" +#include "xray_defs.h" +#include "xray_emulate_tsc.h" +#include "xray_interface_internal.h" +#include <atomic> +#include <cassert> + +namespace __xray { + +uint64_t cycleFrequency() XRAY_NEVER_INSTRUMENT { + // There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does + // not have a constant frequency like TSC on x86[_64]; it may go faster or + // slower depending on CPU's turbo or power saving modes. Furthermore, to + // read from CP15 on ARM a kernel modification or a driver is needed. + // We can not require this from users of compiler-rt. + // So on ARM we use clock_gettime(2) which gives the result in nanoseconds. + // To get the measurements per second, we scale this by the number of + // nanoseconds per second, pretending that the TSC frequency is 1GHz and + // one TSC tick is 1 nanosecond. + return NanosecondsPerSecond; +} + +// The machine codes for some instructions used in runtime patching. +enum class PatchOpcodes : uint32_t { + PO_PushR0Lr = 0xE92D4001, // PUSH {r0, lr} + PO_BlxIp = 0xE12FFF3C, // BLX ip + PO_PopR0Lr = 0xE8BD4001, // POP {r0, lr} + PO_B20 = 0xEA000005 // B #20 +}; + +// 0xUUUUWXYZ -> 0x000W0XYZ +inline static uint32_t getMovwMask(const uint32_t Value) XRAY_NEVER_INSTRUMENT { + return (Value & 0xfff) | ((Value & 0xf000) << 4); +} + +// 0xWXYZUUUU -> 0x000W0XYZ +inline static uint32_t getMovtMask(const uint32_t Value) XRAY_NEVER_INSTRUMENT { + return getMovwMask(Value >> 16); +} + +// Writes the following instructions: +// MOVW R<regNo>, #<lower 16 bits of the |Value|> +// MOVT R<regNo>, #<higher 16 bits of the |Value|> +inline static uint32_t * +write32bitLoadReg(uint8_t regNo, uint32_t *Address, + const uint32_t Value) XRAY_NEVER_INSTRUMENT { + // This is a fatal error: we cannot just report it and continue execution. + assert(regNo <= 15 && "Register number must be 0 to 15."); + // MOVW R, #0xWXYZ in machine code is 0xE30WRXYZ + *Address = (0xE3000000 | (uint32_t(regNo) << 12) | getMovwMask(Value)); + Address++; + // MOVT R, #0xWXYZ in machine code is 0xE34WRXYZ + *Address = (0xE3400000 | (uint32_t(regNo) << 12) | getMovtMask(Value)); + return Address + 1; +} + +// Writes the following instructions: +// MOVW r0, #<lower 16 bits of the |Value|> +// MOVT r0, #<higher 16 bits of the |Value|> +inline static uint32_t * +Write32bitLoadR0(uint32_t *Address, + const uint32_t Value) XRAY_NEVER_INSTRUMENT { + return write32bitLoadReg(0, Address, Value); +} + +// Writes the following instructions: +// MOVW ip, #<lower 16 bits of the |Value|> +// MOVT ip, #<higher 16 bits of the |Value|> +inline static uint32_t * +Write32bitLoadIP(uint32_t *Address, + const uint32_t Value) XRAY_NEVER_INSTRUMENT { + return write32bitLoadReg(12, Address, Value); +} + +inline static bool patchSled(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled, + void (*TracingHook)()) XRAY_NEVER_INSTRUMENT { + // When |Enable| == true, + // We replace the following compile-time stub (sled): + // + // xray_sled_n: + // B #20 + // 6 NOPs (24 bytes) + // + // With the following runtime patch: + // + // xray_sled_n: + // PUSH {r0, lr} + // MOVW r0, #<lower 16 bits of function ID> + // MOVT r0, #<higher 16 bits of function ID> + // MOVW ip, #<lower 16 bits of address of TracingHook> + // MOVT ip, #<higher 16 bits of address of TracingHook> + // BLX ip + // POP {r0, lr} + // + // Replacement of the first 4-byte instruction should be the last and atomic + // operation, so that the user code which reaches the sled concurrently + // either jumps over the whole sled, or executes the whole sled when the + // latter is ready. + // + // When |Enable|==false, we set back the first instruction in the sled to be + // B #20 + + uint32_t *FirstAddress = reinterpret_cast<uint32_t *>(Sled.Address); + if (Enable) { + uint32_t *CurAddress = FirstAddress + 1; + CurAddress = + Write32bitLoadR0(CurAddress, reinterpret_cast<uint32_t>(FuncId)); + CurAddress = + Write32bitLoadIP(CurAddress, reinterpret_cast<uint32_t>(TracingHook)); + *CurAddress = uint32_t(PatchOpcodes::PO_BlxIp); + CurAddress++; + *CurAddress = uint32_t(PatchOpcodes::PO_PopR0Lr); + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress), + uint32_t(PatchOpcodes::PO_PushR0Lr), std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress), + uint32_t(PatchOpcodes::PO_B20), std::memory_order_release); + } + return true; +} + +bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, __xray_FunctionEntry); +} + +bool patchFunctionExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + +bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: In the future we'd need to distinguish between non-tail exits and + // tail exits for better information preservation. + return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + +} // namespace __xray diff --git a/lib/xray/xray_buffer_queue.cc b/lib/xray/xray_buffer_queue.cc new file mode 100644 index 0000000000000..7e5462fb8e113 --- /dev/null +++ b/lib/xray/xray_buffer_queue.cc @@ -0,0 +1,65 @@ +//===-- xray_buffer_queue.cc -----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instruementation system. +// +// Defines the interface for a buffer queue implementation. +// +//===----------------------------------------------------------------------===// +#include "xray_buffer_queue.h" +#include <cassert> +#include <cstdlib> + +using namespace __xray; + +BufferQueue::BufferQueue(std::size_t B, std::size_t N) + : BufferSize(B), Buffers(N), Mutex(), OwnedBuffers(), Finalizing(false) { + for (auto &Buf : Buffers) { + void *Tmp = malloc(BufferSize); + Buf.Buffer = Tmp; + Buf.Size = B; + if (Tmp != 0) + OwnedBuffers.insert(Tmp); + } +} + +std::error_code BufferQueue::getBuffer(Buffer &Buf) { + if (Finalizing.load(std::memory_order_acquire)) + return std::make_error_code(std::errc::state_not_recoverable); + std::lock_guard<std::mutex> Guard(Mutex); + if (Buffers.empty()) + return std::make_error_code(std::errc::not_enough_memory); + Buf = Buffers.front(); + Buffers.pop_front(); + return {}; +} + +std::error_code BufferQueue::releaseBuffer(Buffer &Buf) { + if (OwnedBuffers.count(Buf.Buffer) == 0) + return std::make_error_code(std::errc::argument_out_of_domain); + std::lock_guard<std::mutex> Guard(Mutex); + Buffers.push_back(Buf); + Buf.Buffer = nullptr; + Buf.Size = BufferSize; + return {}; +} + +std::error_code BufferQueue::finalize() { + if (Finalizing.exchange(true, std::memory_order_acq_rel)) + return std::make_error_code(std::errc::state_not_recoverable); + return {}; +} + +BufferQueue::~BufferQueue() { + for (auto &Buf : Buffers) { + free(Buf.Buffer); + Buf.Buffer = nullptr; + Buf.Size = 0; + } +} diff --git a/lib/xray/xray_buffer_queue.h b/lib/xray/xray_buffer_queue.h new file mode 100644 index 0000000000000..bf0b7af9df4d0 --- /dev/null +++ b/lib/xray/xray_buffer_queue.h @@ -0,0 +1,86 @@ +//===-- xray_buffer_queue.h ------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Defines the interface for a buffer queue implementation. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_BUFFER_QUEUE_H +#define XRAY_BUFFER_QUEUE_H + +#include <atomic> +#include <cstdint> +#include <deque> +#include <mutex> +#include <system_error> +#include <unordered_set> + +namespace __xray { + +/// BufferQueue implements a circular queue of fixed sized buffers (much like a +/// freelist) but is concerned mostly with making it really quick to initialise, +/// finalise, and get/return buffers to the queue. This is one key component of +/// the "flight data recorder" (FDR) mode to support ongoing XRay function call +/// trace collection. +class BufferQueue { +public: + struct Buffer { + void *Buffer = nullptr; + std::size_t Size = 0; + }; + +private: + std::size_t BufferSize; + std::deque<Buffer> Buffers; + std::mutex Mutex; + std::unordered_set<void *> OwnedBuffers; + std::atomic<bool> Finalizing; + +public: + /// Initialise a queue of size |N| with buffers of size |B|. + BufferQueue(std::size_t B, std::size_t N); + + /// Updates |Buf| to contain the pointer to an appropriate buffer. Returns an + /// error in case there are no available buffers to return when we will run + /// over the upper bound for the total buffers. + /// + /// Requirements: + /// - BufferQueue is not finalising. + /// + /// Returns: + /// - std::errc::not_enough_memory on exceeding MaxSize. + /// - no error when we find a Buffer. + /// - std::errc::state_not_recoverable on finalising BufferQueue. + std::error_code getBuffer(Buffer &Buf); + + /// Updates |Buf| to point to nullptr, with size 0. + /// + /// Returns: + /// - ... + std::error_code releaseBuffer(Buffer &Buf); + + bool finalizing() const { return Finalizing.load(std::memory_order_acquire); } + + // Sets the state of the BufferQueue to finalizing, which ensures that: + // + // - All subsequent attempts to retrieve a Buffer will fail. + // - All releaseBuffer operations will not fail. + // + // After a call to finalize succeeds, all subsequent calls to finalize will + // fail with std::errc::state_not_recoverable. + std::error_code finalize(); + + // Cleans up allocated buffers. + ~BufferQueue(); +}; + +} // namespace __xray + +#endif // XRAY_BUFFER_QUEUE_H diff --git a/lib/xray/xray_defs.h b/lib/xray/xray_defs.h new file mode 100644 index 0000000000000..e5c37c0665db0 --- /dev/null +++ b/lib/xray/xray_defs.h @@ -0,0 +1,22 @@ +//===-- xray_defs.h ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Common definitions useful for XRay sources. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_XRAY_DEFS_H +#define XRAY_XRAY_DEFS_H + +#if XRAY_SUPPORTED +#define XRAY_NEVER_INSTRUMENT __attribute__((xray_never_instrument)) +#else +#define XRAY_NEVER_INSTRUMENT +#endif + +#endif // XRAY_XRAY_DEFS_H diff --git a/lib/xray/xray_emulate_tsc.h b/lib/xray/xray_emulate_tsc.h new file mode 100644 index 0000000000000..a3e8b1c92eb40 --- /dev/null +++ b/lib/xray/xray_emulate_tsc.h @@ -0,0 +1,40 @@ +//===-- xray_emulate_tsc.h --------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_EMULATE_TSC_H +#define XRAY_EMULATE_TSC_H + +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_internal_defs.h" +#include "xray_defs.h" +#include <cerrno> +#include <cstdint> +#include <time.h> + +namespace __xray { + +static constexpr uint64_t NanosecondsPerSecond = 1000ULL * 1000 * 1000; + +ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT { + timespec TS; + int result = clock_gettime(CLOCK_REALTIME, &TS); + if (result != 0) { + Report("clock_gettime(2) returned %d, errno=%d.", result, int(errno)); + TS.tv_sec = 0; + TS.tv_nsec = 0; + } + CPU = 0; + return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec; +} +} + +#endif // XRAY_EMULATE_TSC_H diff --git a/lib/xray/xray_flags.cc b/lib/xray/xray_flags.cc new file mode 100644 index 0000000000000..338c2378b8cd7 --- /dev/null +++ b/lib/xray/xray_flags.cc @@ -0,0 +1,62 @@ +//===-- xray_flags.cc -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// XRay flag parsing logic. +//===----------------------------------------------------------------------===// + +#include "xray_flags.h" +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_flag_parser.h" +#include "sanitizer_common/sanitizer_libc.h" +#include "xray_defs.h" + +using namespace __sanitizer; + +namespace __xray { + +Flags xray_flags_dont_use_directly; // use via flags(). + +void Flags::SetDefaults() XRAY_NEVER_INSTRUMENT { +#define XRAY_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue; +#include "xray_flags.inc" +#undef XRAY_FLAG +} + +static void RegisterXRayFlags(FlagParser *P, Flags *F) XRAY_NEVER_INSTRUMENT { +#define XRAY_FLAG(Type, Name, DefaultValue, Description) \ + RegisterFlag(P, #Name, Description, &F->Name); +#include "xray_flags.inc" +#undef XRAY_FLAG +} + +void InitializeFlags() XRAY_NEVER_INSTRUMENT { + SetCommonFlagsDefaults(); + auto *F = flags(); + F->SetDefaults(); + + FlagParser XRayParser; + RegisterXRayFlags(&XRayParser, F); + RegisterCommonFlags(&XRayParser); + + // Override from command line. + XRayParser.ParseString(GetEnv("XRAY_OPTIONS")); + + InitializeCommonFlags(); + + if (Verbosity()) + ReportUnrecognizedFlags(); + + if (common_flags()->help) { + XRayParser.PrintFlagDescriptions(); + } +} + +} // namespace __xray diff --git a/lib/xray/xray_flags.h b/lib/xray/xray_flags.h new file mode 100644 index 0000000000000..2ecf5fb9ba1d4 --- /dev/null +++ b/lib/xray/xray_flags.h @@ -0,0 +1,37 @@ +//===-- xray_flags.h -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instruementation system. +// +// XRay runtime flags. +//===----------------------------------------------------------------------===// + +#ifndef XRAY_FLAGS_H +#define XRAY_FLAGS_H + +#include "sanitizer_common/sanitizer_flag_parser.h" + +namespace __xray { + +struct Flags { +#define XRAY_FLAG(Type, Name, DefaultValue, Description) Type Name; +#include "xray_flags.inc" +#undef XRAY_FLAG + + void SetDefaults(); +}; + +extern Flags xray_flags_dont_use_directly; +inline Flags *flags() { return &xray_flags_dont_use_directly; } + +void InitializeFlags(); + +} // namespace __xray + +#endif // XRAY_FLAGS_H diff --git a/lib/xray/xray_flags.inc b/lib/xray/xray_flags.inc new file mode 100644 index 0000000000000..0f6ced8ead0c2 --- /dev/null +++ b/lib/xray/xray_flags.inc @@ -0,0 +1,22 @@ +//===-- xray_flags.inc ------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// XRay runtime flags. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_FLAG +#error "Define XRAY_FLAG prior to including this file!" +#endif + +XRAY_FLAG(bool, patch_premain, true, + "Whether to patch instrumentation points before main.") +XRAY_FLAG(bool, xray_naive_log, true, + "Whether to install the naive log implementation.") +XRAY_FLAG(const char *, xray_logfile_base, "xray-log.", + "Filename base for the xray logfile.") diff --git a/lib/xray/xray_init.cc b/lib/xray/xray_init.cc new file mode 100644 index 0000000000000..eb86182910cf7 --- /dev/null +++ b/lib/xray/xray_init.cc @@ -0,0 +1,70 @@ +//===-- xray_init.cc --------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// XRay initialisation logic. +//===----------------------------------------------------------------------===// + +#include <atomic> +#include <fcntl.h> +#include <strings.h> +#include <unistd.h> + +#include "sanitizer_common/sanitizer_common.h" +#include "xray_defs.h" +#include "xray_flags.h" +#include "xray_interface_internal.h" + +extern "C" { +void __xray_init(); +extern const XRaySledEntry __start_xray_instr_map[] __attribute__((weak)); +extern const XRaySledEntry __stop_xray_instr_map[] __attribute__((weak)); +} + +using namespace __sanitizer; +using namespace __xray; + +// When set to 'true' this means the XRay runtime has been initialised. We use +// the weak symbols defined above (__start_xray_inst_map and +// __stop_xray_instr_map) to initialise the instrumentation map that XRay uses +// for runtime patching/unpatching of instrumentation points. +// +// FIXME: Support DSO instrumentation maps too. The current solution only works +// for statically linked executables. +std::atomic<bool> XRayInitialized{false}; + +// This should always be updated before XRayInitialized is updated. +std::atomic<__xray::XRaySledMap> XRayInstrMap{}; + +// __xray_init() will do the actual loading of the current process' memory map +// and then proceed to look for the .xray_instr_map section/segment. +void __xray_init() XRAY_NEVER_INSTRUMENT { + InitializeFlags(); + if (__start_xray_instr_map == nullptr) { + Report("XRay instrumentation map missing. Not initializing XRay.\n"); + return; + } + + // Now initialize the XRayInstrMap global struct with the address of the + // entries, reinterpreted as an array of XRaySledEntry objects. We use the + // virtual pointer we have from the section to provide us the correct + // information. + __xray::XRaySledMap SledMap{}; + SledMap.Sleds = __start_xray_instr_map; + SledMap.Entries = __stop_xray_instr_map - __start_xray_instr_map; + XRayInstrMap.store(SledMap, std::memory_order_release); + XRayInitialized.store(true, std::memory_order_release); + + if (flags()->patch_premain) + __xray_patch(); +} + +__attribute__((section(".preinit_array"), + used)) void (*__local_xray_preinit)(void) = __xray_init; diff --git a/lib/xray/xray_inmemory_log.cc b/lib/xray/xray_inmemory_log.cc new file mode 100644 index 0000000000000..7ec56f4867074 --- /dev/null +++ b/lib/xray/xray_inmemory_log.cc @@ -0,0 +1,185 @@ +//===-- xray_inmemory_log.cc ------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of a simple in-memory log of XRay events. This defines a +// logging function that's compatible with the XRay handler interface, and +// routines for exporting data to files. +// +//===----------------------------------------------------------------------===// + +#include <cassert> +#include <cstdint> +#include <cstdio> +#include <fcntl.h> +#include <mutex> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <thread> +#include <unistd.h> + +#if defined(__x86_64__) +#include "xray_x86_64.h" +#elif defined(__arm__) || defined(__aarch64__) +#include "xray_emulate_tsc.h" +#else +#error "Unsupported CPU Architecture" +#endif /* Architecture-specific inline intrinsics */ + +#include "sanitizer_common/sanitizer_libc.h" +#include "xray/xray_records.h" +#include "xray_defs.h" +#include "xray_flags.h" +#include "xray_interface_internal.h" + +// __xray_InMemoryRawLog will use a thread-local aligned buffer capped to a +// certain size (32kb by default) and use it as if it were a circular buffer for +// events. We store simple fixed-sized entries in the log for external analysis. + +extern "C" { +void __xray_InMemoryRawLog(int32_t FuncId, + XRayEntryType Type) XRAY_NEVER_INSTRUMENT; +} + +namespace __xray { + +std::mutex LogMutex; + +static void retryingWriteAll(int Fd, char *Begin, + char *End) XRAY_NEVER_INSTRUMENT { + if (Begin == End) + return; + auto TotalBytes = std::distance(Begin, End); + while (auto Written = write(Fd, Begin, TotalBytes)) { + if (Written < 0) { + if (errno == EINTR) + continue; // Try again. + Report("Failed to write; errno = %d\n", errno); + return; + } + TotalBytes -= Written; + if (TotalBytes == 0) + break; + Begin += Written; + } +} + +class ThreadExitFlusher { + int Fd; + XRayRecord *Start; + size_t &Offset; + +public: + explicit ThreadExitFlusher(int Fd, XRayRecord *Start, + size_t &Offset) XRAY_NEVER_INSTRUMENT + : Fd(Fd), + Start(Start), + Offset(Offset) {} + + ~ThreadExitFlusher() XRAY_NEVER_INSTRUMENT { + std::lock_guard<std::mutex> L(LogMutex); + if (Fd > 0 && Start != nullptr) { + retryingWriteAll(Fd, reinterpret_cast<char *>(Start), + reinterpret_cast<char *>(Start + Offset)); + // Because this thread's exit could be the last one trying to write to the + // file and that we're not able to close out the file properly, we sync + // instead and hope that the pending writes are flushed as the thread + // exits. + fsync(Fd); + } + } +}; + +} // namespace __xray + +using namespace __xray; + +void PrintToStdErr(const char *Buffer) XRAY_NEVER_INSTRUMENT { + fprintf(stderr, "%s", Buffer); +} + +static int __xray_OpenLogFile() XRAY_NEVER_INSTRUMENT { + // FIXME: Figure out how to make this less stderr-dependent. + SetPrintfAndReportCallback(PrintToStdErr); + // Open a temporary file once for the log. + static char TmpFilename[256] = {}; + static char TmpWildcardPattern[] = "XXXXXX"; + auto E = internal_strncat(TmpFilename, flags()->xray_logfile_base, + sizeof(TmpFilename) - 10); + if (static_cast<size_t>((E + 6) - TmpFilename) > (sizeof(TmpFilename) - 1)) { + Report("XRay log file base too long: %s\n", flags()->xray_logfile_base); + return -1; + } + internal_strncat(TmpFilename, TmpWildcardPattern, + sizeof(TmpWildcardPattern) - 1); + int Fd = mkstemp(TmpFilename); + if (Fd == -1) { + Report("XRay: Failed opening temporary file '%s'; not logging events.\n", + TmpFilename); + return -1; + } + if (Verbosity()) + fprintf(stderr, "XRay: Log file in '%s'\n", TmpFilename); + + // Since we're here, we get to write the header. We set it up so that the + // header will only be written once, at the start, and let the threads + // logging do writes which just append. + XRayFileHeader Header; + Header.Version = 1; + Header.Type = FileTypes::NAIVE_LOG; + Header.CycleFrequency = __xray::cycleFrequency(); + + // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc' + // before setting the values in the header. + Header.ConstantTSC = 1; + Header.NonstopTSC = 1; + retryingWriteAll(Fd, reinterpret_cast<char *>(&Header), + reinterpret_cast<char *>(&Header) + sizeof(Header)); + return Fd; +} + +void __xray_InMemoryRawLog(int32_t FuncId, + XRayEntryType Type) XRAY_NEVER_INSTRUMENT { + using Buffer = + std::aligned_storage<sizeof(XRayRecord), alignof(XRayRecord)>::type; + static constexpr size_t BuffLen = 1024; + thread_local static Buffer InMemoryBuffer[BuffLen] = {}; + thread_local static size_t Offset = 0; + static int Fd = __xray_OpenLogFile(); + if (Fd == -1) + return; + thread_local __xray::ThreadExitFlusher Flusher( + Fd, reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer), Offset); + thread_local pid_t TId = syscall(SYS_gettid); + + // First we get the useful data, and stuff it into the already aligned buffer + // through a pointer offset. + auto &R = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer)[Offset]; + R.RecordType = RecordTypes::NORMAL; + R.TSC = __xray::readTSC(R.CPU); + R.TId = TId; + R.Type = Type; + R.FuncId = FuncId; + ++Offset; + if (Offset == BuffLen) { + std::lock_guard<std::mutex> L(LogMutex); + auto RecordBuffer = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer); + retryingWriteAll(Fd, reinterpret_cast<char *>(RecordBuffer), + reinterpret_cast<char *>(RecordBuffer + Offset)); + Offset = 0; + } +} + +static auto Unused = [] { + if (flags()->xray_naive_log) + __xray_set_handler(__xray_InMemoryRawLog); + return true; +}(); diff --git a/lib/xray/xray_interface.cc b/lib/xray/xray_interface.cc new file mode 100644 index 0000000000000..20a2b66c4401c --- /dev/null +++ b/lib/xray/xray_interface.cc @@ -0,0 +1,207 @@ +//===-- xray_interface.cpp --------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of the API functions. +// +//===----------------------------------------------------------------------===// + +#include "xray_interface_internal.h" + +#include <atomic> +#include <cstdint> +#include <cstdio> +#include <errno.h> +#include <limits> +#include <sys/mman.h> + +#include "sanitizer_common/sanitizer_common.h" +#include "xray_defs.h" + +namespace __xray { + +#if defined(__x86_64__) +// FIXME: The actual length is 11 bytes. Why was length 12 passed to mprotect() +// ? +static const int16_t cSledLength = 12; +#elif defined(__aarch64__) +static const int16_t cSledLength = 32; +#elif defined(__arm__) +static const int16_t cSledLength = 28; +#else +#error "Unsupported CPU Architecture" +#endif /* CPU architecture */ + +// This is the function to call when we encounter the entry or exit sleds. +std::atomic<void (*)(int32_t, XRayEntryType)> XRayPatchedFunction{nullptr}; + +// MProtectHelper is an RAII wrapper for calls to mprotect(...) that will undo +// any successful mprotect(...) changes. This is used to make a page writeable +// and executable, and upon destruction if it was successful in doing so returns +// the page into a read-only and executable page. +// +// This is only used specifically for runtime-patching of the XRay +// instrumentation points. This assumes that the executable pages are originally +// read-and-execute only. +class MProtectHelper { + void *PageAlignedAddr; + std::size_t MProtectLen; + bool MustCleanup; + +public: + explicit MProtectHelper(void *PageAlignedAddr, + std::size_t MProtectLen) XRAY_NEVER_INSTRUMENT + : PageAlignedAddr(PageAlignedAddr), + MProtectLen(MProtectLen), + MustCleanup(false) {} + + int MakeWriteable() XRAY_NEVER_INSTRUMENT { + auto R = mprotect(PageAlignedAddr, MProtectLen, + PROT_READ | PROT_WRITE | PROT_EXEC); + if (R != -1) + MustCleanup = true; + return R; + } + + ~MProtectHelper() XRAY_NEVER_INSTRUMENT { + if (MustCleanup) { + mprotect(PageAlignedAddr, MProtectLen, PROT_READ | PROT_EXEC); + } + } +}; + +} // namespace __xray + +extern std::atomic<bool> XRayInitialized; +extern std::atomic<__xray::XRaySledMap> XRayInstrMap; + +int __xray_set_handler(void (*entry)(int32_t, + XRayEntryType)) XRAY_NEVER_INSTRUMENT { + if (XRayInitialized.load(std::memory_order_acquire)) { + __xray::XRayPatchedFunction.store(entry, std::memory_order_release); + return 1; + } + return 0; +} + +int __xray_remove_handler() XRAY_NEVER_INSTRUMENT { + return __xray_set_handler(nullptr); +} + +std::atomic<bool> XRayPatching{false}; + +using namespace __xray; + +// FIXME: Figure out whether we can move this class to sanitizer_common instead +// as a generic "scope guard". +template <class Function> class CleanupInvoker { + Function Fn; + +public: + explicit CleanupInvoker(Function Fn) XRAY_NEVER_INSTRUMENT : Fn(Fn) {} + CleanupInvoker(const CleanupInvoker &) XRAY_NEVER_INSTRUMENT = default; + CleanupInvoker(CleanupInvoker &&) XRAY_NEVER_INSTRUMENT = default; + CleanupInvoker & + operator=(const CleanupInvoker &) XRAY_NEVER_INSTRUMENT = delete; + CleanupInvoker &operator=(CleanupInvoker &&) XRAY_NEVER_INSTRUMENT = delete; + ~CleanupInvoker() XRAY_NEVER_INSTRUMENT { Fn(); } +}; + +template <class Function> +CleanupInvoker<Function> ScopeCleanup(Function Fn) XRAY_NEVER_INSTRUMENT { + return CleanupInvoker<Function>{Fn}; +} + +// ControlPatching implements the common internals of the patching/unpatching +// implementation. |Enable| defines whether we're enabling or disabling the +// runtime XRay instrumentation. +XRayPatchingStatus ControlPatching(bool Enable) XRAY_NEVER_INSTRUMENT { + if (!XRayInitialized.load(std::memory_order_acquire)) + return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized. + + static bool NotPatching = false; + if (!XRayPatching.compare_exchange_strong(NotPatching, true, + std::memory_order_acq_rel, + std::memory_order_acquire)) { + return XRayPatchingStatus::ONGOING; // Already patching. + } + + bool PatchingSuccess = false; + auto XRayPatchingStatusResetter = ScopeCleanup([&PatchingSuccess] { + if (!PatchingSuccess) { + XRayPatching.store(false, std::memory_order_release); + } + }); + + // Step 1: Compute the function id, as a unique identifier per function in the + // instrumentation map. + XRaySledMap InstrMap = XRayInstrMap.load(std::memory_order_acquire); + if (InstrMap.Entries == 0) + return XRayPatchingStatus::NOT_INITIALIZED; + + const uint64_t PageSize = GetPageSizeCached(); + if ((PageSize == 0) || ((PageSize & (PageSize - 1)) != 0)) { + Report("System page size is not a power of two: %lld\n", PageSize); + return XRayPatchingStatus::FAILED; + } + + uint32_t FuncId = 1; + uint64_t CurFun = 0; + for (std::size_t I = 0; I < InstrMap.Entries; I++) { + auto Sled = InstrMap.Sleds[I]; + auto F = Sled.Function; + if (CurFun == 0) + CurFun = F; + if (F != CurFun) { + ++FuncId; + CurFun = F; + } + + // While we're here, we should patch the nop sled. To do that we mprotect + // the page containing the function to be writeable. + void *PageAlignedAddr = + reinterpret_cast<void *>(Sled.Address & ~(PageSize - 1)); + std::size_t MProtectLen = (Sled.Address + cSledLength) - + reinterpret_cast<uint64_t>(PageAlignedAddr); + MProtectHelper Protector(PageAlignedAddr, MProtectLen); + if (Protector.MakeWriteable() == -1) { + printf("Failed mprotect: %d\n", errno); + return XRayPatchingStatus::FAILED; + } + + bool Success = false; + switch (Sled.Kind) { + case XRayEntryType::ENTRY: + Success = patchFunctionEntry(Enable, FuncId, Sled); + break; + case XRayEntryType::EXIT: + Success = patchFunctionExit(Enable, FuncId, Sled); + break; + case XRayEntryType::TAIL: + Success = patchFunctionTailExit(Enable, FuncId, Sled); + break; + default: + Report("Unsupported sled kind: %d\n", int(Sled.Kind)); + continue; + } + (void)Success; + } + XRayPatching.store(false, std::memory_order_release); + PatchingSuccess = true; + return XRayPatchingStatus::SUCCESS; +} + +XRayPatchingStatus __xray_patch() XRAY_NEVER_INSTRUMENT { + return ControlPatching(true); +} + +XRayPatchingStatus __xray_unpatch() XRAY_NEVER_INSTRUMENT { + return ControlPatching(false); +} diff --git a/lib/xray/xray_interface_internal.h b/lib/xray/xray_interface_internal.h new file mode 100644 index 0000000000000..a8434a699f865 --- /dev/null +++ b/lib/xray/xray_interface_internal.h @@ -0,0 +1,69 @@ +//===-- xray_interface_internal.h -------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of the API functions. See also include/xray/xray_interface.h. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_INTERFACE_INTERNAL_H +#define XRAY_INTERFACE_INTERNAL_H + +#include "sanitizer_common/sanitizer_platform.h" +#include "xray/xray_interface.h" +#include <cstddef> +#include <cstdint> + +extern "C" { + +struct XRaySledEntry { +#if SANITIZER_WORDSIZE == 64 + uint64_t Address; + uint64_t Function; + unsigned char Kind; + unsigned char AlwaysInstrument; + unsigned char Padding[14]; // Need 32 bytes +#elif SANITIZER_WORDSIZE == 32 + uint32_t Address; + uint32_t Function; + unsigned char Kind; + unsigned char AlwaysInstrument; + unsigned char Padding[6]; // Need 16 bytes +#else +#error "Unsupported word size." +#endif +}; +} + +namespace __xray { + +struct XRaySledMap { + const XRaySledEntry *Sleds; + size_t Entries; +}; + +uint64_t cycleFrequency(); + +bool patchFunctionEntry(bool Enable, uint32_t FuncId, + const XRaySledEntry &Sled); +bool patchFunctionExit(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled); +bool patchFunctionTailExit(bool Enable, uint32_t FuncId, + const XRaySledEntry &Sled); + +} // namespace __xray + +extern "C" { +// The following functions have to be defined in assembler, on a per-platform +// basis. See xray_trampoline_*.S files for implementations. +extern void __xray_FunctionEntry(); +extern void __xray_FunctionExit(); +extern void __xray_FunctionTailExit(); +} + +#endif diff --git a/lib/xray/xray_trampoline_AArch64.S b/lib/xray/xray_trampoline_AArch64.S new file mode 100644 index 0000000000000..f1a471c041f9a --- /dev/null +++ b/lib/xray/xray_trampoline_AArch64.S @@ -0,0 +1,89 @@ + .text + /* The variable containing the handler function pointer */ + .global _ZN6__xray19XRayPatchedFunctionE + /* Word-aligned function entry point */ + .p2align 2 + /* Let C/C++ see the symbol */ + .global __xray_FunctionEntry + .type __xray_FunctionEntry, %function + /* In C++ it is void extern "C" __xray_FunctionEntry(uint32_t FuncId) with + FuncId passed in W0 register. */ +__xray_FunctionEntry: + /* Move the return address beyond the end of sled data. The 12 bytes of + data are inserted in the code of the runtime patch, between the call + instruction and the instruction returned into. The data contains 32 + bits of instrumented function ID and 64 bits of the address of + the current trampoline. */ + ADD X30, X30, #12 + /* Push the registers which may be modified by the handler function */ + STP X1, X2, [SP, #-16]! + STP X3, X4, [SP, #-16]! + STP X5, X6, [SP, #-16]! + STP X7, X30, [SP, #-16]! + STP Q0, Q1, [SP, #-32]! + STP Q2, Q3, [SP, #-32]! + STP Q4, Q5, [SP, #-32]! + STP Q6, Q7, [SP, #-32]! + /* Load the address of _ZN6__xray19XRayPatchedFunctionE into X1 */ + LDR X1, =_ZN6__xray19XRayPatchedFunctionE + /* Load the handler function pointer into X2 */ + LDR X2, [X1] + /* Handler address is nullptr if handler is not set */ + CMP X2, #0 + BEQ FunctionEntry_restore + /* Function ID is already in W0 (the first parameter). + X1=0 means that we are tracing an entry event */ + MOV X1, #0 + /* Call the handler with 2 parameters in W0 and X1 */ + BLR X2 +FunctionEntry_restore: + /* Pop the saved registers */ + LDP Q6, Q7, [SP], #32 + LDP Q4, Q5, [SP], #32 + LDP Q2, Q3, [SP], #32 + LDP Q0, Q1, [SP], #32 + LDP X7, X30, [SP], #16 + LDP X5, X6, [SP], #16 + LDP X3, X4, [SP], #16 + LDP X1, X2, [SP], #16 + RET + + /* Word-aligned function entry point */ + .p2align 2 + /* Let C/C++ see the symbol */ + .global __xray_FunctionExit + .type __xray_FunctionExit, %function + /* In C++ it is void extern "C" __xray_FunctionExit(uint32_t FuncId) with + FuncId passed in W0 register. */ +__xray_FunctionExit: + /* Move the return address beyond the end of sled data. The 12 bytes of + data are inserted in the code of the runtime patch, between the call + instruction and the instruction returned into. The data contains 32 + bits of instrumented function ID and 64 bits of the address of + the current trampoline. */ + ADD X30, X30, #12 + /* Push the registers which may be modified by the handler function */ + STP X1, X2, [SP, #-16]! + STP X3, X4, [SP, #-16]! + STP X5, X6, [SP, #-16]! + STP X7, X30, [SP, #-16]! + STR Q0, [SP, #-16]! + /* Load the address of _ZN6__xray19XRayPatchedFunctionE into X1 */ + LDR X1, =_ZN6__xray19XRayPatchedFunctionE + /* Load the handler function pointer into X2 */ + LDR X2, [X1] + /* Handler address is nullptr if handler is not set */ + CMP X2, #0 + BEQ FunctionExit_restore + /* Function ID is already in W0 (the first parameter). + X1=1 means that we are tracing an exit event */ + MOV X1, #1 + /* Call the handler with 2 parameters in W0 and X1 */ + BLR X2 +FunctionExit_restore: + LDR Q0, [SP], #16 + LDP X7, X30, [SP], #16 + LDP X5, X6, [SP], #16 + LDP X3, X4, [SP], #16 + LDP X1, X2, [SP], #16 + RET diff --git a/lib/xray/xray_trampoline_arm.S b/lib/xray/xray_trampoline_arm.S new file mode 100644 index 0000000000000..5d87c971364dd --- /dev/null +++ b/lib/xray/xray_trampoline_arm.S @@ -0,0 +1,65 @@ + .syntax unified + .arch armv6t2 + .fpu vfpv2 + .code 32 + .global _ZN6__xray19XRayPatchedFunctionE + @ Word-aligned function entry point + .p2align 2 + @ Let C/C++ see the symbol + .global __xray_FunctionEntry + @ It preserves all registers except r0, r12(ip), r14(lr) and r15(pc) + @ Assume that "q" part of the floating-point registers is not used + @ for passing parameters to C/C++ functions. + .type __xray_FunctionEntry, %function + @ In C++ it is void extern "C" __xray_FunctionEntry(uint32_t FuncId) with + @ FuncId passed in r0 register. +__xray_FunctionEntry: + PUSH {r1-r3,lr} + @ Save floating-point parameters of the instrumented function + VPUSH {d0-d7} + MOVW r1,#:lower16:_ZN6__xray19XRayPatchedFunctionE + MOVT r1,#:upper16:_ZN6__xray19XRayPatchedFunctionE + LDR r2, [r1] + @ Handler address is nullptr if handler is not set + CMP r2, #0 + BEQ FunctionEntry_restore + @ Function ID is already in r0 (the first parameter). + @ r1=0 means that we are tracing an entry event + MOV r1, #0 + @ Call the handler with 2 parameters in r0 and r1 + BLX r2 +FunctionEntry_restore: + @ Restore floating-point parameters of the instrumented function + VPOP {d0-d7} + POP {r1-r3,pc} + + @ Word-aligned function entry point + .p2align 2 + @ Let C/C++ see the symbol + .global __xray_FunctionExit + @ Assume that d1-d7 are not used for the return value. + @ Assume that "q" part of the floating-point registers is not used for the + @ return value in C/C++. + .type __xray_FunctionExit, %function + @ In C++ it is extern "C" void __xray_FunctionExit(uint32_t FuncId) with + @ FuncId passed in r0 register. +__xray_FunctionExit: + PUSH {r1-r3,lr} + @ Save the floating-point return value of the instrumented function + VPUSH {d0} + @ Load the handler address + MOVW r1,#:lower16:_ZN6__xray19XRayPatchedFunctionE + MOVT r1,#:upper16:_ZN6__xray19XRayPatchedFunctionE + LDR r2, [r1] + @ Handler address is nullptr if handler is not set + CMP r2, #0 + BEQ FunctionExit_restore + @ Function ID is already in r0 (the first parameter). + @ 1 means that we are tracing an exit event + MOV r1, #1 + @ Call the handler with 2 parameters in r0 and r1 + BLX r2 +FunctionExit_restore: + @ Restore the floating-point return value of the instrumented function + VPOP {d0} + POP {r1-r3,pc} diff --git a/lib/xray/xray_trampoline_x86_64.S b/lib/xray/xray_trampoline_x86_64.S new file mode 100644 index 0000000000000..d90c30cd98e96 --- /dev/null +++ b/lib/xray/xray_trampoline_x86_64.S @@ -0,0 +1,147 @@ +//===-- xray_trampoline_x86.s -----------------------------------*- ASM -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// This implements the X86-specific assembler for the trampolines. +// +//===----------------------------------------------------------------------===// + +.macro SAVE_REGISTERS + subq $200, %rsp + movupd %xmm0, 184(%rsp) + movupd %xmm1, 168(%rsp) + movupd %xmm2, 152(%rsp) + movupd %xmm3, 136(%rsp) + movupd %xmm4, 120(%rsp) + movupd %xmm5, 104(%rsp) + movupd %xmm6, 88(%rsp) + movupd %xmm7, 72(%rsp) + movq %rdi, 64(%rsp) + movq %rax, 56(%rsp) + movq %rdx, 48(%rsp) + movq %rsi, 40(%rsp) + movq %rcx, 32(%rsp) + movq %r8, 24(%rsp) + movq %r9, 16(%rsp) +.endm + +.macro RESTORE_REGISTERS + movupd 184(%rsp), %xmm0 + movupd 168(%rsp), %xmm1 + movupd 152(%rsp), %xmm2 + movupd 136(%rsp), %xmm3 + movupd 120(%rsp), %xmm4 + movupd 104(%rsp), %xmm5 + movupd 88(%rsp) , %xmm6 + movupd 72(%rsp) , %xmm7 + movq 64(%rsp), %rdi + movq 56(%rsp), %rax + movq 48(%rsp), %rdx + movq 40(%rsp), %rsi + movq 32(%rsp), %rcx + movq 24(%rsp), %r8 + movq 16(%rsp), %r9 + addq $200, %rsp +.endm + + .text + .file "xray_trampoline_x86.S" + .globl __xray_FunctionEntry + .align 16, 0x90 + .type __xray_FunctionEntry,@function + +__xray_FunctionEntry: + .cfi_startproc + pushq %rbp + .cfi_def_cfa_offset 16 + SAVE_REGISTERS + + // This load has to be atomic, it's concurrent with __xray_patch(). + // On x86/amd64, a simple (type-aligned) MOV instruction is enough. + movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax + testq %rax, %rax + je .Ltmp0 + + // The patched function prolog puts its xray_instr_map index into %r10d. + movl %r10d, %edi + xor %esi,%esi + callq *%rax +.Ltmp0: + RESTORE_REGISTERS + popq %rbp + retq +.Ltmp1: + .size __xray_FunctionEntry, .Ltmp1-__xray_FunctionEntry + .cfi_endproc + + .globl __xray_FunctionExit + .align 16, 0x90 + .type __xray_FunctionExit,@function +__xray_FunctionExit: + .cfi_startproc + // Save the important registers first. Since we're assuming that this + // function is only jumped into, we only preserve the registers for + // returning. + pushq %rbp + .cfi_def_cfa_offset 16 + subq $56, %rsp + .cfi_def_cfa_offset 32 + movupd %xmm0, 40(%rsp) + movupd %xmm1, 24(%rsp) + movq %rax, 16(%rsp) + movq %rdx, 8(%rsp) + movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax + testq %rax,%rax + je .Ltmp2 + + movl %r10d, %edi + movl $1, %esi + callq *%rax +.Ltmp2: + // Restore the important registers. + movupd 40(%rsp), %xmm0 + movupd 24(%rsp), %xmm1 + movq 16(%rsp), %rax + movq 8(%rsp), %rdx + addq $56, %rsp + popq %rbp + retq +.Ltmp3: + .size __xray_FunctionExit, .Ltmp3-__xray_FunctionExit + .cfi_endproc + + .global __xray_FunctionTailExit + .align 16, 0x90 + .type __xray_FunctionTailExit,@function +__xray_FunctionTailExit: + .cfi_startproc + // Save the important registers as in the entry trampoline, but indicate that + // this is an exit. In the future, we will introduce a new entry type that + // differentiates between a normal exit and a tail exit, but we'd have to do + // this and increment the version number for the header. + pushq %rbp + .cfi_def_cfa_offset 16 + SAVE_REGISTERS + + movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax + testq %rax,%rax + je .Ltmp4 + + movl %r10d, %edi + movl $1, %esi + callq *%rax + +.Ltmp4: + RESTORE_REGISTERS + popq %rbp + retq +.Ltmp5: + .size __xray_FunctionTailExit, .Ltmp5-__xray_FunctionTailExit + .cfi_endproc diff --git a/lib/xray/xray_x86_64.cc b/lib/xray/xray_x86_64.cc new file mode 100644 index 0000000000000..3ee91896c6e0a --- /dev/null +++ b/lib/xray/xray_x86_64.cc @@ -0,0 +1,202 @@ +#include "sanitizer_common/sanitizer_common.h" +#include "xray_defs.h" +#include "xray_interface_internal.h" +#include <atomic> +#include <cstdint> +#include <errno.h> +#include <fcntl.h> +#include <iterator> +#include <limits> +#include <tuple> +#include <unistd.h> + +namespace __xray { + +static std::pair<ssize_t, bool> +retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT { + auto BytesToRead = std::distance(Begin, End); + ssize_t BytesRead; + ssize_t TotalBytesRead = 0; + while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) { + if (BytesRead == -1) { + if (errno == EINTR) + continue; + Report("Read error; errno = %d\n", errno); + return std::make_pair(TotalBytesRead, false); + } + + TotalBytesRead += BytesRead; + BytesToRead -= BytesRead; + Begin += BytesRead; + } + return std::make_pair(TotalBytesRead, true); +} + +static bool readValueFromFile(const char *Filename, + long long *Value) XRAY_NEVER_INSTRUMENT { + int Fd = open(Filename, O_RDONLY | O_CLOEXEC); + if (Fd == -1) + return false; + static constexpr size_t BufSize = 256; + char Line[BufSize] = {}; + ssize_t BytesRead; + bool Success; + std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize); + if (!Success) + return false; + close(Fd); + char *End = nullptr; + long long Tmp = internal_simple_strtoll(Line, &End, 10); + bool Result = false; + if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) { + *Value = Tmp; + Result = true; + } + return Result; +} + +uint64_t cycleFrequency() XRAY_NEVER_INSTRUMENT { + long long CPUFrequency = -1; + if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", + &CPUFrequency)) { + CPUFrequency *= 1000; + } else if (readValueFromFile( + "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", + &CPUFrequency)) { + CPUFrequency *= 1000; + } else { + Report("Unable to determine CPU frequency for TSC accounting.\n"); + } + return CPUFrequency == -1 ? 0 : static_cast<uint64_t>(CPUFrequency); +} + +static constexpr uint8_t CallOpCode = 0xe8; +static constexpr uint16_t MovR10Seq = 0xba41; +static constexpr uint16_t Jmp9Seq = 0x09eb; +static constexpr uint8_t JmpOpCode = 0xe9; +static constexpr uint8_t RetOpCode = 0xc3; + +static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()}; +static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()}; + +bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // Here we do the dance of replacing the following sled: + // + // xray_sled_n: + // jmp +9 + // <9 byte nop> + // + // With the following: + // + // mov r10d, <function id> + // call <relative 32bit offset to entry trampoline> + // + // We need to do this in the following order: + // + // 1. Put the function id first, 2 bytes from the start of the sled (just + // after the 2-byte jmp instruction). + // 2. Put the call opcode 6 bytes from the start of the sled. + // 3. Put the relative offset 7 bytes from the start of the sled. + // 4. Do an atomic write over the jmp instruction for the "mov r10d" + // opcode and first operand. + // + // Prerequisite is to compute the relative offset to the + // __xray_FunctionEntry function's address. + int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionEntry) - + (static_cast<int64_t>(Sled.Address) + 11); + if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { + Report("XRay Entry trampoline (%p) too far from sled (%p)\n", + __xray_FunctionEntry, reinterpret_cast<void *>(Sled.Address)); + return false; + } + if (Enable) { + *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId; + *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode; + *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset; + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq, + std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq, + std::memory_order_release); + // FIXME: Write out the nops still? + } + return true; +} + +bool patchFunctionExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // Here we do the dance of replacing the following sled: + // + // xray_sled_n: + // ret + // <10 byte nop> + // + // With the following: + // + // mov r10d, <function id> + // jmp <relative 32bit offset to exit trampoline> + // + // 1. Put the function id first, 2 bytes from the start of the sled (just + // after the 1-byte ret instruction). + // 2. Put the jmp opcode 6 bytes from the start of the sled. + // 3. Put the relative offset 7 bytes from the start of the sled. + // 4. Do an atomic write over the jmp instruction for the "mov r10d" + // opcode and first operand. + // + // Prerequisite is to compute the relative offset fo the + // __xray_FunctionExit function's address. + int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionExit) - + (static_cast<int64_t>(Sled.Address) + 11); + if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { + Report("XRay Exit trampoline (%p) too far from sled (%p)\n", + __xray_FunctionExit, reinterpret_cast<void *>(Sled.Address)); + return false; + } + if (Enable) { + *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId; + *reinterpret_cast<uint8_t *>(Sled.Address + 6) = JmpOpCode; + *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset; + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq, + std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint8_t> *>(Sled.Address), RetOpCode, + std::memory_order_release); + // FIXME: Write out the nops still? + } + return true; +} + +bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // Here we do the dance of replacing the tail call sled with a similar + // sequence as the entry sled, but calls the tail exit sled instead. + int64_t TrampolineOffset = + reinterpret_cast<int64_t>(__xray_FunctionTailExit) - + (static_cast<int64_t>(Sled.Address) + 11); + if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { + Report("XRay Exit trampoline (%p) too far from sled (%p)\n", + __xray_FunctionExit, reinterpret_cast<void *>(Sled.Address)); + return false; + } + if (Enable) { + *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId; + *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode; + *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset; + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq, + std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq, + std::memory_order_release); + // FIXME: Write out the nops still? + } + return true; +} + +} // namespace __xray diff --git a/lib/xray/xray_x86_64.h b/lib/xray/xray_x86_64.h new file mode 100644 index 0000000000000..52d2dea8f0d93 --- /dev/null +++ b/lib/xray/xray_x86_64.h @@ -0,0 +1,32 @@ +//===-- xray_x86_64.h -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_X86_64_H +#define XRAY_X86_64_H + +#include <cstdint> +#include <x86intrin.h> + +#include "sanitizer_common/sanitizer_internal_defs.h" +#include "xray_defs.h" + +namespace __xray { + +ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT { + unsigned LongCPU; + uint64_t TSC = __rdtscp(&LongCPU); + CPU = LongCPU; + return TSC; +} +} + +#endif // XRAY_X86_64_H |