From 3a1720af1d7f43edc5b214cde0be11bfb94d077e Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Wed, 23 Oct 2019 17:52:22 +0000 Subject: Vendor import of stripped compiler-rt trunk r375505, the last commit before the upstream Subversion repository was made read-only, and the LLVM project migrated to GitHub: https://llvm.org/svn/llvm-project/compiler-rt/trunk@375505 --- lib/xray/xray_AArch64.cc | 127 ------ lib/xray/xray_AArch64.cpp | 127 ++++++ lib/xray/xray_arm.cc | 164 ------- lib/xray/xray_arm.cpp | 164 +++++++ lib/xray/xray_basic_flags.cc | 49 --- lib/xray/xray_basic_flags.cpp | 49 +++ lib/xray/xray_basic_logging.cc | 515 ---------------------- lib/xray/xray_basic_logging.cpp | 515 ++++++++++++++++++++++ lib/xray/xray_buffer_queue.cc | 237 ----------- lib/xray/xray_buffer_queue.cpp | 237 +++++++++++ lib/xray/xray_fdr_flags.cc | 47 -- lib/xray/xray_fdr_flags.cpp | 47 ++ lib/xray/xray_fdr_logging.cc | 757 --------------------------------- lib/xray/xray_fdr_logging.cpp | 757 +++++++++++++++++++++++++++++++++ lib/xray/xray_flags.cc | 84 ---- lib/xray/xray_flags.cpp | 84 ++++ lib/xray/xray_init.cc | 115 ----- lib/xray/xray_init.cpp | 115 +++++ lib/xray/xray_interface.cc | 480 --------------------- lib/xray/xray_interface.cpp | 480 +++++++++++++++++++++ lib/xray/xray_log_interface.cc | 209 --------- lib/xray/xray_log_interface.cpp | 209 +++++++++ lib/xray/xray_mips.cc | 170 -------- lib/xray/xray_mips.cpp | 170 ++++++++ lib/xray/xray_mips64.cc | 178 -------- lib/xray/xray_mips64.cpp | 178 ++++++++ lib/xray/xray_powerpc64.cc | 111 ----- lib/xray/xray_powerpc64.cpp | 111 +++++ lib/xray/xray_profile_collector.cc | 414 ------------------ lib/xray/xray_profile_collector.cpp | 414 ++++++++++++++++++ lib/xray/xray_profiling.cc | 519 ---------------------- lib/xray/xray_profiling.cpp | 519 ++++++++++++++++++++++ lib/xray/xray_profiling_flags.cc | 39 -- lib/xray/xray_profiling_flags.cpp | 39 ++ lib/xray/xray_trampoline_powerpc64.cc | 15 - lib/xray/xray_trampoline_powerpc64.cpp | 15 + lib/xray/xray_utils.cc | 195 --------- lib/xray/xray_utils.cpp | 195 +++++++++ lib/xray/xray_x86_64.cc | 353 --------------- lib/xray/xray_x86_64.cpp | 353 +++++++++++++++ 40 files changed, 4778 insertions(+), 4778 deletions(-) delete mode 100644 lib/xray/xray_AArch64.cc create mode 100644 lib/xray/xray_AArch64.cpp delete mode 100644 lib/xray/xray_arm.cc create mode 100644 lib/xray/xray_arm.cpp delete mode 100644 lib/xray/xray_basic_flags.cc create mode 100644 lib/xray/xray_basic_flags.cpp delete mode 100644 lib/xray/xray_basic_logging.cc create mode 100644 lib/xray/xray_basic_logging.cpp delete mode 100644 lib/xray/xray_buffer_queue.cc create mode 100644 lib/xray/xray_buffer_queue.cpp delete mode 100644 lib/xray/xray_fdr_flags.cc create mode 100644 lib/xray/xray_fdr_flags.cpp delete mode 100644 lib/xray/xray_fdr_logging.cc create mode 100644 lib/xray/xray_fdr_logging.cpp delete mode 100644 lib/xray/xray_flags.cc create mode 100644 lib/xray/xray_flags.cpp delete mode 100644 lib/xray/xray_init.cc create mode 100644 lib/xray/xray_init.cpp delete mode 100644 lib/xray/xray_interface.cc create mode 100644 lib/xray/xray_interface.cpp delete mode 100644 lib/xray/xray_log_interface.cc create mode 100644 lib/xray/xray_log_interface.cpp delete mode 100644 lib/xray/xray_mips.cc create mode 100644 lib/xray/xray_mips.cpp delete mode 100644 lib/xray/xray_mips64.cc create mode 100644 lib/xray/xray_mips64.cpp delete mode 100644 lib/xray/xray_powerpc64.cc create mode 100644 lib/xray/xray_powerpc64.cpp delete mode 100644 lib/xray/xray_profile_collector.cc create mode 100644 lib/xray/xray_profile_collector.cpp delete mode 100644 lib/xray/xray_profiling.cc create mode 100644 lib/xray/xray_profiling.cpp delete mode 100644 lib/xray/xray_profiling_flags.cc create mode 100644 lib/xray/xray_profiling_flags.cpp delete mode 100644 lib/xray/xray_trampoline_powerpc64.cc create mode 100644 lib/xray/xray_trampoline_powerpc64.cpp delete mode 100644 lib/xray/xray_utils.cc create mode 100644 lib/xray/xray_utils.cpp delete mode 100644 lib/xray/xray_x86_64.cc create mode 100644 lib/xray/xray_x86_64.cpp (limited to 'lib/xray') diff --git a/lib/xray/xray_AArch64.cc b/lib/xray/xray_AArch64.cc deleted file mode 100644 index 4c7805488ab8..000000000000 --- a/lib/xray/xray_AArch64.cc +++ /dev/null @@ -1,127 +0,0 @@ -//===-- xray_AArch64.cc -----------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file is a part of XRay, a dynamic runtime instrumentation system. -// -// Implementation of AArch64-specific routines (64-bit). -// -//===----------------------------------------------------------------------===// -#include "sanitizer_common/sanitizer_common.h" -#include "xray_defs.h" -#include "xray_interface_internal.h" -#include -#include - -extern "C" void __clear_cache(void *start, void *end); - -namespace __xray { - -// The machine codes for some instructions used in runtime patching. -enum class PatchOpcodes : uint32_t { - PO_StpX0X30SP_m16e = 0xA9BF7BE0, // STP X0, X30, [SP, #-16]! - PO_LdrW0_12 = 0x18000060, // LDR W0, #12 - PO_LdrX16_12 = 0x58000070, // LDR X16, #12 - PO_BlrX16 = 0xD63F0200, // BLR X16 - PO_LdpX0X30SP_16 = 0xA8C17BE0, // LDP X0, X30, [SP], #16 - PO_B32 = 0x14000008 // B #32 -}; - -inline static bool patchSled(const bool Enable, const uint32_t FuncId, - const XRaySledEntry &Sled, - void (*TracingHook)()) XRAY_NEVER_INSTRUMENT { - // When |Enable| == true, - // We replace the following compile-time stub (sled): - // - // xray_sled_n: - // B #32 - // 7 NOPs (24 bytes) - // - // With the following runtime patch: - // - // xray_sled_n: - // STP X0, X30, [SP, #-16]! ; PUSH {r0, lr} - // LDR W0, #12 ; W0 := function ID - // LDR X16,#12 ; X16 := address of the trampoline - // BLR X16 - // ;DATA: 32 bits of function ID - // ;DATA: lower 32 bits of the address of the trampoline - // ;DATA: higher 32 bits of the address of the trampoline - // LDP X0, X30, [SP], #16 ; POP {r0, lr} - // - // Replacement of the first 4-byte instruction should be the last and atomic - // operation, so that the user code which reaches the sled concurrently - // either jumps over the whole sled, or executes the whole sled when the - // latter is ready. - // - // When |Enable|==false, we set back the first instruction in the sled to be - // B #32 - - uint32_t *FirstAddress = reinterpret_cast(Sled.Address); - uint32_t *CurAddress = FirstAddress + 1; - if (Enable) { - *CurAddress = uint32_t(PatchOpcodes::PO_LdrW0_12); - CurAddress++; - *CurAddress = uint32_t(PatchOpcodes::PO_LdrX16_12); - CurAddress++; - *CurAddress = uint32_t(PatchOpcodes::PO_BlrX16); - CurAddress++; - *CurAddress = FuncId; - CurAddress++; - *reinterpret_cast(CurAddress) = TracingHook; - CurAddress += 2; - *CurAddress = uint32_t(PatchOpcodes::PO_LdpX0X30SP_16); - CurAddress++; - std::atomic_store_explicit( - reinterpret_cast *>(FirstAddress), - uint32_t(PatchOpcodes::PO_StpX0X30SP_m16e), std::memory_order_release); - } else { - std::atomic_store_explicit( - reinterpret_cast *>(FirstAddress), - uint32_t(PatchOpcodes::PO_B32), std::memory_order_release); - } - __clear_cache(reinterpret_cast(FirstAddress), - reinterpret_cast(CurAddress)); - return true; -} - -bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, - const XRaySledEntry &Sled, - void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { - return patchSled(Enable, FuncId, Sled, Trampoline); -} - -bool patchFunctionExit(const bool Enable, const uint32_t FuncId, - const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { - return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); -} - -bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, - const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { - return patchSled(Enable, FuncId, Sled, __xray_FunctionTailExit); -} - -bool patchCustomEvent(const bool Enable, const uint32_t FuncId, - const XRaySledEntry &Sled) - XRAY_NEVER_INSTRUMENT { // FIXME: Implement in aarch64? - return false; -} - -bool patchTypedEvent(const bool Enable, const uint32_t FuncId, - const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { - // FIXME: Implement in aarch64? - return false; -} - -// FIXME: Maybe implement this better? -bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; } - -} // namespace __xray - -extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT { - // FIXME: this will have to be implemented in the trampoline assembly file -} diff --git a/lib/xray/xray_AArch64.cpp b/lib/xray/xray_AArch64.cpp new file mode 100644 index 000000000000..081941b70375 --- /dev/null +++ b/lib/xray/xray_AArch64.cpp @@ -0,0 +1,127 @@ +//===-- xray_AArch64.cpp ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of AArch64-specific routines (64-bit). +// +//===----------------------------------------------------------------------===// +#include "sanitizer_common/sanitizer_common.h" +#include "xray_defs.h" +#include "xray_interface_internal.h" +#include +#include + +extern "C" void __clear_cache(void *start, void *end); + +namespace __xray { + +// The machine codes for some instructions used in runtime patching. +enum class PatchOpcodes : uint32_t { + PO_StpX0X30SP_m16e = 0xA9BF7BE0, // STP X0, X30, [SP, #-16]! + PO_LdrW0_12 = 0x18000060, // LDR W0, #12 + PO_LdrX16_12 = 0x58000070, // LDR X16, #12 + PO_BlrX16 = 0xD63F0200, // BLR X16 + PO_LdpX0X30SP_16 = 0xA8C17BE0, // LDP X0, X30, [SP], #16 + PO_B32 = 0x14000008 // B #32 +}; + +inline static bool patchSled(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled, + void (*TracingHook)()) XRAY_NEVER_INSTRUMENT { + // When |Enable| == true, + // We replace the following compile-time stub (sled): + // + // xray_sled_n: + // B #32 + // 7 NOPs (24 bytes) + // + // With the following runtime patch: + // + // xray_sled_n: + // STP X0, X30, [SP, #-16]! ; PUSH {r0, lr} + // LDR W0, #12 ; W0 := function ID + // LDR X16,#12 ; X16 := address of the trampoline + // BLR X16 + // ;DATA: 32 bits of function ID + // ;DATA: lower 32 bits of the address of the trampoline + // ;DATA: higher 32 bits of the address of the trampoline + // LDP X0, X30, [SP], #16 ; POP {r0, lr} + // + // Replacement of the first 4-byte instruction should be the last and atomic + // operation, so that the user code which reaches the sled concurrently + // either jumps over the whole sled, or executes the whole sled when the + // latter is ready. + // + // When |Enable|==false, we set back the first instruction in the sled to be + // B #32 + + uint32_t *FirstAddress = reinterpret_cast(Sled.Address); + uint32_t *CurAddress = FirstAddress + 1; + if (Enable) { + *CurAddress = uint32_t(PatchOpcodes::PO_LdrW0_12); + CurAddress++; + *CurAddress = uint32_t(PatchOpcodes::PO_LdrX16_12); + CurAddress++; + *CurAddress = uint32_t(PatchOpcodes::PO_BlrX16); + CurAddress++; + *CurAddress = FuncId; + CurAddress++; + *reinterpret_cast(CurAddress) = TracingHook; + CurAddress += 2; + *CurAddress = uint32_t(PatchOpcodes::PO_LdpX0X30SP_16); + CurAddress++; + std::atomic_store_explicit( + reinterpret_cast *>(FirstAddress), + uint32_t(PatchOpcodes::PO_StpX0X30SP_m16e), std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast *>(FirstAddress), + uint32_t(PatchOpcodes::PO_B32), std::memory_order_release); + } + __clear_cache(reinterpret_cast(FirstAddress), + reinterpret_cast(CurAddress)); + return true; +} + +bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled, + void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, Trampoline); +} + +bool patchFunctionExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + +bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, __xray_FunctionTailExit); +} + +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) + XRAY_NEVER_INSTRUMENT { // FIXME: Implement in aarch64? + return false; +} + +bool patchTypedEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: Implement in aarch64? + return false; +} + +// FIXME: Maybe implement this better? +bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; } + +} // namespace __xray + +extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT { + // FIXME: this will have to be implemented in the trampoline assembly file +} diff --git a/lib/xray/xray_arm.cc b/lib/xray/xray_arm.cc deleted file mode 100644 index db26efaa782a..000000000000 --- a/lib/xray/xray_arm.cc +++ /dev/null @@ -1,164 +0,0 @@ -//===-- xray_arm.cc ---------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file is a part of XRay, a dynamic runtime instrumentation system. -// -// Implementation of ARM-specific routines (32-bit). -// -//===----------------------------------------------------------------------===// -#include "sanitizer_common/sanitizer_common.h" -#include "xray_defs.h" -#include "xray_interface_internal.h" -#include -#include - -extern "C" void __clear_cache(void *start, void *end); - -namespace __xray { - -// The machine codes for some instructions used in runtime patching. -enum class PatchOpcodes : uint32_t { - PO_PushR0Lr = 0xE92D4001, // PUSH {r0, lr} - PO_BlxIp = 0xE12FFF3C, // BLX ip - PO_PopR0Lr = 0xE8BD4001, // POP {r0, lr} - PO_B20 = 0xEA000005 // B #20 -}; - -// 0xUUUUWXYZ -> 0x000W0XYZ -inline static uint32_t getMovwMask(const uint32_t Value) XRAY_NEVER_INSTRUMENT { - return (Value & 0xfff) | ((Value & 0xf000) << 4); -} - -// 0xWXYZUUUU -> 0x000W0XYZ -inline static uint32_t getMovtMask(const uint32_t Value) XRAY_NEVER_INSTRUMENT { - return getMovwMask(Value >> 16); -} - -// Writes the following instructions: -// MOVW R, # -// MOVT R, # -inline static uint32_t * -write32bitLoadReg(uint8_t regNo, uint32_t *Address, - const uint32_t Value) XRAY_NEVER_INSTRUMENT { - // This is a fatal error: we cannot just report it and continue execution. - assert(regNo <= 15 && "Register number must be 0 to 15."); - // MOVW R, #0xWXYZ in machine code is 0xE30WRXYZ - *Address = (0xE3000000 | (uint32_t(regNo) << 12) | getMovwMask(Value)); - Address++; - // MOVT R, #0xWXYZ in machine code is 0xE34WRXYZ - *Address = (0xE3400000 | (uint32_t(regNo) << 12) | getMovtMask(Value)); - return Address + 1; -} - -// Writes the following instructions: -// MOVW r0, # -// MOVT r0, # -inline static uint32_t * -write32bitLoadR0(uint32_t *Address, - const uint32_t Value) XRAY_NEVER_INSTRUMENT { - return write32bitLoadReg(0, Address, Value); -} - -// Writes the following instructions: -// MOVW ip, # -// MOVT ip, # -inline static uint32_t * -write32bitLoadIP(uint32_t *Address, - const uint32_t Value) XRAY_NEVER_INSTRUMENT { - return write32bitLoadReg(12, Address, Value); -} - -inline static bool patchSled(const bool Enable, const uint32_t FuncId, - const XRaySledEntry &Sled, - void (*TracingHook)()) XRAY_NEVER_INSTRUMENT { - // When |Enable| == true, - // We replace the following compile-time stub (sled): - // - // xray_sled_n: - // B #20 - // 6 NOPs (24 bytes) - // - // With the following runtime patch: - // - // xray_sled_n: - // PUSH {r0, lr} - // MOVW r0, # - // MOVT r0, # - // MOVW ip, # - // MOVT ip, # - // BLX ip - // POP {r0, lr} - // - // Replacement of the first 4-byte instruction should be the last and atomic - // operation, so that the user code which reaches the sled concurrently - // either jumps over the whole sled, or executes the whole sled when the - // latter is ready. - // - // When |Enable|==false, we set back the first instruction in the sled to be - // B #20 - - uint32_t *FirstAddress = reinterpret_cast(Sled.Address); - uint32_t *CurAddress = FirstAddress + 1; - if (Enable) { - CurAddress = - write32bitLoadR0(CurAddress, reinterpret_cast(FuncId)); - CurAddress = - write32bitLoadIP(CurAddress, reinterpret_cast(TracingHook)); - *CurAddress = uint32_t(PatchOpcodes::PO_BlxIp); - CurAddress++; - *CurAddress = uint32_t(PatchOpcodes::PO_PopR0Lr); - CurAddress++; - std::atomic_store_explicit( - reinterpret_cast *>(FirstAddress), - uint32_t(PatchOpcodes::PO_PushR0Lr), std::memory_order_release); - } else { - std::atomic_store_explicit( - reinterpret_cast *>(FirstAddress), - uint32_t(PatchOpcodes::PO_B20), std::memory_order_release); - } - __clear_cache(reinterpret_cast(FirstAddress), - reinterpret_cast(CurAddress)); - return true; -} - -bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, - const XRaySledEntry &Sled, - void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { - return patchSled(Enable, FuncId, Sled, Trampoline); -} - -bool patchFunctionExit(const bool Enable, const uint32_t FuncId, - const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { - return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); -} - -bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, - const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { - return patchSled(Enable, FuncId, Sled, __xray_FunctionTailExit); -} - -bool patchCustomEvent(const bool Enable, const uint32_t FuncId, - const XRaySledEntry &Sled) - XRAY_NEVER_INSTRUMENT { // FIXME: Implement in arm? - return false; -} - -bool patchTypedEvent(const bool Enable, const uint32_t FuncId, - const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { - // FIXME: Implement in arm? - return false; -} - -// FIXME: Maybe implement this better? -bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; } - -} // namespace __xray - -extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT { - // FIXME: this will have to be implemented in the trampoline assembly file -} diff --git a/lib/xray/xray_arm.cpp b/lib/xray/xray_arm.cpp new file mode 100644 index 000000000000..9ad8065eb886 --- /dev/null +++ b/lib/xray/xray_arm.cpp @@ -0,0 +1,164 @@ +//===-- xray_arm.cpp --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of ARM-specific routines (32-bit). +// +//===----------------------------------------------------------------------===// +#include "sanitizer_common/sanitizer_common.h" +#include "xray_defs.h" +#include "xray_interface_internal.h" +#include +#include + +extern "C" void __clear_cache(void *start, void *end); + +namespace __xray { + +// The machine codes for some instructions used in runtime patching. +enum class PatchOpcodes : uint32_t { + PO_PushR0Lr = 0xE92D4001, // PUSH {r0, lr} + PO_BlxIp = 0xE12FFF3C, // BLX ip + PO_PopR0Lr = 0xE8BD4001, // POP {r0, lr} + PO_B20 = 0xEA000005 // B #20 +}; + +// 0xUUUUWXYZ -> 0x000W0XYZ +inline static uint32_t getMovwMask(const uint32_t Value) XRAY_NEVER_INSTRUMENT { + return (Value & 0xfff) | ((Value & 0xf000) << 4); +} + +// 0xWXYZUUUU -> 0x000W0XYZ +inline static uint32_t getMovtMask(const uint32_t Value) XRAY_NEVER_INSTRUMENT { + return getMovwMask(Value >> 16); +} + +// Writes the following instructions: +// MOVW R, # +// MOVT R, # +inline static uint32_t * +write32bitLoadReg(uint8_t regNo, uint32_t *Address, + const uint32_t Value) XRAY_NEVER_INSTRUMENT { + // This is a fatal error: we cannot just report it and continue execution. + assert(regNo <= 15 && "Register number must be 0 to 15."); + // MOVW R, #0xWXYZ in machine code is 0xE30WRXYZ + *Address = (0xE3000000 | (uint32_t(regNo) << 12) | getMovwMask(Value)); + Address++; + // MOVT R, #0xWXYZ in machine code is 0xE34WRXYZ + *Address = (0xE3400000 | (uint32_t(regNo) << 12) | getMovtMask(Value)); + return Address + 1; +} + +// Writes the following instructions: +// MOVW r0, # +// MOVT r0, # +inline static uint32_t * +write32bitLoadR0(uint32_t *Address, + const uint32_t Value) XRAY_NEVER_INSTRUMENT { + return write32bitLoadReg(0, Address, Value); +} + +// Writes the following instructions: +// MOVW ip, # +// MOVT ip, # +inline static uint32_t * +write32bitLoadIP(uint32_t *Address, + const uint32_t Value) XRAY_NEVER_INSTRUMENT { + return write32bitLoadReg(12, Address, Value); +} + +inline static bool patchSled(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled, + void (*TracingHook)()) XRAY_NEVER_INSTRUMENT { + // When |Enable| == true, + // We replace the following compile-time stub (sled): + // + // xray_sled_n: + // B #20 + // 6 NOPs (24 bytes) + // + // With the following runtime patch: + // + // xray_sled_n: + // PUSH {r0, lr} + // MOVW r0, # + // MOVT r0, # + // MOVW ip, # + // MOVT ip, # + // BLX ip + // POP {r0, lr} + // + // Replacement of the first 4-byte instruction should be the last and atomic + // operation, so that the user code which reaches the sled concurrently + // either jumps over the whole sled, or executes the whole sled when the + // latter is ready. + // + // When |Enable|==false, we set back the first instruction in the sled to be + // B #20 + + uint32_t *FirstAddress = reinterpret_cast(Sled.Address); + uint32_t *CurAddress = FirstAddress + 1; + if (Enable) { + CurAddress = + write32bitLoadR0(CurAddress, reinterpret_cast(FuncId)); + CurAddress = + write32bitLoadIP(CurAddress, reinterpret_cast(TracingHook)); + *CurAddress = uint32_t(PatchOpcodes::PO_BlxIp); + CurAddress++; + *CurAddress = uint32_t(PatchOpcodes::PO_PopR0Lr); + CurAddress++; + std::atomic_store_explicit( + reinterpret_cast *>(FirstAddress), + uint32_t(PatchOpcodes::PO_PushR0Lr), std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast *>(FirstAddress), + uint32_t(PatchOpcodes::PO_B20), std::memory_order_release); + } + __clear_cache(reinterpret_cast(FirstAddress), + reinterpret_cast(CurAddress)); + return true; +} + +bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled, + void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, Trampoline); +} + +bool patchFunctionExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + +bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, __xray_FunctionTailExit); +} + +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) + XRAY_NEVER_INSTRUMENT { // FIXME: Implement in arm? + return false; +} + +bool patchTypedEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: Implement in arm? + return false; +} + +// FIXME: Maybe implement this better? +bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; } + +} // namespace __xray + +extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT { + // FIXME: this will have to be implemented in the trampoline assembly file +} diff --git a/lib/xray/xray_basic_flags.cc b/lib/xray/xray_basic_flags.cc deleted file mode 100644 index 75b674c85656..000000000000 --- a/lib/xray/xray_basic_flags.cc +++ /dev/null @@ -1,49 +0,0 @@ -//===-- xray_basic_flags.cc -------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file is a part of XRay, a dynamic runtime instrumentation system. -// -// XRay Basic flag parsing logic. -//===----------------------------------------------------------------------===// - -#include "xray_basic_flags.h" -#include "sanitizer_common/sanitizer_common.h" -#include "sanitizer_common/sanitizer_flag_parser.h" -#include "sanitizer_common/sanitizer_libc.h" -#include "xray_defs.h" - -using namespace __sanitizer; - -namespace __xray { - -/// Use via basicFlags(). -BasicFlags xray_basic_flags_dont_use_directly; - -void BasicFlags::setDefaults() XRAY_NEVER_INSTRUMENT { -#define XRAY_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue; -#include "xray_basic_flags.inc" -#undef XRAY_FLAG -} - -void registerXRayBasicFlags(FlagParser *P, - BasicFlags *F) XRAY_NEVER_INSTRUMENT { -#define XRAY_FLAG(Type, Name, DefaultValue, Description) \ - RegisterFlag(P, #Name, Description, &F->Name); -#include "xray_basic_flags.inc" -#undef XRAY_FLAG -} - -const char *useCompilerDefinedBasicFlags() XRAY_NEVER_INSTRUMENT { -#ifdef XRAY_BASIC_OPTIONS - return SANITIZER_STRINGIFY(XRAY_BASIC_OPTIONS); -#else - return ""; -#endif -} - -} // namespace __xray diff --git a/lib/xray/xray_basic_flags.cpp b/lib/xray/xray_basic_flags.cpp new file mode 100644 index 000000000000..e0a5e7bb29ee --- /dev/null +++ b/lib/xray/xray_basic_flags.cpp @@ -0,0 +1,49 @@ +//===-- xray_basic_flags.cpp ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// XRay Basic flag parsing logic. +//===----------------------------------------------------------------------===// + +#include "xray_basic_flags.h" +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_flag_parser.h" +#include "sanitizer_common/sanitizer_libc.h" +#include "xray_defs.h" + +using namespace __sanitizer; + +namespace __xray { + +/// Use via basicFlags(). +BasicFlags xray_basic_flags_dont_use_directly; + +void BasicFlags::setDefaults() XRAY_NEVER_INSTRUMENT { +#define XRAY_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue; +#include "xray_basic_flags.inc" +#undef XRAY_FLAG +} + +void registerXRayBasicFlags(FlagParser *P, + BasicFlags *F) XRAY_NEVER_INSTRUMENT { +#define XRAY_FLAG(Type, Name, DefaultValue, Description) \ + RegisterFlag(P, #Name, Description, &F->Name); +#include "xray_basic_flags.inc" +#undef XRAY_FLAG +} + +const char *useCompilerDefinedBasicFlags() XRAY_NEVER_INSTRUMENT { +#ifdef XRAY_BASIC_OPTIONS + return SANITIZER_STRINGIFY(XRAY_BASIC_OPTIONS); +#else + return ""; +#endif +} + +} // namespace __xray diff --git a/lib/xray/xray_basic_logging.cc b/lib/xray/xray_basic_logging.cc deleted file mode 100644 index 553041ce0c31..000000000000 --- a/lib/xray/xray_basic_logging.cc +++ /dev/null @@ -1,515 +0,0 @@ -//===-- xray_basic_logging.cc -----------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file is a part of XRay, a dynamic runtime instrumentation system. -// -// Implementation of a simple in-memory log of XRay events. This defines a -// logging function that's compatible with the XRay handler interface, and -// routines for exporting data to files. -// -//===----------------------------------------------------------------------===// - -#include -#include -#include -#include -#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC -#include -#endif -#include -#include -#include - -#include "sanitizer_common/sanitizer_allocator_internal.h" -#include "sanitizer_common/sanitizer_libc.h" -#include "xray/xray_records.h" -#include "xray_recursion_guard.h" -#include "xray_basic_flags.h" -#include "xray_basic_logging.h" -#include "xray_defs.h" -#include "xray_flags.h" -#include "xray_interface_internal.h" -#include "xray_tsc.h" -#include "xray_utils.h" - -namespace __xray { - -static SpinMutex LogMutex; - -namespace { -// We use elements of this type to record the entry TSC of every function ID we -// see as we're tracing a particular thread's execution. -struct alignas(16) StackEntry { - int32_t FuncId; - uint16_t Type; - uint8_t CPU; - uint8_t Padding; - uint64_t TSC; -}; - -static_assert(sizeof(StackEntry) == 16, "Wrong size for StackEntry"); - -struct XRAY_TLS_ALIGNAS(64) ThreadLocalData { - void *InMemoryBuffer = nullptr; - size_t BufferSize = 0; - size_t BufferOffset = 0; - void *ShadowStack = nullptr; - size_t StackSize = 0; - size_t StackEntries = 0; - __xray::LogWriter *LogWriter = nullptr; -}; - -struct BasicLoggingOptions { - int DurationFilterMicros = 0; - size_t MaxStackDepth = 0; - size_t ThreadBufferSize = 0; -}; -} // namespace - -static pthread_key_t PThreadKey; - -static atomic_uint8_t BasicInitialized{0}; - -struct BasicLoggingOptions GlobalOptions; - -thread_local atomic_uint8_t Guard{0}; - -static atomic_uint8_t UseRealTSC{0}; -static atomic_uint64_t ThresholdTicks{0}; -static atomic_uint64_t TicksPerSec{0}; -static atomic_uint64_t CycleFrequency{NanosecondsPerSecond}; - -static LogWriter *getLog() XRAY_NEVER_INSTRUMENT { - LogWriter* LW = LogWriter::Open(); - if (LW == nullptr) - return LW; - - static pthread_once_t DetectOnce = PTHREAD_ONCE_INIT; - pthread_once(&DetectOnce, +[] { - if (atomic_load(&UseRealTSC, memory_order_acquire)) - atomic_store(&CycleFrequency, getTSCFrequency(), memory_order_release); - }); - - // Since we're here, we get to write the header. We set it up so that the - // header will only be written once, at the start, and let the threads - // logging do writes which just append. - XRayFileHeader Header; - // Version 2 includes tail exit records. - // Version 3 includes pid inside records. - Header.Version = 3; - Header.Type = FileTypes::NAIVE_LOG; - Header.CycleFrequency = atomic_load(&CycleFrequency, memory_order_acquire); - - // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc' - // before setting the values in the header. - Header.ConstantTSC = 1; - Header.NonstopTSC = 1; - LW->WriteAll(reinterpret_cast(&Header), - reinterpret_cast(&Header) + sizeof(Header)); - return LW; -} - -static LogWriter *getGlobalLog() XRAY_NEVER_INSTRUMENT { - static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; - static LogWriter *LW = nullptr; - pthread_once(&OnceInit, +[] { LW = getLog(); }); - return LW; -} - -static ThreadLocalData &getThreadLocalData() XRAY_NEVER_INSTRUMENT { - thread_local ThreadLocalData TLD; - thread_local bool UNUSED TOnce = [] { - if (GlobalOptions.ThreadBufferSize == 0) { - if (Verbosity()) - Report("Not initializing TLD since ThreadBufferSize == 0.\n"); - return false; - } - pthread_setspecific(PThreadKey, &TLD); - TLD.LogWriter = getGlobalLog(); - TLD.InMemoryBuffer = reinterpret_cast( - InternalAlloc(sizeof(XRayRecord) * GlobalOptions.ThreadBufferSize, - nullptr, alignof(XRayRecord))); - TLD.BufferSize = GlobalOptions.ThreadBufferSize; - TLD.BufferOffset = 0; - if (GlobalOptions.MaxStackDepth == 0) { - if (Verbosity()) - Report("Not initializing the ShadowStack since MaxStackDepth == 0.\n"); - TLD.StackSize = 0; - TLD.StackEntries = 0; - TLD.ShadowStack = nullptr; - return false; - } - TLD.ShadowStack = reinterpret_cast( - InternalAlloc(sizeof(StackEntry) * GlobalOptions.MaxStackDepth, nullptr, - alignof(StackEntry))); - TLD.StackSize = GlobalOptions.MaxStackDepth; - TLD.StackEntries = 0; - return false; - }(); - return TLD; -} - -template -void InMemoryRawLog(int32_t FuncId, XRayEntryType Type, - RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT { - auto &TLD = getThreadLocalData(); - LogWriter *LW = getGlobalLog(); - if (LW == nullptr) - return; - - // Use a simple recursion guard, to handle cases where we're already logging - // and for one reason or another, this function gets called again in the same - // thread. - RecursionGuard G(Guard); - if (!G) - return; - - uint8_t CPU = 0; - uint64_t TSC = ReadTSC(CPU); - - switch (Type) { - case XRayEntryType::ENTRY: - case XRayEntryType::LOG_ARGS_ENTRY: { - // Short circuit if we've reached the maximum depth of the stack. - if (TLD.StackEntries++ >= TLD.StackSize) - return; - - // When we encounter an entry event, we keep track of the TSC and the CPU, - // and put it in the stack. - StackEntry E; - E.FuncId = FuncId; - E.CPU = CPU; - E.Type = Type; - E.TSC = TSC; - auto StackEntryPtr = static_cast(TLD.ShadowStack) + - (sizeof(StackEntry) * (TLD.StackEntries - 1)); - internal_memcpy(StackEntryPtr, &E, sizeof(StackEntry)); - break; - } - case XRayEntryType::EXIT: - case XRayEntryType::TAIL: { - if (TLD.StackEntries == 0) - break; - - if (--TLD.StackEntries >= TLD.StackSize) - return; - - // When we encounter an exit event, we check whether all the following are - // true: - // - // - The Function ID is the same as the most recent entry in the stack. - // - The CPU is the same as the most recent entry in the stack. - // - The Delta of the TSCs is less than the threshold amount of time we're - // looking to record. - // - // If all of these conditions are true, we pop the stack and don't write a - // record and move the record offset back. - StackEntry StackTop; - auto StackEntryPtr = static_cast(TLD.ShadowStack) + - (sizeof(StackEntry) * TLD.StackEntries); - internal_memcpy(&StackTop, StackEntryPtr, sizeof(StackEntry)); - if (StackTop.FuncId == FuncId && StackTop.CPU == CPU && - StackTop.TSC < TSC) { - auto Delta = TSC - StackTop.TSC; - if (Delta < atomic_load(&ThresholdTicks, memory_order_relaxed)) { - DCHECK(TLD.BufferOffset > 0); - TLD.BufferOffset -= StackTop.Type == XRayEntryType::ENTRY ? 1 : 2; - return; - } - } - break; - } - default: - // Should be unreachable. - DCHECK(false && "Unsupported XRayEntryType encountered."); - break; - } - - // First determine whether the delta between the function's enter record and - // the exit record is higher than the threshold. - XRayRecord R; - R.RecordType = RecordTypes::NORMAL; - R.CPU = CPU; - R.TSC = TSC; - R.TId = GetTid(); - R.PId = internal_getpid(); - R.Type = Type; - R.FuncId = FuncId; - auto FirstEntry = reinterpret_cast(TLD.InMemoryBuffer); - internal_memcpy(FirstEntry + TLD.BufferOffset, &R, sizeof(R)); - if (++TLD.BufferOffset == TLD.BufferSize) { - SpinMutexLock Lock(&LogMutex); - LW->WriteAll(reinterpret_cast(FirstEntry), - reinterpret_cast(FirstEntry + TLD.BufferOffset)); - TLD.BufferOffset = 0; - TLD.StackEntries = 0; - } -} - -template -void InMemoryRawLogWithArg(int32_t FuncId, XRayEntryType Type, uint64_t Arg1, - RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT { - auto &TLD = getThreadLocalData(); - auto FirstEntry = - reinterpret_cast(TLD.InMemoryBuffer); - const auto &BuffLen = TLD.BufferSize; - LogWriter *LW = getGlobalLog(); - if (LW == nullptr) - return; - - // First we check whether there's enough space to write the data consecutively - // in the thread-local buffer. If not, we first flush the buffer before - // attempting to write the two records that must be consecutive. - if (TLD.BufferOffset + 2 > BuffLen) { - SpinMutexLock Lock(&LogMutex); - LW->WriteAll(reinterpret_cast(FirstEntry), - reinterpret_cast(FirstEntry + TLD.BufferOffset)); - TLD.BufferOffset = 0; - TLD.StackEntries = 0; - } - - // Then we write the "we have an argument" record. - InMemoryRawLog(FuncId, Type, ReadTSC); - - RecursionGuard G(Guard); - if (!G) - return; - - // And, from here on write the arg payload. - XRayArgPayload R; - R.RecordType = RecordTypes::ARG_PAYLOAD; - R.FuncId = FuncId; - R.TId = GetTid(); - R.PId = internal_getpid(); - R.Arg = Arg1; - internal_memcpy(FirstEntry + TLD.BufferOffset, &R, sizeof(R)); - if (++TLD.BufferOffset == BuffLen) { - SpinMutexLock Lock(&LogMutex); - LW->WriteAll(reinterpret_cast(FirstEntry), - reinterpret_cast(FirstEntry + TLD.BufferOffset)); - TLD.BufferOffset = 0; - TLD.StackEntries = 0; - } -} - -void basicLoggingHandleArg0RealTSC(int32_t FuncId, - XRayEntryType Type) XRAY_NEVER_INSTRUMENT { - InMemoryRawLog(FuncId, Type, readTSC); -} - -void basicLoggingHandleArg0EmulateTSC(int32_t FuncId, XRayEntryType Type) - XRAY_NEVER_INSTRUMENT { - InMemoryRawLog(FuncId, Type, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT { - timespec TS; - int result = clock_gettime(CLOCK_REALTIME, &TS); - if (result != 0) { - Report("clock_gettimg(2) return %d, errno=%d.", result, int(errno)); - TS = {0, 0}; - } - CPU = 0; - return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec; - }); -} - -void basicLoggingHandleArg1RealTSC(int32_t FuncId, XRayEntryType Type, - uint64_t Arg1) XRAY_NEVER_INSTRUMENT { - InMemoryRawLogWithArg(FuncId, Type, Arg1, readTSC); -} - -void basicLoggingHandleArg1EmulateTSC(int32_t FuncId, XRayEntryType Type, - uint64_t Arg1) XRAY_NEVER_INSTRUMENT { - InMemoryRawLogWithArg( - FuncId, Type, Arg1, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT { - timespec TS; - int result = clock_gettime(CLOCK_REALTIME, &TS); - if (result != 0) { - Report("clock_gettimg(2) return %d, errno=%d.", result, int(errno)); - TS = {0, 0}; - } - CPU = 0; - return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec; - }); -} - -static void TLDDestructor(void *P) XRAY_NEVER_INSTRUMENT { - ThreadLocalData &TLD = *reinterpret_cast(P); - auto ExitGuard = at_scope_exit([&TLD] { - // Clean up dynamic resources. - if (TLD.InMemoryBuffer) - InternalFree(TLD.InMemoryBuffer); - if (TLD.ShadowStack) - InternalFree(TLD.ShadowStack); - if (Verbosity()) - Report("Cleaned up log for TID: %d\n", GetTid()); - }); - - if (TLD.LogWriter == nullptr || TLD.BufferOffset == 0) { - if (Verbosity()) - Report("Skipping buffer for TID: %d; Offset = %llu\n", GetTid(), - TLD.BufferOffset); - return; - } - - { - SpinMutexLock L(&LogMutex); - TLD.LogWriter->WriteAll(reinterpret_cast(TLD.InMemoryBuffer), - reinterpret_cast(TLD.InMemoryBuffer) + - (sizeof(XRayRecord) * TLD.BufferOffset)); - } - - // Because this thread's exit could be the last one trying to write to - // the file and that we're not able to close out the file properly, we - // sync instead and hope that the pending writes are flushed as the - // thread exits. - TLD.LogWriter->Flush(); -} - -XRayLogInitStatus basicLoggingInit(UNUSED size_t BufferSize, - UNUSED size_t BufferMax, void *Options, - size_t OptionsSize) XRAY_NEVER_INSTRUMENT { - uint8_t Expected = 0; - if (!atomic_compare_exchange_strong(&BasicInitialized, &Expected, 1, - memory_order_acq_rel)) { - if (Verbosity()) - Report("Basic logging already initialized.\n"); - return XRayLogInitStatus::XRAY_LOG_INITIALIZED; - } - - static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; - pthread_once(&OnceInit, +[] { - pthread_key_create(&PThreadKey, TLDDestructor); - atomic_store(&UseRealTSC, probeRequiredCPUFeatures(), memory_order_release); - // Initialize the global TicksPerSec value. - atomic_store(&TicksPerSec, - probeRequiredCPUFeatures() ? getTSCFrequency() - : NanosecondsPerSecond, - memory_order_release); - if (!atomic_load(&UseRealTSC, memory_order_relaxed) && Verbosity()) - Report("WARNING: Required CPU features missing for XRay instrumentation, " - "using emulation instead.\n"); - }); - - FlagParser P; - BasicFlags F; - F.setDefaults(); - registerXRayBasicFlags(&P, &F); - P.ParseString(useCompilerDefinedBasicFlags()); - auto *EnvOpts = GetEnv("XRAY_BASIC_OPTIONS"); - if (EnvOpts == nullptr) - EnvOpts = ""; - - P.ParseString(EnvOpts); - - // If XRAY_BASIC_OPTIONS was not defined, then we use the deprecated options - // set through XRAY_OPTIONS instead. - if (internal_strlen(EnvOpts) == 0) { - F.func_duration_threshold_us = - flags()->xray_naive_log_func_duration_threshold_us; - F.max_stack_depth = flags()->xray_naive_log_max_stack_depth; - F.thread_buffer_size = flags()->xray_naive_log_thread_buffer_size; - } - - P.ParseString(static_cast(Options)); - GlobalOptions.ThreadBufferSize = F.thread_buffer_size; - GlobalOptions.DurationFilterMicros = F.func_duration_threshold_us; - GlobalOptions.MaxStackDepth = F.max_stack_depth; - *basicFlags() = F; - - atomic_store(&ThresholdTicks, - atomic_load(&TicksPerSec, memory_order_acquire) * - GlobalOptions.DurationFilterMicros / 1000000, - memory_order_release); - __xray_set_handler_arg1(atomic_load(&UseRealTSC, memory_order_acquire) - ? basicLoggingHandleArg1RealTSC - : basicLoggingHandleArg1EmulateTSC); - __xray_set_handler(atomic_load(&UseRealTSC, memory_order_acquire) - ? basicLoggingHandleArg0RealTSC - : basicLoggingHandleArg0EmulateTSC); - - // TODO: Implement custom event and typed event handling support in Basic - // Mode. - __xray_remove_customevent_handler(); - __xray_remove_typedevent_handler(); - - return XRayLogInitStatus::XRAY_LOG_INITIALIZED; -} - -XRayLogInitStatus basicLoggingFinalize() XRAY_NEVER_INSTRUMENT { - uint8_t Expected = 0; - if (!atomic_compare_exchange_strong(&BasicInitialized, &Expected, 0, - memory_order_acq_rel) && - Verbosity()) - Report("Basic logging already finalized.\n"); - - // Nothing really to do aside from marking state of the global to be - // uninitialized. - - return XRayLogInitStatus::XRAY_LOG_FINALIZED; -} - -XRayLogFlushStatus basicLoggingFlush() XRAY_NEVER_INSTRUMENT { - // This really does nothing, since flushing the logs happen at the end of a - // thread's lifetime, or when the buffers are full. - return XRayLogFlushStatus::XRAY_LOG_FLUSHED; -} - -// This is a handler that, effectively, does nothing. -void basicLoggingHandleArg0Empty(int32_t, XRayEntryType) XRAY_NEVER_INSTRUMENT { -} - -bool basicLogDynamicInitializer() XRAY_NEVER_INSTRUMENT { - XRayLogImpl Impl{ - basicLoggingInit, - basicLoggingFinalize, - basicLoggingHandleArg0Empty, - basicLoggingFlush, - }; - auto RegistrationResult = __xray_log_register_mode("xray-basic", Impl); - if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK && - Verbosity()) - Report("Cannot register XRay Basic Mode to 'xray-basic'; error = %d\n", - RegistrationResult); - if (flags()->xray_naive_log || - !internal_strcmp(flags()->xray_mode, "xray-basic")) { - auto SelectResult = __xray_log_select_mode("xray-basic"); - if (SelectResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) { - if (Verbosity()) - Report("Failed selecting XRay Basic Mode; error = %d\n", SelectResult); - return false; - } - - // We initialize the implementation using the data we get from the - // XRAY_BASIC_OPTIONS environment variable, at this point of the - // implementation. - auto *Env = GetEnv("XRAY_BASIC_OPTIONS"); - auto InitResult = - __xray_log_init_mode("xray-basic", Env == nullptr ? "" : Env); - if (InitResult != XRayLogInitStatus::XRAY_LOG_INITIALIZED) { - if (Verbosity()) - Report("Failed initializing XRay Basic Mode; error = %d\n", InitResult); - return false; - } - - // At this point we know that we've successfully initialized Basic mode - // tracing, and the only chance we're going to get for the current thread to - // clean-up may be at thread/program exit. To ensure that we're going to get - // the cleanup even without calling the finalization routines, we're - // registering a program exit function that will do the cleanup. - static pthread_once_t DynamicOnce = PTHREAD_ONCE_INIT; - pthread_once(&DynamicOnce, +[] { - static void *FakeTLD = nullptr; - FakeTLD = &getThreadLocalData(); - Atexit(+[] { TLDDestructor(FakeTLD); }); - }); - } - return true; -} - -} // namespace __xray - -static auto UNUSED Unused = __xray::basicLogDynamicInitializer(); diff --git a/lib/xray/xray_basic_logging.cpp b/lib/xray/xray_basic_logging.cpp new file mode 100644 index 000000000000..6e8e93131451 --- /dev/null +++ b/lib/xray/xray_basic_logging.cpp @@ -0,0 +1,515 @@ +//===-- xray_basic_logging.cpp ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of a simple in-memory log of XRay events. This defines a +// logging function that's compatible with the XRay handler interface, and +// routines for exporting data to files. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC +#include +#endif +#include +#include +#include + +#include "sanitizer_common/sanitizer_allocator_internal.h" +#include "sanitizer_common/sanitizer_libc.h" +#include "xray/xray_records.h" +#include "xray_recursion_guard.h" +#include "xray_basic_flags.h" +#include "xray_basic_logging.h" +#include "xray_defs.h" +#include "xray_flags.h" +#include "xray_interface_internal.h" +#include "xray_tsc.h" +#include "xray_utils.h" + +namespace __xray { + +static SpinMutex LogMutex; + +namespace { +// We use elements of this type to record the entry TSC of every function ID we +// see as we're tracing a particular thread's execution. +struct alignas(16) StackEntry { + int32_t FuncId; + uint16_t Type; + uint8_t CPU; + uint8_t Padding; + uint64_t TSC; +}; + +static_assert(sizeof(StackEntry) == 16, "Wrong size for StackEntry"); + +struct XRAY_TLS_ALIGNAS(64) ThreadLocalData { + void *InMemoryBuffer = nullptr; + size_t BufferSize = 0; + size_t BufferOffset = 0; + void *ShadowStack = nullptr; + size_t StackSize = 0; + size_t StackEntries = 0; + __xray::LogWriter *LogWriter = nullptr; +}; + +struct BasicLoggingOptions { + int DurationFilterMicros = 0; + size_t MaxStackDepth = 0; + size_t ThreadBufferSize = 0; +}; +} // namespace + +static pthread_key_t PThreadKey; + +static atomic_uint8_t BasicInitialized{0}; + +struct BasicLoggingOptions GlobalOptions; + +thread_local atomic_uint8_t Guard{0}; + +static atomic_uint8_t UseRealTSC{0}; +static atomic_uint64_t ThresholdTicks{0}; +static atomic_uint64_t TicksPerSec{0}; +static atomic_uint64_t CycleFrequency{NanosecondsPerSecond}; + +static LogWriter *getLog() XRAY_NEVER_INSTRUMENT { + LogWriter* LW = LogWriter::Open(); + if (LW == nullptr) + return LW; + + static pthread_once_t DetectOnce = PTHREAD_ONCE_INIT; + pthread_once(&DetectOnce, +[] { + if (atomic_load(&UseRealTSC, memory_order_acquire)) + atomic_store(&CycleFrequency, getTSCFrequency(), memory_order_release); + }); + + // Since we're here, we get to write the header. We set it up so that the + // header will only be written once, at the start, and let the threads + // logging do writes which just append. + XRayFileHeader Header; + // Version 2 includes tail exit records. + // Version 3 includes pid inside records. + Header.Version = 3; + Header.Type = FileTypes::NAIVE_LOG; + Header.CycleFrequency = atomic_load(&CycleFrequency, memory_order_acquire); + + // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc' + // before setting the values in the header. + Header.ConstantTSC = 1; + Header.NonstopTSC = 1; + LW->WriteAll(reinterpret_cast(&Header), + reinterpret_cast(&Header) + sizeof(Header)); + return LW; +} + +static LogWriter *getGlobalLog() XRAY_NEVER_INSTRUMENT { + static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; + static LogWriter *LW = nullptr; + pthread_once(&OnceInit, +[] { LW = getLog(); }); + return LW; +} + +static ThreadLocalData &getThreadLocalData() XRAY_NEVER_INSTRUMENT { + thread_local ThreadLocalData TLD; + thread_local bool UNUSED TOnce = [] { + if (GlobalOptions.ThreadBufferSize == 0) { + if (Verbosity()) + Report("Not initializing TLD since ThreadBufferSize == 0.\n"); + return false; + } + pthread_setspecific(PThreadKey, &TLD); + TLD.LogWriter = getGlobalLog(); + TLD.InMemoryBuffer = reinterpret_cast( + InternalAlloc(sizeof(XRayRecord) * GlobalOptions.ThreadBufferSize, + nullptr, alignof(XRayRecord))); + TLD.BufferSize = GlobalOptions.ThreadBufferSize; + TLD.BufferOffset = 0; + if (GlobalOptions.MaxStackDepth == 0) { + if (Verbosity()) + Report("Not initializing the ShadowStack since MaxStackDepth == 0.\n"); + TLD.StackSize = 0; + TLD.StackEntries = 0; + TLD.ShadowStack = nullptr; + return false; + } + TLD.ShadowStack = reinterpret_cast( + InternalAlloc(sizeof(StackEntry) * GlobalOptions.MaxStackDepth, nullptr, + alignof(StackEntry))); + TLD.StackSize = GlobalOptions.MaxStackDepth; + TLD.StackEntries = 0; + return false; + }(); + return TLD; +} + +template +void InMemoryRawLog(int32_t FuncId, XRayEntryType Type, + RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT { + auto &TLD = getThreadLocalData(); + LogWriter *LW = getGlobalLog(); + if (LW == nullptr) + return; + + // Use a simple recursion guard, to handle cases where we're already logging + // and for one reason or another, this function gets called again in the same + // thread. + RecursionGuard G(Guard); + if (!G) + return; + + uint8_t CPU = 0; + uint64_t TSC = ReadTSC(CPU); + + switch (Type) { + case XRayEntryType::ENTRY: + case XRayEntryType::LOG_ARGS_ENTRY: { + // Short circuit if we've reached the maximum depth of the stack. + if (TLD.StackEntries++ >= TLD.StackSize) + return; + + // When we encounter an entry event, we keep track of the TSC and the CPU, + // and put it in the stack. + StackEntry E; + E.FuncId = FuncId; + E.CPU = CPU; + E.Type = Type; + E.TSC = TSC; + auto StackEntryPtr = static_cast(TLD.ShadowStack) + + (sizeof(StackEntry) * (TLD.StackEntries - 1)); + internal_memcpy(StackEntryPtr, &E, sizeof(StackEntry)); + break; + } + case XRayEntryType::EXIT: + case XRayEntryType::TAIL: { + if (TLD.StackEntries == 0) + break; + + if (--TLD.StackEntries >= TLD.StackSize) + return; + + // When we encounter an exit event, we check whether all the following are + // true: + // + // - The Function ID is the same as the most recent entry in the stack. + // - The CPU is the same as the most recent entry in the stack. + // - The Delta of the TSCs is less than the threshold amount of time we're + // looking to record. + // + // If all of these conditions are true, we pop the stack and don't write a + // record and move the record offset back. + StackEntry StackTop; + auto StackEntryPtr = static_cast(TLD.ShadowStack) + + (sizeof(StackEntry) * TLD.StackEntries); + internal_memcpy(&StackTop, StackEntryPtr, sizeof(StackEntry)); + if (StackTop.FuncId == FuncId && StackTop.CPU == CPU && + StackTop.TSC < TSC) { + auto Delta = TSC - StackTop.TSC; + if (Delta < atomic_load(&ThresholdTicks, memory_order_relaxed)) { + DCHECK(TLD.BufferOffset > 0); + TLD.BufferOffset -= StackTop.Type == XRayEntryType::ENTRY ? 1 : 2; + return; + } + } + break; + } + default: + // Should be unreachable. + DCHECK(false && "Unsupported XRayEntryType encountered."); + break; + } + + // First determine whether the delta between the function's enter record and + // the exit record is higher than the threshold. + XRayRecord R; + R.RecordType = RecordTypes::NORMAL; + R.CPU = CPU; + R.TSC = TSC; + R.TId = GetTid(); + R.PId = internal_getpid(); + R.Type = Type; + R.FuncId = FuncId; + auto FirstEntry = reinterpret_cast(TLD.InMemoryBuffer); + internal_memcpy(FirstEntry + TLD.BufferOffset, &R, sizeof(R)); + if (++TLD.BufferOffset == TLD.BufferSize) { + SpinMutexLock Lock(&LogMutex); + LW->WriteAll(reinterpret_cast(FirstEntry), + reinterpret_cast(FirstEntry + TLD.BufferOffset)); + TLD.BufferOffset = 0; + TLD.StackEntries = 0; + } +} + +template +void InMemoryRawLogWithArg(int32_t FuncId, XRayEntryType Type, uint64_t Arg1, + RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT { + auto &TLD = getThreadLocalData(); + auto FirstEntry = + reinterpret_cast(TLD.InMemoryBuffer); + const auto &BuffLen = TLD.BufferSize; + LogWriter *LW = getGlobalLog(); + if (LW == nullptr) + return; + + // First we check whether there's enough space to write the data consecutively + // in the thread-local buffer. If not, we first flush the buffer before + // attempting to write the two records that must be consecutive. + if (TLD.BufferOffset + 2 > BuffLen) { + SpinMutexLock Lock(&LogMutex); + LW->WriteAll(reinterpret_cast(FirstEntry), + reinterpret_cast(FirstEntry + TLD.BufferOffset)); + TLD.BufferOffset = 0; + TLD.StackEntries = 0; + } + + // Then we write the "we have an argument" record. + InMemoryRawLog(FuncId, Type, ReadTSC); + + RecursionGuard G(Guard); + if (!G) + return; + + // And, from here on write the arg payload. + XRayArgPayload R; + R.RecordType = RecordTypes::ARG_PAYLOAD; + R.FuncId = FuncId; + R.TId = GetTid(); + R.PId = internal_getpid(); + R.Arg = Arg1; + internal_memcpy(FirstEntry + TLD.BufferOffset, &R, sizeof(R)); + if (++TLD.BufferOffset == BuffLen) { + SpinMutexLock Lock(&LogMutex); + LW->WriteAll(reinterpret_cast(FirstEntry), + reinterpret_cast(FirstEntry + TLD.BufferOffset)); + TLD.BufferOffset = 0; + TLD.StackEntries = 0; + } +} + +void basicLoggingHandleArg0RealTSC(int32_t FuncId, + XRayEntryType Type) XRAY_NEVER_INSTRUMENT { + InMemoryRawLog(FuncId, Type, readTSC); +} + +void basicLoggingHandleArg0EmulateTSC(int32_t FuncId, XRayEntryType Type) + XRAY_NEVER_INSTRUMENT { + InMemoryRawLog(FuncId, Type, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT { + timespec TS; + int result = clock_gettime(CLOCK_REALTIME, &TS); + if (result != 0) { + Report("clock_gettimg(2) return %d, errno=%d.", result, int(errno)); + TS = {0, 0}; + } + CPU = 0; + return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec; + }); +} + +void basicLoggingHandleArg1RealTSC(int32_t FuncId, XRayEntryType Type, + uint64_t Arg1) XRAY_NEVER_INSTRUMENT { + InMemoryRawLogWithArg(FuncId, Type, Arg1, readTSC); +} + +void basicLoggingHandleArg1EmulateTSC(int32_t FuncId, XRayEntryType Type, + uint64_t Arg1) XRAY_NEVER_INSTRUMENT { + InMemoryRawLogWithArg( + FuncId, Type, Arg1, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT { + timespec TS; + int result = clock_gettime(CLOCK_REALTIME, &TS); + if (result != 0) { + Report("clock_gettimg(2) return %d, errno=%d.", result, int(errno)); + TS = {0, 0}; + } + CPU = 0; + return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec; + }); +} + +static void TLDDestructor(void *P) XRAY_NEVER_INSTRUMENT { + ThreadLocalData &TLD = *reinterpret_cast(P); + auto ExitGuard = at_scope_exit([&TLD] { + // Clean up dynamic resources. + if (TLD.InMemoryBuffer) + InternalFree(TLD.InMemoryBuffer); + if (TLD.ShadowStack) + InternalFree(TLD.ShadowStack); + if (Verbosity()) + Report("Cleaned up log for TID: %d\n", GetTid()); + }); + + if (TLD.LogWriter == nullptr || TLD.BufferOffset == 0) { + if (Verbosity()) + Report("Skipping buffer for TID: %d; Offset = %llu\n", GetTid(), + TLD.BufferOffset); + return; + } + + { + SpinMutexLock L(&LogMutex); + TLD.LogWriter->WriteAll(reinterpret_cast(TLD.InMemoryBuffer), + reinterpret_cast(TLD.InMemoryBuffer) + + (sizeof(XRayRecord) * TLD.BufferOffset)); + } + + // Because this thread's exit could be the last one trying to write to + // the file and that we're not able to close out the file properly, we + // sync instead and hope that the pending writes are flushed as the + // thread exits. + TLD.LogWriter->Flush(); +} + +XRayLogInitStatus basicLoggingInit(UNUSED size_t BufferSize, + UNUSED size_t BufferMax, void *Options, + size_t OptionsSize) XRAY_NEVER_INSTRUMENT { + uint8_t Expected = 0; + if (!atomic_compare_exchange_strong(&BasicInitialized, &Expected, 1, + memory_order_acq_rel)) { + if (Verbosity()) + Report("Basic logging already initialized.\n"); + return XRayLogInitStatus::XRAY_LOG_INITIALIZED; + } + + static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; + pthread_once(&OnceInit, +[] { + pthread_key_create(&PThreadKey, TLDDestructor); + atomic_store(&UseRealTSC, probeRequiredCPUFeatures(), memory_order_release); + // Initialize the global TicksPerSec value. + atomic_store(&TicksPerSec, + probeRequiredCPUFeatures() ? getTSCFrequency() + : NanosecondsPerSecond, + memory_order_release); + if (!atomic_load(&UseRealTSC, memory_order_relaxed) && Verbosity()) + Report("WARNING: Required CPU features missing for XRay instrumentation, " + "using emulation instead.\n"); + }); + + FlagParser P; + BasicFlags F; + F.setDefaults(); + registerXRayBasicFlags(&P, &F); + P.ParseString(useCompilerDefinedBasicFlags()); + auto *EnvOpts = GetEnv("XRAY_BASIC_OPTIONS"); + if (EnvOpts == nullptr) + EnvOpts = ""; + + P.ParseString(EnvOpts); + + // If XRAY_BASIC_OPTIONS was not defined, then we use the deprecated options + // set through XRAY_OPTIONS instead. + if (internal_strlen(EnvOpts) == 0) { + F.func_duration_threshold_us = + flags()->xray_naive_log_func_duration_threshold_us; + F.max_stack_depth = flags()->xray_naive_log_max_stack_depth; + F.thread_buffer_size = flags()->xray_naive_log_thread_buffer_size; + } + + P.ParseString(static_cast(Options)); + GlobalOptions.ThreadBufferSize = F.thread_buffer_size; + GlobalOptions.DurationFilterMicros = F.func_duration_threshold_us; + GlobalOptions.MaxStackDepth = F.max_stack_depth; + *basicFlags() = F; + + atomic_store(&ThresholdTicks, + atomic_load(&TicksPerSec, memory_order_acquire) * + GlobalOptions.DurationFilterMicros / 1000000, + memory_order_release); + __xray_set_handler_arg1(atomic_load(&UseRealTSC, memory_order_acquire) + ? basicLoggingHandleArg1RealTSC + : basicLoggingHandleArg1EmulateTSC); + __xray_set_handler(atomic_load(&UseRealTSC, memory_order_acquire) + ? basicLoggingHandleArg0RealTSC + : basicLoggingHandleArg0EmulateTSC); + + // TODO: Implement custom event and typed event handling support in Basic + // Mode. + __xray_remove_customevent_handler(); + __xray_remove_typedevent_handler(); + + return XRayLogInitStatus::XRAY_LOG_INITIALIZED; +} + +XRayLogInitStatus basicLoggingFinalize() XRAY_NEVER_INSTRUMENT { + uint8_t Expected = 0; + if (!atomic_compare_exchange_strong(&BasicInitialized, &Expected, 0, + memory_order_acq_rel) && + Verbosity()) + Report("Basic logging already finalized.\n"); + + // Nothing really to do aside from marking state of the global to be + // uninitialized. + + return XRayLogInitStatus::XRAY_LOG_FINALIZED; +} + +XRayLogFlushStatus basicLoggingFlush() XRAY_NEVER_INSTRUMENT { + // This really does nothing, since flushing the logs happen at the end of a + // thread's lifetime, or when the buffers are full. + return XRayLogFlushStatus::XRAY_LOG_FLUSHED; +} + +// This is a handler that, effectively, does nothing. +void basicLoggingHandleArg0Empty(int32_t, XRayEntryType) XRAY_NEVER_INSTRUMENT { +} + +bool basicLogDynamicInitializer() XRAY_NEVER_INSTRUMENT { + XRayLogImpl Impl{ + basicLoggingInit, + basicLoggingFinalize, + basicLoggingHandleArg0Empty, + basicLoggingFlush, + }; + auto RegistrationResult = __xray_log_register_mode("xray-basic", Impl); + if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK && + Verbosity()) + Report("Cannot register XRay Basic Mode to 'xray-basic'; error = %d\n", + RegistrationResult); + if (flags()->xray_naive_log || + !internal_strcmp(flags()->xray_mode, "xray-basic")) { + auto SelectResult = __xray_log_select_mode("xray-basic"); + if (SelectResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) { + if (Verbosity()) + Report("Failed selecting XRay Basic Mode; error = %d\n", SelectResult); + return false; + } + + // We initialize the implementation using the data we get from the + // XRAY_BASIC_OPTIONS environment variable, at this point of the + // implementation. + auto *Env = GetEnv("XRAY_BASIC_OPTIONS"); + auto InitResult = + __xray_log_init_mode("xray-basic", Env == nullptr ? "" : Env); + if (InitResult != XRayLogInitStatus::XRAY_LOG_INITIALIZED) { + if (Verbosity()) + Report("Failed initializing XRay Basic Mode; error = %d\n", InitResult); + return false; + } + + // At this point we know that we've successfully initialized Basic mode + // tracing, and the only chance we're going to get for the current thread to + // clean-up may be at thread/program exit. To ensure that we're going to get + // the cleanup even without calling the finalization routines, we're + // registering a program exit function that will do the cleanup. + static pthread_once_t DynamicOnce = PTHREAD_ONCE_INIT; + pthread_once(&DynamicOnce, +[] { + static void *FakeTLD = nullptr; + FakeTLD = &getThreadLocalData(); + Atexit(+[] { TLDDestructor(FakeTLD); }); + }); + } + return true; +} + +} // namespace __xray + +static auto UNUSED Unused = __xray::basicLogDynamicInitializer(); diff --git a/lib/xray/xray_buffer_queue.cc b/lib/xray/xray_buffer_queue.cc deleted file mode 100644 index 4cfa717de208..000000000000 --- a/lib/xray/xray_buffer_queue.cc +++ /dev/null @@ -1,237 +0,0 @@ -//===-- xray_buffer_queue.cc -----------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file is a part of XRay, a dynamic runtime instruementation system. -// -// Defines the interface for a buffer queue implementation. -// -//===----------------------------------------------------------------------===// -#include "xray_buffer_queue.h" -#include "sanitizer_common/sanitizer_atomic.h" -#include "sanitizer_common/sanitizer_common.h" -#include "sanitizer_common/sanitizer_libc.h" -#if !SANITIZER_FUCHSIA -#include "sanitizer_common/sanitizer_posix.h" -#endif -#include "xray_allocator.h" -#include "xray_defs.h" -#include -#include - -using namespace __xray; - -namespace { - -BufferQueue::ControlBlock *allocControlBlock(size_t Size, size_t Count) { - auto B = - allocateBuffer((sizeof(BufferQueue::ControlBlock) - 1) + (Size * Count)); - return B == nullptr ? nullptr - : reinterpret_cast(B); -} - -void deallocControlBlock(BufferQueue::ControlBlock *C, size_t Size, - size_t Count) { - deallocateBuffer(reinterpret_cast(C), - (sizeof(BufferQueue::ControlBlock) - 1) + (Size * Count)); -} - -void decRefCount(BufferQueue::ControlBlock *C, size_t Size, size_t Count) { - if (C == nullptr) - return; - if (atomic_fetch_sub(&C->RefCount, 1, memory_order_acq_rel) == 1) - deallocControlBlock(C, Size, Count); -} - -void incRefCount(BufferQueue::ControlBlock *C) { - if (C == nullptr) - return; - atomic_fetch_add(&C->RefCount, 1, memory_order_acq_rel); -} - -// We use a struct to ensure that we are allocating one atomic_uint64_t per -// cache line. This allows us to not worry about false-sharing among atomic -// objects being updated (constantly) by different threads. -struct ExtentsPadded { - union { - atomic_uint64_t Extents; - unsigned char Storage[kCacheLineSize]; - }; -}; - -constexpr size_t kExtentsSize = sizeof(ExtentsPadded); - -} // namespace - -BufferQueue::ErrorCode BufferQueue::init(size_t BS, size_t BC) { - SpinMutexLock Guard(&Mutex); - - if (!finalizing()) - return BufferQueue::ErrorCode::AlreadyInitialized; - - cleanupBuffers(); - - bool Success = false; - BufferSize = BS; - BufferCount = BC; - - BackingStore = allocControlBlock(BufferSize, BufferCount); - if (BackingStore == nullptr) - return BufferQueue::ErrorCode::NotEnoughMemory; - - auto CleanupBackingStore = at_scope_exit([&, this] { - if (Success) - return; - deallocControlBlock(BackingStore, BufferSize, BufferCount); - BackingStore = nullptr; - }); - - // Initialize enough atomic_uint64_t instances, each - ExtentsBackingStore = allocControlBlock(kExtentsSize, BufferCount); - if (ExtentsBackingStore == nullptr) - return BufferQueue::ErrorCode::NotEnoughMemory; - - auto CleanupExtentsBackingStore = at_scope_exit([&, this] { - if (Success) - return; - deallocControlBlock(ExtentsBackingStore, kExtentsSize, BufferCount); - ExtentsBackingStore = nullptr; - }); - - Buffers = initArray(BufferCount); - if (Buffers == nullptr) - return BufferQueue::ErrorCode::NotEnoughMemory; - - // At this point we increment the generation number to associate the buffers - // to the new generation. - atomic_fetch_add(&Generation, 1, memory_order_acq_rel); - - // First, we initialize the refcount in the ControlBlock, which we treat as - // being at the start of the BackingStore pointer. - atomic_store(&BackingStore->RefCount, 1, memory_order_release); - atomic_store(&ExtentsBackingStore->RefCount, 1, memory_order_release); - - // Then we initialise the individual buffers that sub-divide the whole backing - // store. Each buffer will start at the `Data` member of the ControlBlock, and - // will be offsets from these locations. - for (size_t i = 0; i < BufferCount; ++i) { - auto &T = Buffers[i]; - auto &Buf = T.Buff; - auto *E = reinterpret_cast(&ExtentsBackingStore->Data + - (kExtentsSize * i)); - Buf.Extents = &E->Extents; - atomic_store(Buf.Extents, 0, memory_order_release); - Buf.Generation = generation(); - Buf.Data = &BackingStore->Data + (BufferSize * i); - Buf.Size = BufferSize; - Buf.BackingStore = BackingStore; - Buf.ExtentsBackingStore = ExtentsBackingStore; - Buf.Count = BufferCount; - T.Used = false; - } - - Next = Buffers; - First = Buffers; - LiveBuffers = 0; - atomic_store(&Finalizing, 0, memory_order_release); - Success = true; - return BufferQueue::ErrorCode::Ok; -} - -BufferQueue::BufferQueue(size_t B, size_t N, - bool &Success) XRAY_NEVER_INSTRUMENT - : BufferSize(B), - BufferCount(N), - Mutex(), - Finalizing{1}, - BackingStore(nullptr), - ExtentsBackingStore(nullptr), - Buffers(nullptr), - Next(Buffers), - First(Buffers), - LiveBuffers(0), - Generation{0} { - Success = init(B, N) == BufferQueue::ErrorCode::Ok; -} - -BufferQueue::ErrorCode BufferQueue::getBuffer(Buffer &Buf) { - if (atomic_load(&Finalizing, memory_order_acquire)) - return ErrorCode::QueueFinalizing; - - BufferRep *B = nullptr; - { - SpinMutexLock Guard(&Mutex); - if (LiveBuffers == BufferCount) - return ErrorCode::NotEnoughMemory; - B = Next++; - if (Next == (Buffers + BufferCount)) - Next = Buffers; - ++LiveBuffers; - } - - incRefCount(BackingStore); - incRefCount(ExtentsBackingStore); - Buf = B->Buff; - Buf.Generation = generation(); - B->Used = true; - return ErrorCode::Ok; -} - -BufferQueue::ErrorCode BufferQueue::releaseBuffer(Buffer &Buf) { - // Check whether the buffer being referred to is within the bounds of the - // backing store's range. - BufferRep *B = nullptr; - { - SpinMutexLock Guard(&Mutex); - if (Buf.Generation != generation() || LiveBuffers == 0) { - Buf = {}; - decRefCount(Buf.BackingStore, Buf.Size, Buf.Count); - decRefCount(Buf.ExtentsBackingStore, kExtentsSize, Buf.Count); - return BufferQueue::ErrorCode::Ok; - } - - if (Buf.Data < &BackingStore->Data || - Buf.Data > &BackingStore->Data + (BufferCount * BufferSize)) - return BufferQueue::ErrorCode::UnrecognizedBuffer; - - --LiveBuffers; - B = First++; - if (First == (Buffers + BufferCount)) - First = Buffers; - } - - // Now that the buffer has been released, we mark it as "used". - B->Buff = Buf; - B->Used = true; - decRefCount(Buf.BackingStore, Buf.Size, Buf.Count); - decRefCount(Buf.ExtentsBackingStore, kExtentsSize, Buf.Count); - atomic_store(B->Buff.Extents, atomic_load(Buf.Extents, memory_order_acquire), - memory_order_release); - Buf = {}; - return ErrorCode::Ok; -} - -BufferQueue::ErrorCode BufferQueue::finalize() { - if (atomic_exchange(&Finalizing, 1, memory_order_acq_rel)) - return ErrorCode::QueueFinalizing; - return ErrorCode::Ok; -} - -void BufferQueue::cleanupBuffers() { - for (auto B = Buffers, E = Buffers + BufferCount; B != E; ++B) - B->~BufferRep(); - deallocateBuffer(Buffers, BufferCount); - decRefCount(BackingStore, BufferSize, BufferCount); - decRefCount(ExtentsBackingStore, kExtentsSize, BufferCount); - BackingStore = nullptr; - ExtentsBackingStore = nullptr; - Buffers = nullptr; - BufferCount = 0; - BufferSize = 0; -} - -BufferQueue::~BufferQueue() { cleanupBuffers(); } diff --git a/lib/xray/xray_buffer_queue.cpp b/lib/xray/xray_buffer_queue.cpp new file mode 100644 index 000000000000..bad91e036cef --- /dev/null +++ b/lib/xray/xray_buffer_queue.cpp @@ -0,0 +1,237 @@ +//===-- xray_buffer_queue.cpp ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instruementation system. +// +// Defines the interface for a buffer queue implementation. +// +//===----------------------------------------------------------------------===// +#include "xray_buffer_queue.h" +#include "sanitizer_common/sanitizer_atomic.h" +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_libc.h" +#if !SANITIZER_FUCHSIA +#include "sanitizer_common/sanitizer_posix.h" +#endif +#include "xray_allocator.h" +#include "xray_defs.h" +#include +#include + +using namespace __xray; + +namespace { + +BufferQueue::ControlBlock *allocControlBlock(size_t Size, size_t Count) { + auto B = + allocateBuffer((sizeof(BufferQueue::ControlBlock) - 1) + (Size * Count)); + return B == nullptr ? nullptr + : reinterpret_cast(B); +} + +void deallocControlBlock(BufferQueue::ControlBlock *C, size_t Size, + size_t Count) { + deallocateBuffer(reinterpret_cast(C), + (sizeof(BufferQueue::ControlBlock) - 1) + (Size * Count)); +} + +void decRefCount(BufferQueue::ControlBlock *C, size_t Size, size_t Count) { + if (C == nullptr) + return; + if (atomic_fetch_sub(&C->RefCount, 1, memory_order_acq_rel) == 1) + deallocControlBlock(C, Size, Count); +} + +void incRefCount(BufferQueue::ControlBlock *C) { + if (C == nullptr) + return; + atomic_fetch_add(&C->RefCount, 1, memory_order_acq_rel); +} + +// We use a struct to ensure that we are allocating one atomic_uint64_t per +// cache line. This allows us to not worry about false-sharing among atomic +// objects being updated (constantly) by different threads. +struct ExtentsPadded { + union { + atomic_uint64_t Extents; + unsigned char Storage[kCacheLineSize]; + }; +}; + +constexpr size_t kExtentsSize = sizeof(ExtentsPadded); + +} // namespace + +BufferQueue::ErrorCode BufferQueue::init(size_t BS, size_t BC) { + SpinMutexLock Guard(&Mutex); + + if (!finalizing()) + return BufferQueue::ErrorCode::AlreadyInitialized; + + cleanupBuffers(); + + bool Success = false; + BufferSize = BS; + BufferCount = BC; + + BackingStore = allocControlBlock(BufferSize, BufferCount); + if (BackingStore == nullptr) + return BufferQueue::ErrorCode::NotEnoughMemory; + + auto CleanupBackingStore = at_scope_exit([&, this] { + if (Success) + return; + deallocControlBlock(BackingStore, BufferSize, BufferCount); + BackingStore = nullptr; + }); + + // Initialize enough atomic_uint64_t instances, each + ExtentsBackingStore = allocControlBlock(kExtentsSize, BufferCount); + if (ExtentsBackingStore == nullptr) + return BufferQueue::ErrorCode::NotEnoughMemory; + + auto CleanupExtentsBackingStore = at_scope_exit([&, this] { + if (Success) + return; + deallocControlBlock(ExtentsBackingStore, kExtentsSize, BufferCount); + ExtentsBackingStore = nullptr; + }); + + Buffers = initArray(BufferCount); + if (Buffers == nullptr) + return BufferQueue::ErrorCode::NotEnoughMemory; + + // At this point we increment the generation number to associate the buffers + // to the new generation. + atomic_fetch_add(&Generation, 1, memory_order_acq_rel); + + // First, we initialize the refcount in the ControlBlock, which we treat as + // being at the start of the BackingStore pointer. + atomic_store(&BackingStore->RefCount, 1, memory_order_release); + atomic_store(&ExtentsBackingStore->RefCount, 1, memory_order_release); + + // Then we initialise the individual buffers that sub-divide the whole backing + // store. Each buffer will start at the `Data` member of the ControlBlock, and + // will be offsets from these locations. + for (size_t i = 0; i < BufferCount; ++i) { + auto &T = Buffers[i]; + auto &Buf = T.Buff; + auto *E = reinterpret_cast(&ExtentsBackingStore->Data + + (kExtentsSize * i)); + Buf.Extents = &E->Extents; + atomic_store(Buf.Extents, 0, memory_order_release); + Buf.Generation = generation(); + Buf.Data = &BackingStore->Data + (BufferSize * i); + Buf.Size = BufferSize; + Buf.BackingStore = BackingStore; + Buf.ExtentsBackingStore = ExtentsBackingStore; + Buf.Count = BufferCount; + T.Used = false; + } + + Next = Buffers; + First = Buffers; + LiveBuffers = 0; + atomic_store(&Finalizing, 0, memory_order_release); + Success = true; + return BufferQueue::ErrorCode::Ok; +} + +BufferQueue::BufferQueue(size_t B, size_t N, + bool &Success) XRAY_NEVER_INSTRUMENT + : BufferSize(B), + BufferCount(N), + Mutex(), + Finalizing{1}, + BackingStore(nullptr), + ExtentsBackingStore(nullptr), + Buffers(nullptr), + Next(Buffers), + First(Buffers), + LiveBuffers(0), + Generation{0} { + Success = init(B, N) == BufferQueue::ErrorCode::Ok; +} + +BufferQueue::ErrorCode BufferQueue::getBuffer(Buffer &Buf) { + if (atomic_load(&Finalizing, memory_order_acquire)) + return ErrorCode::QueueFinalizing; + + BufferRep *B = nullptr; + { + SpinMutexLock Guard(&Mutex); + if (LiveBuffers == BufferCount) + return ErrorCode::NotEnoughMemory; + B = Next++; + if (Next == (Buffers + BufferCount)) + Next = Buffers; + ++LiveBuffers; + } + + incRefCount(BackingStore); + incRefCount(ExtentsBackingStore); + Buf = B->Buff; + Buf.Generation = generation(); + B->Used = true; + return ErrorCode::Ok; +} + +BufferQueue::ErrorCode BufferQueue::releaseBuffer(Buffer &Buf) { + // Check whether the buffer being referred to is within the bounds of the + // backing store's range. + BufferRep *B = nullptr; + { + SpinMutexLock Guard(&Mutex); + if (Buf.Generation != generation() || LiveBuffers == 0) { + Buf = {}; + decRefCount(Buf.BackingStore, Buf.Size, Buf.Count); + decRefCount(Buf.ExtentsBackingStore, kExtentsSize, Buf.Count); + return BufferQueue::ErrorCode::Ok; + } + + if (Buf.Data < &BackingStore->Data || + Buf.Data > &BackingStore->Data + (BufferCount * BufferSize)) + return BufferQueue::ErrorCode::UnrecognizedBuffer; + + --LiveBuffers; + B = First++; + if (First == (Buffers + BufferCount)) + First = Buffers; + } + + // Now that the buffer has been released, we mark it as "used". + B->Buff = Buf; + B->Used = true; + decRefCount(Buf.BackingStore, Buf.Size, Buf.Count); + decRefCount(Buf.ExtentsBackingStore, kExtentsSize, Buf.Count); + atomic_store(B->Buff.Extents, atomic_load(Buf.Extents, memory_order_acquire), + memory_order_release); + Buf = {}; + return ErrorCode::Ok; +} + +BufferQueue::ErrorCode BufferQueue::finalize() { + if (atomic_exchange(&Finalizing, 1, memory_order_acq_rel)) + return ErrorCode::QueueFinalizing; + return ErrorCode::Ok; +} + +void BufferQueue::cleanupBuffers() { + for (auto B = Buffers, E = Buffers + BufferCount; B != E; ++B) + B->~BufferRep(); + deallocateBuffer(Buffers, BufferCount); + decRefCount(BackingStore, BufferSize, BufferCount); + decRefCount(ExtentsBackingStore, kExtentsSize, BufferCount); + BackingStore = nullptr; + ExtentsBackingStore = nullptr; + Buffers = nullptr; + BufferCount = 0; + BufferSize = 0; +} + +BufferQueue::~BufferQueue() { cleanupBuffers(); } diff --git a/lib/xray/xray_fdr_flags.cc b/lib/xray/xray_fdr_flags.cc deleted file mode 100644 index 8d432d298d88..000000000000 --- a/lib/xray/xray_fdr_flags.cc +++ /dev/null @@ -1,47 +0,0 @@ -//===-- xray_fdr_flags.cc ---------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file is a part of XRay, a dynamic runtime instrumentation system. -// -// XRay FDR flag parsing logic. -//===----------------------------------------------------------------------===// - -#include "xray_fdr_flags.h" -#include "sanitizer_common/sanitizer_common.h" -#include "sanitizer_common/sanitizer_flag_parser.h" -#include "sanitizer_common/sanitizer_libc.h" -#include "xray_defs.h" - -using namespace __sanitizer; - -namespace __xray { - -FDRFlags xray_fdr_flags_dont_use_directly; // use via fdrFlags(). - -void FDRFlags::setDefaults() XRAY_NEVER_INSTRUMENT { -#define XRAY_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue; -#include "xray_fdr_flags.inc" -#undef XRAY_FLAG -} - -void registerXRayFDRFlags(FlagParser *P, FDRFlags *F) XRAY_NEVER_INSTRUMENT { -#define XRAY_FLAG(Type, Name, DefaultValue, Description) \ - RegisterFlag(P, #Name, Description, &F->Name); -#include "xray_fdr_flags.inc" -#undef XRAY_FLAG -} - -const char *useCompilerDefinedFDRFlags() XRAY_NEVER_INSTRUMENT { -#ifdef XRAY_FDR_OPTIONS - return SANITIZER_STRINGIFY(XRAY_FDR_OPTIONS); -#else - return ""; -#endif -} - -} // namespace __xray diff --git a/lib/xray/xray_fdr_flags.cpp b/lib/xray/xray_fdr_flags.cpp new file mode 100644 index 000000000000..272b0b7cb1f7 --- /dev/null +++ b/lib/xray/xray_fdr_flags.cpp @@ -0,0 +1,47 @@ +//===-- xray_fdr_flags.cpp --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// XRay FDR flag parsing logic. +//===----------------------------------------------------------------------===// + +#include "xray_fdr_flags.h" +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_flag_parser.h" +#include "sanitizer_common/sanitizer_libc.h" +#include "xray_defs.h" + +using namespace __sanitizer; + +namespace __xray { + +FDRFlags xray_fdr_flags_dont_use_directly; // use via fdrFlags(). + +void FDRFlags::setDefaults() XRAY_NEVER_INSTRUMENT { +#define XRAY_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue; +#include "xray_fdr_flags.inc" +#undef XRAY_FLAG +} + +void registerXRayFDRFlags(FlagParser *P, FDRFlags *F) XRAY_NEVER_INSTRUMENT { +#define XRAY_FLAG(Type, Name, DefaultValue, Description) \ + RegisterFlag(P, #Name, Description, &F->Name); +#include "xray_fdr_flags.inc" +#undef XRAY_FLAG +} + +const char *useCompilerDefinedFDRFlags() XRAY_NEVER_INSTRUMENT { +#ifdef XRAY_FDR_OPTIONS + return SANITIZER_STRINGIFY(XRAY_FDR_OPTIONS); +#else + return ""; +#endif +} + +} // namespace __xray diff --git a/lib/xray/xray_fdr_logging.cc b/lib/xray/xray_fdr_logging.cc deleted file mode 100644 index abba06576da1..000000000000 --- a/lib/xray/xray_fdr_logging.cc +++ /dev/null @@ -1,757 +0,0 @@ -//===-- xray_fdr_logging.cc ------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file is a part of XRay, a dynamic runtime instrumentation system. -// -// Here we implement the Flight Data Recorder mode for XRay, where we use -// compact structures to store records in memory as well as when writing out the -// data to files. -// -//===----------------------------------------------------------------------===// -#include "xray_fdr_logging.h" -#include -#include -#include -#include -#include -#include -#include -#include - -#include "sanitizer_common/sanitizer_allocator_internal.h" -#include "sanitizer_common/sanitizer_atomic.h" -#include "sanitizer_common/sanitizer_common.h" -#include "xray/xray_interface.h" -#include "xray/xray_records.h" -#include "xray_allocator.h" -#include "xray_buffer_queue.h" -#include "xray_defs.h" -#include "xray_fdr_controller.h" -#include "xray_fdr_flags.h" -#include "xray_fdr_log_writer.h" -#include "xray_flags.h" -#include "xray_recursion_guard.h" -#include "xray_tsc.h" -#include "xray_utils.h" - -namespace __xray { - -static atomic_sint32_t LoggingStatus = { - XRayLogInitStatus::XRAY_LOG_UNINITIALIZED}; - -namespace { - -// Group together thread-local-data in a struct, then hide it behind a function -// call so that it can be initialized on first use instead of as a global. We -// force the alignment to 64-bytes for x86 cache line alignment, as this -// structure is used in the hot path of implementation. -struct XRAY_TLS_ALIGNAS(64) ThreadLocalData { - BufferQueue::Buffer Buffer{}; - BufferQueue *BQ = nullptr; - - using LogWriterStorage = - typename std::aligned_storage::type; - - LogWriterStorage LWStorage; - FDRLogWriter *Writer = nullptr; - - using ControllerStorage = - typename std::aligned_storage), - alignof(FDRController<>)>::type; - ControllerStorage CStorage; - FDRController<> *Controller = nullptr; -}; - -} // namespace - -static_assert(std::is_trivially_destructible::value, - "ThreadLocalData must be trivially destructible"); - -// Use a global pthread key to identify thread-local data for logging. -static pthread_key_t Key; - -// Global BufferQueue. -static std::aligned_storage::type BufferQueueStorage; -static BufferQueue *BQ = nullptr; - -// Global thresholds for function durations. -static atomic_uint64_t ThresholdTicks{0}; - -// Global for ticks per second. -static atomic_uint64_t TicksPerSec{0}; - -static atomic_sint32_t LogFlushStatus = { - XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING}; - -// This function will initialize the thread-local data structure used by the FDR -// logging implementation and return a reference to it. The implementation -// details require a bit of care to maintain. -// -// First, some requirements on the implementation in general: -// -// - XRay handlers should not call any memory allocation routines that may -// delegate to an instrumented implementation. This means functions like -// malloc() and free() should not be called while instrumenting. -// -// - We would like to use some thread-local data initialized on first-use of -// the XRay instrumentation. These allow us to implement unsynchronized -// routines that access resources associated with the thread. -// -// The implementation here uses a few mechanisms that allow us to provide both -// the requirements listed above. We do this by: -// -// 1. Using a thread-local aligned storage buffer for representing the -// ThreadLocalData struct. This data will be uninitialized memory by -// design. -// -// 2. Not requiring a thread exit handler/implementation, keeping the -// thread-local as purely a collection of references/data that do not -// require cleanup. -// -// We're doing this to avoid using a `thread_local` object that has a -// non-trivial destructor, because the C++ runtime might call std::malloc(...) -// to register calls to destructors. Deadlocks may arise when, for example, an -// externally provided malloc implementation is XRay instrumented, and -// initializing the thread-locals involves calling into malloc. A malloc -// implementation that does global synchronization might be holding a lock for a -// critical section, calling a function that might be XRay instrumented (and -// thus in turn calling into malloc by virtue of registration of the -// thread_local's destructor). -#if XRAY_HAS_TLS_ALIGNAS -static_assert(alignof(ThreadLocalData) >= 64, - "ThreadLocalData must be cache line aligned."); -#endif -static ThreadLocalData &getThreadLocalData() { - thread_local typename std::aligned_storage< - sizeof(ThreadLocalData), alignof(ThreadLocalData)>::type TLDStorage{}; - - if (pthread_getspecific(Key) == NULL) { - new (reinterpret_cast(&TLDStorage)) ThreadLocalData{}; - pthread_setspecific(Key, &TLDStorage); - } - - return *reinterpret_cast(&TLDStorage); -} - -static XRayFileHeader &fdrCommonHeaderInfo() { - static std::aligned_storage::type HStorage; - static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; - static bool TSCSupported = true; - static uint64_t CycleFrequency = NanosecondsPerSecond; - pthread_once( - &OnceInit, +[] { - XRayFileHeader &H = reinterpret_cast(HStorage); - // Version 2 of the log writes the extents of the buffer, instead of - // relying on an end-of-buffer record. - // Version 3 includes PID metadata record. - // Version 4 includes CPU data in the custom event records. - // Version 5 uses relative deltas for custom and typed event records, - // and removes the CPU data in custom event records (similar to how - // function records use deltas instead of full TSCs and rely on other - // metadata records for TSC wraparound and CPU migration). - H.Version = 5; - H.Type = FileTypes::FDR_LOG; - - // Test for required CPU features and cache the cycle frequency - TSCSupported = probeRequiredCPUFeatures(); - if (TSCSupported) - CycleFrequency = getTSCFrequency(); - H.CycleFrequency = CycleFrequency; - - // FIXME: Actually check whether we have 'constant_tsc' and - // 'nonstop_tsc' before setting the values in the header. - H.ConstantTSC = 1; - H.NonstopTSC = 1; - }); - return reinterpret_cast(HStorage); -} - -// This is the iterator implementation, which knows how to handle FDR-mode -// specific buffers. This is used as an implementation of the iterator function -// needed by __xray_set_buffer_iterator(...). It maintains a global state of the -// buffer iteration for the currently installed FDR mode buffers. In particular: -// -// - If the argument represents the initial state of XRayBuffer ({nullptr, 0}) -// then the iterator returns the header information. -// - If the argument represents the header information ({address of header -// info, size of the header info}) then it returns the first FDR buffer's -// address and extents. -// - It will keep returning the next buffer and extents as there are more -// buffers to process. When the input represents the last buffer, it will -// return the initial state to signal completion ({nullptr, 0}). -// -// See xray/xray_log_interface.h for more details on the requirements for the -// implementations of __xray_set_buffer_iterator(...) and -// __xray_log_process_buffers(...). -XRayBuffer fdrIterator(const XRayBuffer B) { - DCHECK(internal_strcmp(__xray_log_get_current_mode(), "xray-fdr") == 0); - DCHECK(BQ->finalizing()); - - if (BQ == nullptr || !BQ->finalizing()) { - if (Verbosity()) - Report( - "XRay FDR: Failed global buffer queue is null or not finalizing!\n"); - return {nullptr, 0}; - } - - // We use a global scratch-pad for the header information, which only gets - // initialized the first time this function is called. We'll update one part - // of this information with some relevant data (in particular the number of - // buffers to expect). - static std::aligned_storage::type HeaderStorage; - static pthread_once_t HeaderOnce = PTHREAD_ONCE_INIT; - pthread_once( - &HeaderOnce, +[] { - reinterpret_cast(HeaderStorage) = - fdrCommonHeaderInfo(); - }); - - // We use a convenience alias for code referring to Header from here on out. - auto &Header = reinterpret_cast(HeaderStorage); - if (B.Data == nullptr && B.Size == 0) { - Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()}; - return XRayBuffer{static_cast(&Header), sizeof(Header)}; - } - - static BufferQueue::const_iterator It{}; - static BufferQueue::const_iterator End{}; - static uint8_t *CurrentBuffer{nullptr}; - static size_t SerializedBufferSize = 0; - if (B.Data == static_cast(&Header) && B.Size == sizeof(Header)) { - // From this point on, we provide raw access to the raw buffer we're getting - // from the BufferQueue. We're relying on the iterators from the current - // Buffer queue. - It = BQ->cbegin(); - End = BQ->cend(); - } - - if (CurrentBuffer != nullptr) { - deallocateBuffer(CurrentBuffer, SerializedBufferSize); - CurrentBuffer = nullptr; - } - - if (It == End) - return {nullptr, 0}; - - // Set up the current buffer to contain the extents like we would when writing - // out to disk. The difference here would be that we still write "empty" - // buffers, or at least go through the iterators faithfully to let the - // handlers see the empty buffers in the queue. - // - // We need this atomic fence here to ensure that writes happening to the - // buffer have been committed before we load the extents atomically. Because - // the buffer is not explicitly synchronised across threads, we rely on the - // fence ordering to ensure that writes we expect to have been completed - // before the fence are fully committed before we read the extents. - atomic_thread_fence(memory_order_acquire); - auto BufferSize = atomic_load(It->Extents, memory_order_acquire); - SerializedBufferSize = BufferSize + sizeof(MetadataRecord); - CurrentBuffer = allocateBuffer(SerializedBufferSize); - if (CurrentBuffer == nullptr) - return {nullptr, 0}; - - // Write out the extents as a Metadata Record into the CurrentBuffer. - MetadataRecord ExtentsRecord; - ExtentsRecord.Type = uint8_t(RecordType::Metadata); - ExtentsRecord.RecordKind = - uint8_t(MetadataRecord::RecordKinds::BufferExtents); - internal_memcpy(ExtentsRecord.Data, &BufferSize, sizeof(BufferSize)); - auto AfterExtents = - static_cast(internal_memcpy(CurrentBuffer, &ExtentsRecord, - sizeof(MetadataRecord))) + - sizeof(MetadataRecord); - internal_memcpy(AfterExtents, It->Data, BufferSize); - - XRayBuffer Result; - Result.Data = CurrentBuffer; - Result.Size = SerializedBufferSize; - ++It; - return Result; -} - -// Must finalize before flushing. -XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT { - if (atomic_load(&LoggingStatus, memory_order_acquire) != - XRayLogInitStatus::XRAY_LOG_FINALIZED) { - if (Verbosity()) - Report("Not flushing log, implementation is not finalized.\n"); - return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; - } - - s32 Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; - if (!atomic_compare_exchange_strong(&LogFlushStatus, &Result, - XRayLogFlushStatus::XRAY_LOG_FLUSHING, - memory_order_release)) { - if (Verbosity()) - Report("Not flushing log, implementation is still finalizing.\n"); - return static_cast(Result); - } - - if (BQ == nullptr) { - if (Verbosity()) - Report("Cannot flush when global buffer queue is null.\n"); - return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; - } - - // We wait a number of milliseconds to allow threads to see that we've - // finalised before attempting to flush the log. - SleepForMillis(fdrFlags()->grace_period_ms); - - // At this point, we're going to uninstall the iterator implementation, before - // we decide to do anything further with the global buffer queue. - __xray_log_remove_buffer_iterator(); - - // Once flushed, we should set the global status of the logging implementation - // to "uninitialized" to allow for FDR-logging multiple runs. - auto ResetToUnitialized = at_scope_exit([] { - atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, - memory_order_release); - }); - - auto CleanupBuffers = at_scope_exit([] { - auto &TLD = getThreadLocalData(); - if (TLD.Controller != nullptr) - TLD.Controller->flush(); - }); - - if (fdrFlags()->no_file_flush) { - if (Verbosity()) - Report("XRay FDR: Not flushing to file, 'no_file_flush=true'.\n"); - - atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED, - memory_order_release); - return XRayLogFlushStatus::XRAY_LOG_FLUSHED; - } - - // We write out the file in the following format: - // - // 1) We write down the XRay file header with version 1, type FDR_LOG. - // 2) Then we use the 'apply' member of the BufferQueue that's live, to - // ensure that at this point in time we write down the buffers that have - // been released (and marked "used") -- we dump the full buffer for now - // (fixed-sized) and let the tools reading the buffers deal with the data - // afterwards. - // - LogWriter *LW = LogWriter::Open(); - if (LW == nullptr) { - auto Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; - atomic_store(&LogFlushStatus, Result, memory_order_release); - return Result; - } - - XRayFileHeader Header = fdrCommonHeaderInfo(); - Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()}; - LW->WriteAll(reinterpret_cast(&Header), - reinterpret_cast(&Header) + sizeof(Header)); - - // Release the current thread's buffer before we attempt to write out all the - // buffers. This ensures that in case we had only a single thread going, that - // we are able to capture the data nonetheless. - auto &TLD = getThreadLocalData(); - if (TLD.Controller != nullptr) - TLD.Controller->flush(); - - BQ->apply([&](const BufferQueue::Buffer &B) { - // Starting at version 2 of the FDR logging implementation, we only write - // the records identified by the extents of the buffer. We use the Extents - // from the Buffer and write that out as the first record in the buffer. We - // still use a Metadata record, but fill in the extents instead for the - // data. - MetadataRecord ExtentsRecord; - auto BufferExtents = atomic_load(B.Extents, memory_order_acquire); - DCHECK(BufferExtents <= B.Size); - ExtentsRecord.Type = uint8_t(RecordType::Metadata); - ExtentsRecord.RecordKind = - uint8_t(MetadataRecord::RecordKinds::BufferExtents); - internal_memcpy(ExtentsRecord.Data, &BufferExtents, sizeof(BufferExtents)); - if (BufferExtents > 0) { - LW->WriteAll(reinterpret_cast(&ExtentsRecord), - reinterpret_cast(&ExtentsRecord) + - sizeof(MetadataRecord)); - LW->WriteAll(reinterpret_cast(B.Data), - reinterpret_cast(B.Data) + BufferExtents); - } - }); - - atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED, - memory_order_release); - return XRayLogFlushStatus::XRAY_LOG_FLUSHED; -} - -XRayLogInitStatus fdrLoggingFinalize() XRAY_NEVER_INSTRUMENT { - s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED; - if (!atomic_compare_exchange_strong(&LoggingStatus, &CurrentStatus, - XRayLogInitStatus::XRAY_LOG_FINALIZING, - memory_order_release)) { - if (Verbosity()) - Report("Cannot finalize log, implementation not initialized.\n"); - return static_cast(CurrentStatus); - } - - // Do special things to make the log finalize itself, and not allow any more - // operations to be performed until re-initialized. - if (BQ == nullptr) { - if (Verbosity()) - Report("Attempting to finalize an uninitialized global buffer!\n"); - } else { - BQ->finalize(); - } - - atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED, - memory_order_release); - return XRayLogInitStatus::XRAY_LOG_FINALIZED; -} - -struct TSCAndCPU { - uint64_t TSC = 0; - unsigned char CPU = 0; -}; - -static TSCAndCPU getTimestamp() XRAY_NEVER_INSTRUMENT { - // We want to get the TSC as early as possible, so that we can check whether - // we've seen this CPU before. We also do it before we load anything else, - // to allow for forward progress with the scheduling. - TSCAndCPU Result; - - // Test once for required CPU features - static pthread_once_t OnceProbe = PTHREAD_ONCE_INIT; - static bool TSCSupported = true; - pthread_once( - &OnceProbe, +[] { TSCSupported = probeRequiredCPUFeatures(); }); - - if (TSCSupported) { - Result.TSC = __xray::readTSC(Result.CPU); - } else { - // FIXME: This code needs refactoring as it appears in multiple locations - timespec TS; - int result = clock_gettime(CLOCK_REALTIME, &TS); - if (result != 0) { - Report("clock_gettime(2) return %d, errno=%d", result, int(errno)); - TS = {0, 0}; - } - Result.CPU = 0; - Result.TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec; - } - return Result; -} - -thread_local atomic_uint8_t Running{0}; - -static bool setupTLD(ThreadLocalData &TLD) XRAY_NEVER_INSTRUMENT { - // Check if we're finalizing, before proceeding. - { - auto Status = atomic_load(&LoggingStatus, memory_order_acquire); - if (Status == XRayLogInitStatus::XRAY_LOG_FINALIZING || - Status == XRayLogInitStatus::XRAY_LOG_FINALIZED) { - if (TLD.Controller != nullptr) { - TLD.Controller->flush(); - TLD.Controller = nullptr; - } - return false; - } - } - - if (UNLIKELY(TLD.Controller == nullptr)) { - // Set up the TLD buffer queue. - if (UNLIKELY(BQ == nullptr)) - return false; - TLD.BQ = BQ; - - // Check that we have a valid buffer. - if (TLD.Buffer.Generation != BQ->generation() && - TLD.BQ->releaseBuffer(TLD.Buffer) != BufferQueue::ErrorCode::Ok) - return false; - - // Set up a buffer, before setting up the log writer. Bail out on failure. - if (TLD.BQ->getBuffer(TLD.Buffer) != BufferQueue::ErrorCode::Ok) - return false; - - // Set up the Log Writer for this thread. - if (UNLIKELY(TLD.Writer == nullptr)) { - auto *LWStorage = reinterpret_cast(&TLD.LWStorage); - new (LWStorage) FDRLogWriter(TLD.Buffer); - TLD.Writer = LWStorage; - } else { - TLD.Writer->resetRecord(); - } - - auto *CStorage = reinterpret_cast *>(&TLD.CStorage); - new (CStorage) - FDRController<>(TLD.BQ, TLD.Buffer, *TLD.Writer, clock_gettime, - atomic_load_relaxed(&ThresholdTicks)); - TLD.Controller = CStorage; - } - - DCHECK_NE(TLD.Controller, nullptr); - return true; -} - -void fdrLoggingHandleArg0(int32_t FuncId, - XRayEntryType Entry) XRAY_NEVER_INSTRUMENT { - auto TC = getTimestamp(); - auto &TSC = TC.TSC; - auto &CPU = TC.CPU; - RecursionGuard Guard{Running}; - if (!Guard) - return; - - auto &TLD = getThreadLocalData(); - if (!setupTLD(TLD)) - return; - - switch (Entry) { - case XRayEntryType::ENTRY: - case XRayEntryType::LOG_ARGS_ENTRY: - TLD.Controller->functionEnter(FuncId, TSC, CPU); - return; - case XRayEntryType::EXIT: - TLD.Controller->functionExit(FuncId, TSC, CPU); - return; - case XRayEntryType::TAIL: - TLD.Controller->functionTailExit(FuncId, TSC, CPU); - return; - case XRayEntryType::CUSTOM_EVENT: - case XRayEntryType::TYPED_EVENT: - break; - } -} - -void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry, - uint64_t Arg) XRAY_NEVER_INSTRUMENT { - auto TC = getTimestamp(); - auto &TSC = TC.TSC; - auto &CPU = TC.CPU; - RecursionGuard Guard{Running}; - if (!Guard) - return; - - auto &TLD = getThreadLocalData(); - if (!setupTLD(TLD)) - return; - - switch (Entry) { - case XRayEntryType::ENTRY: - case XRayEntryType::LOG_ARGS_ENTRY: - TLD.Controller->functionEnterArg(FuncId, TSC, CPU, Arg); - return; - case XRayEntryType::EXIT: - TLD.Controller->functionExit(FuncId, TSC, CPU); - return; - case XRayEntryType::TAIL: - TLD.Controller->functionTailExit(FuncId, TSC, CPU); - return; - case XRayEntryType::CUSTOM_EVENT: - case XRayEntryType::TYPED_EVENT: - break; - } -} - -void fdrLoggingHandleCustomEvent(void *Event, - std::size_t EventSize) XRAY_NEVER_INSTRUMENT { - auto TC = getTimestamp(); - auto &TSC = TC.TSC; - auto &CPU = TC.CPU; - RecursionGuard Guard{Running}; - if (!Guard) - return; - - // Complain when we ever get at least one custom event that's larger than what - // we can possibly support. - if (EventSize > - static_cast(std::numeric_limits::max())) { - static pthread_once_t Once = PTHREAD_ONCE_INIT; - pthread_once( - &Once, +[] { - Report("Custom event size too large; truncating to %d.\n", - std::numeric_limits::max()); - }); - } - - auto &TLD = getThreadLocalData(); - if (!setupTLD(TLD)) - return; - - int32_t ReducedEventSize = static_cast(EventSize); - TLD.Controller->customEvent(TSC, CPU, Event, ReducedEventSize); -} - -void fdrLoggingHandleTypedEvent( - uint16_t EventType, const void *Event, - std::size_t EventSize) noexcept XRAY_NEVER_INSTRUMENT { - auto TC = getTimestamp(); - auto &TSC = TC.TSC; - auto &CPU = TC.CPU; - RecursionGuard Guard{Running}; - if (!Guard) - return; - - // Complain when we ever get at least one typed event that's larger than what - // we can possibly support. - if (EventSize > - static_cast(std::numeric_limits::max())) { - static pthread_once_t Once = PTHREAD_ONCE_INIT; - pthread_once( - &Once, +[] { - Report("Typed event size too large; truncating to %d.\n", - std::numeric_limits::max()); - }); - } - - auto &TLD = getThreadLocalData(); - if (!setupTLD(TLD)) - return; - - int32_t ReducedEventSize = static_cast(EventSize); - TLD.Controller->typedEvent(TSC, CPU, EventType, Event, ReducedEventSize); -} - -XRayLogInitStatus fdrLoggingInit(size_t, size_t, void *Options, - size_t OptionsSize) XRAY_NEVER_INSTRUMENT { - if (Options == nullptr) - return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; - - s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; - if (!atomic_compare_exchange_strong(&LoggingStatus, &CurrentStatus, - XRayLogInitStatus::XRAY_LOG_INITIALIZING, - memory_order_release)) { - if (Verbosity()) - Report("Cannot initialize already initialized implementation.\n"); - return static_cast(CurrentStatus); - } - - if (Verbosity()) - Report("Initializing FDR mode with options: %s\n", - static_cast(Options)); - - // TODO: Factor out the flags specific to the FDR mode implementation. For - // now, use the global/single definition of the flags, since the FDR mode - // flags are already defined there. - FlagParser FDRParser; - FDRFlags FDRFlags; - registerXRayFDRFlags(&FDRParser, &FDRFlags); - FDRFlags.setDefaults(); - - // Override first from the general XRAY_DEFAULT_OPTIONS compiler-provided - // options until we migrate everyone to use the XRAY_FDR_OPTIONS - // compiler-provided options. - FDRParser.ParseString(useCompilerDefinedFlags()); - FDRParser.ParseString(useCompilerDefinedFDRFlags()); - auto *EnvOpts = GetEnv("XRAY_FDR_OPTIONS"); - if (EnvOpts == nullptr) - EnvOpts = ""; - FDRParser.ParseString(EnvOpts); - - // FIXME: Remove this when we fully remove the deprecated flags. - if (internal_strlen(EnvOpts) == 0) { - FDRFlags.func_duration_threshold_us = - flags()->xray_fdr_log_func_duration_threshold_us; - FDRFlags.grace_period_ms = flags()->xray_fdr_log_grace_period_ms; - } - - // The provided options should always override the compiler-provided and - // environment-variable defined options. - FDRParser.ParseString(static_cast(Options)); - *fdrFlags() = FDRFlags; - auto BufferSize = FDRFlags.buffer_size; - auto BufferMax = FDRFlags.buffer_max; - - if (BQ == nullptr) { - bool Success = false; - BQ = reinterpret_cast(&BufferQueueStorage); - new (BQ) BufferQueue(BufferSize, BufferMax, Success); - if (!Success) { - Report("BufferQueue init failed.\n"); - return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; - } - } else { - if (BQ->init(BufferSize, BufferMax) != BufferQueue::ErrorCode::Ok) { - if (Verbosity()) - Report("Failed to re-initialize global buffer queue. Init failed.\n"); - return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; - } - } - - static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; - pthread_once( - &OnceInit, +[] { - atomic_store(&TicksPerSec, - probeRequiredCPUFeatures() ? getTSCFrequency() - : __xray::NanosecondsPerSecond, - memory_order_release); - pthread_key_create( - &Key, +[](void *TLDPtr) { - if (TLDPtr == nullptr) - return; - auto &TLD = *reinterpret_cast(TLDPtr); - if (TLD.BQ == nullptr) - return; - if (TLD.Buffer.Data == nullptr) - return; - auto EC = TLD.BQ->releaseBuffer(TLD.Buffer); - if (EC != BufferQueue::ErrorCode::Ok) - Report("At thread exit, failed to release buffer at %p; " - "error=%s\n", - TLD.Buffer.Data, BufferQueue::getErrorString(EC)); - }); - }); - - atomic_store(&ThresholdTicks, - atomic_load_relaxed(&TicksPerSec) * - fdrFlags()->func_duration_threshold_us / 1000000, - memory_order_release); - // Arg1 handler should go in first to avoid concurrent code accidentally - // falling back to arg0 when it should have ran arg1. - __xray_set_handler_arg1(fdrLoggingHandleArg1); - // Install the actual handleArg0 handler after initialising the buffers. - __xray_set_handler(fdrLoggingHandleArg0); - __xray_set_customevent_handler(fdrLoggingHandleCustomEvent); - __xray_set_typedevent_handler(fdrLoggingHandleTypedEvent); - - // Install the buffer iterator implementation. - __xray_log_set_buffer_iterator(fdrIterator); - - atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZED, - memory_order_release); - - if (Verbosity()) - Report("XRay FDR init successful.\n"); - return XRayLogInitStatus::XRAY_LOG_INITIALIZED; -} - -bool fdrLogDynamicInitializer() XRAY_NEVER_INSTRUMENT { - XRayLogImpl Impl{ - fdrLoggingInit, - fdrLoggingFinalize, - fdrLoggingHandleArg0, - fdrLoggingFlush, - }; - auto RegistrationResult = __xray_log_register_mode("xray-fdr", Impl); - if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK && - Verbosity()) { - Report("Cannot register XRay FDR mode to 'xray-fdr'; error = %d\n", - RegistrationResult); - return false; - } - - if (flags()->xray_fdr_log || - !internal_strcmp(flags()->xray_mode, "xray-fdr")) { - auto SelectResult = __xray_log_select_mode("xray-fdr"); - if (SelectResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK && - Verbosity()) { - Report("Cannot select XRay FDR mode as 'xray-fdr'; error = %d\n", - SelectResult); - return false; - } - } - return true; -} - -} // namespace __xray - -static auto UNUSED Unused = __xray::fdrLogDynamicInitializer(); diff --git a/lib/xray/xray_fdr_logging.cpp b/lib/xray/xray_fdr_logging.cpp new file mode 100644 index 000000000000..16ce483502f0 --- /dev/null +++ b/lib/xray/xray_fdr_logging.cpp @@ -0,0 +1,757 @@ +//===-- xray_fdr_logging.cpp -----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Here we implement the Flight Data Recorder mode for XRay, where we use +// compact structures to store records in memory as well as when writing out the +// data to files. +// +//===----------------------------------------------------------------------===// +#include "xray_fdr_logging.h" +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sanitizer_common/sanitizer_allocator_internal.h" +#include "sanitizer_common/sanitizer_atomic.h" +#include "sanitizer_common/sanitizer_common.h" +#include "xray/xray_interface.h" +#include "xray/xray_records.h" +#include "xray_allocator.h" +#include "xray_buffer_queue.h" +#include "xray_defs.h" +#include "xray_fdr_controller.h" +#include "xray_fdr_flags.h" +#include "xray_fdr_log_writer.h" +#include "xray_flags.h" +#include "xray_recursion_guard.h" +#include "xray_tsc.h" +#include "xray_utils.h" + +namespace __xray { + +static atomic_sint32_t LoggingStatus = { + XRayLogInitStatus::XRAY_LOG_UNINITIALIZED}; + +namespace { + +// Group together thread-local-data in a struct, then hide it behind a function +// call so that it can be initialized on first use instead of as a global. We +// force the alignment to 64-bytes for x86 cache line alignment, as this +// structure is used in the hot path of implementation. +struct XRAY_TLS_ALIGNAS(64) ThreadLocalData { + BufferQueue::Buffer Buffer{}; + BufferQueue *BQ = nullptr; + + using LogWriterStorage = + typename std::aligned_storage::type; + + LogWriterStorage LWStorage; + FDRLogWriter *Writer = nullptr; + + using ControllerStorage = + typename std::aligned_storage), + alignof(FDRController<>)>::type; + ControllerStorage CStorage; + FDRController<> *Controller = nullptr; +}; + +} // namespace + +static_assert(std::is_trivially_destructible::value, + "ThreadLocalData must be trivially destructible"); + +// Use a global pthread key to identify thread-local data for logging. +static pthread_key_t Key; + +// Global BufferQueue. +static std::aligned_storage::type BufferQueueStorage; +static BufferQueue *BQ = nullptr; + +// Global thresholds for function durations. +static atomic_uint64_t ThresholdTicks{0}; + +// Global for ticks per second. +static atomic_uint64_t TicksPerSec{0}; + +static atomic_sint32_t LogFlushStatus = { + XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING}; + +// This function will initialize the thread-local data structure used by the FDR +// logging implementation and return a reference to it. The implementation +// details require a bit of care to maintain. +// +// First, some requirements on the implementation in general: +// +// - XRay handlers should not call any memory allocation routines that may +// delegate to an instrumented implementation. This means functions like +// malloc() and free() should not be called while instrumenting. +// +// - We would like to use some thread-local data initialized on first-use of +// the XRay instrumentation. These allow us to implement unsynchronized +// routines that access resources associated with the thread. +// +// The implementation here uses a few mechanisms that allow us to provide both +// the requirements listed above. We do this by: +// +// 1. Using a thread-local aligned storage buffer for representing the +// ThreadLocalData struct. This data will be uninitialized memory by +// design. +// +// 2. Not requiring a thread exit handler/implementation, keeping the +// thread-local as purely a collection of references/data that do not +// require cleanup. +// +// We're doing this to avoid using a `thread_local` object that has a +// non-trivial destructor, because the C++ runtime might call std::malloc(...) +// to register calls to destructors. Deadlocks may arise when, for example, an +// externally provided malloc implementation is XRay instrumented, and +// initializing the thread-locals involves calling into malloc. A malloc +// implementation that does global synchronization might be holding a lock for a +// critical section, calling a function that might be XRay instrumented (and +// thus in turn calling into malloc by virtue of registration of the +// thread_local's destructor). +#if XRAY_HAS_TLS_ALIGNAS +static_assert(alignof(ThreadLocalData) >= 64, + "ThreadLocalData must be cache line aligned."); +#endif +static ThreadLocalData &getThreadLocalData() { + thread_local typename std::aligned_storage< + sizeof(ThreadLocalData), alignof(ThreadLocalData)>::type TLDStorage{}; + + if (pthread_getspecific(Key) == NULL) { + new (reinterpret_cast(&TLDStorage)) ThreadLocalData{}; + pthread_setspecific(Key, &TLDStorage); + } + + return *reinterpret_cast(&TLDStorage); +} + +static XRayFileHeader &fdrCommonHeaderInfo() { + static std::aligned_storage::type HStorage; + static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; + static bool TSCSupported = true; + static uint64_t CycleFrequency = NanosecondsPerSecond; + pthread_once( + &OnceInit, +[] { + XRayFileHeader &H = reinterpret_cast(HStorage); + // Version 2 of the log writes the extents of the buffer, instead of + // relying on an end-of-buffer record. + // Version 3 includes PID metadata record. + // Version 4 includes CPU data in the custom event records. + // Version 5 uses relative deltas for custom and typed event records, + // and removes the CPU data in custom event records (similar to how + // function records use deltas instead of full TSCs and rely on other + // metadata records for TSC wraparound and CPU migration). + H.Version = 5; + H.Type = FileTypes::FDR_LOG; + + // Test for required CPU features and cache the cycle frequency + TSCSupported = probeRequiredCPUFeatures(); + if (TSCSupported) + CycleFrequency = getTSCFrequency(); + H.CycleFrequency = CycleFrequency; + + // FIXME: Actually check whether we have 'constant_tsc' and + // 'nonstop_tsc' before setting the values in the header. + H.ConstantTSC = 1; + H.NonstopTSC = 1; + }); + return reinterpret_cast(HStorage); +} + +// This is the iterator implementation, which knows how to handle FDR-mode +// specific buffers. This is used as an implementation of the iterator function +// needed by __xray_set_buffer_iterator(...). It maintains a global state of the +// buffer iteration for the currently installed FDR mode buffers. In particular: +// +// - If the argument represents the initial state of XRayBuffer ({nullptr, 0}) +// then the iterator returns the header information. +// - If the argument represents the header information ({address of header +// info, size of the header info}) then it returns the first FDR buffer's +// address and extents. +// - It will keep returning the next buffer and extents as there are more +// buffers to process. When the input represents the last buffer, it will +// return the initial state to signal completion ({nullptr, 0}). +// +// See xray/xray_log_interface.h for more details on the requirements for the +// implementations of __xray_set_buffer_iterator(...) and +// __xray_log_process_buffers(...). +XRayBuffer fdrIterator(const XRayBuffer B) { + DCHECK(internal_strcmp(__xray_log_get_current_mode(), "xray-fdr") == 0); + DCHECK(BQ->finalizing()); + + if (BQ == nullptr || !BQ->finalizing()) { + if (Verbosity()) + Report( + "XRay FDR: Failed global buffer queue is null or not finalizing!\n"); + return {nullptr, 0}; + } + + // We use a global scratch-pad for the header information, which only gets + // initialized the first time this function is called. We'll update one part + // of this information with some relevant data (in particular the number of + // buffers to expect). + static std::aligned_storage::type HeaderStorage; + static pthread_once_t HeaderOnce = PTHREAD_ONCE_INIT; + pthread_once( + &HeaderOnce, +[] { + reinterpret_cast(HeaderStorage) = + fdrCommonHeaderInfo(); + }); + + // We use a convenience alias for code referring to Header from here on out. + auto &Header = reinterpret_cast(HeaderStorage); + if (B.Data == nullptr && B.Size == 0) { + Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()}; + return XRayBuffer{static_cast(&Header), sizeof(Header)}; + } + + static BufferQueue::const_iterator It{}; + static BufferQueue::const_iterator End{}; + static uint8_t *CurrentBuffer{nullptr}; + static size_t SerializedBufferSize = 0; + if (B.Data == static_cast(&Header) && B.Size == sizeof(Header)) { + // From this point on, we provide raw access to the raw buffer we're getting + // from the BufferQueue. We're relying on the iterators from the current + // Buffer queue. + It = BQ->cbegin(); + End = BQ->cend(); + } + + if (CurrentBuffer != nullptr) { + deallocateBuffer(CurrentBuffer, SerializedBufferSize); + CurrentBuffer = nullptr; + } + + if (It == End) + return {nullptr, 0}; + + // Set up the current buffer to contain the extents like we would when writing + // out to disk. The difference here would be that we still write "empty" + // buffers, or at least go through the iterators faithfully to let the + // handlers see the empty buffers in the queue. + // + // We need this atomic fence here to ensure that writes happening to the + // buffer have been committed before we load the extents atomically. Because + // the buffer is not explicitly synchronised across threads, we rely on the + // fence ordering to ensure that writes we expect to have been completed + // before the fence are fully committed before we read the extents. + atomic_thread_fence(memory_order_acquire); + auto BufferSize = atomic_load(It->Extents, memory_order_acquire); + SerializedBufferSize = BufferSize + sizeof(MetadataRecord); + CurrentBuffer = allocateBuffer(SerializedBufferSize); + if (CurrentBuffer == nullptr) + return {nullptr, 0}; + + // Write out the extents as a Metadata Record into the CurrentBuffer. + MetadataRecord ExtentsRecord; + ExtentsRecord.Type = uint8_t(RecordType::Metadata); + ExtentsRecord.RecordKind = + uint8_t(MetadataRecord::RecordKinds::BufferExtents); + internal_memcpy(ExtentsRecord.Data, &BufferSize, sizeof(BufferSize)); + auto AfterExtents = + static_cast(internal_memcpy(CurrentBuffer, &ExtentsRecord, + sizeof(MetadataRecord))) + + sizeof(MetadataRecord); + internal_memcpy(AfterExtents, It->Data, BufferSize); + + XRayBuffer Result; + Result.Data = CurrentBuffer; + Result.Size = SerializedBufferSize; + ++It; + return Result; +} + +// Must finalize before flushing. +XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT { + if (atomic_load(&LoggingStatus, memory_order_acquire) != + XRayLogInitStatus::XRAY_LOG_FINALIZED) { + if (Verbosity()) + Report("Not flushing log, implementation is not finalized.\n"); + return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; + } + + s32 Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; + if (!atomic_compare_exchange_strong(&LogFlushStatus, &Result, + XRayLogFlushStatus::XRAY_LOG_FLUSHING, + memory_order_release)) { + if (Verbosity()) + Report("Not flushing log, implementation is still finalizing.\n"); + return static_cast(Result); + } + + if (BQ == nullptr) { + if (Verbosity()) + Report("Cannot flush when global buffer queue is null.\n"); + return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; + } + + // We wait a number of milliseconds to allow threads to see that we've + // finalised before attempting to flush the log. + SleepForMillis(fdrFlags()->grace_period_ms); + + // At this point, we're going to uninstall the iterator implementation, before + // we decide to do anything further with the global buffer queue. + __xray_log_remove_buffer_iterator(); + + // Once flushed, we should set the global status of the logging implementation + // to "uninitialized" to allow for FDR-logging multiple runs. + auto ResetToUnitialized = at_scope_exit([] { + atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, + memory_order_release); + }); + + auto CleanupBuffers = at_scope_exit([] { + auto &TLD = getThreadLocalData(); + if (TLD.Controller != nullptr) + TLD.Controller->flush(); + }); + + if (fdrFlags()->no_file_flush) { + if (Verbosity()) + Report("XRay FDR: Not flushing to file, 'no_file_flush=true'.\n"); + + atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED, + memory_order_release); + return XRayLogFlushStatus::XRAY_LOG_FLUSHED; + } + + // We write out the file in the following format: + // + // 1) We write down the XRay file header with version 1, type FDR_LOG. + // 2) Then we use the 'apply' member of the BufferQueue that's live, to + // ensure that at this point in time we write down the buffers that have + // been released (and marked "used") -- we dump the full buffer for now + // (fixed-sized) and let the tools reading the buffers deal with the data + // afterwards. + // + LogWriter *LW = LogWriter::Open(); + if (LW == nullptr) { + auto Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; + atomic_store(&LogFlushStatus, Result, memory_order_release); + return Result; + } + + XRayFileHeader Header = fdrCommonHeaderInfo(); + Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()}; + LW->WriteAll(reinterpret_cast(&Header), + reinterpret_cast(&Header) + sizeof(Header)); + + // Release the current thread's buffer before we attempt to write out all the + // buffers. This ensures that in case we had only a single thread going, that + // we are able to capture the data nonetheless. + auto &TLD = getThreadLocalData(); + if (TLD.Controller != nullptr) + TLD.Controller->flush(); + + BQ->apply([&](const BufferQueue::Buffer &B) { + // Starting at version 2 of the FDR logging implementation, we only write + // the records identified by the extents of the buffer. We use the Extents + // from the Buffer and write that out as the first record in the buffer. We + // still use a Metadata record, but fill in the extents instead for the + // data. + MetadataRecord ExtentsRecord; + auto BufferExtents = atomic_load(B.Extents, memory_order_acquire); + DCHECK(BufferExtents <= B.Size); + ExtentsRecord.Type = uint8_t(RecordType::Metadata); + ExtentsRecord.RecordKind = + uint8_t(MetadataRecord::RecordKinds::BufferExtents); + internal_memcpy(ExtentsRecord.Data, &BufferExtents, sizeof(BufferExtents)); + if (BufferExtents > 0) { + LW->WriteAll(reinterpret_cast(&ExtentsRecord), + reinterpret_cast(&ExtentsRecord) + + sizeof(MetadataRecord)); + LW->WriteAll(reinterpret_cast(B.Data), + reinterpret_cast(B.Data) + BufferExtents); + } + }); + + atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED, + memory_order_release); + return XRayLogFlushStatus::XRAY_LOG_FLUSHED; +} + +XRayLogInitStatus fdrLoggingFinalize() XRAY_NEVER_INSTRUMENT { + s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED; + if (!atomic_compare_exchange_strong(&LoggingStatus, &CurrentStatus, + XRayLogInitStatus::XRAY_LOG_FINALIZING, + memory_order_release)) { + if (Verbosity()) + Report("Cannot finalize log, implementation not initialized.\n"); + return static_cast(CurrentStatus); + } + + // Do special things to make the log finalize itself, and not allow any more + // operations to be performed until re-initialized. + if (BQ == nullptr) { + if (Verbosity()) + Report("Attempting to finalize an uninitialized global buffer!\n"); + } else { + BQ->finalize(); + } + + atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED, + memory_order_release); + return XRayLogInitStatus::XRAY_LOG_FINALIZED; +} + +struct TSCAndCPU { + uint64_t TSC = 0; + unsigned char CPU = 0; +}; + +static TSCAndCPU getTimestamp() XRAY_NEVER_INSTRUMENT { + // We want to get the TSC as early as possible, so that we can check whether + // we've seen this CPU before. We also do it before we load anything else, + // to allow for forward progress with the scheduling. + TSCAndCPU Result; + + // Test once for required CPU features + static pthread_once_t OnceProbe = PTHREAD_ONCE_INIT; + static bool TSCSupported = true; + pthread_once( + &OnceProbe, +[] { TSCSupported = probeRequiredCPUFeatures(); }); + + if (TSCSupported) { + Result.TSC = __xray::readTSC(Result.CPU); + } else { + // FIXME: This code needs refactoring as it appears in multiple locations + timespec TS; + int result = clock_gettime(CLOCK_REALTIME, &TS); + if (result != 0) { + Report("clock_gettime(2) return %d, errno=%d", result, int(errno)); + TS = {0, 0}; + } + Result.CPU = 0; + Result.TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec; + } + return Result; +} + +thread_local atomic_uint8_t Running{0}; + +static bool setupTLD(ThreadLocalData &TLD) XRAY_NEVER_INSTRUMENT { + // Check if we're finalizing, before proceeding. + { + auto Status = atomic_load(&LoggingStatus, memory_order_acquire); + if (Status == XRayLogInitStatus::XRAY_LOG_FINALIZING || + Status == XRayLogInitStatus::XRAY_LOG_FINALIZED) { + if (TLD.Controller != nullptr) { + TLD.Controller->flush(); + TLD.Controller = nullptr; + } + return false; + } + } + + if (UNLIKELY(TLD.Controller == nullptr)) { + // Set up the TLD buffer queue. + if (UNLIKELY(BQ == nullptr)) + return false; + TLD.BQ = BQ; + + // Check that we have a valid buffer. + if (TLD.Buffer.Generation != BQ->generation() && + TLD.BQ->releaseBuffer(TLD.Buffer) != BufferQueue::ErrorCode::Ok) + return false; + + // Set up a buffer, before setting up the log writer. Bail out on failure. + if (TLD.BQ->getBuffer(TLD.Buffer) != BufferQueue::ErrorCode::Ok) + return false; + + // Set up the Log Writer for this thread. + if (UNLIKELY(TLD.Writer == nullptr)) { + auto *LWStorage = reinterpret_cast(&TLD.LWStorage); + new (LWStorage) FDRLogWriter(TLD.Buffer); + TLD.Writer = LWStorage; + } else { + TLD.Writer->resetRecord(); + } + + auto *CStorage = reinterpret_cast *>(&TLD.CStorage); + new (CStorage) + FDRController<>(TLD.BQ, TLD.Buffer, *TLD.Writer, clock_gettime, + atomic_load_relaxed(&ThresholdTicks)); + TLD.Controller = CStorage; + } + + DCHECK_NE(TLD.Controller, nullptr); + return true; +} + +void fdrLoggingHandleArg0(int32_t FuncId, + XRayEntryType Entry) XRAY_NEVER_INSTRUMENT { + auto TC = getTimestamp(); + auto &TSC = TC.TSC; + auto &CPU = TC.CPU; + RecursionGuard Guard{Running}; + if (!Guard) + return; + + auto &TLD = getThreadLocalData(); + if (!setupTLD(TLD)) + return; + + switch (Entry) { + case XRayEntryType::ENTRY: + case XRayEntryType::LOG_ARGS_ENTRY: + TLD.Controller->functionEnter(FuncId, TSC, CPU); + return; + case XRayEntryType::EXIT: + TLD.Controller->functionExit(FuncId, TSC, CPU); + return; + case XRayEntryType::TAIL: + TLD.Controller->functionTailExit(FuncId, TSC, CPU); + return; + case XRayEntryType::CUSTOM_EVENT: + case XRayEntryType::TYPED_EVENT: + break; + } +} + +void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry, + uint64_t Arg) XRAY_NEVER_INSTRUMENT { + auto TC = getTimestamp(); + auto &TSC = TC.TSC; + auto &CPU = TC.CPU; + RecursionGuard Guard{Running}; + if (!Guard) + return; + + auto &TLD = getThreadLocalData(); + if (!setupTLD(TLD)) + return; + + switch (Entry) { + case XRayEntryType::ENTRY: + case XRayEntryType::LOG_ARGS_ENTRY: + TLD.Controller->functionEnterArg(FuncId, TSC, CPU, Arg); + return; + case XRayEntryType::EXIT: + TLD.Controller->functionExit(FuncId, TSC, CPU); + return; + case XRayEntryType::TAIL: + TLD.Controller->functionTailExit(FuncId, TSC, CPU); + return; + case XRayEntryType::CUSTOM_EVENT: + case XRayEntryType::TYPED_EVENT: + break; + } +} + +void fdrLoggingHandleCustomEvent(void *Event, + std::size_t EventSize) XRAY_NEVER_INSTRUMENT { + auto TC = getTimestamp(); + auto &TSC = TC.TSC; + auto &CPU = TC.CPU; + RecursionGuard Guard{Running}; + if (!Guard) + return; + + // Complain when we ever get at least one custom event that's larger than what + // we can possibly support. + if (EventSize > + static_cast(std::numeric_limits::max())) { + static pthread_once_t Once = PTHREAD_ONCE_INIT; + pthread_once( + &Once, +[] { + Report("Custom event size too large; truncating to %d.\n", + std::numeric_limits::max()); + }); + } + + auto &TLD = getThreadLocalData(); + if (!setupTLD(TLD)) + return; + + int32_t ReducedEventSize = static_cast(EventSize); + TLD.Controller->customEvent(TSC, CPU, Event, ReducedEventSize); +} + +void fdrLoggingHandleTypedEvent( + uint16_t EventType, const void *Event, + std::size_t EventSize) noexcept XRAY_NEVER_INSTRUMENT { + auto TC = getTimestamp(); + auto &TSC = TC.TSC; + auto &CPU = TC.CPU; + RecursionGuard Guard{Running}; + if (!Guard) + return; + + // Complain when we ever get at least one typed event that's larger than what + // we can possibly support. + if (EventSize > + static_cast(std::numeric_limits::max())) { + static pthread_once_t Once = PTHREAD_ONCE_INIT; + pthread_once( + &Once, +[] { + Report("Typed event size too large; truncating to %d.\n", + std::numeric_limits::max()); + }); + } + + auto &TLD = getThreadLocalData(); + if (!setupTLD(TLD)) + return; + + int32_t ReducedEventSize = static_cast(EventSize); + TLD.Controller->typedEvent(TSC, CPU, EventType, Event, ReducedEventSize); +} + +XRayLogInitStatus fdrLoggingInit(size_t, size_t, void *Options, + size_t OptionsSize) XRAY_NEVER_INSTRUMENT { + if (Options == nullptr) + return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + + s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + if (!atomic_compare_exchange_strong(&LoggingStatus, &CurrentStatus, + XRayLogInitStatus::XRAY_LOG_INITIALIZING, + memory_order_release)) { + if (Verbosity()) + Report("Cannot initialize already initialized implementation.\n"); + return static_cast(CurrentStatus); + } + + if (Verbosity()) + Report("Initializing FDR mode with options: %s\n", + static_cast(Options)); + + // TODO: Factor out the flags specific to the FDR mode implementation. For + // now, use the global/single definition of the flags, since the FDR mode + // flags are already defined there. + FlagParser FDRParser; + FDRFlags FDRFlags; + registerXRayFDRFlags(&FDRParser, &FDRFlags); + FDRFlags.setDefaults(); + + // Override first from the general XRAY_DEFAULT_OPTIONS compiler-provided + // options until we migrate everyone to use the XRAY_FDR_OPTIONS + // compiler-provided options. + FDRParser.ParseString(useCompilerDefinedFlags()); + FDRParser.ParseString(useCompilerDefinedFDRFlags()); + auto *EnvOpts = GetEnv("XRAY_FDR_OPTIONS"); + if (EnvOpts == nullptr) + EnvOpts = ""; + FDRParser.ParseString(EnvOpts); + + // FIXME: Remove this when we fully remove the deprecated flags. + if (internal_strlen(EnvOpts) == 0) { + FDRFlags.func_duration_threshold_us = + flags()->xray_fdr_log_func_duration_threshold_us; + FDRFlags.grace_period_ms = flags()->xray_fdr_log_grace_period_ms; + } + + // The provided options should always override the compiler-provided and + // environment-variable defined options. + FDRParser.ParseString(static_cast(Options)); + *fdrFlags() = FDRFlags; + auto BufferSize = FDRFlags.buffer_size; + auto BufferMax = FDRFlags.buffer_max; + + if (BQ == nullptr) { + bool Success = false; + BQ = reinterpret_cast(&BufferQueueStorage); + new (BQ) BufferQueue(BufferSize, BufferMax, Success); + if (!Success) { + Report("BufferQueue init failed.\n"); + return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + } + } else { + if (BQ->init(BufferSize, BufferMax) != BufferQueue::ErrorCode::Ok) { + if (Verbosity()) + Report("Failed to re-initialize global buffer queue. Init failed.\n"); + return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + } + } + + static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; + pthread_once( + &OnceInit, +[] { + atomic_store(&TicksPerSec, + probeRequiredCPUFeatures() ? getTSCFrequency() + : __xray::NanosecondsPerSecond, + memory_order_release); + pthread_key_create( + &Key, +[](void *TLDPtr) { + if (TLDPtr == nullptr) + return; + auto &TLD = *reinterpret_cast(TLDPtr); + if (TLD.BQ == nullptr) + return; + if (TLD.Buffer.Data == nullptr) + return; + auto EC = TLD.BQ->releaseBuffer(TLD.Buffer); + if (EC != BufferQueue::ErrorCode::Ok) + Report("At thread exit, failed to release buffer at %p; " + "error=%s\n", + TLD.Buffer.Data, BufferQueue::getErrorString(EC)); + }); + }); + + atomic_store(&ThresholdTicks, + atomic_load_relaxed(&TicksPerSec) * + fdrFlags()->func_duration_threshold_us / 1000000, + memory_order_release); + // Arg1 handler should go in first to avoid concurrent code accidentally + // falling back to arg0 when it should have ran arg1. + __xray_set_handler_arg1(fdrLoggingHandleArg1); + // Install the actual handleArg0 handler after initialising the buffers. + __xray_set_handler(fdrLoggingHandleArg0); + __xray_set_customevent_handler(fdrLoggingHandleCustomEvent); + __xray_set_typedevent_handler(fdrLoggingHandleTypedEvent); + + // Install the buffer iterator implementation. + __xray_log_set_buffer_iterator(fdrIterator); + + atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZED, + memory_order_release); + + if (Verbosity()) + Report("XRay FDR init successful.\n"); + return XRayLogInitStatus::XRAY_LOG_INITIALIZED; +} + +bool fdrLogDynamicInitializer() XRAY_NEVER_INSTRUMENT { + XRayLogImpl Impl{ + fdrLoggingInit, + fdrLoggingFinalize, + fdrLoggingHandleArg0, + fdrLoggingFlush, + }; + auto RegistrationResult = __xray_log_register_mode("xray-fdr", Impl); + if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK && + Verbosity()) { + Report("Cannot register XRay FDR mode to 'xray-fdr'; error = %d\n", + RegistrationResult); + return false; + } + + if (flags()->xray_fdr_log || + !internal_strcmp(flags()->xray_mode, "xray-fdr")) { + auto SelectResult = __xray_log_select_mode("xray-fdr"); + if (SelectResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK && + Verbosity()) { + Report("Cannot select XRay FDR mode as 'xray-fdr'; error = %d\n", + SelectResult); + return false; + } + } + return true; +} + +} // namespace __xray + +static auto UNUSED Unused = __xray::fdrLogDynamicInitializer(); diff --git a/lib/xray/xray_flags.cc b/lib/xray/xray_flags.cc deleted file mode 100644 index b9e8324a7874..000000000000 --- a/lib/xray/xray_flags.cc +++ /dev/null @@ -1,84 +0,0 @@ -//===-- xray_flags.cc -------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file is a part of XRay, a dynamic runtime instrumentation system. -// -// XRay flag parsing logic. -//===----------------------------------------------------------------------===// - -#include "xray_flags.h" -#include "sanitizer_common/sanitizer_common.h" -#include "sanitizer_common/sanitizer_flag_parser.h" -#include "sanitizer_common/sanitizer_libc.h" -#include "xray_defs.h" - -using namespace __sanitizer; - -namespace __xray { - -Flags xray_flags_dont_use_directly; // use via flags(). - -void Flags::setDefaults() XRAY_NEVER_INSTRUMENT { -#define XRAY_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue; -#include "xray_flags.inc" -#undef XRAY_FLAG -} - -void registerXRayFlags(FlagParser *P, Flags *F) XRAY_NEVER_INSTRUMENT { -#define XRAY_FLAG(Type, Name, DefaultValue, Description) \ - RegisterFlag(P, #Name, Description, &F->Name); -#include "xray_flags.inc" -#undef XRAY_FLAG -} - -// This function, as defined with the help of a macro meant to be introduced at -// build time of the XRay runtime, passes in a statically defined list of -// options that control XRay. This means users/deployments can tweak the -// defaults that override the hard-coded defaults in the xray_flags.inc at -// compile-time using the XRAY_DEFAULT_OPTIONS macro. -const char *useCompilerDefinedFlags() XRAY_NEVER_INSTRUMENT { -#ifdef XRAY_DEFAULT_OPTIONS - // Do the double-layered string conversion to prevent badly crafted strings - // provided through the XRAY_DEFAULT_OPTIONS from causing compilation issues - // (or changing the semantics of the implementation through the macro). This - // ensures that we convert whatever XRAY_DEFAULT_OPTIONS is defined as a - // string literal. - return SANITIZER_STRINGIFY(XRAY_DEFAULT_OPTIONS); -#else - return ""; -#endif -} - -void initializeFlags() XRAY_NEVER_INSTRUMENT { - SetCommonFlagsDefaults(); - auto *F = flags(); - F->setDefaults(); - - FlagParser XRayParser; - registerXRayFlags(&XRayParser, F); - RegisterCommonFlags(&XRayParser); - - // Use options defaulted at compile-time for the runtime. - const char *XRayCompileFlags = useCompilerDefinedFlags(); - XRayParser.ParseString(XRayCompileFlags); - - // Override from environment variables. - XRayParser.ParseStringFromEnv("XRAY_OPTIONS"); - - // Override from command line. - InitializeCommonFlags(); - - if (Verbosity()) - ReportUnrecognizedFlags(); - - if (common_flags()->help) { - XRayParser.PrintFlagDescriptions(); - } -} - -} // namespace __xray diff --git a/lib/xray/xray_flags.cpp b/lib/xray/xray_flags.cpp new file mode 100644 index 000000000000..e4c6906dc443 --- /dev/null +++ b/lib/xray/xray_flags.cpp @@ -0,0 +1,84 @@ +//===-- xray_flags.cpp ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// XRay flag parsing logic. +//===----------------------------------------------------------------------===// + +#include "xray_flags.h" +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_flag_parser.h" +#include "sanitizer_common/sanitizer_libc.h" +#include "xray_defs.h" + +using namespace __sanitizer; + +namespace __xray { + +Flags xray_flags_dont_use_directly; // use via flags(). + +void Flags::setDefaults() XRAY_NEVER_INSTRUMENT { +#define XRAY_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue; +#include "xray_flags.inc" +#undef XRAY_FLAG +} + +void registerXRayFlags(FlagParser *P, Flags *F) XRAY_NEVER_INSTRUMENT { +#define XRAY_FLAG(Type, Name, DefaultValue, Description) \ + RegisterFlag(P, #Name, Description, &F->Name); +#include "xray_flags.inc" +#undef XRAY_FLAG +} + +// This function, as defined with the help of a macro meant to be introduced at +// build time of the XRay runtime, passes in a statically defined list of +// options that control XRay. This means users/deployments can tweak the +// defaults that override the hard-coded defaults in the xray_flags.inc at +// compile-time using the XRAY_DEFAULT_OPTIONS macro. +const char *useCompilerDefinedFlags() XRAY_NEVER_INSTRUMENT { +#ifdef XRAY_DEFAULT_OPTIONS + // Do the double-layered string conversion to prevent badly crafted strings + // provided through the XRAY_DEFAULT_OPTIONS from causing compilation issues + // (or changing the semantics of the implementation through the macro). This + // ensures that we convert whatever XRAY_DEFAULT_OPTIONS is defined as a + // string literal. + return SANITIZER_STRINGIFY(XRAY_DEFAULT_OPTIONS); +#else + return ""; +#endif +} + +void initializeFlags() XRAY_NEVER_INSTRUMENT { + SetCommonFlagsDefaults(); + auto *F = flags(); + F->setDefaults(); + + FlagParser XRayParser; + registerXRayFlags(&XRayParser, F); + RegisterCommonFlags(&XRayParser); + + // Use options defaulted at compile-time for the runtime. + const char *XRayCompileFlags = useCompilerDefinedFlags(); + XRayParser.ParseString(XRayCompileFlags); + + // Override from environment variables. + XRayParser.ParseStringFromEnv("XRAY_OPTIONS"); + + // Override from command line. + InitializeCommonFlags(); + + if (Verbosity()) + ReportUnrecognizedFlags(); + + if (common_flags()->help) { + XRayParser.PrintFlagDescriptions(); + } +} + +} // namespace __xray diff --git a/lib/xray/xray_init.cc b/lib/xray/xray_init.cc deleted file mode 100644 index b79bc08c5f4d..000000000000 --- a/lib/xray/xray_init.cc +++ /dev/null @@ -1,115 +0,0 @@ -//===-- xray_init.cc --------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file is a part of XRay, a dynamic runtime instrumentation system. -// -// XRay initialisation logic. -//===----------------------------------------------------------------------===// - -#include -#include -#include - -#include "sanitizer_common/sanitizer_common.h" -#include "xray_defs.h" -#include "xray_flags.h" -#include "xray_interface_internal.h" - -extern "C" { -void __xray_init(); -extern const XRaySledEntry __start_xray_instr_map[] __attribute__((weak)); -extern const XRaySledEntry __stop_xray_instr_map[] __attribute__((weak)); -extern const XRayFunctionSledIndex __start_xray_fn_idx[] __attribute__((weak)); -extern const XRayFunctionSledIndex __stop_xray_fn_idx[] __attribute__((weak)); - -#if SANITIZER_MAC -// HACK: This is a temporary workaround to make XRay build on -// Darwin, but it will probably not work at runtime. -const XRaySledEntry __start_xray_instr_map[] = {}; -extern const XRaySledEntry __stop_xray_instr_map[] = {}; -extern const XRayFunctionSledIndex __start_xray_fn_idx[] = {}; -extern const XRayFunctionSledIndex __stop_xray_fn_idx[] = {}; -#endif -} - -using namespace __xray; - -// When set to 'true' this means the XRay runtime has been initialised. We use -// the weak symbols defined above (__start_xray_inst_map and -// __stop_xray_instr_map) to initialise the instrumentation map that XRay uses -// for runtime patching/unpatching of instrumentation points. -// -// FIXME: Support DSO instrumentation maps too. The current solution only works -// for statically linked executables. -atomic_uint8_t XRayInitialized{0}; - -// This should always be updated before XRayInitialized is updated. -SpinMutex XRayInstrMapMutex; -XRaySledMap XRayInstrMap; - -// Global flag to determine whether the flags have been initialized. -atomic_uint8_t XRayFlagsInitialized{0}; - -// A mutex to allow only one thread to initialize the XRay data structures. -SpinMutex XRayInitMutex; - -// __xray_init() will do the actual loading of the current process' memory map -// and then proceed to look for the .xray_instr_map section/segment. -void __xray_init() XRAY_NEVER_INSTRUMENT { - SpinMutexLock Guard(&XRayInitMutex); - // Short-circuit if we've already initialized XRay before. - if (atomic_load(&XRayInitialized, memory_order_acquire)) - return; - - // XRAY is not compatible with PaX MPROTECT - CheckMPROTECT(); - - if (!atomic_load(&XRayFlagsInitialized, memory_order_acquire)) { - initializeFlags(); - atomic_store(&XRayFlagsInitialized, true, memory_order_release); - } - - if (__start_xray_instr_map == nullptr) { - if (Verbosity()) - Report("XRay instrumentation map missing. Not initializing XRay.\n"); - return; - } - - { - SpinMutexLock Guard(&XRayInstrMapMutex); - XRayInstrMap.Sleds = __start_xray_instr_map; - XRayInstrMap.Entries = __stop_xray_instr_map - __start_xray_instr_map; - XRayInstrMap.SledsIndex = __start_xray_fn_idx; - XRayInstrMap.Functions = __stop_xray_fn_idx - __start_xray_fn_idx; - } - atomic_store(&XRayInitialized, true, memory_order_release); - -#ifndef XRAY_NO_PREINIT - if (flags()->patch_premain) - __xray_patch(); -#endif -} - -// FIXME: Make check-xray tests work on FreeBSD without -// SANITIZER_CAN_USE_PREINIT_ARRAY. -// See sanitizer_internal_defs.h where the macro is defined. -// Calling unresolved PLT functions in .preinit_array can lead to deadlock on -// FreeBSD but here it seems benign. -#if !defined(XRAY_NO_PREINIT) && \ - (SANITIZER_CAN_USE_PREINIT_ARRAY || SANITIZER_FREEBSD) -// Only add the preinit array initialization if the sanitizers can. -__attribute__((section(".preinit_array"), - used)) void (*__local_xray_preinit)(void) = __xray_init; -#else -// If we cannot use the .preinit_array section, we should instead use dynamic -// initialisation. -__attribute__ ((constructor (0))) -static void __local_xray_dyninit() { - __xray_init(); -} -#endif diff --git a/lib/xray/xray_init.cpp b/lib/xray/xray_init.cpp new file mode 100644 index 000000000000..408396477975 --- /dev/null +++ b/lib/xray/xray_init.cpp @@ -0,0 +1,115 @@ +//===-- xray_init.cpp -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// XRay initialisation logic. +//===----------------------------------------------------------------------===// + +#include +#include +#include + +#include "sanitizer_common/sanitizer_common.h" +#include "xray_defs.h" +#include "xray_flags.h" +#include "xray_interface_internal.h" + +extern "C" { +void __xray_init(); +extern const XRaySledEntry __start_xray_instr_map[] __attribute__((weak)); +extern const XRaySledEntry __stop_xray_instr_map[] __attribute__((weak)); +extern const XRayFunctionSledIndex __start_xray_fn_idx[] __attribute__((weak)); +extern const XRayFunctionSledIndex __stop_xray_fn_idx[] __attribute__((weak)); + +#if SANITIZER_MAC +// HACK: This is a temporary workaround to make XRay build on +// Darwin, but it will probably not work at runtime. +const XRaySledEntry __start_xray_instr_map[] = {}; +extern const XRaySledEntry __stop_xray_instr_map[] = {}; +extern const XRayFunctionSledIndex __start_xray_fn_idx[] = {}; +extern const XRayFunctionSledIndex __stop_xray_fn_idx[] = {}; +#endif +} + +using namespace __xray; + +// When set to 'true' this means the XRay runtime has been initialised. We use +// the weak symbols defined above (__start_xray_inst_map and +// __stop_xray_instr_map) to initialise the instrumentation map that XRay uses +// for runtime patching/unpatching of instrumentation points. +// +// FIXME: Support DSO instrumentation maps too. The current solution only works +// for statically linked executables. +atomic_uint8_t XRayInitialized{0}; + +// This should always be updated before XRayInitialized is updated. +SpinMutex XRayInstrMapMutex; +XRaySledMap XRayInstrMap; + +// Global flag to determine whether the flags have been initialized. +atomic_uint8_t XRayFlagsInitialized{0}; + +// A mutex to allow only one thread to initialize the XRay data structures. +SpinMutex XRayInitMutex; + +// __xray_init() will do the actual loading of the current process' memory map +// and then proceed to look for the .xray_instr_map section/segment. +void __xray_init() XRAY_NEVER_INSTRUMENT { + SpinMutexLock Guard(&XRayInitMutex); + // Short-circuit if we've already initialized XRay before. + if (atomic_load(&XRayInitialized, memory_order_acquire)) + return; + + // XRAY is not compatible with PaX MPROTECT + CheckMPROTECT(); + + if (!atomic_load(&XRayFlagsInitialized, memory_order_acquire)) { + initializeFlags(); + atomic_store(&XRayFlagsInitialized, true, memory_order_release); + } + + if (__start_xray_instr_map == nullptr) { + if (Verbosity()) + Report("XRay instrumentation map missing. Not initializing XRay.\n"); + return; + } + + { + SpinMutexLock Guard(&XRayInstrMapMutex); + XRayInstrMap.Sleds = __start_xray_instr_map; + XRayInstrMap.Entries = __stop_xray_instr_map - __start_xray_instr_map; + XRayInstrMap.SledsIndex = __start_xray_fn_idx; + XRayInstrMap.Functions = __stop_xray_fn_idx - __start_xray_fn_idx; + } + atomic_store(&XRayInitialized, true, memory_order_release); + +#ifndef XRAY_NO_PREINIT + if (flags()->patch_premain) + __xray_patch(); +#endif +} + +// FIXME: Make check-xray tests work on FreeBSD without +// SANITIZER_CAN_USE_PREINIT_ARRAY. +// See sanitizer_internal_defs.h where the macro is defined. +// Calling unresolved PLT functions in .preinit_array can lead to deadlock on +// FreeBSD but here it seems benign. +#if !defined(XRAY_NO_PREINIT) && \ + (SANITIZER_CAN_USE_PREINIT_ARRAY || SANITIZER_FREEBSD) +// Only add the preinit array initialization if the sanitizers can. +__attribute__((section(".preinit_array"), + used)) void (*__local_xray_preinit)(void) = __xray_init; +#else +// If we cannot use the .preinit_array section, we should instead use dynamic +// initialisation. +__attribute__ ((constructor (0))) +static void __local_xray_dyninit() { + __xray_init(); +} +#endif diff --git a/lib/xray/xray_interface.cc b/lib/xray/xray_interface.cc deleted file mode 100644 index 0d22893eb30f..000000000000 --- a/lib/xray/xray_interface.cc +++ /dev/null @@ -1,480 +0,0 @@ -//===-- xray_interface.cpp --------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file is a part of XRay, a dynamic runtime instrumentation system. -// -// Implementation of the API functions. -// -//===----------------------------------------------------------------------===// - -#include "xray_interface_internal.h" - -#include -#include -#include -#include -#include -#include - -#if SANITIZER_FUCHSIA -#include -#include -#include -#include -#endif - -#include "sanitizer_common/sanitizer_addrhashmap.h" -#include "sanitizer_common/sanitizer_common.h" - -#include "xray_defs.h" -#include "xray_flags.h" - -extern __sanitizer::SpinMutex XRayInstrMapMutex; -extern __sanitizer::atomic_uint8_t XRayInitialized; -extern __xray::XRaySledMap XRayInstrMap; - -namespace __xray { - -#if defined(__x86_64__) -static const int16_t cSledLength = 12; -#elif defined(__aarch64__) -static const int16_t cSledLength = 32; -#elif defined(__arm__) -static const int16_t cSledLength = 28; -#elif SANITIZER_MIPS32 -static const int16_t cSledLength = 48; -#elif SANITIZER_MIPS64 -static const int16_t cSledLength = 64; -#elif defined(__powerpc64__) -static const int16_t cSledLength = 8; -#else -#error "Unsupported CPU Architecture" -#endif /* CPU architecture */ - -// This is the function to call when we encounter the entry or exit sleds. -atomic_uintptr_t XRayPatchedFunction{0}; - -// This is the function to call from the arg1-enabled sleds/trampolines. -atomic_uintptr_t XRayArgLogger{0}; - -// This is the function to call when we encounter a custom event log call. -atomic_uintptr_t XRayPatchedCustomEvent{0}; - -// This is the function to call when we encounter a typed event log call. -atomic_uintptr_t XRayPatchedTypedEvent{0}; - -// This is the global status to determine whether we are currently -// patching/unpatching. -atomic_uint8_t XRayPatching{0}; - -struct TypeDescription { - uint32_t type_id; - std::size_t description_string_length; -}; - -using TypeDescriptorMapType = AddrHashMap; -// An address map from immutable descriptors to type ids. -TypeDescriptorMapType TypeDescriptorAddressMap{}; - -atomic_uint32_t TypeEventDescriptorCounter{0}; - -// MProtectHelper is an RAII wrapper for calls to mprotect(...) that will -// undo any successful mprotect(...) changes. This is used to make a page -// writeable and executable, and upon destruction if it was successful in -// doing so returns the page into a read-only and executable page. -// -// This is only used specifically for runtime-patching of the XRay -// instrumentation points. This assumes that the executable pages are -// originally read-and-execute only. -class MProtectHelper { - void *PageAlignedAddr; - std::size_t MProtectLen; - bool MustCleanup; - -public: - explicit MProtectHelper(void *PageAlignedAddr, - std::size_t MProtectLen, - std::size_t PageSize) XRAY_NEVER_INSTRUMENT - : PageAlignedAddr(PageAlignedAddr), - MProtectLen(MProtectLen), - MustCleanup(false) { -#if SANITIZER_FUCHSIA - MProtectLen = RoundUpTo(MProtectLen, PageSize); -#endif - } - - int MakeWriteable() XRAY_NEVER_INSTRUMENT { -#if SANITIZER_FUCHSIA - auto R = __sanitizer_change_code_protection( - reinterpret_cast(PageAlignedAddr), MProtectLen, true); - if (R != ZX_OK) { - Report("XRay: cannot change code protection: %s\n", - _zx_status_get_string(R)); - return -1; - } - MustCleanup = true; - return 0; -#else - auto R = mprotect(PageAlignedAddr, MProtectLen, - PROT_READ | PROT_WRITE | PROT_EXEC); - if (R != -1) - MustCleanup = true; - return R; -#endif - } - - ~MProtectHelper() XRAY_NEVER_INSTRUMENT { - if (MustCleanup) { -#if SANITIZER_FUCHSIA - auto R = __sanitizer_change_code_protection( - reinterpret_cast(PageAlignedAddr), MProtectLen, false); - if (R != ZX_OK) { - Report("XRay: cannot change code protection: %s\n", - _zx_status_get_string(R)); - } -#else - mprotect(PageAlignedAddr, MProtectLen, PROT_READ | PROT_EXEC); -#endif - } - } -}; - -namespace { - -bool patchSled(const XRaySledEntry &Sled, bool Enable, - int32_t FuncId) XRAY_NEVER_INSTRUMENT { - bool Success = false; - switch (Sled.Kind) { - case XRayEntryType::ENTRY: - Success = patchFunctionEntry(Enable, FuncId, Sled, __xray_FunctionEntry); - break; - case XRayEntryType::EXIT: - Success = patchFunctionExit(Enable, FuncId, Sled); - break; - case XRayEntryType::TAIL: - Success = patchFunctionTailExit(Enable, FuncId, Sled); - break; - case XRayEntryType::LOG_ARGS_ENTRY: - Success = patchFunctionEntry(Enable, FuncId, Sled, __xray_ArgLoggerEntry); - break; - case XRayEntryType::CUSTOM_EVENT: - Success = patchCustomEvent(Enable, FuncId, Sled); - break; - case XRayEntryType::TYPED_EVENT: - Success = patchTypedEvent(Enable, FuncId, Sled); - break; - default: - Report("Unsupported sled kind '%d' @%04x\n", Sled.Address, int(Sled.Kind)); - return false; - } - return Success; -} - -XRayPatchingStatus patchFunction(int32_t FuncId, - bool Enable) XRAY_NEVER_INSTRUMENT { - if (!atomic_load(&XRayInitialized, - memory_order_acquire)) - return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized. - - uint8_t NotPatching = false; - if (!atomic_compare_exchange_strong( - &XRayPatching, &NotPatching, true, memory_order_acq_rel)) - return XRayPatchingStatus::ONGOING; // Already patching. - - // Next, we look for the function index. - XRaySledMap InstrMap; - { - SpinMutexLock Guard(&XRayInstrMapMutex); - InstrMap = XRayInstrMap; - } - - // If we don't have an index, we can't patch individual functions. - if (InstrMap.Functions == 0) - return XRayPatchingStatus::NOT_INITIALIZED; - - // FuncId must be a positive number, less than the number of functions - // instrumented. - if (FuncId <= 0 || static_cast(FuncId) > InstrMap.Functions) { - Report("Invalid function id provided: %d\n", FuncId); - return XRayPatchingStatus::FAILED; - } - - // Now we patch ths sleds for this specific function. - auto SledRange = InstrMap.SledsIndex[FuncId - 1]; - auto *f = SledRange.Begin; - auto *e = SledRange.End; - - bool SucceedOnce = false; - while (f != e) - SucceedOnce |= patchSled(*f++, Enable, FuncId); - - atomic_store(&XRayPatching, false, - memory_order_release); - - if (!SucceedOnce) { - Report("Failed patching any sled for function '%d'.", FuncId); - return XRayPatchingStatus::FAILED; - } - - return XRayPatchingStatus::SUCCESS; -} - -// controlPatching implements the common internals of the patching/unpatching -// implementation. |Enable| defines whether we're enabling or disabling the -// runtime XRay instrumentation. -XRayPatchingStatus controlPatching(bool Enable) XRAY_NEVER_INSTRUMENT { - if (!atomic_load(&XRayInitialized, - memory_order_acquire)) - return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized. - - uint8_t NotPatching = false; - if (!atomic_compare_exchange_strong( - &XRayPatching, &NotPatching, true, memory_order_acq_rel)) - return XRayPatchingStatus::ONGOING; // Already patching. - - uint8_t PatchingSuccess = false; - auto XRayPatchingStatusResetter = - at_scope_exit([&PatchingSuccess] { - if (!PatchingSuccess) - atomic_store(&XRayPatching, false, - memory_order_release); - }); - - XRaySledMap InstrMap; - { - SpinMutexLock Guard(&XRayInstrMapMutex); - InstrMap = XRayInstrMap; - } - if (InstrMap.Entries == 0) - return XRayPatchingStatus::NOT_INITIALIZED; - - uint32_t FuncId = 1; - uint64_t CurFun = 0; - - // First we want to find the bounds for which we have instrumentation points, - // and try to get as few calls to mprotect(...) as possible. We're assuming - // that all the sleds for the instrumentation map are contiguous as a single - // set of pages. When we do support dynamic shared object instrumentation, - // we'll need to do this for each set of page load offsets per DSO loaded. For - // now we're assuming we can mprotect the whole section of text between the - // minimum sled address and the maximum sled address (+ the largest sled - // size). - auto MinSled = InstrMap.Sleds[0]; - auto MaxSled = InstrMap.Sleds[InstrMap.Entries - 1]; - for (std::size_t I = 0; I < InstrMap.Entries; I++) { - const auto &Sled = InstrMap.Sleds[I]; - if (Sled.Address < MinSled.Address) - MinSled = Sled; - if (Sled.Address > MaxSled.Address) - MaxSled = Sled; - } - - const size_t PageSize = flags()->xray_page_size_override > 0 - ? flags()->xray_page_size_override - : GetPageSizeCached(); - if ((PageSize == 0) || ((PageSize & (PageSize - 1)) != 0)) { - Report("System page size is not a power of two: %lld\n", PageSize); - return XRayPatchingStatus::FAILED; - } - - void *PageAlignedAddr = - reinterpret_cast(MinSled.Address & ~(PageSize - 1)); - size_t MProtectLen = - (MaxSled.Address - reinterpret_cast(PageAlignedAddr)) + cSledLength; - MProtectHelper Protector(PageAlignedAddr, MProtectLen, PageSize); - if (Protector.MakeWriteable() == -1) { - Report("Failed mprotect: %d\n", errno); - return XRayPatchingStatus::FAILED; - } - - for (std::size_t I = 0; I < InstrMap.Entries; ++I) { - auto &Sled = InstrMap.Sleds[I]; - auto F = Sled.Function; - if (CurFun == 0) - CurFun = F; - if (F != CurFun) { - ++FuncId; - CurFun = F; - } - patchSled(Sled, Enable, FuncId); - } - atomic_store(&XRayPatching, false, - memory_order_release); - PatchingSuccess = true; - return XRayPatchingStatus::SUCCESS; -} - -XRayPatchingStatus mprotectAndPatchFunction(int32_t FuncId, - bool Enable) XRAY_NEVER_INSTRUMENT { - XRaySledMap InstrMap; - { - SpinMutexLock Guard(&XRayInstrMapMutex); - InstrMap = XRayInstrMap; - } - - // FuncId must be a positive number, less than the number of functions - // instrumented. - if (FuncId <= 0 || static_cast(FuncId) > InstrMap.Functions) { - Report("Invalid function id provided: %d\n", FuncId); - return XRayPatchingStatus::FAILED; - } - - const size_t PageSize = flags()->xray_page_size_override > 0 - ? flags()->xray_page_size_override - : GetPageSizeCached(); - if ((PageSize == 0) || ((PageSize & (PageSize - 1)) != 0)) { - Report("Provided page size is not a power of two: %lld\n", PageSize); - return XRayPatchingStatus::FAILED; - } - - // Here we compute the minumum sled and maximum sled associated with a - // particular function ID. - auto SledRange = InstrMap.SledsIndex[FuncId - 1]; - auto *f = SledRange.Begin; - auto *e = SledRange.End; - auto MinSled = *f; - auto MaxSled = *(SledRange.End - 1); - while (f != e) { - if (f->Address < MinSled.Address) - MinSled = *f; - if (f->Address > MaxSled.Address) - MaxSled = *f; - ++f; - } - - void *PageAlignedAddr = - reinterpret_cast(MinSled.Address & ~(PageSize - 1)); - size_t MProtectLen = - (MaxSled.Address - reinterpret_cast(PageAlignedAddr)) + cSledLength; - MProtectHelper Protector(PageAlignedAddr, MProtectLen, PageSize); - if (Protector.MakeWriteable() == -1) { - Report("Failed mprotect: %d\n", errno); - return XRayPatchingStatus::FAILED; - } - return patchFunction(FuncId, Enable); -} - -} // namespace - -} // namespace __xray - -using namespace __xray; - -// The following functions are declared `extern "C" {...}` in the header, hence -// they're defined in the global namespace. - -int __xray_set_handler(void (*entry)(int32_t, - XRayEntryType)) XRAY_NEVER_INSTRUMENT { - if (atomic_load(&XRayInitialized, - memory_order_acquire)) { - - atomic_store(&__xray::XRayPatchedFunction, - reinterpret_cast(entry), - memory_order_release); - return 1; - } - return 0; -} - -int __xray_set_customevent_handler(void (*entry)(void *, size_t)) - XRAY_NEVER_INSTRUMENT { - if (atomic_load(&XRayInitialized, - memory_order_acquire)) { - atomic_store(&__xray::XRayPatchedCustomEvent, - reinterpret_cast(entry), - memory_order_release); - return 1; - } - return 0; -} - -int __xray_set_typedevent_handler(void (*entry)( - uint16_t, const void *, size_t)) XRAY_NEVER_INSTRUMENT { - if (atomic_load(&XRayInitialized, - memory_order_acquire)) { - atomic_store(&__xray::XRayPatchedTypedEvent, - reinterpret_cast(entry), - memory_order_release); - return 1; - } - return 0; -} - -int __xray_remove_handler() XRAY_NEVER_INSTRUMENT { - return __xray_set_handler(nullptr); -} - -int __xray_remove_customevent_handler() XRAY_NEVER_INSTRUMENT { - return __xray_set_customevent_handler(nullptr); -} - -int __xray_remove_typedevent_handler() XRAY_NEVER_INSTRUMENT { - return __xray_set_typedevent_handler(nullptr); -} - -uint16_t __xray_register_event_type( - const char *const event_type) XRAY_NEVER_INSTRUMENT { - TypeDescriptorMapType::Handle h(&TypeDescriptorAddressMap, (uptr)event_type); - if (h.created()) { - h->type_id = atomic_fetch_add( - &TypeEventDescriptorCounter, 1, memory_order_acq_rel); - h->description_string_length = strnlen(event_type, 1024); - } - return h->type_id; -} - -XRayPatchingStatus __xray_patch() XRAY_NEVER_INSTRUMENT { - return controlPatching(true); -} - -XRayPatchingStatus __xray_unpatch() XRAY_NEVER_INSTRUMENT { - return controlPatching(false); -} - -XRayPatchingStatus __xray_patch_function(int32_t FuncId) XRAY_NEVER_INSTRUMENT { - return mprotectAndPatchFunction(FuncId, true); -} - -XRayPatchingStatus -__xray_unpatch_function(int32_t FuncId) XRAY_NEVER_INSTRUMENT { - return mprotectAndPatchFunction(FuncId, false); -} - -int __xray_set_handler_arg1(void (*entry)(int32_t, XRayEntryType, uint64_t)) { - if (!atomic_load(&XRayInitialized, - memory_order_acquire)) - return 0; - - // A relaxed write might not be visible even if the current thread gets - // scheduled on a different CPU/NUMA node. We need to wait for everyone to - // have this handler installed for consistency of collected data across CPUs. - atomic_store(&XRayArgLogger, reinterpret_cast(entry), - memory_order_release); - return 1; -} - -int __xray_remove_handler_arg1() { return __xray_set_handler_arg1(nullptr); } - -uintptr_t __xray_function_address(int32_t FuncId) XRAY_NEVER_INSTRUMENT { - SpinMutexLock Guard(&XRayInstrMapMutex); - if (FuncId <= 0 || static_cast(FuncId) > XRayInstrMap.Functions) - return 0; - return XRayInstrMap.SledsIndex[FuncId - 1].Begin->Function -// On PPC, function entries are always aligned to 16 bytes. The beginning of a -// sled might be a local entry, which is always +8 based on the global entry. -// Always return the global entry. -#ifdef __PPC__ - & ~0xf -#endif - ; -} - -size_t __xray_max_function_id() XRAY_NEVER_INSTRUMENT { - SpinMutexLock Guard(&XRayInstrMapMutex); - return XRayInstrMap.Functions; -} diff --git a/lib/xray/xray_interface.cpp b/lib/xray/xray_interface.cpp new file mode 100644 index 000000000000..0d22893eb30f --- /dev/null +++ b/lib/xray/xray_interface.cpp @@ -0,0 +1,480 @@ +//===-- xray_interface.cpp --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of the API functions. +// +//===----------------------------------------------------------------------===// + +#include "xray_interface_internal.h" + +#include +#include +#include +#include +#include +#include + +#if SANITIZER_FUCHSIA +#include +#include +#include +#include +#endif + +#include "sanitizer_common/sanitizer_addrhashmap.h" +#include "sanitizer_common/sanitizer_common.h" + +#include "xray_defs.h" +#include "xray_flags.h" + +extern __sanitizer::SpinMutex XRayInstrMapMutex; +extern __sanitizer::atomic_uint8_t XRayInitialized; +extern __xray::XRaySledMap XRayInstrMap; + +namespace __xray { + +#if defined(__x86_64__) +static const int16_t cSledLength = 12; +#elif defined(__aarch64__) +static const int16_t cSledLength = 32; +#elif defined(__arm__) +static const int16_t cSledLength = 28; +#elif SANITIZER_MIPS32 +static const int16_t cSledLength = 48; +#elif SANITIZER_MIPS64 +static const int16_t cSledLength = 64; +#elif defined(__powerpc64__) +static const int16_t cSledLength = 8; +#else +#error "Unsupported CPU Architecture" +#endif /* CPU architecture */ + +// This is the function to call when we encounter the entry or exit sleds. +atomic_uintptr_t XRayPatchedFunction{0}; + +// This is the function to call from the arg1-enabled sleds/trampolines. +atomic_uintptr_t XRayArgLogger{0}; + +// This is the function to call when we encounter a custom event log call. +atomic_uintptr_t XRayPatchedCustomEvent{0}; + +// This is the function to call when we encounter a typed event log call. +atomic_uintptr_t XRayPatchedTypedEvent{0}; + +// This is the global status to determine whether we are currently +// patching/unpatching. +atomic_uint8_t XRayPatching{0}; + +struct TypeDescription { + uint32_t type_id; + std::size_t description_string_length; +}; + +using TypeDescriptorMapType = AddrHashMap; +// An address map from immutable descriptors to type ids. +TypeDescriptorMapType TypeDescriptorAddressMap{}; + +atomic_uint32_t TypeEventDescriptorCounter{0}; + +// MProtectHelper is an RAII wrapper for calls to mprotect(...) that will +// undo any successful mprotect(...) changes. This is used to make a page +// writeable and executable, and upon destruction if it was successful in +// doing so returns the page into a read-only and executable page. +// +// This is only used specifically for runtime-patching of the XRay +// instrumentation points. This assumes that the executable pages are +// originally read-and-execute only. +class MProtectHelper { + void *PageAlignedAddr; + std::size_t MProtectLen; + bool MustCleanup; + +public: + explicit MProtectHelper(void *PageAlignedAddr, + std::size_t MProtectLen, + std::size_t PageSize) XRAY_NEVER_INSTRUMENT + : PageAlignedAddr(PageAlignedAddr), + MProtectLen(MProtectLen), + MustCleanup(false) { +#if SANITIZER_FUCHSIA + MProtectLen = RoundUpTo(MProtectLen, PageSize); +#endif + } + + int MakeWriteable() XRAY_NEVER_INSTRUMENT { +#if SANITIZER_FUCHSIA + auto R = __sanitizer_change_code_protection( + reinterpret_cast(PageAlignedAddr), MProtectLen, true); + if (R != ZX_OK) { + Report("XRay: cannot change code protection: %s\n", + _zx_status_get_string(R)); + return -1; + } + MustCleanup = true; + return 0; +#else + auto R = mprotect(PageAlignedAddr, MProtectLen, + PROT_READ | PROT_WRITE | PROT_EXEC); + if (R != -1) + MustCleanup = true; + return R; +#endif + } + + ~MProtectHelper() XRAY_NEVER_INSTRUMENT { + if (MustCleanup) { +#if SANITIZER_FUCHSIA + auto R = __sanitizer_change_code_protection( + reinterpret_cast(PageAlignedAddr), MProtectLen, false); + if (R != ZX_OK) { + Report("XRay: cannot change code protection: %s\n", + _zx_status_get_string(R)); + } +#else + mprotect(PageAlignedAddr, MProtectLen, PROT_READ | PROT_EXEC); +#endif + } + } +}; + +namespace { + +bool patchSled(const XRaySledEntry &Sled, bool Enable, + int32_t FuncId) XRAY_NEVER_INSTRUMENT { + bool Success = false; + switch (Sled.Kind) { + case XRayEntryType::ENTRY: + Success = patchFunctionEntry(Enable, FuncId, Sled, __xray_FunctionEntry); + break; + case XRayEntryType::EXIT: + Success = patchFunctionExit(Enable, FuncId, Sled); + break; + case XRayEntryType::TAIL: + Success = patchFunctionTailExit(Enable, FuncId, Sled); + break; + case XRayEntryType::LOG_ARGS_ENTRY: + Success = patchFunctionEntry(Enable, FuncId, Sled, __xray_ArgLoggerEntry); + break; + case XRayEntryType::CUSTOM_EVENT: + Success = patchCustomEvent(Enable, FuncId, Sled); + break; + case XRayEntryType::TYPED_EVENT: + Success = patchTypedEvent(Enable, FuncId, Sled); + break; + default: + Report("Unsupported sled kind '%d' @%04x\n", Sled.Address, int(Sled.Kind)); + return false; + } + return Success; +} + +XRayPatchingStatus patchFunction(int32_t FuncId, + bool Enable) XRAY_NEVER_INSTRUMENT { + if (!atomic_load(&XRayInitialized, + memory_order_acquire)) + return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized. + + uint8_t NotPatching = false; + if (!atomic_compare_exchange_strong( + &XRayPatching, &NotPatching, true, memory_order_acq_rel)) + return XRayPatchingStatus::ONGOING; // Already patching. + + // Next, we look for the function index. + XRaySledMap InstrMap; + { + SpinMutexLock Guard(&XRayInstrMapMutex); + InstrMap = XRayInstrMap; + } + + // If we don't have an index, we can't patch individual functions. + if (InstrMap.Functions == 0) + return XRayPatchingStatus::NOT_INITIALIZED; + + // FuncId must be a positive number, less than the number of functions + // instrumented. + if (FuncId <= 0 || static_cast(FuncId) > InstrMap.Functions) { + Report("Invalid function id provided: %d\n", FuncId); + return XRayPatchingStatus::FAILED; + } + + // Now we patch ths sleds for this specific function. + auto SledRange = InstrMap.SledsIndex[FuncId - 1]; + auto *f = SledRange.Begin; + auto *e = SledRange.End; + + bool SucceedOnce = false; + while (f != e) + SucceedOnce |= patchSled(*f++, Enable, FuncId); + + atomic_store(&XRayPatching, false, + memory_order_release); + + if (!SucceedOnce) { + Report("Failed patching any sled for function '%d'.", FuncId); + return XRayPatchingStatus::FAILED; + } + + return XRayPatchingStatus::SUCCESS; +} + +// controlPatching implements the common internals of the patching/unpatching +// implementation. |Enable| defines whether we're enabling or disabling the +// runtime XRay instrumentation. +XRayPatchingStatus controlPatching(bool Enable) XRAY_NEVER_INSTRUMENT { + if (!atomic_load(&XRayInitialized, + memory_order_acquire)) + return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized. + + uint8_t NotPatching = false; + if (!atomic_compare_exchange_strong( + &XRayPatching, &NotPatching, true, memory_order_acq_rel)) + return XRayPatchingStatus::ONGOING; // Already patching. + + uint8_t PatchingSuccess = false; + auto XRayPatchingStatusResetter = + at_scope_exit([&PatchingSuccess] { + if (!PatchingSuccess) + atomic_store(&XRayPatching, false, + memory_order_release); + }); + + XRaySledMap InstrMap; + { + SpinMutexLock Guard(&XRayInstrMapMutex); + InstrMap = XRayInstrMap; + } + if (InstrMap.Entries == 0) + return XRayPatchingStatus::NOT_INITIALIZED; + + uint32_t FuncId = 1; + uint64_t CurFun = 0; + + // First we want to find the bounds for which we have instrumentation points, + // and try to get as few calls to mprotect(...) as possible. We're assuming + // that all the sleds for the instrumentation map are contiguous as a single + // set of pages. When we do support dynamic shared object instrumentation, + // we'll need to do this for each set of page load offsets per DSO loaded. For + // now we're assuming we can mprotect the whole section of text between the + // minimum sled address and the maximum sled address (+ the largest sled + // size). + auto MinSled = InstrMap.Sleds[0]; + auto MaxSled = InstrMap.Sleds[InstrMap.Entries - 1]; + for (std::size_t I = 0; I < InstrMap.Entries; I++) { + const auto &Sled = InstrMap.Sleds[I]; + if (Sled.Address < MinSled.Address) + MinSled = Sled; + if (Sled.Address > MaxSled.Address) + MaxSled = Sled; + } + + const size_t PageSize = flags()->xray_page_size_override > 0 + ? flags()->xray_page_size_override + : GetPageSizeCached(); + if ((PageSize == 0) || ((PageSize & (PageSize - 1)) != 0)) { + Report("System page size is not a power of two: %lld\n", PageSize); + return XRayPatchingStatus::FAILED; + } + + void *PageAlignedAddr = + reinterpret_cast(MinSled.Address & ~(PageSize - 1)); + size_t MProtectLen = + (MaxSled.Address - reinterpret_cast(PageAlignedAddr)) + cSledLength; + MProtectHelper Protector(PageAlignedAddr, MProtectLen, PageSize); + if (Protector.MakeWriteable() == -1) { + Report("Failed mprotect: %d\n", errno); + return XRayPatchingStatus::FAILED; + } + + for (std::size_t I = 0; I < InstrMap.Entries; ++I) { + auto &Sled = InstrMap.Sleds[I]; + auto F = Sled.Function; + if (CurFun == 0) + CurFun = F; + if (F != CurFun) { + ++FuncId; + CurFun = F; + } + patchSled(Sled, Enable, FuncId); + } + atomic_store(&XRayPatching, false, + memory_order_release); + PatchingSuccess = true; + return XRayPatchingStatus::SUCCESS; +} + +XRayPatchingStatus mprotectAndPatchFunction(int32_t FuncId, + bool Enable) XRAY_NEVER_INSTRUMENT { + XRaySledMap InstrMap; + { + SpinMutexLock Guard(&XRayInstrMapMutex); + InstrMap = XRayInstrMap; + } + + // FuncId must be a positive number, less than the number of functions + // instrumented. + if (FuncId <= 0 || static_cast(FuncId) > InstrMap.Functions) { + Report("Invalid function id provided: %d\n", FuncId); + return XRayPatchingStatus::FAILED; + } + + const size_t PageSize = flags()->xray_page_size_override > 0 + ? flags()->xray_page_size_override + : GetPageSizeCached(); + if ((PageSize == 0) || ((PageSize & (PageSize - 1)) != 0)) { + Report("Provided page size is not a power of two: %lld\n", PageSize); + return XRayPatchingStatus::FAILED; + } + + // Here we compute the minumum sled and maximum sled associated with a + // particular function ID. + auto SledRange = InstrMap.SledsIndex[FuncId - 1]; + auto *f = SledRange.Begin; + auto *e = SledRange.End; + auto MinSled = *f; + auto MaxSled = *(SledRange.End - 1); + while (f != e) { + if (f->Address < MinSled.Address) + MinSled = *f; + if (f->Address > MaxSled.Address) + MaxSled = *f; + ++f; + } + + void *PageAlignedAddr = + reinterpret_cast(MinSled.Address & ~(PageSize - 1)); + size_t MProtectLen = + (MaxSled.Address - reinterpret_cast(PageAlignedAddr)) + cSledLength; + MProtectHelper Protector(PageAlignedAddr, MProtectLen, PageSize); + if (Protector.MakeWriteable() == -1) { + Report("Failed mprotect: %d\n", errno); + return XRayPatchingStatus::FAILED; + } + return patchFunction(FuncId, Enable); +} + +} // namespace + +} // namespace __xray + +using namespace __xray; + +// The following functions are declared `extern "C" {...}` in the header, hence +// they're defined in the global namespace. + +int __xray_set_handler(void (*entry)(int32_t, + XRayEntryType)) XRAY_NEVER_INSTRUMENT { + if (atomic_load(&XRayInitialized, + memory_order_acquire)) { + + atomic_store(&__xray::XRayPatchedFunction, + reinterpret_cast(entry), + memory_order_release); + return 1; + } + return 0; +} + +int __xray_set_customevent_handler(void (*entry)(void *, size_t)) + XRAY_NEVER_INSTRUMENT { + if (atomic_load(&XRayInitialized, + memory_order_acquire)) { + atomic_store(&__xray::XRayPatchedCustomEvent, + reinterpret_cast(entry), + memory_order_release); + return 1; + } + return 0; +} + +int __xray_set_typedevent_handler(void (*entry)( + uint16_t, const void *, size_t)) XRAY_NEVER_INSTRUMENT { + if (atomic_load(&XRayInitialized, + memory_order_acquire)) { + atomic_store(&__xray::XRayPatchedTypedEvent, + reinterpret_cast(entry), + memory_order_release); + return 1; + } + return 0; +} + +int __xray_remove_handler() XRAY_NEVER_INSTRUMENT { + return __xray_set_handler(nullptr); +} + +int __xray_remove_customevent_handler() XRAY_NEVER_INSTRUMENT { + return __xray_set_customevent_handler(nullptr); +} + +int __xray_remove_typedevent_handler() XRAY_NEVER_INSTRUMENT { + return __xray_set_typedevent_handler(nullptr); +} + +uint16_t __xray_register_event_type( + const char *const event_type) XRAY_NEVER_INSTRUMENT { + TypeDescriptorMapType::Handle h(&TypeDescriptorAddressMap, (uptr)event_type); + if (h.created()) { + h->type_id = atomic_fetch_add( + &TypeEventDescriptorCounter, 1, memory_order_acq_rel); + h->description_string_length = strnlen(event_type, 1024); + } + return h->type_id; +} + +XRayPatchingStatus __xray_patch() XRAY_NEVER_INSTRUMENT { + return controlPatching(true); +} + +XRayPatchingStatus __xray_unpatch() XRAY_NEVER_INSTRUMENT { + return controlPatching(false); +} + +XRayPatchingStatus __xray_patch_function(int32_t FuncId) XRAY_NEVER_INSTRUMENT { + return mprotectAndPatchFunction(FuncId, true); +} + +XRayPatchingStatus +__xray_unpatch_function(int32_t FuncId) XRAY_NEVER_INSTRUMENT { + return mprotectAndPatchFunction(FuncId, false); +} + +int __xray_set_handler_arg1(void (*entry)(int32_t, XRayEntryType, uint64_t)) { + if (!atomic_load(&XRayInitialized, + memory_order_acquire)) + return 0; + + // A relaxed write might not be visible even if the current thread gets + // scheduled on a different CPU/NUMA node. We need to wait for everyone to + // have this handler installed for consistency of collected data across CPUs. + atomic_store(&XRayArgLogger, reinterpret_cast(entry), + memory_order_release); + return 1; +} + +int __xray_remove_handler_arg1() { return __xray_set_handler_arg1(nullptr); } + +uintptr_t __xray_function_address(int32_t FuncId) XRAY_NEVER_INSTRUMENT { + SpinMutexLock Guard(&XRayInstrMapMutex); + if (FuncId <= 0 || static_cast(FuncId) > XRayInstrMap.Functions) + return 0; + return XRayInstrMap.SledsIndex[FuncId - 1].Begin->Function +// On PPC, function entries are always aligned to 16 bytes. The beginning of a +// sled might be a local entry, which is always +8 based on the global entry. +// Always return the global entry. +#ifdef __PPC__ + & ~0xf +#endif + ; +} + +size_t __xray_max_function_id() XRAY_NEVER_INSTRUMENT { + SpinMutexLock Guard(&XRayInstrMapMutex); + return XRayInstrMap.Functions; +} diff --git a/lib/xray/xray_log_interface.cc b/lib/xray/xray_log_interface.cc deleted file mode 100644 index 7916a9e2b8ad..000000000000 --- a/lib/xray/xray_log_interface.cc +++ /dev/null @@ -1,209 +0,0 @@ -//===-- xray_log_interface.cc ---------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file is a part of XRay, a function call tracing system. -// -//===----------------------------------------------------------------------===// -#include "xray/xray_log_interface.h" - -#include "sanitizer_common/sanitizer_allocator_internal.h" -#include "sanitizer_common/sanitizer_atomic.h" -#include "sanitizer_common/sanitizer_mutex.h" -#include "xray/xray_interface.h" -#include "xray_defs.h" - -namespace __xray { -static SpinMutex XRayImplMutex; -static XRayLogImpl CurrentXRayImpl{nullptr, nullptr, nullptr, nullptr}; -static XRayLogImpl *GlobalXRayImpl = nullptr; - -// This is the default implementation of a buffer iterator, which always yields -// a null buffer. -XRayBuffer NullBufferIterator(XRayBuffer) XRAY_NEVER_INSTRUMENT { - return {nullptr, 0}; -} - -// This is the global function responsible for iterating through given buffers. -atomic_uintptr_t XRayBufferIterator{ - reinterpret_cast(&NullBufferIterator)}; - -// We use a linked list of Mode to XRayLogImpl mappings. This is a linked list -// when it should be a map because we're avoiding having to depend on C++ -// standard library data structures at this level of the implementation. -struct ModeImpl { - ModeImpl *Next; - const char *Mode; - XRayLogImpl Impl; -}; - -static ModeImpl SentinelModeImpl{ - nullptr, nullptr, {nullptr, nullptr, nullptr, nullptr}}; -static ModeImpl *ModeImpls = &SentinelModeImpl; -static const ModeImpl *CurrentMode = nullptr; - -} // namespace __xray - -using namespace __xray; - -void __xray_log_set_buffer_iterator(XRayBuffer (*Iterator)(XRayBuffer)) - XRAY_NEVER_INSTRUMENT { - atomic_store(&__xray::XRayBufferIterator, - reinterpret_cast(Iterator), memory_order_release); -} - -void __xray_log_remove_buffer_iterator() XRAY_NEVER_INSTRUMENT { - __xray_log_set_buffer_iterator(&NullBufferIterator); -} - -XRayLogRegisterStatus -__xray_log_register_mode(const char *Mode, - XRayLogImpl Impl) XRAY_NEVER_INSTRUMENT { - if (Impl.flush_log == nullptr || Impl.handle_arg0 == nullptr || - Impl.log_finalize == nullptr || Impl.log_init == nullptr) - return XRayLogRegisterStatus::XRAY_INCOMPLETE_IMPL; - - SpinMutexLock Guard(&XRayImplMutex); - // First, look for whether the mode already has a registered implementation. - for (ModeImpl *it = ModeImpls; it != &SentinelModeImpl; it = it->Next) { - if (!internal_strcmp(Mode, it->Mode)) - return XRayLogRegisterStatus::XRAY_DUPLICATE_MODE; - } - auto *NewModeImpl = static_cast(InternalAlloc(sizeof(ModeImpl))); - NewModeImpl->Next = ModeImpls; - NewModeImpl->Mode = internal_strdup(Mode); - NewModeImpl->Impl = Impl; - ModeImpls = NewModeImpl; - return XRayLogRegisterStatus::XRAY_REGISTRATION_OK; -} - -XRayLogRegisterStatus -__xray_log_select_mode(const char *Mode) XRAY_NEVER_INSTRUMENT { - SpinMutexLock Guard(&XRayImplMutex); - for (ModeImpl *it = ModeImpls; it != &SentinelModeImpl; it = it->Next) { - if (!internal_strcmp(Mode, it->Mode)) { - CurrentMode = it; - CurrentXRayImpl = it->Impl; - GlobalXRayImpl = &CurrentXRayImpl; - __xray_set_handler(it->Impl.handle_arg0); - return XRayLogRegisterStatus::XRAY_REGISTRATION_OK; - } - } - return XRayLogRegisterStatus::XRAY_MODE_NOT_FOUND; -} - -const char *__xray_log_get_current_mode() XRAY_NEVER_INSTRUMENT { - SpinMutexLock Guard(&XRayImplMutex); - if (CurrentMode != nullptr) - return CurrentMode->Mode; - return nullptr; -} - -void __xray_set_log_impl(XRayLogImpl Impl) XRAY_NEVER_INSTRUMENT { - if (Impl.log_init == nullptr || Impl.log_finalize == nullptr || - Impl.handle_arg0 == nullptr || Impl.flush_log == nullptr) { - SpinMutexLock Guard(&XRayImplMutex); - GlobalXRayImpl = nullptr; - CurrentMode = nullptr; - __xray_remove_handler(); - __xray_remove_handler_arg1(); - return; - } - - SpinMutexLock Guard(&XRayImplMutex); - CurrentXRayImpl = Impl; - GlobalXRayImpl = &CurrentXRayImpl; - __xray_set_handler(Impl.handle_arg0); -} - -void __xray_remove_log_impl() XRAY_NEVER_INSTRUMENT { - SpinMutexLock Guard(&XRayImplMutex); - GlobalXRayImpl = nullptr; - __xray_remove_handler(); - __xray_remove_handler_arg1(); -} - -XRayLogInitStatus __xray_log_init(size_t BufferSize, size_t MaxBuffers, - void *Args, - size_t ArgsSize) XRAY_NEVER_INSTRUMENT { - SpinMutexLock Guard(&XRayImplMutex); - if (!GlobalXRayImpl) - return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; - return GlobalXRayImpl->log_init(BufferSize, MaxBuffers, Args, ArgsSize); -} - -XRayLogInitStatus __xray_log_init_mode(const char *Mode, const char *Config) - XRAY_NEVER_INSTRUMENT { - SpinMutexLock Guard(&XRayImplMutex); - if (!GlobalXRayImpl) - return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; - - if (Config == nullptr) - return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; - - // Check first whether the current mode is the same as what we expect. - if (CurrentMode == nullptr || internal_strcmp(CurrentMode->Mode, Mode) != 0) - return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; - - // Here we do some work to coerce the pointer we're provided, so that - // the implementations that still take void* pointers can handle the - // data provided in the Config argument. - return GlobalXRayImpl->log_init( - 0, 0, const_cast(static_cast(Config)), 0); -} - -XRayLogInitStatus -__xray_log_init_mode_bin(const char *Mode, const char *Config, - size_t ConfigSize) XRAY_NEVER_INSTRUMENT { - SpinMutexLock Guard(&XRayImplMutex); - if (!GlobalXRayImpl) - return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; - - if (Config == nullptr) - return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; - - // Check first whether the current mode is the same as what we expect. - if (CurrentMode == nullptr || internal_strcmp(CurrentMode->Mode, Mode) != 0) - return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; - - // Here we do some work to coerce the pointer we're provided, so that - // the implementations that still take void* pointers can handle the - // data provided in the Config argument. - return GlobalXRayImpl->log_init( - 0, 0, const_cast(static_cast(Config)), ConfigSize); -} - -XRayLogInitStatus __xray_log_finalize() XRAY_NEVER_INSTRUMENT { - SpinMutexLock Guard(&XRayImplMutex); - if (!GlobalXRayImpl) - return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; - return GlobalXRayImpl->log_finalize(); -} - -XRayLogFlushStatus __xray_log_flushLog() XRAY_NEVER_INSTRUMENT { - SpinMutexLock Guard(&XRayImplMutex); - if (!GlobalXRayImpl) - return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; - return GlobalXRayImpl->flush_log(); -} - -XRayLogFlushStatus __xray_log_process_buffers( - void (*Processor)(const char *, XRayBuffer)) XRAY_NEVER_INSTRUMENT { - // We want to make sure that there will be no changes to the global state for - // the log by synchronising on the XRayBufferIteratorMutex. - if (!GlobalXRayImpl) - return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; - auto Iterator = reinterpret_cast( - atomic_load(&XRayBufferIterator, memory_order_acquire)); - auto Buffer = (*Iterator)(XRayBuffer{nullptr, 0}); - auto Mode = CurrentMode ? CurrentMode->Mode : nullptr; - while (Buffer.Data != nullptr) { - (*Processor)(Mode, Buffer); - Buffer = (*Iterator)(Buffer); - } - return XRayLogFlushStatus::XRAY_LOG_FLUSHED; -} diff --git a/lib/xray/xray_log_interface.cpp b/lib/xray/xray_log_interface.cpp new file mode 100644 index 000000000000..fc70373f9dac --- /dev/null +++ b/lib/xray/xray_log_interface.cpp @@ -0,0 +1,209 @@ +//===-- xray_log_interface.cpp --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a function call tracing system. +// +//===----------------------------------------------------------------------===// +#include "xray/xray_log_interface.h" + +#include "sanitizer_common/sanitizer_allocator_internal.h" +#include "sanitizer_common/sanitizer_atomic.h" +#include "sanitizer_common/sanitizer_mutex.h" +#include "xray/xray_interface.h" +#include "xray_defs.h" + +namespace __xray { +static SpinMutex XRayImplMutex; +static XRayLogImpl CurrentXRayImpl{nullptr, nullptr, nullptr, nullptr}; +static XRayLogImpl *GlobalXRayImpl = nullptr; + +// This is the default implementation of a buffer iterator, which always yields +// a null buffer. +XRayBuffer NullBufferIterator(XRayBuffer) XRAY_NEVER_INSTRUMENT { + return {nullptr, 0}; +} + +// This is the global function responsible for iterating through given buffers. +atomic_uintptr_t XRayBufferIterator{ + reinterpret_cast(&NullBufferIterator)}; + +// We use a linked list of Mode to XRayLogImpl mappings. This is a linked list +// when it should be a map because we're avoiding having to depend on C++ +// standard library data structures at this level of the implementation. +struct ModeImpl { + ModeImpl *Next; + const char *Mode; + XRayLogImpl Impl; +}; + +static ModeImpl SentinelModeImpl{ + nullptr, nullptr, {nullptr, nullptr, nullptr, nullptr}}; +static ModeImpl *ModeImpls = &SentinelModeImpl; +static const ModeImpl *CurrentMode = nullptr; + +} // namespace __xray + +using namespace __xray; + +void __xray_log_set_buffer_iterator(XRayBuffer (*Iterator)(XRayBuffer)) + XRAY_NEVER_INSTRUMENT { + atomic_store(&__xray::XRayBufferIterator, + reinterpret_cast(Iterator), memory_order_release); +} + +void __xray_log_remove_buffer_iterator() XRAY_NEVER_INSTRUMENT { + __xray_log_set_buffer_iterator(&NullBufferIterator); +} + +XRayLogRegisterStatus +__xray_log_register_mode(const char *Mode, + XRayLogImpl Impl) XRAY_NEVER_INSTRUMENT { + if (Impl.flush_log == nullptr || Impl.handle_arg0 == nullptr || + Impl.log_finalize == nullptr || Impl.log_init == nullptr) + return XRayLogRegisterStatus::XRAY_INCOMPLETE_IMPL; + + SpinMutexLock Guard(&XRayImplMutex); + // First, look for whether the mode already has a registered implementation. + for (ModeImpl *it = ModeImpls; it != &SentinelModeImpl; it = it->Next) { + if (!internal_strcmp(Mode, it->Mode)) + return XRayLogRegisterStatus::XRAY_DUPLICATE_MODE; + } + auto *NewModeImpl = static_cast(InternalAlloc(sizeof(ModeImpl))); + NewModeImpl->Next = ModeImpls; + NewModeImpl->Mode = internal_strdup(Mode); + NewModeImpl->Impl = Impl; + ModeImpls = NewModeImpl; + return XRayLogRegisterStatus::XRAY_REGISTRATION_OK; +} + +XRayLogRegisterStatus +__xray_log_select_mode(const char *Mode) XRAY_NEVER_INSTRUMENT { + SpinMutexLock Guard(&XRayImplMutex); + for (ModeImpl *it = ModeImpls; it != &SentinelModeImpl; it = it->Next) { + if (!internal_strcmp(Mode, it->Mode)) { + CurrentMode = it; + CurrentXRayImpl = it->Impl; + GlobalXRayImpl = &CurrentXRayImpl; + __xray_set_handler(it->Impl.handle_arg0); + return XRayLogRegisterStatus::XRAY_REGISTRATION_OK; + } + } + return XRayLogRegisterStatus::XRAY_MODE_NOT_FOUND; +} + +const char *__xray_log_get_current_mode() XRAY_NEVER_INSTRUMENT { + SpinMutexLock Guard(&XRayImplMutex); + if (CurrentMode != nullptr) + return CurrentMode->Mode; + return nullptr; +} + +void __xray_set_log_impl(XRayLogImpl Impl) XRAY_NEVER_INSTRUMENT { + if (Impl.log_init == nullptr || Impl.log_finalize == nullptr || + Impl.handle_arg0 == nullptr || Impl.flush_log == nullptr) { + SpinMutexLock Guard(&XRayImplMutex); + GlobalXRayImpl = nullptr; + CurrentMode = nullptr; + __xray_remove_handler(); + __xray_remove_handler_arg1(); + return; + } + + SpinMutexLock Guard(&XRayImplMutex); + CurrentXRayImpl = Impl; + GlobalXRayImpl = &CurrentXRayImpl; + __xray_set_handler(Impl.handle_arg0); +} + +void __xray_remove_log_impl() XRAY_NEVER_INSTRUMENT { + SpinMutexLock Guard(&XRayImplMutex); + GlobalXRayImpl = nullptr; + __xray_remove_handler(); + __xray_remove_handler_arg1(); +} + +XRayLogInitStatus __xray_log_init(size_t BufferSize, size_t MaxBuffers, + void *Args, + size_t ArgsSize) XRAY_NEVER_INSTRUMENT { + SpinMutexLock Guard(&XRayImplMutex); + if (!GlobalXRayImpl) + return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + return GlobalXRayImpl->log_init(BufferSize, MaxBuffers, Args, ArgsSize); +} + +XRayLogInitStatus __xray_log_init_mode(const char *Mode, const char *Config) + XRAY_NEVER_INSTRUMENT { + SpinMutexLock Guard(&XRayImplMutex); + if (!GlobalXRayImpl) + return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + + if (Config == nullptr) + return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + + // Check first whether the current mode is the same as what we expect. + if (CurrentMode == nullptr || internal_strcmp(CurrentMode->Mode, Mode) != 0) + return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + + // Here we do some work to coerce the pointer we're provided, so that + // the implementations that still take void* pointers can handle the + // data provided in the Config argument. + return GlobalXRayImpl->log_init( + 0, 0, const_cast(static_cast(Config)), 0); +} + +XRayLogInitStatus +__xray_log_init_mode_bin(const char *Mode, const char *Config, + size_t ConfigSize) XRAY_NEVER_INSTRUMENT { + SpinMutexLock Guard(&XRayImplMutex); + if (!GlobalXRayImpl) + return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + + if (Config == nullptr) + return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + + // Check first whether the current mode is the same as what we expect. + if (CurrentMode == nullptr || internal_strcmp(CurrentMode->Mode, Mode) != 0) + return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + + // Here we do some work to coerce the pointer we're provided, so that + // the implementations that still take void* pointers can handle the + // data provided in the Config argument. + return GlobalXRayImpl->log_init( + 0, 0, const_cast(static_cast(Config)), ConfigSize); +} + +XRayLogInitStatus __xray_log_finalize() XRAY_NEVER_INSTRUMENT { + SpinMutexLock Guard(&XRayImplMutex); + if (!GlobalXRayImpl) + return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + return GlobalXRayImpl->log_finalize(); +} + +XRayLogFlushStatus __xray_log_flushLog() XRAY_NEVER_INSTRUMENT { + SpinMutexLock Guard(&XRayImplMutex); + if (!GlobalXRayImpl) + return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; + return GlobalXRayImpl->flush_log(); +} + +XRayLogFlushStatus __xray_log_process_buffers( + void (*Processor)(const char *, XRayBuffer)) XRAY_NEVER_INSTRUMENT { + // We want to make sure that there will be no changes to the global state for + // the log by synchronising on the XRayBufferIteratorMutex. + if (!GlobalXRayImpl) + return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; + auto Iterator = reinterpret_cast( + atomic_load(&XRayBufferIterator, memory_order_acquire)); + auto Buffer = (*Iterator)(XRayBuffer{nullptr, 0}); + auto Mode = CurrentMode ? CurrentMode->Mode : nullptr; + while (Buffer.Data != nullptr) { + (*Processor)(Mode, Buffer); + Buffer = (*Iterator)(Buffer); + } + return XRayLogFlushStatus::XRAY_LOG_FLUSHED; +} diff --git a/lib/xray/xray_mips.cc b/lib/xray/xray_mips.cc deleted file mode 100644 index 80990ab8d639..000000000000 --- a/lib/xray/xray_mips.cc +++ /dev/null @@ -1,170 +0,0 @@ -//===-- xray_mips.cc --------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file is a part of XRay, a dynamic runtime instrumentation system. -// -// Implementation of MIPS-specific routines (32-bit). -// -//===----------------------------------------------------------------------===// -#include "sanitizer_common/sanitizer_common.h" -#include "xray_defs.h" -#include "xray_interface_internal.h" -#include - -namespace __xray { - -// The machine codes for some instructions used in runtime patching. -enum PatchOpcodes : uint32_t { - PO_ADDIU = 0x24000000, // addiu rt, rs, imm - PO_SW = 0xAC000000, // sw rt, offset(sp) - PO_LUI = 0x3C000000, // lui rs, %hi(address) - PO_ORI = 0x34000000, // ori rt, rs, %lo(address) - PO_JALR = 0x0000F809, // jalr rs - PO_LW = 0x8C000000, // lw rt, offset(address) - PO_B44 = 0x1000000b, // b #44 - PO_NOP = 0x0, // nop -}; - -enum RegNum : uint32_t { - RN_T0 = 0x8, - RN_T9 = 0x19, - RN_RA = 0x1F, - RN_SP = 0x1D, -}; - -inline static uint32_t encodeInstruction(uint32_t Opcode, uint32_t Rs, - uint32_t Rt, - uint32_t Imm) XRAY_NEVER_INSTRUMENT { - return (Opcode | Rs << 21 | Rt << 16 | Imm); -} - -inline static uint32_t -encodeSpecialInstruction(uint32_t Opcode, uint32_t Rs, uint32_t Rt, uint32_t Rd, - uint32_t Imm) XRAY_NEVER_INSTRUMENT { - return (Rs << 21 | Rt << 16 | Rd << 11 | Imm << 6 | Opcode); -} - -inline static bool patchSled(const bool Enable, const uint32_t FuncId, - const XRaySledEntry &Sled, - void (*TracingHook)()) XRAY_NEVER_INSTRUMENT { - // When |Enable| == true, - // We replace the following compile-time stub (sled): - // - // xray_sled_n: - // B .tmpN - // 11 NOPs (44 bytes) - // .tmpN - // ADDIU T9, T9, 44 - // - // With the following runtime patch: - // - // xray_sled_n (32-bit): - // addiu sp, sp, -8 ;create stack frame - // nop - // sw ra, 4(sp) ;save return address - // sw t9, 0(sp) ;save register t9 - // lui t9, %hi(__xray_FunctionEntry/Exit) - // ori t9, t9, %lo(__xray_FunctionEntry/Exit) - // lui t0, %hi(function_id) - // jalr t9 ;call Tracing hook - // ori t0, t0, %lo(function_id) ;pass function id (delay slot) - // lw t9, 0(sp) ;restore register t9 - // lw ra, 4(sp) ;restore return address - // addiu sp, sp, 8 ;delete stack frame - // - // We add 44 bytes to t9 because we want to adjust the function pointer to - // the actual start of function i.e. the address just after the noop sled. - // We do this because gp displacement relocation is emitted at the start of - // of the function i.e after the nop sled and to correctly calculate the - // global offset table address, t9 must hold the address of the instruction - // containing the gp displacement relocation. - // FIXME: Is this correct for the static relocation model? - // - // Replacement of the first 4-byte instruction should be the last and atomic - // operation, so that the user code which reaches the sled concurrently - // either jumps over the whole sled, or executes the whole sled when the - // latter is ready. - // - // When |Enable|==false, we set back the first instruction in the sled to be - // B #44 - - if (Enable) { - uint32_t LoTracingHookAddr = - reinterpret_cast(TracingHook) & 0xffff; - uint32_t HiTracingHookAddr = - (reinterpret_cast(TracingHook) >> 16) & 0xffff; - uint32_t LoFunctionID = FuncId & 0xffff; - uint32_t HiFunctionID = (FuncId >> 16) & 0xffff; - *reinterpret_cast(Sled.Address + 8) = encodeInstruction( - PatchOpcodes::PO_SW, RegNum::RN_SP, RegNum::RN_RA, 0x4); - *reinterpret_cast(Sled.Address + 12) = encodeInstruction( - PatchOpcodes::PO_SW, RegNum::RN_SP, RegNum::RN_T9, 0x0); - *reinterpret_cast(Sled.Address + 16) = encodeInstruction( - PatchOpcodes::PO_LUI, 0x0, RegNum::RN_T9, HiTracingHookAddr); - *reinterpret_cast(Sled.Address + 20) = encodeInstruction( - PatchOpcodes::PO_ORI, RegNum::RN_T9, RegNum::RN_T9, LoTracingHookAddr); - *reinterpret_cast(Sled.Address + 24) = encodeInstruction( - PatchOpcodes::PO_LUI, 0x0, RegNum::RN_T0, HiFunctionID); - *reinterpret_cast(Sled.Address + 28) = encodeSpecialInstruction( - PatchOpcodes::PO_JALR, RegNum::RN_T9, 0x0, RegNum::RN_RA, 0X0); - *reinterpret_cast(Sled.Address + 32) = encodeInstruction( - PatchOpcodes::PO_ORI, RegNum::RN_T0, RegNum::RN_T0, LoFunctionID); - *reinterpret_cast(Sled.Address + 36) = encodeInstruction( - PatchOpcodes::PO_LW, RegNum::RN_SP, RegNum::RN_T9, 0x0); - *reinterpret_cast(Sled.Address + 40) = encodeInstruction( - PatchOpcodes::PO_LW, RegNum::RN_SP, RegNum::RN_RA, 0x4); - *reinterpret_cast(Sled.Address + 44) = encodeInstruction( - PatchOpcodes::PO_ADDIU, RegNum::RN_SP, RegNum::RN_SP, 0x8); - uint32_t CreateStackSpaceInstr = encodeInstruction( - PatchOpcodes::PO_ADDIU, RegNum::RN_SP, RegNum::RN_SP, 0xFFF8); - std::atomic_store_explicit( - reinterpret_cast *>(Sled.Address), - uint32_t(CreateStackSpaceInstr), std::memory_order_release); - } else { - std::atomic_store_explicit( - reinterpret_cast *>(Sled.Address), - uint32_t(PatchOpcodes::PO_B44), std::memory_order_release); - } - return true; -} - -bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, - const XRaySledEntry &Sled, - void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { - return patchSled(Enable, FuncId, Sled, Trampoline); -} - -bool patchFunctionExit(const bool Enable, const uint32_t FuncId, - const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { - return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); -} - -bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, - const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { - // FIXME: In the future we'd need to distinguish between non-tail exits and - // tail exits for better information preservation. - return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); -} - -bool patchCustomEvent(const bool Enable, const uint32_t FuncId, - const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { - // FIXME: Implement in mips? - return false; -} - -bool patchTypedEvent(const bool Enable, const uint32_t FuncId, - const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { - // FIXME: Implement in mips? - return false; -} - -} // namespace __xray - -extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT { - // FIXME: this will have to be implemented in the trampoline assembly file -} diff --git a/lib/xray/xray_mips.cpp b/lib/xray/xray_mips.cpp new file mode 100644 index 000000000000..26fc50374471 --- /dev/null +++ b/lib/xray/xray_mips.cpp @@ -0,0 +1,170 @@ +//===-- xray_mips.cpp -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of MIPS-specific routines (32-bit). +// +//===----------------------------------------------------------------------===// +#include "sanitizer_common/sanitizer_common.h" +#include "xray_defs.h" +#include "xray_interface_internal.h" +#include + +namespace __xray { + +// The machine codes for some instructions used in runtime patching. +enum PatchOpcodes : uint32_t { + PO_ADDIU = 0x24000000, // addiu rt, rs, imm + PO_SW = 0xAC000000, // sw rt, offset(sp) + PO_LUI = 0x3C000000, // lui rs, %hi(address) + PO_ORI = 0x34000000, // ori rt, rs, %lo(address) + PO_JALR = 0x0000F809, // jalr rs + PO_LW = 0x8C000000, // lw rt, offset(address) + PO_B44 = 0x1000000b, // b #44 + PO_NOP = 0x0, // nop +}; + +enum RegNum : uint32_t { + RN_T0 = 0x8, + RN_T9 = 0x19, + RN_RA = 0x1F, + RN_SP = 0x1D, +}; + +inline static uint32_t encodeInstruction(uint32_t Opcode, uint32_t Rs, + uint32_t Rt, + uint32_t Imm) XRAY_NEVER_INSTRUMENT { + return (Opcode | Rs << 21 | Rt << 16 | Imm); +} + +inline static uint32_t +encodeSpecialInstruction(uint32_t Opcode, uint32_t Rs, uint32_t Rt, uint32_t Rd, + uint32_t Imm) XRAY_NEVER_INSTRUMENT { + return (Rs << 21 | Rt << 16 | Rd << 11 | Imm << 6 | Opcode); +} + +inline static bool patchSled(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled, + void (*TracingHook)()) XRAY_NEVER_INSTRUMENT { + // When |Enable| == true, + // We replace the following compile-time stub (sled): + // + // xray_sled_n: + // B .tmpN + // 11 NOPs (44 bytes) + // .tmpN + // ADDIU T9, T9, 44 + // + // With the following runtime patch: + // + // xray_sled_n (32-bit): + // addiu sp, sp, -8 ;create stack frame + // nop + // sw ra, 4(sp) ;save return address + // sw t9, 0(sp) ;save register t9 + // lui t9, %hi(__xray_FunctionEntry/Exit) + // ori t9, t9, %lo(__xray_FunctionEntry/Exit) + // lui t0, %hi(function_id) + // jalr t9 ;call Tracing hook + // ori t0, t0, %lo(function_id) ;pass function id (delay slot) + // lw t9, 0(sp) ;restore register t9 + // lw ra, 4(sp) ;restore return address + // addiu sp, sp, 8 ;delete stack frame + // + // We add 44 bytes to t9 because we want to adjust the function pointer to + // the actual start of function i.e. the address just after the noop sled. + // We do this because gp displacement relocation is emitted at the start of + // of the function i.e after the nop sled and to correctly calculate the + // global offset table address, t9 must hold the address of the instruction + // containing the gp displacement relocation. + // FIXME: Is this correct for the static relocation model? + // + // Replacement of the first 4-byte instruction should be the last and atomic + // operation, so that the user code which reaches the sled concurrently + // either jumps over the whole sled, or executes the whole sled when the + // latter is ready. + // + // When |Enable|==false, we set back the first instruction in the sled to be + // B #44 + + if (Enable) { + uint32_t LoTracingHookAddr = + reinterpret_cast(TracingHook) & 0xffff; + uint32_t HiTracingHookAddr = + (reinterpret_cast(TracingHook) >> 16) & 0xffff; + uint32_t LoFunctionID = FuncId & 0xffff; + uint32_t HiFunctionID = (FuncId >> 16) & 0xffff; + *reinterpret_cast(Sled.Address + 8) = encodeInstruction( + PatchOpcodes::PO_SW, RegNum::RN_SP, RegNum::RN_RA, 0x4); + *reinterpret_cast(Sled.Address + 12) = encodeInstruction( + PatchOpcodes::PO_SW, RegNum::RN_SP, RegNum::RN_T9, 0x0); + *reinterpret_cast(Sled.Address + 16) = encodeInstruction( + PatchOpcodes::PO_LUI, 0x0, RegNum::RN_T9, HiTracingHookAddr); + *reinterpret_cast(Sled.Address + 20) = encodeInstruction( + PatchOpcodes::PO_ORI, RegNum::RN_T9, RegNum::RN_T9, LoTracingHookAddr); + *reinterpret_cast(Sled.Address + 24) = encodeInstruction( + PatchOpcodes::PO_LUI, 0x0, RegNum::RN_T0, HiFunctionID); + *reinterpret_cast(Sled.Address + 28) = encodeSpecialInstruction( + PatchOpcodes::PO_JALR, RegNum::RN_T9, 0x0, RegNum::RN_RA, 0X0); + *reinterpret_cast(Sled.Address + 32) = encodeInstruction( + PatchOpcodes::PO_ORI, RegNum::RN_T0, RegNum::RN_T0, LoFunctionID); + *reinterpret_cast(Sled.Address + 36) = encodeInstruction( + PatchOpcodes::PO_LW, RegNum::RN_SP, RegNum::RN_T9, 0x0); + *reinterpret_cast(Sled.Address + 40) = encodeInstruction( + PatchOpcodes::PO_LW, RegNum::RN_SP, RegNum::RN_RA, 0x4); + *reinterpret_cast(Sled.Address + 44) = encodeInstruction( + PatchOpcodes::PO_ADDIU, RegNum::RN_SP, RegNum::RN_SP, 0x8); + uint32_t CreateStackSpaceInstr = encodeInstruction( + PatchOpcodes::PO_ADDIU, RegNum::RN_SP, RegNum::RN_SP, 0xFFF8); + std::atomic_store_explicit( + reinterpret_cast *>(Sled.Address), + uint32_t(CreateStackSpaceInstr), std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast *>(Sled.Address), + uint32_t(PatchOpcodes::PO_B44), std::memory_order_release); + } + return true; +} + +bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled, + void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, Trampoline); +} + +bool patchFunctionExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + +bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: In the future we'd need to distinguish between non-tail exits and + // tail exits for better information preservation. + return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: Implement in mips? + return false; +} + +bool patchTypedEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: Implement in mips? + return false; +} + +} // namespace __xray + +extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT { + // FIXME: this will have to be implemented in the trampoline assembly file +} diff --git a/lib/xray/xray_mips64.cc b/lib/xray/xray_mips64.cc deleted file mode 100644 index 73c8924f9a0b..000000000000 --- a/lib/xray/xray_mips64.cc +++ /dev/null @@ -1,178 +0,0 @@ -//===-- xray_mips64.cc ------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file is a part of XRay, a dynamic runtime instrumentation system. -// -// Implementation of MIPS64-specific routines. -// -//===----------------------------------------------------------------------===// -#include "sanitizer_common/sanitizer_common.h" -#include "xray_defs.h" -#include "xray_interface_internal.h" -#include - -namespace __xray { - -// The machine codes for some instructions used in runtime patching. -enum PatchOpcodes : uint32_t { - PO_DADDIU = 0x64000000, // daddiu rt, rs, imm - PO_SD = 0xFC000000, // sd rt, base(offset) - PO_LUI = 0x3C000000, // lui rt, imm - PO_ORI = 0x34000000, // ori rt, rs, imm - PO_DSLL = 0x00000038, // dsll rd, rt, sa - PO_JALR = 0x00000009, // jalr rs - PO_LD = 0xDC000000, // ld rt, base(offset) - PO_B60 = 0x1000000f, // b #60 - PO_NOP = 0x0, // nop -}; - -enum RegNum : uint32_t { - RN_T0 = 0xC, - RN_T9 = 0x19, - RN_RA = 0x1F, - RN_SP = 0x1D, -}; - -inline static uint32_t encodeInstruction(uint32_t Opcode, uint32_t Rs, - uint32_t Rt, - uint32_t Imm) XRAY_NEVER_INSTRUMENT { - return (Opcode | Rs << 21 | Rt << 16 | Imm); -} - -inline static uint32_t -encodeSpecialInstruction(uint32_t Opcode, uint32_t Rs, uint32_t Rt, uint32_t Rd, - uint32_t Imm) XRAY_NEVER_INSTRUMENT { - return (Rs << 21 | Rt << 16 | Rd << 11 | Imm << 6 | Opcode); -} - -inline static bool patchSled(const bool Enable, const uint32_t FuncId, - const XRaySledEntry &Sled, - void (*TracingHook)()) XRAY_NEVER_INSTRUMENT { - // When |Enable| == true, - // We replace the following compile-time stub (sled): - // - // xray_sled_n: - // B .tmpN - // 15 NOPs (60 bytes) - // .tmpN - // - // With the following runtime patch: - // - // xray_sled_n (64-bit): - // daddiu sp, sp, -16 ;create stack frame - // nop - // sd ra, 8(sp) ;save return address - // sd t9, 0(sp) ;save register t9 - // lui t9, %highest(__xray_FunctionEntry/Exit) - // ori t9, t9, %higher(__xray_FunctionEntry/Exit) - // dsll t9, t9, 16 - // ori t9, t9, %hi(__xray_FunctionEntry/Exit) - // dsll t9, t9, 16 - // ori t9, t9, %lo(__xray_FunctionEntry/Exit) - // lui t0, %hi(function_id) - // jalr t9 ;call Tracing hook - // ori t0, t0, %lo(function_id) ;pass function id (delay slot) - // ld t9, 0(sp) ;restore register t9 - // ld ra, 8(sp) ;restore return address - // daddiu sp, sp, 16 ;delete stack frame - // - // Replacement of the first 4-byte instruction should be the last and atomic - // operation, so that the user code which reaches the sled concurrently - // either jumps over the whole sled, or executes the whole sled when the - // latter is ready. - // - // When |Enable|==false, we set back the first instruction in the sled to be - // B #60 - - if (Enable) { - uint32_t LoTracingHookAddr = - reinterpret_cast(TracingHook) & 0xffff; - uint32_t HiTracingHookAddr = - (reinterpret_cast(TracingHook) >> 16) & 0xffff; - uint32_t HigherTracingHookAddr = - (reinterpret_cast(TracingHook) >> 32) & 0xffff; - uint32_t HighestTracingHookAddr = - (reinterpret_cast(TracingHook) >> 48) & 0xffff; - uint32_t LoFunctionID = FuncId & 0xffff; - uint32_t HiFunctionID = (FuncId >> 16) & 0xffff; - *reinterpret_cast(Sled.Address + 8) = encodeInstruction( - PatchOpcodes::PO_SD, RegNum::RN_SP, RegNum::RN_RA, 0x8); - *reinterpret_cast(Sled.Address + 12) = encodeInstruction( - PatchOpcodes::PO_SD, RegNum::RN_SP, RegNum::RN_T9, 0x0); - *reinterpret_cast(Sled.Address + 16) = encodeInstruction( - PatchOpcodes::PO_LUI, 0x0, RegNum::RN_T9, HighestTracingHookAddr); - *reinterpret_cast(Sled.Address + 20) = - encodeInstruction(PatchOpcodes::PO_ORI, RegNum::RN_T9, RegNum::RN_T9, - HigherTracingHookAddr); - *reinterpret_cast(Sled.Address + 24) = encodeSpecialInstruction( - PatchOpcodes::PO_DSLL, 0x0, RegNum::RN_T9, RegNum::RN_T9, 0x10); - *reinterpret_cast(Sled.Address + 28) = encodeInstruction( - PatchOpcodes::PO_ORI, RegNum::RN_T9, RegNum::RN_T9, HiTracingHookAddr); - *reinterpret_cast(Sled.Address + 32) = encodeSpecialInstruction( - PatchOpcodes::PO_DSLL, 0x0, RegNum::RN_T9, RegNum::RN_T9, 0x10); - *reinterpret_cast(Sled.Address + 36) = encodeInstruction( - PatchOpcodes::PO_ORI, RegNum::RN_T9, RegNum::RN_T9, LoTracingHookAddr); - *reinterpret_cast(Sled.Address + 40) = encodeInstruction( - PatchOpcodes::PO_LUI, 0x0, RegNum::RN_T0, HiFunctionID); - *reinterpret_cast(Sled.Address + 44) = encodeSpecialInstruction( - PatchOpcodes::PO_JALR, RegNum::RN_T9, 0x0, RegNum::RN_RA, 0X0); - *reinterpret_cast(Sled.Address + 48) = encodeInstruction( - PatchOpcodes::PO_ORI, RegNum::RN_T0, RegNum::RN_T0, LoFunctionID); - *reinterpret_cast(Sled.Address + 52) = encodeInstruction( - PatchOpcodes::PO_LD, RegNum::RN_SP, RegNum::RN_T9, 0x0); - *reinterpret_cast(Sled.Address + 56) = encodeInstruction( - PatchOpcodes::PO_LD, RegNum::RN_SP, RegNum::RN_RA, 0x8); - *reinterpret_cast(Sled.Address + 60) = encodeInstruction( - PatchOpcodes::PO_DADDIU, RegNum::RN_SP, RegNum::RN_SP, 0x10); - uint32_t CreateStackSpace = encodeInstruction( - PatchOpcodes::PO_DADDIU, RegNum::RN_SP, RegNum::RN_SP, 0xfff0); - std::atomic_store_explicit( - reinterpret_cast *>(Sled.Address), - CreateStackSpace, std::memory_order_release); - } else { - std::atomic_store_explicit( - reinterpret_cast *>(Sled.Address), - uint32_t(PatchOpcodes::PO_B60), std::memory_order_release); - } - return true; -} - -bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, - const XRaySledEntry &Sled, - void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { - return patchSled(Enable, FuncId, Sled, Trampoline); -} - -bool patchFunctionExit(const bool Enable, const uint32_t FuncId, - const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { - return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); -} - -bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, - const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { - // FIXME: In the future we'd need to distinguish between non-tail exits and - // tail exits for better information preservation. - return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); -} - -bool patchCustomEvent(const bool Enable, const uint32_t FuncId, - const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { - // FIXME: Implement in mips64? - return false; -} - -bool patchTypedEvent(const bool Enable, const uint32_t FuncId, - const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { - // FIXME: Implement in mips64? - return false; -} -} // namespace __xray - -extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT { - // FIXME: this will have to be implemented in the trampoline assembly file -} diff --git a/lib/xray/xray_mips64.cpp b/lib/xray/xray_mips64.cpp new file mode 100644 index 000000000000..62c67ff7376d --- /dev/null +++ b/lib/xray/xray_mips64.cpp @@ -0,0 +1,178 @@ +//===-- xray_mips64.cpp -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of MIPS64-specific routines. +// +//===----------------------------------------------------------------------===// +#include "sanitizer_common/sanitizer_common.h" +#include "xray_defs.h" +#include "xray_interface_internal.h" +#include + +namespace __xray { + +// The machine codes for some instructions used in runtime patching. +enum PatchOpcodes : uint32_t { + PO_DADDIU = 0x64000000, // daddiu rt, rs, imm + PO_SD = 0xFC000000, // sd rt, base(offset) + PO_LUI = 0x3C000000, // lui rt, imm + PO_ORI = 0x34000000, // ori rt, rs, imm + PO_DSLL = 0x00000038, // dsll rd, rt, sa + PO_JALR = 0x00000009, // jalr rs + PO_LD = 0xDC000000, // ld rt, base(offset) + PO_B60 = 0x1000000f, // b #60 + PO_NOP = 0x0, // nop +}; + +enum RegNum : uint32_t { + RN_T0 = 0xC, + RN_T9 = 0x19, + RN_RA = 0x1F, + RN_SP = 0x1D, +}; + +inline static uint32_t encodeInstruction(uint32_t Opcode, uint32_t Rs, + uint32_t Rt, + uint32_t Imm) XRAY_NEVER_INSTRUMENT { + return (Opcode | Rs << 21 | Rt << 16 | Imm); +} + +inline static uint32_t +encodeSpecialInstruction(uint32_t Opcode, uint32_t Rs, uint32_t Rt, uint32_t Rd, + uint32_t Imm) XRAY_NEVER_INSTRUMENT { + return (Rs << 21 | Rt << 16 | Rd << 11 | Imm << 6 | Opcode); +} + +inline static bool patchSled(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled, + void (*TracingHook)()) XRAY_NEVER_INSTRUMENT { + // When |Enable| == true, + // We replace the following compile-time stub (sled): + // + // xray_sled_n: + // B .tmpN + // 15 NOPs (60 bytes) + // .tmpN + // + // With the following runtime patch: + // + // xray_sled_n (64-bit): + // daddiu sp, sp, -16 ;create stack frame + // nop + // sd ra, 8(sp) ;save return address + // sd t9, 0(sp) ;save register t9 + // lui t9, %highest(__xray_FunctionEntry/Exit) + // ori t9, t9, %higher(__xray_FunctionEntry/Exit) + // dsll t9, t9, 16 + // ori t9, t9, %hi(__xray_FunctionEntry/Exit) + // dsll t9, t9, 16 + // ori t9, t9, %lo(__xray_FunctionEntry/Exit) + // lui t0, %hi(function_id) + // jalr t9 ;call Tracing hook + // ori t0, t0, %lo(function_id) ;pass function id (delay slot) + // ld t9, 0(sp) ;restore register t9 + // ld ra, 8(sp) ;restore return address + // daddiu sp, sp, 16 ;delete stack frame + // + // Replacement of the first 4-byte instruction should be the last and atomic + // operation, so that the user code which reaches the sled concurrently + // either jumps over the whole sled, or executes the whole sled when the + // latter is ready. + // + // When |Enable|==false, we set back the first instruction in the sled to be + // B #60 + + if (Enable) { + uint32_t LoTracingHookAddr = + reinterpret_cast(TracingHook) & 0xffff; + uint32_t HiTracingHookAddr = + (reinterpret_cast(TracingHook) >> 16) & 0xffff; + uint32_t HigherTracingHookAddr = + (reinterpret_cast(TracingHook) >> 32) & 0xffff; + uint32_t HighestTracingHookAddr = + (reinterpret_cast(TracingHook) >> 48) & 0xffff; + uint32_t LoFunctionID = FuncId & 0xffff; + uint32_t HiFunctionID = (FuncId >> 16) & 0xffff; + *reinterpret_cast(Sled.Address + 8) = encodeInstruction( + PatchOpcodes::PO_SD, RegNum::RN_SP, RegNum::RN_RA, 0x8); + *reinterpret_cast(Sled.Address + 12) = encodeInstruction( + PatchOpcodes::PO_SD, RegNum::RN_SP, RegNum::RN_T9, 0x0); + *reinterpret_cast(Sled.Address + 16) = encodeInstruction( + PatchOpcodes::PO_LUI, 0x0, RegNum::RN_T9, HighestTracingHookAddr); + *reinterpret_cast(Sled.Address + 20) = + encodeInstruction(PatchOpcodes::PO_ORI, RegNum::RN_T9, RegNum::RN_T9, + HigherTracingHookAddr); + *reinterpret_cast(Sled.Address + 24) = encodeSpecialInstruction( + PatchOpcodes::PO_DSLL, 0x0, RegNum::RN_T9, RegNum::RN_T9, 0x10); + *reinterpret_cast(Sled.Address + 28) = encodeInstruction( + PatchOpcodes::PO_ORI, RegNum::RN_T9, RegNum::RN_T9, HiTracingHookAddr); + *reinterpret_cast(Sled.Address + 32) = encodeSpecialInstruction( + PatchOpcodes::PO_DSLL, 0x0, RegNum::RN_T9, RegNum::RN_T9, 0x10); + *reinterpret_cast(Sled.Address + 36) = encodeInstruction( + PatchOpcodes::PO_ORI, RegNum::RN_T9, RegNum::RN_T9, LoTracingHookAddr); + *reinterpret_cast(Sled.Address + 40) = encodeInstruction( + PatchOpcodes::PO_LUI, 0x0, RegNum::RN_T0, HiFunctionID); + *reinterpret_cast(Sled.Address + 44) = encodeSpecialInstruction( + PatchOpcodes::PO_JALR, RegNum::RN_T9, 0x0, RegNum::RN_RA, 0X0); + *reinterpret_cast(Sled.Address + 48) = encodeInstruction( + PatchOpcodes::PO_ORI, RegNum::RN_T0, RegNum::RN_T0, LoFunctionID); + *reinterpret_cast(Sled.Address + 52) = encodeInstruction( + PatchOpcodes::PO_LD, RegNum::RN_SP, RegNum::RN_T9, 0x0); + *reinterpret_cast(Sled.Address + 56) = encodeInstruction( + PatchOpcodes::PO_LD, RegNum::RN_SP, RegNum::RN_RA, 0x8); + *reinterpret_cast(Sled.Address + 60) = encodeInstruction( + PatchOpcodes::PO_DADDIU, RegNum::RN_SP, RegNum::RN_SP, 0x10); + uint32_t CreateStackSpace = encodeInstruction( + PatchOpcodes::PO_DADDIU, RegNum::RN_SP, RegNum::RN_SP, 0xfff0); + std::atomic_store_explicit( + reinterpret_cast *>(Sled.Address), + CreateStackSpace, std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast *>(Sled.Address), + uint32_t(PatchOpcodes::PO_B60), std::memory_order_release); + } + return true; +} + +bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled, + void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, Trampoline); +} + +bool patchFunctionExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + +bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: In the future we'd need to distinguish between non-tail exits and + // tail exits for better information preservation. + return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: Implement in mips64? + return false; +} + +bool patchTypedEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: Implement in mips64? + return false; +} +} // namespace __xray + +extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT { + // FIXME: this will have to be implemented in the trampoline assembly file +} diff --git a/lib/xray/xray_powerpc64.cc b/lib/xray/xray_powerpc64.cc deleted file mode 100644 index abc2becf5b4d..000000000000 --- a/lib/xray/xray_powerpc64.cc +++ /dev/null @@ -1,111 +0,0 @@ -//===-- xray_powerpc64.cc ---------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file is a part of XRay, a dynamic runtime instrumentation system. -// -// Implementation of powerpc64 and powerpc64le routines. -// -//===----------------------------------------------------------------------===// -#include "sanitizer_common/sanitizer_common.h" -#include "xray_defs.h" -#include "xray_interface_internal.h" -#include "xray_utils.h" -#include -#include -#include - -#ifndef __LITTLE_ENDIAN__ -#error powerpc64 big endian is not supported for now. -#endif - -namespace { - -constexpr unsigned long long JumpOverInstNum = 7; - -void clearCache(void *Addr, size_t Len) { - const size_t LineSize = 32; - - const intptr_t Mask = ~(LineSize - 1); - const intptr_t StartLine = ((intptr_t)Addr) & Mask; - const intptr_t EndLine = ((intptr_t)Addr + Len + LineSize - 1) & Mask; - - for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize) - asm volatile("dcbf 0, %0" : : "r"(Line)); - asm volatile("sync"); - - for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize) - asm volatile("icbi 0, %0" : : "r"(Line)); - asm volatile("isync"); -} - -} // namespace - -extern "C" void __clear_cache(void *start, void *end); - -namespace __xray { - -bool patchFunctionEntry(const bool Enable, uint32_t FuncId, - const XRaySledEntry &Sled, - void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { - if (Enable) { - // lis 0, FuncId[16..32] - // li 0, FuncId[0..15] - *reinterpret_cast(Sled.Address) = - (0x3c000000ull + (FuncId >> 16)) + - ((0x60000000ull + (FuncId & 0xffff)) << 32); - } else { - // b +JumpOverInstNum instructions. - *reinterpret_cast(Sled.Address) = - 0x48000000ull + (JumpOverInstNum << 2); - } - clearCache(reinterpret_cast(Sled.Address), 8); - return true; -} - -bool patchFunctionExit(const bool Enable, uint32_t FuncId, - const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { - if (Enable) { - // lis 0, FuncId[16..32] - // li 0, FuncId[0..15] - *reinterpret_cast(Sled.Address) = - (0x3c000000ull + (FuncId >> 16)) + - ((0x60000000ull + (FuncId & 0xffff)) << 32); - } else { - // Copy the blr/b instruction after JumpOverInstNum instructions. - *reinterpret_cast(Sled.Address) = - *(reinterpret_cast(Sled.Address) + JumpOverInstNum); - } - clearCache(reinterpret_cast(Sled.Address), 8); - return true; -} - -bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, - const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { - return patchFunctionExit(Enable, FuncId, Sled); -} - -// FIXME: Maybe implement this better? -bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; } - -bool patchCustomEvent(const bool Enable, const uint32_t FuncId, - const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { - // FIXME: Implement in powerpc64? - return false; -} - -bool patchTypedEvent(const bool Enable, const uint32_t FuncId, - const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { - // FIXME: Implement in powerpc64? - return false; -} - -} // namespace __xray - -extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT { - // FIXME: this will have to be implemented in the trampoline assembly file -} diff --git a/lib/xray/xray_powerpc64.cpp b/lib/xray/xray_powerpc64.cpp new file mode 100644 index 000000000000..b41f1bce6f21 --- /dev/null +++ b/lib/xray/xray_powerpc64.cpp @@ -0,0 +1,111 @@ +//===-- xray_powerpc64.cpp --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of powerpc64 and powerpc64le routines. +// +//===----------------------------------------------------------------------===// +#include "sanitizer_common/sanitizer_common.h" +#include "xray_defs.h" +#include "xray_interface_internal.h" +#include "xray_utils.h" +#include +#include +#include + +#ifndef __LITTLE_ENDIAN__ +#error powerpc64 big endian is not supported for now. +#endif + +namespace { + +constexpr unsigned long long JumpOverInstNum = 7; + +void clearCache(void *Addr, size_t Len) { + const size_t LineSize = 32; + + const intptr_t Mask = ~(LineSize - 1); + const intptr_t StartLine = ((intptr_t)Addr) & Mask; + const intptr_t EndLine = ((intptr_t)Addr + Len + LineSize - 1) & Mask; + + for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize) + asm volatile("dcbf 0, %0" : : "r"(Line)); + asm volatile("sync"); + + for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize) + asm volatile("icbi 0, %0" : : "r"(Line)); + asm volatile("isync"); +} + +} // namespace + +extern "C" void __clear_cache(void *start, void *end); + +namespace __xray { + +bool patchFunctionEntry(const bool Enable, uint32_t FuncId, + const XRaySledEntry &Sled, + void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { + if (Enable) { + // lis 0, FuncId[16..32] + // li 0, FuncId[0..15] + *reinterpret_cast(Sled.Address) = + (0x3c000000ull + (FuncId >> 16)) + + ((0x60000000ull + (FuncId & 0xffff)) << 32); + } else { + // b +JumpOverInstNum instructions. + *reinterpret_cast(Sled.Address) = + 0x48000000ull + (JumpOverInstNum << 2); + } + clearCache(reinterpret_cast(Sled.Address), 8); + return true; +} + +bool patchFunctionExit(const bool Enable, uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + if (Enable) { + // lis 0, FuncId[16..32] + // li 0, FuncId[0..15] + *reinterpret_cast(Sled.Address) = + (0x3c000000ull + (FuncId >> 16)) + + ((0x60000000ull + (FuncId & 0xffff)) << 32); + } else { + // Copy the blr/b instruction after JumpOverInstNum instructions. + *reinterpret_cast(Sled.Address) = + *(reinterpret_cast(Sled.Address) + JumpOverInstNum); + } + clearCache(reinterpret_cast(Sled.Address), 8); + return true; +} + +bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + return patchFunctionExit(Enable, FuncId, Sled); +} + +// FIXME: Maybe implement this better? +bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; } + +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: Implement in powerpc64? + return false; +} + +bool patchTypedEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: Implement in powerpc64? + return false; +} + +} // namespace __xray + +extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT { + // FIXME: this will have to be implemented in the trampoline assembly file +} diff --git a/lib/xray/xray_profile_collector.cc b/lib/xray/xray_profile_collector.cc deleted file mode 100644 index 97b52e1d9a22..000000000000 --- a/lib/xray/xray_profile_collector.cc +++ /dev/null @@ -1,414 +0,0 @@ -//===-- xray_profile_collector.cc ------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file is a part of XRay, a dynamic runtime instrumentation system. -// -// This implements the interface for the profileCollectorService. -// -//===----------------------------------------------------------------------===// -#include "xray_profile_collector.h" -#include "sanitizer_common/sanitizer_common.h" -#include "xray_allocator.h" -#include "xray_defs.h" -#include "xray_profiling_flags.h" -#include "xray_segmented_array.h" -#include -#include -#include - -namespace __xray { -namespace profileCollectorService { - -namespace { - -SpinMutex GlobalMutex; -struct ThreadTrie { - tid_t TId; - typename std::aligned_storage::type TrieStorage; -}; - -struct ProfileBuffer { - void *Data; - size_t Size; -}; - -// Current version of the profile format. -constexpr u64 XRayProfilingVersion = 0x20180424; - -// Identifier for XRay profiling files 'xrayprof' in hex. -constexpr u64 XRayMagicBytes = 0x7872617970726f66; - -struct XRayProfilingFileHeader { - const u64 MagicBytes = XRayMagicBytes; - const u64 Version = XRayProfilingVersion; - u64 Timestamp = 0; // System time in nanoseconds. - u64 PID = 0; // Process ID. -}; - -struct BlockHeader { - u32 BlockSize; - u32 BlockNum; - u64 ThreadId; -}; - -struct ThreadData { - BufferQueue *BQ; - FunctionCallTrie::Allocators::Buffers Buffers; - FunctionCallTrie::Allocators Allocators; - FunctionCallTrie FCT; - tid_t TId; -}; - -using ThreadDataArray = Array; -using ThreadDataAllocator = ThreadDataArray::AllocatorType; - -// We use a separate buffer queue for the backing store for the allocator used -// by the ThreadData array. This lets us host the buffers, allocators, and tries -// associated with a thread by moving the data into the array instead of -// attempting to copy the data to a separately backed set of tries. -static typename std::aligned_storage< - sizeof(BufferQueue), alignof(BufferQueue)>::type BufferQueueStorage; -static BufferQueue *BQ = nullptr; -static BufferQueue::Buffer Buffer; -static typename std::aligned_storage::type - ThreadDataAllocatorStorage; -static typename std::aligned_storage::type - ThreadDataArrayStorage; - -static ThreadDataAllocator *TDAllocator = nullptr; -static ThreadDataArray *TDArray = nullptr; - -using ProfileBufferArray = Array; -using ProfileBufferArrayAllocator = typename ProfileBufferArray::AllocatorType; - -// These need to be global aligned storage to avoid dynamic initialization. We -// need these to be aligned to allow us to placement new objects into the -// storage, and have pointers to those objects be appropriately aligned. -static typename std::aligned_storage::type - ProfileBuffersStorage; -static typename std::aligned_storage::type - ProfileBufferArrayAllocatorStorage; - -static ProfileBufferArrayAllocator *ProfileBuffersAllocator = nullptr; -static ProfileBufferArray *ProfileBuffers = nullptr; - -// Use a global flag to determine whether the collector implementation has been -// initialized. -static atomic_uint8_t CollectorInitialized{0}; - -} // namespace - -void post(BufferQueue *Q, FunctionCallTrie &&T, - FunctionCallTrie::Allocators &&A, - FunctionCallTrie::Allocators::Buffers &&B, - tid_t TId) XRAY_NEVER_INSTRUMENT { - DCHECK_NE(Q, nullptr); - - // Bail out early if the collector has not been initialized. - if (!atomic_load(&CollectorInitialized, memory_order_acquire)) { - T.~FunctionCallTrie(); - A.~Allocators(); - Q->releaseBuffer(B.NodeBuffer); - Q->releaseBuffer(B.RootsBuffer); - Q->releaseBuffer(B.ShadowStackBuffer); - Q->releaseBuffer(B.NodeIdPairBuffer); - B.~Buffers(); - return; - } - - { - SpinMutexLock Lock(&GlobalMutex); - DCHECK_NE(TDAllocator, nullptr); - DCHECK_NE(TDArray, nullptr); - - if (TDArray->AppendEmplace(Q, std::move(B), std::move(A), std::move(T), - TId) == nullptr) { - // If we fail to add the data to the array, we should destroy the objects - // handed us. - T.~FunctionCallTrie(); - A.~Allocators(); - Q->releaseBuffer(B.NodeBuffer); - Q->releaseBuffer(B.RootsBuffer); - Q->releaseBuffer(B.ShadowStackBuffer); - Q->releaseBuffer(B.NodeIdPairBuffer); - B.~Buffers(); - } - } -} - -// A PathArray represents the function id's representing a stack trace. In this -// context a path is almost always represented from the leaf function in a call -// stack to a root of the call trie. -using PathArray = Array; - -struct ProfileRecord { - using PathAllocator = typename PathArray::AllocatorType; - - // The Path in this record is the function id's from the leaf to the root of - // the function call stack as represented from a FunctionCallTrie. - PathArray Path; - const FunctionCallTrie::Node *Node; -}; - -namespace { - -using ProfileRecordArray = Array; - -// Walk a depth-first traversal of each root of the FunctionCallTrie to generate -// the path(s) and the data associated with the path. -static void -populateRecords(ProfileRecordArray &PRs, ProfileRecord::PathAllocator &PA, - const FunctionCallTrie &Trie) XRAY_NEVER_INSTRUMENT { - using StackArray = Array; - using StackAllocator = typename StackArray::AllocatorType; - StackAllocator StackAlloc(profilingFlags()->stack_allocator_max); - StackArray DFSStack(StackAlloc); - for (const auto *R : Trie.getRoots()) { - DFSStack.Append(R); - while (!DFSStack.empty()) { - auto *Node = DFSStack.back(); - DFSStack.trim(1); - if (Node == nullptr) - continue; - auto Record = PRs.AppendEmplace(PathArray{PA}, Node); - if (Record == nullptr) - return; - DCHECK_NE(Record, nullptr); - - // Traverse the Node's parents and as we're doing so, get the FIds in - // the order they appear. - for (auto N = Node; N != nullptr; N = N->Parent) - Record->Path.Append(N->FId); - DCHECK(!Record->Path.empty()); - - for (const auto C : Node->Callees) - DFSStack.Append(C.NodePtr); - } - } -} - -static void serializeRecords(ProfileBuffer *Buffer, const BlockHeader &Header, - const ProfileRecordArray &ProfileRecords) - XRAY_NEVER_INSTRUMENT { - auto NextPtr = static_cast( - internal_memcpy(Buffer->Data, &Header, sizeof(Header))) + - sizeof(Header); - for (const auto &Record : ProfileRecords) { - // List of IDs follow: - for (const auto FId : Record.Path) - NextPtr = - static_cast(internal_memcpy(NextPtr, &FId, sizeof(FId))) + - sizeof(FId); - - // Add the sentinel here. - constexpr int32_t SentinelFId = 0; - NextPtr = static_cast( - internal_memset(NextPtr, SentinelFId, sizeof(SentinelFId))) + - sizeof(SentinelFId); - - // Add the node data here. - NextPtr = - static_cast(internal_memcpy( - NextPtr, &Record.Node->CallCount, sizeof(Record.Node->CallCount))) + - sizeof(Record.Node->CallCount); - NextPtr = static_cast( - internal_memcpy(NextPtr, &Record.Node->CumulativeLocalTime, - sizeof(Record.Node->CumulativeLocalTime))) + - sizeof(Record.Node->CumulativeLocalTime); - } - - DCHECK_EQ(NextPtr - static_cast(Buffer->Data), Buffer->Size); -} - -} // namespace - -void serialize() XRAY_NEVER_INSTRUMENT { - if (!atomic_load(&CollectorInitialized, memory_order_acquire)) - return; - - SpinMutexLock Lock(&GlobalMutex); - - // Clear out the global ProfileBuffers, if it's not empty. - for (auto &B : *ProfileBuffers) - deallocateBuffer(reinterpret_cast(B.Data), B.Size); - ProfileBuffers->trim(ProfileBuffers->size()); - - DCHECK_NE(TDArray, nullptr); - if (TDArray->empty()) - return; - - // Then repopulate the global ProfileBuffers. - u32 I = 0; - auto MaxSize = profilingFlags()->global_allocator_max; - auto ProfileArena = allocateBuffer(MaxSize); - if (ProfileArena == nullptr) - return; - - auto ProfileArenaCleanup = at_scope_exit( - [&]() XRAY_NEVER_INSTRUMENT { deallocateBuffer(ProfileArena, MaxSize); }); - - auto PathArena = allocateBuffer(profilingFlags()->global_allocator_max); - if (PathArena == nullptr) - return; - - auto PathArenaCleanup = at_scope_exit( - [&]() XRAY_NEVER_INSTRUMENT { deallocateBuffer(PathArena, MaxSize); }); - - for (const auto &ThreadTrie : *TDArray) { - using ProfileRecordAllocator = typename ProfileRecordArray::AllocatorType; - ProfileRecordAllocator PRAlloc(ProfileArena, - profilingFlags()->global_allocator_max); - ProfileRecord::PathAllocator PathAlloc( - PathArena, profilingFlags()->global_allocator_max); - ProfileRecordArray ProfileRecords(PRAlloc); - - // First, we want to compute the amount of space we're going to need. We'll - // use a local allocator and an __xray::Array<...> to store the intermediary - // data, then compute the size as we're going along. Then we'll allocate the - // contiguous space to contain the thread buffer data. - if (ThreadTrie.FCT.getRoots().empty()) - continue; - - populateRecords(ProfileRecords, PathAlloc, ThreadTrie.FCT); - DCHECK(!ThreadTrie.FCT.getRoots().empty()); - DCHECK(!ProfileRecords.empty()); - - // Go through each record, to compute the sizes. - // - // header size = block size (4 bytes) - // + block number (4 bytes) - // + thread id (8 bytes) - // record size = path ids (4 bytes * number of ids + sentinel 4 bytes) - // + call count (8 bytes) - // + local time (8 bytes) - // + end of record (8 bytes) - u32 CumulativeSizes = 0; - for (const auto &Record : ProfileRecords) - CumulativeSizes += 20 + (4 * Record.Path.size()); - - BlockHeader Header{16 + CumulativeSizes, I++, ThreadTrie.TId}; - auto B = ProfileBuffers->Append({}); - B->Size = sizeof(Header) + CumulativeSizes; - B->Data = allocateBuffer(B->Size); - DCHECK_NE(B->Data, nullptr); - serializeRecords(B, Header, ProfileRecords); - } -} - -void reset() XRAY_NEVER_INSTRUMENT { - atomic_store(&CollectorInitialized, 0, memory_order_release); - SpinMutexLock Lock(&GlobalMutex); - - if (ProfileBuffers != nullptr) { - // Clear out the profile buffers that have been serialized. - for (auto &B : *ProfileBuffers) - deallocateBuffer(reinterpret_cast(B.Data), B.Size); - ProfileBuffers->trim(ProfileBuffers->size()); - ProfileBuffers = nullptr; - } - - if (TDArray != nullptr) { - // Release the resources as required. - for (auto &TD : *TDArray) { - TD.BQ->releaseBuffer(TD.Buffers.NodeBuffer); - TD.BQ->releaseBuffer(TD.Buffers.RootsBuffer); - TD.BQ->releaseBuffer(TD.Buffers.ShadowStackBuffer); - TD.BQ->releaseBuffer(TD.Buffers.NodeIdPairBuffer); - } - // We don't bother destroying the array here because we've already - // potentially freed the backing store for the array. Instead we're going to - // reset the pointer to nullptr, and re-use the storage later instead - // (placement-new'ing into the storage as-is). - TDArray = nullptr; - } - - if (TDAllocator != nullptr) { - TDAllocator->~Allocator(); - TDAllocator = nullptr; - } - - if (Buffer.Data != nullptr) { - BQ->releaseBuffer(Buffer); - } - - if (BQ == nullptr) { - bool Success = false; - new (&BufferQueueStorage) - BufferQueue(profilingFlags()->global_allocator_max, 1, Success); - if (!Success) - return; - BQ = reinterpret_cast(&BufferQueueStorage); - } else { - BQ->finalize(); - - if (BQ->init(profilingFlags()->global_allocator_max, 1) != - BufferQueue::ErrorCode::Ok) - return; - } - - if (BQ->getBuffer(Buffer) != BufferQueue::ErrorCode::Ok) - return; - - new (&ProfileBufferArrayAllocatorStorage) - ProfileBufferArrayAllocator(profilingFlags()->global_allocator_max); - ProfileBuffersAllocator = reinterpret_cast( - &ProfileBufferArrayAllocatorStorage); - - new (&ProfileBuffersStorage) ProfileBufferArray(*ProfileBuffersAllocator); - ProfileBuffers = - reinterpret_cast(&ProfileBuffersStorage); - - new (&ThreadDataAllocatorStorage) - ThreadDataAllocator(Buffer.Data, Buffer.Size); - TDAllocator = - reinterpret_cast(&ThreadDataAllocatorStorage); - new (&ThreadDataArrayStorage) ThreadDataArray(*TDAllocator); - TDArray = reinterpret_cast(&ThreadDataArrayStorage); - - atomic_store(&CollectorInitialized, 1, memory_order_release); -} - -XRayBuffer nextBuffer(XRayBuffer B) XRAY_NEVER_INSTRUMENT { - SpinMutexLock Lock(&GlobalMutex); - - if (ProfileBuffers == nullptr || ProfileBuffers->size() == 0) - return {nullptr, 0}; - - static pthread_once_t Once = PTHREAD_ONCE_INIT; - static typename std::aligned_storage::type - FileHeaderStorage; - pthread_once( - &Once, +[]() XRAY_NEVER_INSTRUMENT { - new (&FileHeaderStorage) XRayProfilingFileHeader{}; - }); - - if (UNLIKELY(B.Data == nullptr)) { - // The first buffer should always contain the file header information. - auto &FileHeader = - *reinterpret_cast(&FileHeaderStorage); - FileHeader.Timestamp = NanoTime(); - FileHeader.PID = internal_getpid(); - return {&FileHeaderStorage, sizeof(XRayProfilingFileHeader)}; - } - - if (UNLIKELY(B.Data == &FileHeaderStorage)) - return {(*ProfileBuffers)[0].Data, (*ProfileBuffers)[0].Size}; - - BlockHeader Header; - internal_memcpy(&Header, B.Data, sizeof(BlockHeader)); - auto NextBlock = Header.BlockNum + 1; - if (NextBlock < ProfileBuffers->size()) - return {(*ProfileBuffers)[NextBlock].Data, - (*ProfileBuffers)[NextBlock].Size}; - return {nullptr, 0}; -} - -} // namespace profileCollectorService -} // namespace __xray diff --git a/lib/xray/xray_profile_collector.cpp b/lib/xray/xray_profile_collector.cpp new file mode 100644 index 000000000000..bef2504f2a16 --- /dev/null +++ b/lib/xray/xray_profile_collector.cpp @@ -0,0 +1,414 @@ +//===-- xray_profile_collector.cpp -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// This implements the interface for the profileCollectorService. +// +//===----------------------------------------------------------------------===// +#include "xray_profile_collector.h" +#include "sanitizer_common/sanitizer_common.h" +#include "xray_allocator.h" +#include "xray_defs.h" +#include "xray_profiling_flags.h" +#include "xray_segmented_array.h" +#include +#include +#include + +namespace __xray { +namespace profileCollectorService { + +namespace { + +SpinMutex GlobalMutex; +struct ThreadTrie { + tid_t TId; + typename std::aligned_storage::type TrieStorage; +}; + +struct ProfileBuffer { + void *Data; + size_t Size; +}; + +// Current version of the profile format. +constexpr u64 XRayProfilingVersion = 0x20180424; + +// Identifier for XRay profiling files 'xrayprof' in hex. +constexpr u64 XRayMagicBytes = 0x7872617970726f66; + +struct XRayProfilingFileHeader { + const u64 MagicBytes = XRayMagicBytes; + const u64 Version = XRayProfilingVersion; + u64 Timestamp = 0; // System time in nanoseconds. + u64 PID = 0; // Process ID. +}; + +struct BlockHeader { + u32 BlockSize; + u32 BlockNum; + u64 ThreadId; +}; + +struct ThreadData { + BufferQueue *BQ; + FunctionCallTrie::Allocators::Buffers Buffers; + FunctionCallTrie::Allocators Allocators; + FunctionCallTrie FCT; + tid_t TId; +}; + +using ThreadDataArray = Array; +using ThreadDataAllocator = ThreadDataArray::AllocatorType; + +// We use a separate buffer queue for the backing store for the allocator used +// by the ThreadData array. This lets us host the buffers, allocators, and tries +// associated with a thread by moving the data into the array instead of +// attempting to copy the data to a separately backed set of tries. +static typename std::aligned_storage< + sizeof(BufferQueue), alignof(BufferQueue)>::type BufferQueueStorage; +static BufferQueue *BQ = nullptr; +static BufferQueue::Buffer Buffer; +static typename std::aligned_storage::type + ThreadDataAllocatorStorage; +static typename std::aligned_storage::type + ThreadDataArrayStorage; + +static ThreadDataAllocator *TDAllocator = nullptr; +static ThreadDataArray *TDArray = nullptr; + +using ProfileBufferArray = Array; +using ProfileBufferArrayAllocator = typename ProfileBufferArray::AllocatorType; + +// These need to be global aligned storage to avoid dynamic initialization. We +// need these to be aligned to allow us to placement new objects into the +// storage, and have pointers to those objects be appropriately aligned. +static typename std::aligned_storage::type + ProfileBuffersStorage; +static typename std::aligned_storage::type + ProfileBufferArrayAllocatorStorage; + +static ProfileBufferArrayAllocator *ProfileBuffersAllocator = nullptr; +static ProfileBufferArray *ProfileBuffers = nullptr; + +// Use a global flag to determine whether the collector implementation has been +// initialized. +static atomic_uint8_t CollectorInitialized{0}; + +} // namespace + +void post(BufferQueue *Q, FunctionCallTrie &&T, + FunctionCallTrie::Allocators &&A, + FunctionCallTrie::Allocators::Buffers &&B, + tid_t TId) XRAY_NEVER_INSTRUMENT { + DCHECK_NE(Q, nullptr); + + // Bail out early if the collector has not been initialized. + if (!atomic_load(&CollectorInitialized, memory_order_acquire)) { + T.~FunctionCallTrie(); + A.~Allocators(); + Q->releaseBuffer(B.NodeBuffer); + Q->releaseBuffer(B.RootsBuffer); + Q->releaseBuffer(B.ShadowStackBuffer); + Q->releaseBuffer(B.NodeIdPairBuffer); + B.~Buffers(); + return; + } + + { + SpinMutexLock Lock(&GlobalMutex); + DCHECK_NE(TDAllocator, nullptr); + DCHECK_NE(TDArray, nullptr); + + if (TDArray->AppendEmplace(Q, std::move(B), std::move(A), std::move(T), + TId) == nullptr) { + // If we fail to add the data to the array, we should destroy the objects + // handed us. + T.~FunctionCallTrie(); + A.~Allocators(); + Q->releaseBuffer(B.NodeBuffer); + Q->releaseBuffer(B.RootsBuffer); + Q->releaseBuffer(B.ShadowStackBuffer); + Q->releaseBuffer(B.NodeIdPairBuffer); + B.~Buffers(); + } + } +} + +// A PathArray represents the function id's representing a stack trace. In this +// context a path is almost always represented from the leaf function in a call +// stack to a root of the call trie. +using PathArray = Array; + +struct ProfileRecord { + using PathAllocator = typename PathArray::AllocatorType; + + // The Path in this record is the function id's from the leaf to the root of + // the function call stack as represented from a FunctionCallTrie. + PathArray Path; + const FunctionCallTrie::Node *Node; +}; + +namespace { + +using ProfileRecordArray = Array; + +// Walk a depth-first traversal of each root of the FunctionCallTrie to generate +// the path(s) and the data associated with the path. +static void +populateRecords(ProfileRecordArray &PRs, ProfileRecord::PathAllocator &PA, + const FunctionCallTrie &Trie) XRAY_NEVER_INSTRUMENT { + using StackArray = Array; + using StackAllocator = typename StackArray::AllocatorType; + StackAllocator StackAlloc(profilingFlags()->stack_allocator_max); + StackArray DFSStack(StackAlloc); + for (const auto *R : Trie.getRoots()) { + DFSStack.Append(R); + while (!DFSStack.empty()) { + auto *Node = DFSStack.back(); + DFSStack.trim(1); + if (Node == nullptr) + continue; + auto Record = PRs.AppendEmplace(PathArray{PA}, Node); + if (Record == nullptr) + return; + DCHECK_NE(Record, nullptr); + + // Traverse the Node's parents and as we're doing so, get the FIds in + // the order they appear. + for (auto N = Node; N != nullptr; N = N->Parent) + Record->Path.Append(N->FId); + DCHECK(!Record->Path.empty()); + + for (const auto C : Node->Callees) + DFSStack.Append(C.NodePtr); + } + } +} + +static void serializeRecords(ProfileBuffer *Buffer, const BlockHeader &Header, + const ProfileRecordArray &ProfileRecords) + XRAY_NEVER_INSTRUMENT { + auto NextPtr = static_cast( + internal_memcpy(Buffer->Data, &Header, sizeof(Header))) + + sizeof(Header); + for (const auto &Record : ProfileRecords) { + // List of IDs follow: + for (const auto FId : Record.Path) + NextPtr = + static_cast(internal_memcpy(NextPtr, &FId, sizeof(FId))) + + sizeof(FId); + + // Add the sentinel here. + constexpr int32_t SentinelFId = 0; + NextPtr = static_cast( + internal_memset(NextPtr, SentinelFId, sizeof(SentinelFId))) + + sizeof(SentinelFId); + + // Add the node data here. + NextPtr = + static_cast(internal_memcpy( + NextPtr, &Record.Node->CallCount, sizeof(Record.Node->CallCount))) + + sizeof(Record.Node->CallCount); + NextPtr = static_cast( + internal_memcpy(NextPtr, &Record.Node->CumulativeLocalTime, + sizeof(Record.Node->CumulativeLocalTime))) + + sizeof(Record.Node->CumulativeLocalTime); + } + + DCHECK_EQ(NextPtr - static_cast(Buffer->Data), Buffer->Size); +} + +} // namespace + +void serialize() XRAY_NEVER_INSTRUMENT { + if (!atomic_load(&CollectorInitialized, memory_order_acquire)) + return; + + SpinMutexLock Lock(&GlobalMutex); + + // Clear out the global ProfileBuffers, if it's not empty. + for (auto &B : *ProfileBuffers) + deallocateBuffer(reinterpret_cast(B.Data), B.Size); + ProfileBuffers->trim(ProfileBuffers->size()); + + DCHECK_NE(TDArray, nullptr); + if (TDArray->empty()) + return; + + // Then repopulate the global ProfileBuffers. + u32 I = 0; + auto MaxSize = profilingFlags()->global_allocator_max; + auto ProfileArena = allocateBuffer(MaxSize); + if (ProfileArena == nullptr) + return; + + auto ProfileArenaCleanup = at_scope_exit( + [&]() XRAY_NEVER_INSTRUMENT { deallocateBuffer(ProfileArena, MaxSize); }); + + auto PathArena = allocateBuffer(profilingFlags()->global_allocator_max); + if (PathArena == nullptr) + return; + + auto PathArenaCleanup = at_scope_exit( + [&]() XRAY_NEVER_INSTRUMENT { deallocateBuffer(PathArena, MaxSize); }); + + for (const auto &ThreadTrie : *TDArray) { + using ProfileRecordAllocator = typename ProfileRecordArray::AllocatorType; + ProfileRecordAllocator PRAlloc(ProfileArena, + profilingFlags()->global_allocator_max); + ProfileRecord::PathAllocator PathAlloc( + PathArena, profilingFlags()->global_allocator_max); + ProfileRecordArray ProfileRecords(PRAlloc); + + // First, we want to compute the amount of space we're going to need. We'll + // use a local allocator and an __xray::Array<...> to store the intermediary + // data, then compute the size as we're going along. Then we'll allocate the + // contiguous space to contain the thread buffer data. + if (ThreadTrie.FCT.getRoots().empty()) + continue; + + populateRecords(ProfileRecords, PathAlloc, ThreadTrie.FCT); + DCHECK(!ThreadTrie.FCT.getRoots().empty()); + DCHECK(!ProfileRecords.empty()); + + // Go through each record, to compute the sizes. + // + // header size = block size (4 bytes) + // + block number (4 bytes) + // + thread id (8 bytes) + // record size = path ids (4 bytes * number of ids + sentinel 4 bytes) + // + call count (8 bytes) + // + local time (8 bytes) + // + end of record (8 bytes) + u32 CumulativeSizes = 0; + for (const auto &Record : ProfileRecords) + CumulativeSizes += 20 + (4 * Record.Path.size()); + + BlockHeader Header{16 + CumulativeSizes, I++, ThreadTrie.TId}; + auto B = ProfileBuffers->Append({}); + B->Size = sizeof(Header) + CumulativeSizes; + B->Data = allocateBuffer(B->Size); + DCHECK_NE(B->Data, nullptr); + serializeRecords(B, Header, ProfileRecords); + } +} + +void reset() XRAY_NEVER_INSTRUMENT { + atomic_store(&CollectorInitialized, 0, memory_order_release); + SpinMutexLock Lock(&GlobalMutex); + + if (ProfileBuffers != nullptr) { + // Clear out the profile buffers that have been serialized. + for (auto &B : *ProfileBuffers) + deallocateBuffer(reinterpret_cast(B.Data), B.Size); + ProfileBuffers->trim(ProfileBuffers->size()); + ProfileBuffers = nullptr; + } + + if (TDArray != nullptr) { + // Release the resources as required. + for (auto &TD : *TDArray) { + TD.BQ->releaseBuffer(TD.Buffers.NodeBuffer); + TD.BQ->releaseBuffer(TD.Buffers.RootsBuffer); + TD.BQ->releaseBuffer(TD.Buffers.ShadowStackBuffer); + TD.BQ->releaseBuffer(TD.Buffers.NodeIdPairBuffer); + } + // We don't bother destroying the array here because we've already + // potentially freed the backing store for the array. Instead we're going to + // reset the pointer to nullptr, and re-use the storage later instead + // (placement-new'ing into the storage as-is). + TDArray = nullptr; + } + + if (TDAllocator != nullptr) { + TDAllocator->~Allocator(); + TDAllocator = nullptr; + } + + if (Buffer.Data != nullptr) { + BQ->releaseBuffer(Buffer); + } + + if (BQ == nullptr) { + bool Success = false; + new (&BufferQueueStorage) + BufferQueue(profilingFlags()->global_allocator_max, 1, Success); + if (!Success) + return; + BQ = reinterpret_cast(&BufferQueueStorage); + } else { + BQ->finalize(); + + if (BQ->init(profilingFlags()->global_allocator_max, 1) != + BufferQueue::ErrorCode::Ok) + return; + } + + if (BQ->getBuffer(Buffer) != BufferQueue::ErrorCode::Ok) + return; + + new (&ProfileBufferArrayAllocatorStorage) + ProfileBufferArrayAllocator(profilingFlags()->global_allocator_max); + ProfileBuffersAllocator = reinterpret_cast( + &ProfileBufferArrayAllocatorStorage); + + new (&ProfileBuffersStorage) ProfileBufferArray(*ProfileBuffersAllocator); + ProfileBuffers = + reinterpret_cast(&ProfileBuffersStorage); + + new (&ThreadDataAllocatorStorage) + ThreadDataAllocator(Buffer.Data, Buffer.Size); + TDAllocator = + reinterpret_cast(&ThreadDataAllocatorStorage); + new (&ThreadDataArrayStorage) ThreadDataArray(*TDAllocator); + TDArray = reinterpret_cast(&ThreadDataArrayStorage); + + atomic_store(&CollectorInitialized, 1, memory_order_release); +} + +XRayBuffer nextBuffer(XRayBuffer B) XRAY_NEVER_INSTRUMENT { + SpinMutexLock Lock(&GlobalMutex); + + if (ProfileBuffers == nullptr || ProfileBuffers->size() == 0) + return {nullptr, 0}; + + static pthread_once_t Once = PTHREAD_ONCE_INIT; + static typename std::aligned_storage::type + FileHeaderStorage; + pthread_once( + &Once, +[]() XRAY_NEVER_INSTRUMENT { + new (&FileHeaderStorage) XRayProfilingFileHeader{}; + }); + + if (UNLIKELY(B.Data == nullptr)) { + // The first buffer should always contain the file header information. + auto &FileHeader = + *reinterpret_cast(&FileHeaderStorage); + FileHeader.Timestamp = NanoTime(); + FileHeader.PID = internal_getpid(); + return {&FileHeaderStorage, sizeof(XRayProfilingFileHeader)}; + } + + if (UNLIKELY(B.Data == &FileHeaderStorage)) + return {(*ProfileBuffers)[0].Data, (*ProfileBuffers)[0].Size}; + + BlockHeader Header; + internal_memcpy(&Header, B.Data, sizeof(BlockHeader)); + auto NextBlock = Header.BlockNum + 1; + if (NextBlock < ProfileBuffers->size()) + return {(*ProfileBuffers)[NextBlock].Data, + (*ProfileBuffers)[NextBlock].Size}; + return {nullptr, 0}; +} + +} // namespace profileCollectorService +} // namespace __xray diff --git a/lib/xray/xray_profiling.cc b/lib/xray/xray_profiling.cc deleted file mode 100644 index 66def6cf2485..000000000000 --- a/lib/xray/xray_profiling.cc +++ /dev/null @@ -1,519 +0,0 @@ -//===-- xray_profiling.cc ---------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file is a part of XRay, a dynamic runtime instrumentation system. -// -// This is the implementation of a profiling handler. -// -//===----------------------------------------------------------------------===// -#include -#include - -#include "sanitizer_common/sanitizer_atomic.h" -#include "sanitizer_common/sanitizer_flags.h" -#include "xray/xray_interface.h" -#include "xray/xray_log_interface.h" -#include "xray_buffer_queue.h" -#include "xray_flags.h" -#include "xray_profile_collector.h" -#include "xray_profiling_flags.h" -#include "xray_recursion_guard.h" -#include "xray_tsc.h" -#include "xray_utils.h" -#include - -namespace __xray { - -namespace { - -static atomic_sint32_t ProfilerLogFlushStatus = { - XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING}; - -static atomic_sint32_t ProfilerLogStatus = { - XRayLogInitStatus::XRAY_LOG_UNINITIALIZED}; - -static SpinMutex ProfilerOptionsMutex; - -struct ProfilingData { - atomic_uintptr_t Allocators; - atomic_uintptr_t FCT; -}; - -static pthread_key_t ProfilingKey; - -// We use a global buffer queue, which gets initialized once at initialisation -// time, and gets reset when profiling is "done". -static std::aligned_storage::type - BufferQueueStorage; -static BufferQueue *BQ = nullptr; - -thread_local FunctionCallTrie::Allocators::Buffers ThreadBuffers; -thread_local std::aligned_storage::type - AllocatorsStorage; -thread_local std::aligned_storage::type - FunctionCallTrieStorage; -thread_local ProfilingData TLD{{0}, {0}}; -thread_local atomic_uint8_t ReentranceGuard{0}; - -// We use a separate guard for ensuring that for this thread, if we're already -// cleaning up, that any signal handlers don't attempt to cleanup nor -// initialise. -thread_local atomic_uint8_t TLDInitGuard{0}; - -// We also use a separate latch to signal that the thread is exiting, and -// non-essential work should be ignored (things like recording events, etc.). -thread_local atomic_uint8_t ThreadExitingLatch{0}; - -static ProfilingData *getThreadLocalData() XRAY_NEVER_INSTRUMENT { - thread_local auto ThreadOnce = []() XRAY_NEVER_INSTRUMENT { - pthread_setspecific(ProfilingKey, &TLD); - return false; - }(); - (void)ThreadOnce; - - RecursionGuard TLDInit(TLDInitGuard); - if (!TLDInit) - return nullptr; - - if (atomic_load_relaxed(&ThreadExitingLatch)) - return nullptr; - - uptr Allocators = 0; - if (atomic_compare_exchange_strong(&TLD.Allocators, &Allocators, 1, - memory_order_acq_rel)) { - bool Success = false; - auto AllocatorsUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { - if (!Success) - atomic_store(&TLD.Allocators, 0, memory_order_release); - }); - - // Acquire a set of buffers for this thread. - if (BQ == nullptr) - return nullptr; - - if (BQ->getBuffer(ThreadBuffers.NodeBuffer) != BufferQueue::ErrorCode::Ok) - return nullptr; - auto NodeBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { - if (!Success) - BQ->releaseBuffer(ThreadBuffers.NodeBuffer); - }); - - if (BQ->getBuffer(ThreadBuffers.RootsBuffer) != BufferQueue::ErrorCode::Ok) - return nullptr; - auto RootsBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { - if (!Success) - BQ->releaseBuffer(ThreadBuffers.RootsBuffer); - }); - - if (BQ->getBuffer(ThreadBuffers.ShadowStackBuffer) != - BufferQueue::ErrorCode::Ok) - return nullptr; - auto ShadowStackBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { - if (!Success) - BQ->releaseBuffer(ThreadBuffers.ShadowStackBuffer); - }); - - if (BQ->getBuffer(ThreadBuffers.NodeIdPairBuffer) != - BufferQueue::ErrorCode::Ok) - return nullptr; - - Success = true; - new (&AllocatorsStorage) FunctionCallTrie::Allocators( - FunctionCallTrie::InitAllocatorsFromBuffers(ThreadBuffers)); - Allocators = reinterpret_cast( - reinterpret_cast(&AllocatorsStorage)); - atomic_store(&TLD.Allocators, Allocators, memory_order_release); - } - - if (Allocators == 1) - return nullptr; - - uptr FCT = 0; - if (atomic_compare_exchange_strong(&TLD.FCT, &FCT, 1, memory_order_acq_rel)) { - new (&FunctionCallTrieStorage) - FunctionCallTrie(*reinterpret_cast( - atomic_load_relaxed(&TLD.Allocators))); - FCT = reinterpret_cast( - reinterpret_cast(&FunctionCallTrieStorage)); - atomic_store(&TLD.FCT, FCT, memory_order_release); - } - - if (FCT == 1) - return nullptr; - - return &TLD; -} - -static void cleanupTLD() XRAY_NEVER_INSTRUMENT { - auto FCT = atomic_exchange(&TLD.FCT, 0, memory_order_acq_rel); - if (FCT == reinterpret_cast(reinterpret_cast( - &FunctionCallTrieStorage))) - reinterpret_cast(FCT)->~FunctionCallTrie(); - - auto Allocators = atomic_exchange(&TLD.Allocators, 0, memory_order_acq_rel); - if (Allocators == - reinterpret_cast( - reinterpret_cast(&AllocatorsStorage))) - reinterpret_cast(Allocators)->~Allocators(); -} - -static void postCurrentThreadFCT(ProfilingData &T) XRAY_NEVER_INSTRUMENT { - RecursionGuard TLDInit(TLDInitGuard); - if (!TLDInit) - return; - - uptr P = atomic_exchange(&T.FCT, 0, memory_order_acq_rel); - if (P != reinterpret_cast( - reinterpret_cast(&FunctionCallTrieStorage))) - return; - - auto FCT = reinterpret_cast(P); - DCHECK_NE(FCT, nullptr); - - uptr A = atomic_exchange(&T.Allocators, 0, memory_order_acq_rel); - if (A != - reinterpret_cast( - reinterpret_cast(&AllocatorsStorage))) - return; - - auto Allocators = reinterpret_cast(A); - DCHECK_NE(Allocators, nullptr); - - // Always move the data into the profile collector. - profileCollectorService::post(BQ, std::move(*FCT), std::move(*Allocators), - std::move(ThreadBuffers), GetTid()); - - // Re-initialize the ThreadBuffers object to a known "default" state. - ThreadBuffers = FunctionCallTrie::Allocators::Buffers{}; -} - -} // namespace - -const char *profilingCompilerDefinedFlags() XRAY_NEVER_INSTRUMENT { -#ifdef XRAY_PROFILER_DEFAULT_OPTIONS - return SANITIZER_STRINGIFY(XRAY_PROFILER_DEFAULT_OPTIONS); -#else - return ""; -#endif -} - -XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT { - if (atomic_load(&ProfilerLogStatus, memory_order_acquire) != - XRayLogInitStatus::XRAY_LOG_FINALIZED) { - if (Verbosity()) - Report("Not flushing profiles, profiling not been finalized.\n"); - return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; - } - - RecursionGuard SignalGuard(ReentranceGuard); - if (!SignalGuard) { - if (Verbosity()) - Report("Cannot finalize properly inside a signal handler!\n"); - atomic_store(&ProfilerLogFlushStatus, - XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING, - memory_order_release); - return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; - } - - s32 Previous = atomic_exchange(&ProfilerLogFlushStatus, - XRayLogFlushStatus::XRAY_LOG_FLUSHING, - memory_order_acq_rel); - if (Previous == XRayLogFlushStatus::XRAY_LOG_FLUSHING) { - if (Verbosity()) - Report("Not flushing profiles, implementation still flushing.\n"); - return XRayLogFlushStatus::XRAY_LOG_FLUSHING; - } - - // At this point, we'll create the file that will contain the profile, but - // only if the options say so. - if (!profilingFlags()->no_flush) { - // First check whether we have data in the profile collector service - // before we try and write anything down. - XRayBuffer B = profileCollectorService::nextBuffer({nullptr, 0}); - if (B.Data == nullptr) { - if (Verbosity()) - Report("profiling: No data to flush.\n"); - } else { - LogWriter *LW = LogWriter::Open(); - if (LW == nullptr) { - if (Verbosity()) - Report("profiling: Failed to flush to file, dropping data.\n"); - } else { - // Now for each of the buffers, write out the profile data as we would - // see it in memory, verbatim. - while (B.Data != nullptr && B.Size != 0) { - LW->WriteAll(reinterpret_cast(B.Data), - reinterpret_cast(B.Data) + B.Size); - B = profileCollectorService::nextBuffer(B); - } - } - LogWriter::Close(LW); - } - } - - profileCollectorService::reset(); - - atomic_store(&ProfilerLogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED, - memory_order_release); - atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, - memory_order_release); - - return XRayLogFlushStatus::XRAY_LOG_FLUSHED; -} - -void profilingHandleArg0(int32_t FuncId, - XRayEntryType Entry) XRAY_NEVER_INSTRUMENT { - unsigned char CPU; - auto TSC = readTSC(CPU); - RecursionGuard G(ReentranceGuard); - if (!G) - return; - - auto Status = atomic_load(&ProfilerLogStatus, memory_order_acquire); - if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_UNINITIALIZED || - Status == XRayLogInitStatus::XRAY_LOG_INITIALIZING)) - return; - - if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_FINALIZED || - Status == XRayLogInitStatus::XRAY_LOG_FINALIZING)) { - postCurrentThreadFCT(TLD); - return; - } - - auto T = getThreadLocalData(); - if (T == nullptr) - return; - - auto FCT = reinterpret_cast(atomic_load_relaxed(&T->FCT)); - switch (Entry) { - case XRayEntryType::ENTRY: - case XRayEntryType::LOG_ARGS_ENTRY: - FCT->enterFunction(FuncId, TSC, CPU); - break; - case XRayEntryType::EXIT: - case XRayEntryType::TAIL: - FCT->exitFunction(FuncId, TSC, CPU); - break; - default: - // FIXME: Handle bugs. - break; - } -} - -void profilingHandleArg1(int32_t FuncId, XRayEntryType Entry, - uint64_t) XRAY_NEVER_INSTRUMENT { - return profilingHandleArg0(FuncId, Entry); -} - -XRayLogInitStatus profilingFinalize() XRAY_NEVER_INSTRUMENT { - s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED; - if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus, - XRayLogInitStatus::XRAY_LOG_FINALIZING, - memory_order_release)) { - if (Verbosity()) - Report("Cannot finalize profile, the profiling is not initialized.\n"); - return static_cast(CurrentStatus); - } - - // Mark then finalize the current generation of buffers. This allows us to let - // the threads currently holding onto new buffers still use them, but let the - // last reference do the memory cleanup. - DCHECK_NE(BQ, nullptr); - BQ->finalize(); - - // Wait a grace period to allow threads to see that we're finalizing. - SleepForMillis(profilingFlags()->grace_period_ms); - - // If we for some reason are entering this function from an instrumented - // handler, we bail out. - RecursionGuard G(ReentranceGuard); - if (!G) - return static_cast(CurrentStatus); - - // Post the current thread's data if we have any. - postCurrentThreadFCT(TLD); - - // Then we force serialize the log data. - profileCollectorService::serialize(); - - atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED, - memory_order_release); - return XRayLogInitStatus::XRAY_LOG_FINALIZED; -} - -XRayLogInitStatus -profilingLoggingInit(size_t, size_t, void *Options, - size_t OptionsSize) XRAY_NEVER_INSTRUMENT { - RecursionGuard G(ReentranceGuard); - if (!G) - return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; - - s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; - if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus, - XRayLogInitStatus::XRAY_LOG_INITIALIZING, - memory_order_acq_rel)) { - if (Verbosity()) - Report("Cannot initialize already initialised profiling " - "implementation.\n"); - return static_cast(CurrentStatus); - } - - { - SpinMutexLock Lock(&ProfilerOptionsMutex); - FlagParser ConfigParser; - ProfilerFlags Flags; - Flags.setDefaults(); - registerProfilerFlags(&ConfigParser, &Flags); - ConfigParser.ParseString(profilingCompilerDefinedFlags()); - const char *Env = GetEnv("XRAY_PROFILING_OPTIONS"); - if (Env == nullptr) - Env = ""; - ConfigParser.ParseString(Env); - - // Then parse the configuration string provided. - ConfigParser.ParseString(static_cast(Options)); - if (Verbosity()) - ReportUnrecognizedFlags(); - *profilingFlags() = Flags; - } - - // We need to reset the profile data collection implementation now. - profileCollectorService::reset(); - - // Then also reset the buffer queue implementation. - if (BQ == nullptr) { - bool Success = false; - new (&BufferQueueStorage) - BufferQueue(profilingFlags()->per_thread_allocator_max, - profilingFlags()->buffers_max, Success); - if (!Success) { - if (Verbosity()) - Report("Failed to initialize preallocated memory buffers!"); - atomic_store(&ProfilerLogStatus, - XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, - memory_order_release); - return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; - } - - // If we've succeded, set the global pointer to the initialised storage. - BQ = reinterpret_cast(&BufferQueueStorage); - } else { - BQ->finalize(); - auto InitStatus = BQ->init(profilingFlags()->per_thread_allocator_max, - profilingFlags()->buffers_max); - - if (InitStatus != BufferQueue::ErrorCode::Ok) { - if (Verbosity()) - Report("Failed to initialize preallocated memory buffers; error: %s", - BufferQueue::getErrorString(InitStatus)); - atomic_store(&ProfilerLogStatus, - XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, - memory_order_release); - return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; - } - - DCHECK(!BQ->finalizing()); - } - - // We need to set up the exit handlers. - static pthread_once_t Once = PTHREAD_ONCE_INIT; - pthread_once( - &Once, +[] { - pthread_key_create( - &ProfilingKey, +[](void *P) XRAY_NEVER_INSTRUMENT { - if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel)) - return; - - if (P == nullptr) - return; - - auto T = reinterpret_cast(P); - if (atomic_load_relaxed(&T->Allocators) == 0) - return; - - { - // If we're somehow executing this while inside a - // non-reentrant-friendly context, we skip attempting to post - // the current thread's data. - RecursionGuard G(ReentranceGuard); - if (!G) - return; - - postCurrentThreadFCT(*T); - } - }); - - // We also need to set up an exit handler, so that we can get the - // profile information at exit time. We use the C API to do this, to not - // rely on C++ ABI functions for registering exit handlers. - Atexit(+[]() XRAY_NEVER_INSTRUMENT { - if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel)) - return; - - auto Cleanup = - at_scope_exit([]() XRAY_NEVER_INSTRUMENT { cleanupTLD(); }); - - // Finalize and flush. - if (profilingFinalize() != XRAY_LOG_FINALIZED || - profilingFlush() != XRAY_LOG_FLUSHED) - return; - - if (Verbosity()) - Report("XRay Profile flushed at exit."); - }); - }); - - __xray_log_set_buffer_iterator(profileCollectorService::nextBuffer); - __xray_set_handler(profilingHandleArg0); - __xray_set_handler_arg1(profilingHandleArg1); - - atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZED, - memory_order_release); - if (Verbosity()) - Report("XRay Profiling init successful.\n"); - - return XRayLogInitStatus::XRAY_LOG_INITIALIZED; -} - -bool profilingDynamicInitializer() XRAY_NEVER_INSTRUMENT { - // Set up the flag defaults from the static defaults and the - // compiler-provided defaults. - { - SpinMutexLock Lock(&ProfilerOptionsMutex); - auto *F = profilingFlags(); - F->setDefaults(); - FlagParser ProfilingParser; - registerProfilerFlags(&ProfilingParser, F); - ProfilingParser.ParseString(profilingCompilerDefinedFlags()); - } - - XRayLogImpl Impl{ - profilingLoggingInit, - profilingFinalize, - profilingHandleArg0, - profilingFlush, - }; - auto RegistrationResult = __xray_log_register_mode("xray-profiling", Impl); - if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) { - if (Verbosity()) - Report("Cannot register XRay Profiling mode to 'xray-profiling'; error = " - "%d\n", - RegistrationResult); - return false; - } - - if (!internal_strcmp(flags()->xray_mode, "xray-profiling")) - __xray_log_select_mode("xray_profiling"); - return true; -} - -} // namespace __xray - -static auto UNUSED Unused = __xray::profilingDynamicInitializer(); diff --git a/lib/xray/xray_profiling.cpp b/lib/xray/xray_profiling.cpp new file mode 100644 index 000000000000..ef16691562cc --- /dev/null +++ b/lib/xray/xray_profiling.cpp @@ -0,0 +1,519 @@ +//===-- xray_profiling.cpp --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// This is the implementation of a profiling handler. +// +//===----------------------------------------------------------------------===// +#include +#include + +#include "sanitizer_common/sanitizer_atomic.h" +#include "sanitizer_common/sanitizer_flags.h" +#include "xray/xray_interface.h" +#include "xray/xray_log_interface.h" +#include "xray_buffer_queue.h" +#include "xray_flags.h" +#include "xray_profile_collector.h" +#include "xray_profiling_flags.h" +#include "xray_recursion_guard.h" +#include "xray_tsc.h" +#include "xray_utils.h" +#include + +namespace __xray { + +namespace { + +static atomic_sint32_t ProfilerLogFlushStatus = { + XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING}; + +static atomic_sint32_t ProfilerLogStatus = { + XRayLogInitStatus::XRAY_LOG_UNINITIALIZED}; + +static SpinMutex ProfilerOptionsMutex; + +struct ProfilingData { + atomic_uintptr_t Allocators; + atomic_uintptr_t FCT; +}; + +static pthread_key_t ProfilingKey; + +// We use a global buffer queue, which gets initialized once at initialisation +// time, and gets reset when profiling is "done". +static std::aligned_storage::type + BufferQueueStorage; +static BufferQueue *BQ = nullptr; + +thread_local FunctionCallTrie::Allocators::Buffers ThreadBuffers; +thread_local std::aligned_storage::type + AllocatorsStorage; +thread_local std::aligned_storage::type + FunctionCallTrieStorage; +thread_local ProfilingData TLD{{0}, {0}}; +thread_local atomic_uint8_t ReentranceGuard{0}; + +// We use a separate guard for ensuring that for this thread, if we're already +// cleaning up, that any signal handlers don't attempt to cleanup nor +// initialise. +thread_local atomic_uint8_t TLDInitGuard{0}; + +// We also use a separate latch to signal that the thread is exiting, and +// non-essential work should be ignored (things like recording events, etc.). +thread_local atomic_uint8_t ThreadExitingLatch{0}; + +static ProfilingData *getThreadLocalData() XRAY_NEVER_INSTRUMENT { + thread_local auto ThreadOnce = []() XRAY_NEVER_INSTRUMENT { + pthread_setspecific(ProfilingKey, &TLD); + return false; + }(); + (void)ThreadOnce; + + RecursionGuard TLDInit(TLDInitGuard); + if (!TLDInit) + return nullptr; + + if (atomic_load_relaxed(&ThreadExitingLatch)) + return nullptr; + + uptr Allocators = 0; + if (atomic_compare_exchange_strong(&TLD.Allocators, &Allocators, 1, + memory_order_acq_rel)) { + bool Success = false; + auto AllocatorsUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { + if (!Success) + atomic_store(&TLD.Allocators, 0, memory_order_release); + }); + + // Acquire a set of buffers for this thread. + if (BQ == nullptr) + return nullptr; + + if (BQ->getBuffer(ThreadBuffers.NodeBuffer) != BufferQueue::ErrorCode::Ok) + return nullptr; + auto NodeBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { + if (!Success) + BQ->releaseBuffer(ThreadBuffers.NodeBuffer); + }); + + if (BQ->getBuffer(ThreadBuffers.RootsBuffer) != BufferQueue::ErrorCode::Ok) + return nullptr; + auto RootsBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { + if (!Success) + BQ->releaseBuffer(ThreadBuffers.RootsBuffer); + }); + + if (BQ->getBuffer(ThreadBuffers.ShadowStackBuffer) != + BufferQueue::ErrorCode::Ok) + return nullptr; + auto ShadowStackBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { + if (!Success) + BQ->releaseBuffer(ThreadBuffers.ShadowStackBuffer); + }); + + if (BQ->getBuffer(ThreadBuffers.NodeIdPairBuffer) != + BufferQueue::ErrorCode::Ok) + return nullptr; + + Success = true; + new (&AllocatorsStorage) FunctionCallTrie::Allocators( + FunctionCallTrie::InitAllocatorsFromBuffers(ThreadBuffers)); + Allocators = reinterpret_cast( + reinterpret_cast(&AllocatorsStorage)); + atomic_store(&TLD.Allocators, Allocators, memory_order_release); + } + + if (Allocators == 1) + return nullptr; + + uptr FCT = 0; + if (atomic_compare_exchange_strong(&TLD.FCT, &FCT, 1, memory_order_acq_rel)) { + new (&FunctionCallTrieStorage) + FunctionCallTrie(*reinterpret_cast( + atomic_load_relaxed(&TLD.Allocators))); + FCT = reinterpret_cast( + reinterpret_cast(&FunctionCallTrieStorage)); + atomic_store(&TLD.FCT, FCT, memory_order_release); + } + + if (FCT == 1) + return nullptr; + + return &TLD; +} + +static void cleanupTLD() XRAY_NEVER_INSTRUMENT { + auto FCT = atomic_exchange(&TLD.FCT, 0, memory_order_acq_rel); + if (FCT == reinterpret_cast(reinterpret_cast( + &FunctionCallTrieStorage))) + reinterpret_cast(FCT)->~FunctionCallTrie(); + + auto Allocators = atomic_exchange(&TLD.Allocators, 0, memory_order_acq_rel); + if (Allocators == + reinterpret_cast( + reinterpret_cast(&AllocatorsStorage))) + reinterpret_cast(Allocators)->~Allocators(); +} + +static void postCurrentThreadFCT(ProfilingData &T) XRAY_NEVER_INSTRUMENT { + RecursionGuard TLDInit(TLDInitGuard); + if (!TLDInit) + return; + + uptr P = atomic_exchange(&T.FCT, 0, memory_order_acq_rel); + if (P != reinterpret_cast( + reinterpret_cast(&FunctionCallTrieStorage))) + return; + + auto FCT = reinterpret_cast(P); + DCHECK_NE(FCT, nullptr); + + uptr A = atomic_exchange(&T.Allocators, 0, memory_order_acq_rel); + if (A != + reinterpret_cast( + reinterpret_cast(&AllocatorsStorage))) + return; + + auto Allocators = reinterpret_cast(A); + DCHECK_NE(Allocators, nullptr); + + // Always move the data into the profile collector. + profileCollectorService::post(BQ, std::move(*FCT), std::move(*Allocators), + std::move(ThreadBuffers), GetTid()); + + // Re-initialize the ThreadBuffers object to a known "default" state. + ThreadBuffers = FunctionCallTrie::Allocators::Buffers{}; +} + +} // namespace + +const char *profilingCompilerDefinedFlags() XRAY_NEVER_INSTRUMENT { +#ifdef XRAY_PROFILER_DEFAULT_OPTIONS + return SANITIZER_STRINGIFY(XRAY_PROFILER_DEFAULT_OPTIONS); +#else + return ""; +#endif +} + +XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT { + if (atomic_load(&ProfilerLogStatus, memory_order_acquire) != + XRayLogInitStatus::XRAY_LOG_FINALIZED) { + if (Verbosity()) + Report("Not flushing profiles, profiling not been finalized.\n"); + return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; + } + + RecursionGuard SignalGuard(ReentranceGuard); + if (!SignalGuard) { + if (Verbosity()) + Report("Cannot finalize properly inside a signal handler!\n"); + atomic_store(&ProfilerLogFlushStatus, + XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING, + memory_order_release); + return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; + } + + s32 Previous = atomic_exchange(&ProfilerLogFlushStatus, + XRayLogFlushStatus::XRAY_LOG_FLUSHING, + memory_order_acq_rel); + if (Previous == XRayLogFlushStatus::XRAY_LOG_FLUSHING) { + if (Verbosity()) + Report("Not flushing profiles, implementation still flushing.\n"); + return XRayLogFlushStatus::XRAY_LOG_FLUSHING; + } + + // At this point, we'll create the file that will contain the profile, but + // only if the options say so. + if (!profilingFlags()->no_flush) { + // First check whether we have data in the profile collector service + // before we try and write anything down. + XRayBuffer B = profileCollectorService::nextBuffer({nullptr, 0}); + if (B.Data == nullptr) { + if (Verbosity()) + Report("profiling: No data to flush.\n"); + } else { + LogWriter *LW = LogWriter::Open(); + if (LW == nullptr) { + if (Verbosity()) + Report("profiling: Failed to flush to file, dropping data.\n"); + } else { + // Now for each of the buffers, write out the profile data as we would + // see it in memory, verbatim. + while (B.Data != nullptr && B.Size != 0) { + LW->WriteAll(reinterpret_cast(B.Data), + reinterpret_cast(B.Data) + B.Size); + B = profileCollectorService::nextBuffer(B); + } + } + LogWriter::Close(LW); + } + } + + profileCollectorService::reset(); + + atomic_store(&ProfilerLogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED, + memory_order_release); + atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, + memory_order_release); + + return XRayLogFlushStatus::XRAY_LOG_FLUSHED; +} + +void profilingHandleArg0(int32_t FuncId, + XRayEntryType Entry) XRAY_NEVER_INSTRUMENT { + unsigned char CPU; + auto TSC = readTSC(CPU); + RecursionGuard G(ReentranceGuard); + if (!G) + return; + + auto Status = atomic_load(&ProfilerLogStatus, memory_order_acquire); + if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_UNINITIALIZED || + Status == XRayLogInitStatus::XRAY_LOG_INITIALIZING)) + return; + + if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_FINALIZED || + Status == XRayLogInitStatus::XRAY_LOG_FINALIZING)) { + postCurrentThreadFCT(TLD); + return; + } + + auto T = getThreadLocalData(); + if (T == nullptr) + return; + + auto FCT = reinterpret_cast(atomic_load_relaxed(&T->FCT)); + switch (Entry) { + case XRayEntryType::ENTRY: + case XRayEntryType::LOG_ARGS_ENTRY: + FCT->enterFunction(FuncId, TSC, CPU); + break; + case XRayEntryType::EXIT: + case XRayEntryType::TAIL: + FCT->exitFunction(FuncId, TSC, CPU); + break; + default: + // FIXME: Handle bugs. + break; + } +} + +void profilingHandleArg1(int32_t FuncId, XRayEntryType Entry, + uint64_t) XRAY_NEVER_INSTRUMENT { + return profilingHandleArg0(FuncId, Entry); +} + +XRayLogInitStatus profilingFinalize() XRAY_NEVER_INSTRUMENT { + s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED; + if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus, + XRayLogInitStatus::XRAY_LOG_FINALIZING, + memory_order_release)) { + if (Verbosity()) + Report("Cannot finalize profile, the profiling is not initialized.\n"); + return static_cast(CurrentStatus); + } + + // Mark then finalize the current generation of buffers. This allows us to let + // the threads currently holding onto new buffers still use them, but let the + // last reference do the memory cleanup. + DCHECK_NE(BQ, nullptr); + BQ->finalize(); + + // Wait a grace period to allow threads to see that we're finalizing. + SleepForMillis(profilingFlags()->grace_period_ms); + + // If we for some reason are entering this function from an instrumented + // handler, we bail out. + RecursionGuard G(ReentranceGuard); + if (!G) + return static_cast(CurrentStatus); + + // Post the current thread's data if we have any. + postCurrentThreadFCT(TLD); + + // Then we force serialize the log data. + profileCollectorService::serialize(); + + atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED, + memory_order_release); + return XRayLogInitStatus::XRAY_LOG_FINALIZED; +} + +XRayLogInitStatus +profilingLoggingInit(size_t, size_t, void *Options, + size_t OptionsSize) XRAY_NEVER_INSTRUMENT { + RecursionGuard G(ReentranceGuard); + if (!G) + return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + + s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus, + XRayLogInitStatus::XRAY_LOG_INITIALIZING, + memory_order_acq_rel)) { + if (Verbosity()) + Report("Cannot initialize already initialised profiling " + "implementation.\n"); + return static_cast(CurrentStatus); + } + + { + SpinMutexLock Lock(&ProfilerOptionsMutex); + FlagParser ConfigParser; + ProfilerFlags Flags; + Flags.setDefaults(); + registerProfilerFlags(&ConfigParser, &Flags); + ConfigParser.ParseString(profilingCompilerDefinedFlags()); + const char *Env = GetEnv("XRAY_PROFILING_OPTIONS"); + if (Env == nullptr) + Env = ""; + ConfigParser.ParseString(Env); + + // Then parse the configuration string provided. + ConfigParser.ParseString(static_cast(Options)); + if (Verbosity()) + ReportUnrecognizedFlags(); + *profilingFlags() = Flags; + } + + // We need to reset the profile data collection implementation now. + profileCollectorService::reset(); + + // Then also reset the buffer queue implementation. + if (BQ == nullptr) { + bool Success = false; + new (&BufferQueueStorage) + BufferQueue(profilingFlags()->per_thread_allocator_max, + profilingFlags()->buffers_max, Success); + if (!Success) { + if (Verbosity()) + Report("Failed to initialize preallocated memory buffers!"); + atomic_store(&ProfilerLogStatus, + XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, + memory_order_release); + return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + } + + // If we've succeded, set the global pointer to the initialised storage. + BQ = reinterpret_cast(&BufferQueueStorage); + } else { + BQ->finalize(); + auto InitStatus = BQ->init(profilingFlags()->per_thread_allocator_max, + profilingFlags()->buffers_max); + + if (InitStatus != BufferQueue::ErrorCode::Ok) { + if (Verbosity()) + Report("Failed to initialize preallocated memory buffers; error: %s", + BufferQueue::getErrorString(InitStatus)); + atomic_store(&ProfilerLogStatus, + XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, + memory_order_release); + return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + } + + DCHECK(!BQ->finalizing()); + } + + // We need to set up the exit handlers. + static pthread_once_t Once = PTHREAD_ONCE_INIT; + pthread_once( + &Once, +[] { + pthread_key_create( + &ProfilingKey, +[](void *P) XRAY_NEVER_INSTRUMENT { + if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel)) + return; + + if (P == nullptr) + return; + + auto T = reinterpret_cast(P); + if (atomic_load_relaxed(&T->Allocators) == 0) + return; + + { + // If we're somehow executing this while inside a + // non-reentrant-friendly context, we skip attempting to post + // the current thread's data. + RecursionGuard G(ReentranceGuard); + if (!G) + return; + + postCurrentThreadFCT(*T); + } + }); + + // We also need to set up an exit handler, so that we can get the + // profile information at exit time. We use the C API to do this, to not + // rely on C++ ABI functions for registering exit handlers. + Atexit(+[]() XRAY_NEVER_INSTRUMENT { + if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel)) + return; + + auto Cleanup = + at_scope_exit([]() XRAY_NEVER_INSTRUMENT { cleanupTLD(); }); + + // Finalize and flush. + if (profilingFinalize() != XRAY_LOG_FINALIZED || + profilingFlush() != XRAY_LOG_FLUSHED) + return; + + if (Verbosity()) + Report("XRay Profile flushed at exit."); + }); + }); + + __xray_log_set_buffer_iterator(profileCollectorService::nextBuffer); + __xray_set_handler(profilingHandleArg0); + __xray_set_handler_arg1(profilingHandleArg1); + + atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZED, + memory_order_release); + if (Verbosity()) + Report("XRay Profiling init successful.\n"); + + return XRayLogInitStatus::XRAY_LOG_INITIALIZED; +} + +bool profilingDynamicInitializer() XRAY_NEVER_INSTRUMENT { + // Set up the flag defaults from the static defaults and the + // compiler-provided defaults. + { + SpinMutexLock Lock(&ProfilerOptionsMutex); + auto *F = profilingFlags(); + F->setDefaults(); + FlagParser ProfilingParser; + registerProfilerFlags(&ProfilingParser, F); + ProfilingParser.ParseString(profilingCompilerDefinedFlags()); + } + + XRayLogImpl Impl{ + profilingLoggingInit, + profilingFinalize, + profilingHandleArg0, + profilingFlush, + }; + auto RegistrationResult = __xray_log_register_mode("xray-profiling", Impl); + if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) { + if (Verbosity()) + Report("Cannot register XRay Profiling mode to 'xray-profiling'; error = " + "%d\n", + RegistrationResult); + return false; + } + + if (!internal_strcmp(flags()->xray_mode, "xray-profiling")) + __xray_log_select_mode("xray_profiling"); + return true; +} + +} // namespace __xray + +static auto UNUSED Unused = __xray::profilingDynamicInitializer(); diff --git a/lib/xray/xray_profiling_flags.cc b/lib/xray/xray_profiling_flags.cc deleted file mode 100644 index 0e89b7420f8c..000000000000 --- a/lib/xray/xray_profiling_flags.cc +++ /dev/null @@ -1,39 +0,0 @@ -//===-- xray_flags.h -------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file is a part of XRay, a dynamic runtime instrumentation system. -// -// XRay runtime flags. -//===----------------------------------------------------------------------===// - -#include "xray_profiling_flags.h" -#include "sanitizer_common/sanitizer_common.h" -#include "sanitizer_common/sanitizer_flag_parser.h" -#include "sanitizer_common/sanitizer_libc.h" -#include "xray_defs.h" - -namespace __xray { - -// Storage for the profiling flags. -ProfilerFlags xray_profiling_flags_dont_use_directly; - -void ProfilerFlags::setDefaults() XRAY_NEVER_INSTRUMENT { -#define XRAY_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue; -#include "xray_profiling_flags.inc" -#undef XRAY_FLAG -} - -void registerProfilerFlags(FlagParser *P, - ProfilerFlags *F) XRAY_NEVER_INSTRUMENT { -#define XRAY_FLAG(Type, Name, DefaultValue, Description) \ - RegisterFlag(P, #Name, Description, &F->Name); -#include "xray_profiling_flags.inc" -#undef XRAY_FLAG -} - -} // namespace __xray diff --git a/lib/xray/xray_profiling_flags.cpp b/lib/xray/xray_profiling_flags.cpp new file mode 100644 index 000000000000..0e89b7420f8c --- /dev/null +++ b/lib/xray/xray_profiling_flags.cpp @@ -0,0 +1,39 @@ +//===-- xray_flags.h -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// XRay runtime flags. +//===----------------------------------------------------------------------===// + +#include "xray_profiling_flags.h" +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_flag_parser.h" +#include "sanitizer_common/sanitizer_libc.h" +#include "xray_defs.h" + +namespace __xray { + +// Storage for the profiling flags. +ProfilerFlags xray_profiling_flags_dont_use_directly; + +void ProfilerFlags::setDefaults() XRAY_NEVER_INSTRUMENT { +#define XRAY_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue; +#include "xray_profiling_flags.inc" +#undef XRAY_FLAG +} + +void registerProfilerFlags(FlagParser *P, + ProfilerFlags *F) XRAY_NEVER_INSTRUMENT { +#define XRAY_FLAG(Type, Name, DefaultValue, Description) \ + RegisterFlag(P, #Name, Description, &F->Name); +#include "xray_profiling_flags.inc" +#undef XRAY_FLAG +} + +} // namespace __xray diff --git a/lib/xray/xray_trampoline_powerpc64.cc b/lib/xray/xray_trampoline_powerpc64.cc deleted file mode 100644 index 878c46930fee..000000000000 --- a/lib/xray/xray_trampoline_powerpc64.cc +++ /dev/null @@ -1,15 +0,0 @@ -#include -#include - -namespace __xray { - -extern std::atomic XRayPatchedFunction; - -// Implement this in C++ instead of assembly, to avoid dealing with ToC by hand. -void CallXRayPatchedFunction(int32_t FuncId, XRayEntryType Type) { - auto fptr = __xray::XRayPatchedFunction.load(); - if (fptr != nullptr) - (*fptr)(FuncId, Type); -} - -} // namespace __xray diff --git a/lib/xray/xray_trampoline_powerpc64.cpp b/lib/xray/xray_trampoline_powerpc64.cpp new file mode 100644 index 000000000000..878c46930fee --- /dev/null +++ b/lib/xray/xray_trampoline_powerpc64.cpp @@ -0,0 +1,15 @@ +#include +#include + +namespace __xray { + +extern std::atomic XRayPatchedFunction; + +// Implement this in C++ instead of assembly, to avoid dealing with ToC by hand. +void CallXRayPatchedFunction(int32_t FuncId, XRayEntryType Type) { + auto fptr = __xray::XRayPatchedFunction.load(); + if (fptr != nullptr) + (*fptr)(FuncId, Type); +} + +} // namespace __xray diff --git a/lib/xray/xray_utils.cc b/lib/xray/xray_utils.cc deleted file mode 100644 index 82674baa5a0c..000000000000 --- a/lib/xray/xray_utils.cc +++ /dev/null @@ -1,195 +0,0 @@ -//===-- xray_utils.cc -------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file is a part of XRay, a dynamic runtime instrumentation system. -// -//===----------------------------------------------------------------------===// -#include "xray_utils.h" - -#include "sanitizer_common/sanitizer_allocator_internal.h" -#include "sanitizer_common/sanitizer_common.h" -#include "xray_allocator.h" -#include "xray_defs.h" -#include "xray_flags.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if SANITIZER_FUCHSIA -#include "sanitizer_common/sanitizer_symbolizer_fuchsia.h" - -#include -#include -#include -#include -#include -#endif - -namespace __xray { - -#if SANITIZER_FUCHSIA -constexpr const char* ProfileSinkName = "llvm-xray"; - -LogWriter::~LogWriter() { - _zx_handle_close(Vmo); -} - -void LogWriter::WriteAll(const char *Begin, const char *End) XRAY_NEVER_INSTRUMENT { - if (Begin == End) - return; - auto TotalBytes = std::distance(Begin, End); - - const size_t PageSize = flags()->xray_page_size_override > 0 - ? flags()->xray_page_size_override - : GetPageSizeCached(); - if (RoundUpTo(Offset, PageSize) != RoundUpTo(Offset + TotalBytes, PageSize)) { - // Resize the VMO to ensure there's sufficient space for the data. - zx_status_t Status = _zx_vmo_set_size(Vmo, Offset + TotalBytes); - if (Status != ZX_OK) { - Report("Failed to resize VMO: %s\n", _zx_status_get_string(Status)); - return; - } - } - - // Write the data into VMO. - zx_status_t Status = _zx_vmo_write(Vmo, Begin, Offset, TotalBytes); - if (Status != ZX_OK) { - Report("Failed to write: %s\n", _zx_status_get_string(Status)); - return; - } - Offset += TotalBytes; -} - -void LogWriter::Flush() XRAY_NEVER_INSTRUMENT { - // Nothing to do here since WriteAll writes directly into the VMO. -} - -LogWriter *LogWriter::Open() XRAY_NEVER_INSTRUMENT { - // Create VMO to hold the profile data. - zx_handle_t Vmo; - zx_status_t Status = _zx_vmo_create(0, ZX_VMO_RESIZABLE, &Vmo); - if (Status != ZX_OK) { - Report("XRay: cannot create VMO: %s\n", _zx_status_get_string(Status)); - return nullptr; - } - - // Get the KOID of the current process to use in the VMO name. - zx_info_handle_basic_t Info; - Status = _zx_object_get_info(_zx_process_self(), ZX_INFO_HANDLE_BASIC, &Info, - sizeof(Info), NULL, NULL); - if (Status != ZX_OK) { - Report("XRay: cannot get basic info about current process handle: %s\n", - _zx_status_get_string(Status)); - return nullptr; - } - - // Give the VMO a name including our process KOID so it's easy to spot. - char VmoName[ZX_MAX_NAME_LEN]; - internal_snprintf(VmoName, sizeof(VmoName), "%s.%zu", ProfileSinkName, - Info.koid); - _zx_object_set_property(Vmo, ZX_PROP_NAME, VmoName, strlen(VmoName)); - - // Duplicate the handle since __sanitizer_publish_data consumes it and - // LogWriter needs to hold onto it. - zx_handle_t Handle; - Status =_zx_handle_duplicate(Vmo, ZX_RIGHT_SAME_RIGHTS, &Handle); - if (Status != ZX_OK) { - Report("XRay: cannot duplicate VMO handle: %s\n", - _zx_status_get_string(Status)); - return nullptr; - } - - // Publish the VMO that receives the logging. Note the VMO's contents can - // grow and change after publication. The contents won't be read out until - // after the process exits. - __sanitizer_publish_data(ProfileSinkName, Handle); - - // Use the dumpfile symbolizer markup element to write the name of the VMO. - Report("XRay: " FORMAT_DUMPFILE "\n", ProfileSinkName, VmoName); - - LogWriter *LW = reinterpret_cast(InternalAlloc(sizeof(LogWriter))); - new (LW) LogWriter(Vmo); - return LW; -} - -void LogWriter::Close(LogWriter *LW) { - LW->~LogWriter(); - InternalFree(LW); -} -#else // SANITIZER_FUCHSIA -LogWriter::~LogWriter() { - internal_close(Fd); -} - -void LogWriter::WriteAll(const char *Begin, const char *End) XRAY_NEVER_INSTRUMENT { - if (Begin == End) - return; - auto TotalBytes = std::distance(Begin, End); - while (auto Written = write(Fd, Begin, TotalBytes)) { - if (Written < 0) { - if (errno == EINTR) - continue; // Try again. - Report("Failed to write; errno = %d\n", errno); - return; - } - TotalBytes -= Written; - if (TotalBytes == 0) - break; - Begin += Written; - } -} - -void LogWriter::Flush() XRAY_NEVER_INSTRUMENT { - fsync(Fd); -} - -LogWriter *LogWriter::Open() XRAY_NEVER_INSTRUMENT { - // Open a temporary file once for the log. - char TmpFilename[256] = {}; - char TmpWildcardPattern[] = "XXXXXX"; - auto **Argv = GetArgv(); - const char *Progname = !Argv ? "(unknown)" : Argv[0]; - const char *LastSlash = internal_strrchr(Progname, '/'); - - if (LastSlash != nullptr) - Progname = LastSlash + 1; - - int NeededLength = internal_snprintf( - TmpFilename, sizeof(TmpFilename), "%s%s.%s", - flags()->xray_logfile_base, Progname, TmpWildcardPattern); - if (NeededLength > int(sizeof(TmpFilename))) { - Report("XRay log file name too long (%d): %s\n", NeededLength, TmpFilename); - return nullptr; - } - int Fd = mkstemp(TmpFilename); - if (Fd == -1) { - Report("XRay: Failed opening temporary file '%s'; not logging events.\n", - TmpFilename); - return nullptr; - } - if (Verbosity()) - Report("XRay: Log file in '%s'\n", TmpFilename); - - LogWriter *LW = allocate(); - new (LW) LogWriter(Fd); - return LW; -} - -void LogWriter::Close(LogWriter *LW) { - LW->~LogWriter(); - deallocate(LW); -} -#endif // SANITIZER_FUCHSIA - -} // namespace __xray diff --git a/lib/xray/xray_utils.cpp b/lib/xray/xray_utils.cpp new file mode 100644 index 000000000000..1036d17a7725 --- /dev/null +++ b/lib/xray/xray_utils.cpp @@ -0,0 +1,195 @@ +//===-- xray_utils.cpp ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +//===----------------------------------------------------------------------===// +#include "xray_utils.h" + +#include "sanitizer_common/sanitizer_allocator_internal.h" +#include "sanitizer_common/sanitizer_common.h" +#include "xray_allocator.h" +#include "xray_defs.h" +#include "xray_flags.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if SANITIZER_FUCHSIA +#include "sanitizer_common/sanitizer_symbolizer_fuchsia.h" + +#include +#include +#include +#include +#include +#endif + +namespace __xray { + +#if SANITIZER_FUCHSIA +constexpr const char* ProfileSinkName = "llvm-xray"; + +LogWriter::~LogWriter() { + _zx_handle_close(Vmo); +} + +void LogWriter::WriteAll(const char *Begin, const char *End) XRAY_NEVER_INSTRUMENT { + if (Begin == End) + return; + auto TotalBytes = std::distance(Begin, End); + + const size_t PageSize = flags()->xray_page_size_override > 0 + ? flags()->xray_page_size_override + : GetPageSizeCached(); + if (RoundUpTo(Offset, PageSize) != RoundUpTo(Offset + TotalBytes, PageSize)) { + // Resize the VMO to ensure there's sufficient space for the data. + zx_status_t Status = _zx_vmo_set_size(Vmo, Offset + TotalBytes); + if (Status != ZX_OK) { + Report("Failed to resize VMO: %s\n", _zx_status_get_string(Status)); + return; + } + } + + // Write the data into VMO. + zx_status_t Status = _zx_vmo_write(Vmo, Begin, Offset, TotalBytes); + if (Status != ZX_OK) { + Report("Failed to write: %s\n", _zx_status_get_string(Status)); + return; + } + Offset += TotalBytes; +} + +void LogWriter::Flush() XRAY_NEVER_INSTRUMENT { + // Nothing to do here since WriteAll writes directly into the VMO. +} + +LogWriter *LogWriter::Open() XRAY_NEVER_INSTRUMENT { + // Create VMO to hold the profile data. + zx_handle_t Vmo; + zx_status_t Status = _zx_vmo_create(0, ZX_VMO_RESIZABLE, &Vmo); + if (Status != ZX_OK) { + Report("XRay: cannot create VMO: %s\n", _zx_status_get_string(Status)); + return nullptr; + } + + // Get the KOID of the current process to use in the VMO name. + zx_info_handle_basic_t Info; + Status = _zx_object_get_info(_zx_process_self(), ZX_INFO_HANDLE_BASIC, &Info, + sizeof(Info), NULL, NULL); + if (Status != ZX_OK) { + Report("XRay: cannot get basic info about current process handle: %s\n", + _zx_status_get_string(Status)); + return nullptr; + } + + // Give the VMO a name including our process KOID so it's easy to spot. + char VmoName[ZX_MAX_NAME_LEN]; + internal_snprintf(VmoName, sizeof(VmoName), "%s.%zu", ProfileSinkName, + Info.koid); + _zx_object_set_property(Vmo, ZX_PROP_NAME, VmoName, strlen(VmoName)); + + // Duplicate the handle since __sanitizer_publish_data consumes it and + // LogWriter needs to hold onto it. + zx_handle_t Handle; + Status =_zx_handle_duplicate(Vmo, ZX_RIGHT_SAME_RIGHTS, &Handle); + if (Status != ZX_OK) { + Report("XRay: cannot duplicate VMO handle: %s\n", + _zx_status_get_string(Status)); + return nullptr; + } + + // Publish the VMO that receives the logging. Note the VMO's contents can + // grow and change after publication. The contents won't be read out until + // after the process exits. + __sanitizer_publish_data(ProfileSinkName, Handle); + + // Use the dumpfile symbolizer markup element to write the name of the VMO. + Report("XRay: " FORMAT_DUMPFILE "\n", ProfileSinkName, VmoName); + + LogWriter *LW = reinterpret_cast(InternalAlloc(sizeof(LogWriter))); + new (LW) LogWriter(Vmo); + return LW; +} + +void LogWriter::Close(LogWriter *LW) { + LW->~LogWriter(); + InternalFree(LW); +} +#else // SANITIZER_FUCHSIA +LogWriter::~LogWriter() { + internal_close(Fd); +} + +void LogWriter::WriteAll(const char *Begin, const char *End) XRAY_NEVER_INSTRUMENT { + if (Begin == End) + return; + auto TotalBytes = std::distance(Begin, End); + while (auto Written = write(Fd, Begin, TotalBytes)) { + if (Written < 0) { + if (errno == EINTR) + continue; // Try again. + Report("Failed to write; errno = %d\n", errno); + return; + } + TotalBytes -= Written; + if (TotalBytes == 0) + break; + Begin += Written; + } +} + +void LogWriter::Flush() XRAY_NEVER_INSTRUMENT { + fsync(Fd); +} + +LogWriter *LogWriter::Open() XRAY_NEVER_INSTRUMENT { + // Open a temporary file once for the log. + char TmpFilename[256] = {}; + char TmpWildcardPattern[] = "XXXXXX"; + auto **Argv = GetArgv(); + const char *Progname = !Argv ? "(unknown)" : Argv[0]; + const char *LastSlash = internal_strrchr(Progname, '/'); + + if (LastSlash != nullptr) + Progname = LastSlash + 1; + + int NeededLength = internal_snprintf( + TmpFilename, sizeof(TmpFilename), "%s%s.%s", + flags()->xray_logfile_base, Progname, TmpWildcardPattern); + if (NeededLength > int(sizeof(TmpFilename))) { + Report("XRay log file name too long (%d): %s\n", NeededLength, TmpFilename); + return nullptr; + } + int Fd = mkstemp(TmpFilename); + if (Fd == -1) { + Report("XRay: Failed opening temporary file '%s'; not logging events.\n", + TmpFilename); + return nullptr; + } + if (Verbosity()) + Report("XRay: Log file in '%s'\n", TmpFilename); + + LogWriter *LW = allocate(); + new (LW) LogWriter(Fd); + return LW; +} + +void LogWriter::Close(LogWriter *LW) { + LW->~LogWriter(); + deallocate(LW); +} +#endif // SANITIZER_FUCHSIA + +} // namespace __xray diff --git a/lib/xray/xray_x86_64.cc b/lib/xray/xray_x86_64.cc deleted file mode 100644 index e63ee1b3bd02..000000000000 --- a/lib/xray/xray_x86_64.cc +++ /dev/null @@ -1,353 +0,0 @@ -#include "cpuid.h" -#include "sanitizer_common/sanitizer_common.h" -#if !SANITIZER_FUCHSIA -#include "sanitizer_common/sanitizer_posix.h" -#endif -#include "xray_defs.h" -#include "xray_interface_internal.h" - -#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC -#include -#if SANITIZER_OPENBSD -#include -#include -#endif -#include -#elif SANITIZER_FUCHSIA -#include -#endif - -#include -#include -#include -#include -#include -#include -#include -#include - -namespace __xray { - -#if SANITIZER_LINUX -static std::pair -retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT { - auto BytesToRead = std::distance(Begin, End); - ssize_t BytesRead; - ssize_t TotalBytesRead = 0; - while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) { - if (BytesRead == -1) { - if (errno == EINTR) - continue; - Report("Read error; errno = %d\n", errno); - return std::make_pair(TotalBytesRead, false); - } - - TotalBytesRead += BytesRead; - BytesToRead -= BytesRead; - Begin += BytesRead; - } - return std::make_pair(TotalBytesRead, true); -} - -static bool readValueFromFile(const char *Filename, - long long *Value) XRAY_NEVER_INSTRUMENT { - int Fd = open(Filename, O_RDONLY | O_CLOEXEC); - if (Fd == -1) - return false; - static constexpr size_t BufSize = 256; - char Line[BufSize] = {}; - ssize_t BytesRead; - bool Success; - std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize); - close(Fd); - if (!Success) - return false; - const char *End = nullptr; - long long Tmp = internal_simple_strtoll(Line, &End, 10); - bool Result = false; - if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) { - *Value = Tmp; - Result = true; - } - return Result; -} - -uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { - long long TSCFrequency = -1; - if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", - &TSCFrequency)) { - TSCFrequency *= 1000; - } else if (readValueFromFile( - "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", - &TSCFrequency)) { - TSCFrequency *= 1000; - } else { - Report("Unable to determine CPU frequency for TSC accounting.\n"); - } - return TSCFrequency == -1 ? 0 : static_cast(TSCFrequency); -} -#elif SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC -uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { - long long TSCFrequency = -1; - size_t tscfreqsz = sizeof(TSCFrequency); -#if SANITIZER_OPENBSD - int Mib[2] = { CTL_MACHDEP, CPU_TSCFREQ }; - if (internal_sysctl(Mib, 2, &TSCFrequency, &tscfreqsz, NULL, 0) != -1) { -#elif SANITIZER_MAC - if (internal_sysctlbyname("machdep.tsc.frequency", &TSCFrequency, - &tscfreqsz, NULL, 0) != -1) { - -#else - if (internal_sysctlbyname("machdep.tsc_freq", &TSCFrequency, &tscfreqsz, - NULL, 0) != -1) { -#endif - return static_cast(TSCFrequency); - } else { - Report("Unable to determine CPU frequency for TSC accounting.\n"); - } - - return 0; -} -#elif !SANITIZER_FUCHSIA -uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { - /* Not supported */ - return 0; -} -#endif - -static constexpr uint8_t CallOpCode = 0xe8; -static constexpr uint16_t MovR10Seq = 0xba41; -static constexpr uint16_t Jmp9Seq = 0x09eb; -static constexpr uint16_t Jmp20Seq = 0x14eb; -static constexpr uint16_t Jmp15Seq = 0x0feb; -static constexpr uint8_t JmpOpCode = 0xe9; -static constexpr uint8_t RetOpCode = 0xc3; -static constexpr uint16_t NopwSeq = 0x9066; - -static constexpr int64_t MinOffset{std::numeric_limits::min()}; -static constexpr int64_t MaxOffset{std::numeric_limits::max()}; - -bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, - const XRaySledEntry &Sled, - void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { - // Here we do the dance of replacing the following sled: - // - // xray_sled_n: - // jmp +9 - // <9 byte nop> - // - // With the following: - // - // mov r10d, - // call - // - // We need to do this in the following order: - // - // 1. Put the function id first, 2 bytes from the start of the sled (just - // after the 2-byte jmp instruction). - // 2. Put the call opcode 6 bytes from the start of the sled. - // 3. Put the relative offset 7 bytes from the start of the sled. - // 4. Do an atomic write over the jmp instruction for the "mov r10d" - // opcode and first operand. - // - // Prerequisite is to compute the relative offset to the trampoline's address. - int64_t TrampolineOffset = reinterpret_cast(Trampoline) - - (static_cast(Sled.Address) + 11); - if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { - Report("XRay Entry trampoline (%p) too far from sled (%p)\n", - Trampoline, reinterpret_cast(Sled.Address)); - return false; - } - if (Enable) { - *reinterpret_cast(Sled.Address + 2) = FuncId; - *reinterpret_cast(Sled.Address + 6) = CallOpCode; - *reinterpret_cast(Sled.Address + 7) = TrampolineOffset; - std::atomic_store_explicit( - reinterpret_cast *>(Sled.Address), MovR10Seq, - std::memory_order_release); - } else { - std::atomic_store_explicit( - reinterpret_cast *>(Sled.Address), Jmp9Seq, - std::memory_order_release); - // FIXME: Write out the nops still? - } - return true; -} - -bool patchFunctionExit(const bool Enable, const uint32_t FuncId, - const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { - // Here we do the dance of replacing the following sled: - // - // xray_sled_n: - // ret - // <10 byte nop> - // - // With the following: - // - // mov r10d, - // jmp - // - // 1. Put the function id first, 2 bytes from the start of the sled (just - // after the 1-byte ret instruction). - // 2. Put the jmp opcode 6 bytes from the start of the sled. - // 3. Put the relative offset 7 bytes from the start of the sled. - // 4. Do an atomic write over the jmp instruction for the "mov r10d" - // opcode and first operand. - // - // Prerequisite is to compute the relative offset fo the - // __xray_FunctionExit function's address. - int64_t TrampolineOffset = reinterpret_cast(__xray_FunctionExit) - - (static_cast(Sled.Address) + 11); - if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { - Report("XRay Exit trampoline (%p) too far from sled (%p)\n", - __xray_FunctionExit, reinterpret_cast(Sled.Address)); - return false; - } - if (Enable) { - *reinterpret_cast(Sled.Address + 2) = FuncId; - *reinterpret_cast(Sled.Address + 6) = JmpOpCode; - *reinterpret_cast(Sled.Address + 7) = TrampolineOffset; - std::atomic_store_explicit( - reinterpret_cast *>(Sled.Address), MovR10Seq, - std::memory_order_release); - } else { - std::atomic_store_explicit( - reinterpret_cast *>(Sled.Address), RetOpCode, - std::memory_order_release); - // FIXME: Write out the nops still? - } - return true; -} - -bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, - const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { - // Here we do the dance of replacing the tail call sled with a similar - // sequence as the entry sled, but calls the tail exit sled instead. - int64_t TrampolineOffset = - reinterpret_cast(__xray_FunctionTailExit) - - (static_cast(Sled.Address) + 11); - if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { - Report("XRay Tail Exit trampoline (%p) too far from sled (%p)\n", - __xray_FunctionTailExit, reinterpret_cast(Sled.Address)); - return false; - } - if (Enable) { - *reinterpret_cast(Sled.Address + 2) = FuncId; - *reinterpret_cast(Sled.Address + 6) = CallOpCode; - *reinterpret_cast(Sled.Address + 7) = TrampolineOffset; - std::atomic_store_explicit( - reinterpret_cast *>(Sled.Address), MovR10Seq, - std::memory_order_release); - } else { - std::atomic_store_explicit( - reinterpret_cast *>(Sled.Address), Jmp9Seq, - std::memory_order_release); - // FIXME: Write out the nops still? - } - return true; -} - -bool patchCustomEvent(const bool Enable, const uint32_t FuncId, - const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { - // Here we do the dance of replacing the following sled: - // - // In Version 0: - // - // xray_sled_n: - // jmp +20 // 2 bytes - // ... - // - // With the following: - // - // nopw // 2 bytes* - // ... - // - // - // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'. - // - // --- - // - // In Version 1: - // - // The jump offset is now 15 bytes (0x0f), so when restoring the nopw back - // to a jmp, use 15 bytes instead. - // - if (Enable) { - std::atomic_store_explicit( - reinterpret_cast *>(Sled.Address), NopwSeq, - std::memory_order_release); - } else { - switch (Sled.Version) { - case 1: - std::atomic_store_explicit( - reinterpret_cast *>(Sled.Address), Jmp15Seq, - std::memory_order_release); - break; - case 0: - default: - std::atomic_store_explicit( - reinterpret_cast *>(Sled.Address), Jmp20Seq, - std::memory_order_release); - break; - } - } - return false; -} - -bool patchTypedEvent(const bool Enable, const uint32_t FuncId, - const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { - // Here we do the dance of replacing the following sled: - // - // xray_sled_n: - // jmp +20 // 2 byte instruction - // ... - // - // With the following: - // - // nopw // 2 bytes - // ... - // - // - // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'. - // The 20 byte sled stashes three argument registers, calls the trampoline, - // unstashes the registers and returns. If the arguments are already in - // the correct registers, the stashing and unstashing become equivalently - // sized nops. - if (Enable) { - std::atomic_store_explicit( - reinterpret_cast *>(Sled.Address), NopwSeq, - std::memory_order_release); - } else { - std::atomic_store_explicit( - reinterpret_cast *>(Sled.Address), Jmp20Seq, - std::memory_order_release); - } - return false; -} - -#if !SANITIZER_FUCHSIA -// We determine whether the CPU we're running on has the correct features we -// need. In x86_64 this will be rdtscp support. -bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { - unsigned int EAX, EBX, ECX, EDX; - - // We check whether rdtscp support is enabled. According to the x86_64 manual, - // level should be set at 0x80000001, and we should have a look at bit 27 in - // EDX. That's 0x8000000 (or 1u << 27). - __asm__ __volatile__("cpuid" : "=a"(EAX), "=b"(EBX), "=c"(ECX), "=d"(EDX) - : "0"(0x80000001)); - if (!(EDX & (1u << 27))) { - Report("Missing rdtscp support.\n"); - return false; - } - // Also check whether we can determine the CPU frequency, since if we cannot, - // we should use the emulated TSC instead. - if (!getTSCFrequency()) { - Report("Unable to determine CPU frequency.\n"); - return false; - } - return true; -} -#endif - -} // namespace __xray diff --git a/lib/xray/xray_x86_64.cpp b/lib/xray/xray_x86_64.cpp new file mode 100644 index 000000000000..e63ee1b3bd02 --- /dev/null +++ b/lib/xray/xray_x86_64.cpp @@ -0,0 +1,353 @@ +#include "cpuid.h" +#include "sanitizer_common/sanitizer_common.h" +#if !SANITIZER_FUCHSIA +#include "sanitizer_common/sanitizer_posix.h" +#endif +#include "xray_defs.h" +#include "xray_interface_internal.h" + +#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC +#include +#if SANITIZER_OPENBSD +#include +#include +#endif +#include +#elif SANITIZER_FUCHSIA +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace __xray { + +#if SANITIZER_LINUX +static std::pair +retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT { + auto BytesToRead = std::distance(Begin, End); + ssize_t BytesRead; + ssize_t TotalBytesRead = 0; + while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) { + if (BytesRead == -1) { + if (errno == EINTR) + continue; + Report("Read error; errno = %d\n", errno); + return std::make_pair(TotalBytesRead, false); + } + + TotalBytesRead += BytesRead; + BytesToRead -= BytesRead; + Begin += BytesRead; + } + return std::make_pair(TotalBytesRead, true); +} + +static bool readValueFromFile(const char *Filename, + long long *Value) XRAY_NEVER_INSTRUMENT { + int Fd = open(Filename, O_RDONLY | O_CLOEXEC); + if (Fd == -1) + return false; + static constexpr size_t BufSize = 256; + char Line[BufSize] = {}; + ssize_t BytesRead; + bool Success; + std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize); + close(Fd); + if (!Success) + return false; + const char *End = nullptr; + long long Tmp = internal_simple_strtoll(Line, &End, 10); + bool Result = false; + if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) { + *Value = Tmp; + Result = true; + } + return Result; +} + +uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { + long long TSCFrequency = -1; + if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", + &TSCFrequency)) { + TSCFrequency *= 1000; + } else if (readValueFromFile( + "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", + &TSCFrequency)) { + TSCFrequency *= 1000; + } else { + Report("Unable to determine CPU frequency for TSC accounting.\n"); + } + return TSCFrequency == -1 ? 0 : static_cast(TSCFrequency); +} +#elif SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC +uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { + long long TSCFrequency = -1; + size_t tscfreqsz = sizeof(TSCFrequency); +#if SANITIZER_OPENBSD + int Mib[2] = { CTL_MACHDEP, CPU_TSCFREQ }; + if (internal_sysctl(Mib, 2, &TSCFrequency, &tscfreqsz, NULL, 0) != -1) { +#elif SANITIZER_MAC + if (internal_sysctlbyname("machdep.tsc.frequency", &TSCFrequency, + &tscfreqsz, NULL, 0) != -1) { + +#else + if (internal_sysctlbyname("machdep.tsc_freq", &TSCFrequency, &tscfreqsz, + NULL, 0) != -1) { +#endif + return static_cast(TSCFrequency); + } else { + Report("Unable to determine CPU frequency for TSC accounting.\n"); + } + + return 0; +} +#elif !SANITIZER_FUCHSIA +uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { + /* Not supported */ + return 0; +} +#endif + +static constexpr uint8_t CallOpCode = 0xe8; +static constexpr uint16_t MovR10Seq = 0xba41; +static constexpr uint16_t Jmp9Seq = 0x09eb; +static constexpr uint16_t Jmp20Seq = 0x14eb; +static constexpr uint16_t Jmp15Seq = 0x0feb; +static constexpr uint8_t JmpOpCode = 0xe9; +static constexpr uint8_t RetOpCode = 0xc3; +static constexpr uint16_t NopwSeq = 0x9066; + +static constexpr int64_t MinOffset{std::numeric_limits::min()}; +static constexpr int64_t MaxOffset{std::numeric_limits::max()}; + +bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled, + void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { + // Here we do the dance of replacing the following sled: + // + // xray_sled_n: + // jmp +9 + // <9 byte nop> + // + // With the following: + // + // mov r10d, + // call + // + // We need to do this in the following order: + // + // 1. Put the function id first, 2 bytes from the start of the sled (just + // after the 2-byte jmp instruction). + // 2. Put the call opcode 6 bytes from the start of the sled. + // 3. Put the relative offset 7 bytes from the start of the sled. + // 4. Do an atomic write over the jmp instruction for the "mov r10d" + // opcode and first operand. + // + // Prerequisite is to compute the relative offset to the trampoline's address. + int64_t TrampolineOffset = reinterpret_cast(Trampoline) - + (static_cast(Sled.Address) + 11); + if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { + Report("XRay Entry trampoline (%p) too far from sled (%p)\n", + Trampoline, reinterpret_cast(Sled.Address)); + return false; + } + if (Enable) { + *reinterpret_cast(Sled.Address + 2) = FuncId; + *reinterpret_cast(Sled.Address + 6) = CallOpCode; + *reinterpret_cast(Sled.Address + 7) = TrampolineOffset; + std::atomic_store_explicit( + reinterpret_cast *>(Sled.Address), MovR10Seq, + std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast *>(Sled.Address), Jmp9Seq, + std::memory_order_release); + // FIXME: Write out the nops still? + } + return true; +} + +bool patchFunctionExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // Here we do the dance of replacing the following sled: + // + // xray_sled_n: + // ret + // <10 byte nop> + // + // With the following: + // + // mov r10d, + // jmp + // + // 1. Put the function id first, 2 bytes from the start of the sled (just + // after the 1-byte ret instruction). + // 2. Put the jmp opcode 6 bytes from the start of the sled. + // 3. Put the relative offset 7 bytes from the start of the sled. + // 4. Do an atomic write over the jmp instruction for the "mov r10d" + // opcode and first operand. + // + // Prerequisite is to compute the relative offset fo the + // __xray_FunctionExit function's address. + int64_t TrampolineOffset = reinterpret_cast(__xray_FunctionExit) - + (static_cast(Sled.Address) + 11); + if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { + Report("XRay Exit trampoline (%p) too far from sled (%p)\n", + __xray_FunctionExit, reinterpret_cast(Sled.Address)); + return false; + } + if (Enable) { + *reinterpret_cast(Sled.Address + 2) = FuncId; + *reinterpret_cast(Sled.Address + 6) = JmpOpCode; + *reinterpret_cast(Sled.Address + 7) = TrampolineOffset; + std::atomic_store_explicit( + reinterpret_cast *>(Sled.Address), MovR10Seq, + std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast *>(Sled.Address), RetOpCode, + std::memory_order_release); + // FIXME: Write out the nops still? + } + return true; +} + +bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // Here we do the dance of replacing the tail call sled with a similar + // sequence as the entry sled, but calls the tail exit sled instead. + int64_t TrampolineOffset = + reinterpret_cast(__xray_FunctionTailExit) - + (static_cast(Sled.Address) + 11); + if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { + Report("XRay Tail Exit trampoline (%p) too far from sled (%p)\n", + __xray_FunctionTailExit, reinterpret_cast(Sled.Address)); + return false; + } + if (Enable) { + *reinterpret_cast(Sled.Address + 2) = FuncId; + *reinterpret_cast(Sled.Address + 6) = CallOpCode; + *reinterpret_cast(Sled.Address + 7) = TrampolineOffset; + std::atomic_store_explicit( + reinterpret_cast *>(Sled.Address), MovR10Seq, + std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast *>(Sled.Address), Jmp9Seq, + std::memory_order_release); + // FIXME: Write out the nops still? + } + return true; +} + +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // Here we do the dance of replacing the following sled: + // + // In Version 0: + // + // xray_sled_n: + // jmp +20 // 2 bytes + // ... + // + // With the following: + // + // nopw // 2 bytes* + // ... + // + // + // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'. + // + // --- + // + // In Version 1: + // + // The jump offset is now 15 bytes (0x0f), so when restoring the nopw back + // to a jmp, use 15 bytes instead. + // + if (Enable) { + std::atomic_store_explicit( + reinterpret_cast *>(Sled.Address), NopwSeq, + std::memory_order_release); + } else { + switch (Sled.Version) { + case 1: + std::atomic_store_explicit( + reinterpret_cast *>(Sled.Address), Jmp15Seq, + std::memory_order_release); + break; + case 0: + default: + std::atomic_store_explicit( + reinterpret_cast *>(Sled.Address), Jmp20Seq, + std::memory_order_release); + break; + } + } + return false; +} + +bool patchTypedEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // Here we do the dance of replacing the following sled: + // + // xray_sled_n: + // jmp +20 // 2 byte instruction + // ... + // + // With the following: + // + // nopw // 2 bytes + // ... + // + // + // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'. + // The 20 byte sled stashes three argument registers, calls the trampoline, + // unstashes the registers and returns. If the arguments are already in + // the correct registers, the stashing and unstashing become equivalently + // sized nops. + if (Enable) { + std::atomic_store_explicit( + reinterpret_cast *>(Sled.Address), NopwSeq, + std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast *>(Sled.Address), Jmp20Seq, + std::memory_order_release); + } + return false; +} + +#if !SANITIZER_FUCHSIA +// We determine whether the CPU we're running on has the correct features we +// need. In x86_64 this will be rdtscp support. +bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { + unsigned int EAX, EBX, ECX, EDX; + + // We check whether rdtscp support is enabled. According to the x86_64 manual, + // level should be set at 0x80000001, and we should have a look at bit 27 in + // EDX. That's 0x8000000 (or 1u << 27). + __asm__ __volatile__("cpuid" : "=a"(EAX), "=b"(EBX), "=c"(ECX), "=d"(EDX) + : "0"(0x80000001)); + if (!(EDX & (1u << 27))) { + Report("Missing rdtscp support.\n"); + return false; + } + // Also check whether we can determine the CPU frequency, since if we cannot, + // we should use the emulated TSC instead. + if (!getTSCFrequency()) { + Report("Unable to determine CPU frequency.\n"); + return false; + } + return true; +} +#endif + +} // namespace __xray -- cgit v1.2.3