diff options
Diffstat (limited to 'contrib/llvm-project/compiler-rt/lib/xray')
58 files changed, 9848 insertions, 0 deletions
| diff --git a/contrib/llvm-project/compiler-rt/lib/xray/weak_symbols.txt b/contrib/llvm-project/compiler-rt/lib/xray/weak_symbols.txt new file mode 100644 index 000000000000..963fff2d697e --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/weak_symbols.txt @@ -0,0 +1,4 @@ +___start_xray_fn_idx +___start_xray_instr_map +___stop_xray_fn_idx +___stop_xray_instr_map diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_AArch64.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_AArch64.cpp new file mode 100644 index 000000000000..c1d77758946e --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_AArch64.cpp @@ -0,0 +1,143 @@ +//===-- xray_AArch64.cpp ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of AArch64-specific routines (64-bit). +// +//===----------------------------------------------------------------------===// +#include "sanitizer_common/sanitizer_common.h" +#include "xray_defs.h" +#include "xray_interface_internal.h" +#include <atomic> +#include <cassert> + +extern "C" void __clear_cache(void *start, void *end); + +namespace __xray { + +// The machine codes for some instructions used in runtime patching. +enum class PatchOpcodes : uint32_t { +  PO_StpX0X30SP_m16e = 0xA9BF7BE0, // STP X0, X30, [SP, #-16]! +  PO_LdrX16_12 = 0x58000070,       // LDR X16, #12 +  PO_BlrX16 = 0xD63F0200,          // BLR X16 +  PO_LdpX0X30SP_16 = 0xA8C17BE0,   // LDP X0, X30, [SP], #16 +  PO_B32 = 0x14000008              // B #32 +}; + +inline static bool patchSled(const bool Enable, const uint32_t FuncId, +                             const XRaySledEntry &Sled, +                             void (*TracingHook)()) XRAY_NEVER_INSTRUMENT { +  // When |Enable| == true, +  // We replace the following compile-time stub (sled): +  // +  // xray_sled_n: +  //   B #32 +  //   7 NOPs (24 bytes) +  // +  // With the following runtime patch: +  // +  // xray_sled_n: +  //   STP X0, X30, [SP, #-16]! ; PUSH {r0, lr} +  //   LDR W17, #12 ; W17 := function ID +  //   LDR X16,#12 ; X16 := address of the trampoline +  //   BLR X16 +  //   ;DATA: 32 bits of function ID +  //   ;DATA: lower 32 bits of the address of the trampoline +  //   ;DATA: higher 32 bits of the address of the trampoline +  //   LDP X0, X30, [SP], #16 ; POP {r0, lr} +  // +  // Replacement of the first 4-byte instruction should be the last and atomic +  // operation, so that the user code which reaches the sled concurrently +  // either jumps over the whole sled, or executes the whole sled when the +  // latter is ready. +  // +  // When |Enable|==false, we set back the first instruction in the sled to be +  //   B #32 + +  uint32_t *FirstAddress = reinterpret_cast<uint32_t *>(Sled.address()); +  uint32_t *CurAddress = FirstAddress + 1; +  if (Enable) { +    *CurAddress++ = 0x18000071; // ldr w17, #12 +    *CurAddress = uint32_t(PatchOpcodes::PO_LdrX16_12); +    CurAddress++; +    *CurAddress = uint32_t(PatchOpcodes::PO_BlrX16); +    CurAddress++; +    *CurAddress = FuncId; +    CurAddress++; +    *reinterpret_cast<void (**)()>(CurAddress) = TracingHook; +    CurAddress += 2; +    *CurAddress = uint32_t(PatchOpcodes::PO_LdpX0X30SP_16); +    CurAddress++; +    std::atomic_store_explicit( +        reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress), +        uint32_t(PatchOpcodes::PO_StpX0X30SP_m16e), std::memory_order_release); +  } else { +    std::atomic_store_explicit( +        reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress), +        uint32_t(PatchOpcodes::PO_B32), std::memory_order_release); +  } +  __clear_cache(reinterpret_cast<char *>(FirstAddress), +                reinterpret_cast<char *>(CurAddress)); +  return true; +} + +bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, +                        const XRaySledEntry &Sled, +                        void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { +  return patchSled(Enable, FuncId, Sled, Trampoline); +} + +bool patchFunctionExit(const bool Enable, const uint32_t FuncId, +                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { +  return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + +bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, +                           const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { +  return patchSled(Enable, FuncId, Sled, __xray_FunctionTailExit); +} + +// AArch64AsmPrinter::LowerPATCHABLE_EVENT_CALL generates this code sequence: +// +// .Lxray_event_sled_N: +//   b 1f +//   save x0 and x1 (and also x2 for TYPED_EVENT_CALL) +//   set up x0 and x1 (and also x2 for TYPED_EVENT_CALL) +//   bl __xray_CustomEvent or __xray_TypedEvent +//   restore x0 and x1 (and also x2 for TYPED_EVENT_CALL) +// 1f +// +// There are 6 instructions for EVENT_CALL and 9 for TYPED_EVENT_CALL. +// +// Enable: b .+24 => nop +// Disable: nop => b .+24 +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, +                      const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { +  uint32_t Inst = Enable ? 0xd503201f : 0x14000006; +  std::atomic_store_explicit( +      reinterpret_cast<std::atomic<uint32_t> *>(Sled.address()), Inst, +      std::memory_order_release); +  return false; +} + +// Enable: b +36 => nop +// Disable: nop => b +36 +bool patchTypedEvent(const bool Enable, const uint32_t FuncId, +                     const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { +  uint32_t Inst = Enable ? 0xd503201f : 0x14000009; +  std::atomic_store_explicit( +      reinterpret_cast<std::atomic<uint32_t> *>(Sled.address()), Inst, +      std::memory_order_release); +  return false; +} + +// FIXME: Maybe implement this better? +bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; } + +} // namespace __xray diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_allocator.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_allocator.h new file mode 100644 index 000000000000..0284f4299fb1 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_allocator.h @@ -0,0 +1,288 @@ +//===-- xray_allocator.h ---------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Defines the allocator interface for an arena allocator, used primarily for +// the profiling runtime. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_ALLOCATOR_H +#define XRAY_ALLOCATOR_H + +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_internal_defs.h" +#include "sanitizer_common/sanitizer_mutex.h" +#if SANITIZER_FUCHSIA +#include <zircon/process.h> +#include <zircon/status.h> +#include <zircon/syscalls.h> +#else +#include "sanitizer_common/sanitizer_posix.h" +#endif +#include "xray_defs.h" +#include "xray_utils.h" +#include <cstddef> +#include <cstdint> +#include <sys/mman.h> + +namespace __xray { + +// We implement our own memory allocation routine which will bypass the +// internal allocator. This allows us to manage the memory directly, using +// mmap'ed memory to back the allocators. +template <class T> T *allocate() XRAY_NEVER_INSTRUMENT { +  uptr RoundedSize = RoundUpTo(sizeof(T), GetPageSizeCached()); +#if SANITIZER_FUCHSIA +  zx_handle_t Vmo; +  zx_status_t Status = _zx_vmo_create(RoundedSize, 0, &Vmo); +  if (Status != ZX_OK) { +    if (Verbosity()) +      Report("XRay Profiling: Failed to create VMO of size %zu: %s\n", +             sizeof(T), _zx_status_get_string(Status)); +    return nullptr; +  } +  uintptr_t B; +  Status = +      _zx_vmar_map(_zx_vmar_root_self(), ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, 0, +                   Vmo, 0, sizeof(T), &B); +  _zx_handle_close(Vmo); +  if (Status != ZX_OK) { +    if (Verbosity()) +      Report("XRay Profiling: Failed to map VMAR of size %zu: %s\n", sizeof(T), +             _zx_status_get_string(Status)); +    return nullptr; +  } +  return reinterpret_cast<T *>(B); +#else +  uptr B = internal_mmap(NULL, RoundedSize, PROT_READ | PROT_WRITE, +                         MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); +  int ErrNo = 0; +  if (UNLIKELY(internal_iserror(B, &ErrNo))) { +    if (Verbosity()) +      Report("XRay Profiling: Failed to allocate memory of size %zu; Error = " +             "%zu\n", +             RoundedSize, B); +    return nullptr; +  } +#endif +  return reinterpret_cast<T *>(B); +} + +template <class T> void deallocate(T *B) XRAY_NEVER_INSTRUMENT { +  if (B == nullptr) +    return; +  uptr RoundedSize = RoundUpTo(sizeof(T), GetPageSizeCached()); +#if SANITIZER_FUCHSIA +  _zx_vmar_unmap(_zx_vmar_root_self(), reinterpret_cast<uintptr_t>(B), +                 RoundedSize); +#else +  internal_munmap(B, RoundedSize); +#endif +} + +template <class T = unsigned char> +T *allocateBuffer(size_t S) XRAY_NEVER_INSTRUMENT { +  uptr RoundedSize = RoundUpTo(S * sizeof(T), GetPageSizeCached()); +#if SANITIZER_FUCHSIA +  zx_handle_t Vmo; +  zx_status_t Status = _zx_vmo_create(RoundedSize, 0, &Vmo); +  if (Status != ZX_OK) { +    if (Verbosity()) +      Report("XRay Profiling: Failed to create VMO of size %zu: %s\n", S, +             _zx_status_get_string(Status)); +    return nullptr; +  } +  uintptr_t B; +  Status = _zx_vmar_map(_zx_vmar_root_self(), +                        ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, 0, Vmo, 0, S, &B); +  _zx_handle_close(Vmo); +  if (Status != ZX_OK) { +    if (Verbosity()) +      Report("XRay Profiling: Failed to map VMAR of size %zu: %s\n", S, +             _zx_status_get_string(Status)); +    return nullptr; +  } +#else +  uptr B = internal_mmap(NULL, RoundedSize, PROT_READ | PROT_WRITE, +                         MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); +  int ErrNo = 0; +  if (UNLIKELY(internal_iserror(B, &ErrNo))) { +    if (Verbosity()) +      Report("XRay Profiling: Failed to allocate memory of size %zu; Error = " +             "%zu\n", +             RoundedSize, B); +    return nullptr; +  } +#endif +  return reinterpret_cast<T *>(B); +} + +template <class T> void deallocateBuffer(T *B, size_t S) XRAY_NEVER_INSTRUMENT { +  if (B == nullptr) +    return; +  uptr RoundedSize = RoundUpTo(S * sizeof(T), GetPageSizeCached()); +#if SANITIZER_FUCHSIA +  _zx_vmar_unmap(_zx_vmar_root_self(), reinterpret_cast<uintptr_t>(B), +                 RoundedSize); +#else +  internal_munmap(B, RoundedSize); +#endif +} + +template <class T, class... U> +T *initArray(size_t N, U &&... Us) XRAY_NEVER_INSTRUMENT { +  auto A = allocateBuffer<T>(N); +  if (A != nullptr) +    while (N > 0) +      new (A + (--N)) T(std::forward<U>(Us)...); +  return A; +} + +/// The Allocator type hands out fixed-sized chunks of memory that are +/// cache-line aligned and sized. This is useful for placement of +/// performance-sensitive data in memory that's frequently accessed. The +/// allocator also self-limits the peak memory usage to a dynamically defined +/// maximum. +/// +/// N is the lower-bound size of the block of memory to return from the +/// allocation function. N is used to compute the size of a block, which is +/// cache-line-size multiples worth of memory. We compute the size of a block by +/// determining how many cache lines worth of memory is required to subsume N. +/// +/// The Allocator instance will manage its own memory acquired through mmap. +/// This severely constrains the platforms on which this can be used to POSIX +/// systems where mmap semantics are well-defined. +/// +/// FIXME: Isolate the lower-level memory management to a different abstraction +/// that can be platform-specific. +template <size_t N> struct Allocator { +  // The Allocator returns memory as Block instances. +  struct Block { +    /// Compute the minimum cache-line size multiple that is >= N. +    static constexpr auto Size = nearest_boundary(N, kCacheLineSize); +    void *Data; +  }; + +private: +  size_t MaxMemory{0}; +  unsigned char *BackingStore = nullptr; +  unsigned char *AlignedNextBlock = nullptr; +  size_t AllocatedBlocks = 0; +  bool Owned; +  SpinMutex Mutex{}; + +  void *Alloc() XRAY_NEVER_INSTRUMENT { +    SpinMutexLock Lock(&Mutex); +    if (UNLIKELY(BackingStore == nullptr)) { +      BackingStore = allocateBuffer(MaxMemory); +      if (BackingStore == nullptr) { +        if (Verbosity()) +          Report("XRay Profiling: Failed to allocate memory for allocator\n"); +        return nullptr; +      } + +      AlignedNextBlock = BackingStore; + +      // Ensure that NextBlock is aligned appropriately. +      auto BackingStoreNum = reinterpret_cast<uintptr_t>(BackingStore); +      auto AlignedNextBlockNum = nearest_boundary( +          reinterpret_cast<uintptr_t>(AlignedNextBlock), kCacheLineSize); +      if (diff(AlignedNextBlockNum, BackingStoreNum) > ptrdiff_t(MaxMemory)) { +        deallocateBuffer(BackingStore, MaxMemory); +        AlignedNextBlock = BackingStore = nullptr; +        if (Verbosity()) +          Report("XRay Profiling: Cannot obtain enough memory from " +                 "preallocated region\n"); +        return nullptr; +      } + +      AlignedNextBlock = reinterpret_cast<unsigned char *>(AlignedNextBlockNum); + +      // Assert that AlignedNextBlock is cache-line aligned. +      DCHECK_EQ(reinterpret_cast<uintptr_t>(AlignedNextBlock) % kCacheLineSize, +                0); +    } + +    if (((AllocatedBlocks + 1) * Block::Size) > MaxMemory) +      return nullptr; + +    // Align the pointer we'd like to return to an appropriate alignment, then +    // advance the pointer from where to start allocations. +    void *Result = AlignedNextBlock; +    AlignedNextBlock = +        reinterpret_cast<unsigned char *>(AlignedNextBlock) + Block::Size; +    ++AllocatedBlocks; +    return Result; +  } + +public: +  explicit Allocator(size_t M) XRAY_NEVER_INSTRUMENT +      : MaxMemory(RoundUpTo(M, kCacheLineSize)), +        BackingStore(nullptr), +        AlignedNextBlock(nullptr), +        AllocatedBlocks(0), +        Owned(true), +        Mutex() {} + +  explicit Allocator(void *P, size_t M) XRAY_NEVER_INSTRUMENT +      : MaxMemory(M), +        BackingStore(reinterpret_cast<unsigned char *>(P)), +        AlignedNextBlock(reinterpret_cast<unsigned char *>(P)), +        AllocatedBlocks(0), +        Owned(false), +        Mutex() {} + +  Allocator(const Allocator &) = delete; +  Allocator &operator=(const Allocator &) = delete; + +  Allocator(Allocator &&O) XRAY_NEVER_INSTRUMENT { +    SpinMutexLock L0(&Mutex); +    SpinMutexLock L1(&O.Mutex); +    MaxMemory = O.MaxMemory; +    O.MaxMemory = 0; +    BackingStore = O.BackingStore; +    O.BackingStore = nullptr; +    AlignedNextBlock = O.AlignedNextBlock; +    O.AlignedNextBlock = nullptr; +    AllocatedBlocks = O.AllocatedBlocks; +    O.AllocatedBlocks = 0; +    Owned = O.Owned; +    O.Owned = false; +  } + +  Allocator &operator=(Allocator &&O) XRAY_NEVER_INSTRUMENT { +    SpinMutexLock L0(&Mutex); +    SpinMutexLock L1(&O.Mutex); +    MaxMemory = O.MaxMemory; +    O.MaxMemory = 0; +    if (BackingStore != nullptr) +      deallocateBuffer(BackingStore, MaxMemory); +    BackingStore = O.BackingStore; +    O.BackingStore = nullptr; +    AlignedNextBlock = O.AlignedNextBlock; +    O.AlignedNextBlock = nullptr; +    AllocatedBlocks = O.AllocatedBlocks; +    O.AllocatedBlocks = 0; +    Owned = O.Owned; +    O.Owned = false; +    return *this; +  } + +  Block Allocate() XRAY_NEVER_INSTRUMENT { return {Alloc()}; } + +  ~Allocator() NOEXCEPT XRAY_NEVER_INSTRUMENT { +    if (Owned && BackingStore != nullptr) { +      deallocateBuffer(BackingStore, MaxMemory); +    } +  } +}; + +} // namespace __xray + +#endif // XRAY_ALLOCATOR_H diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_always_instrument.txt b/contrib/llvm-project/compiler-rt/lib/xray/xray_always_instrument.txt new file mode 100644 index 000000000000..151ed703dd56 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_always_instrument.txt @@ -0,0 +1,6 @@ +# List of function matchers common to C/C++ applications that make sense to +# always instrument. You can use this as an argument to +# -fxray-always-instrument=<path> along with your project-specific lists. + +# Always instrument the main function. +fun:main diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_arm.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_arm.cpp new file mode 100644 index 000000000000..e1818555906c --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_arm.cpp @@ -0,0 +1,164 @@ +//===-- xray_arm.cpp --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of ARM-specific routines (32-bit). +// +//===----------------------------------------------------------------------===// +#include "sanitizer_common/sanitizer_common.h" +#include "xray_defs.h" +#include "xray_interface_internal.h" +#include <atomic> +#include <cassert> + +extern "C" void __clear_cache(void *start, void *end); + +namespace __xray { + +// The machine codes for some instructions used in runtime patching. +enum class PatchOpcodes : uint32_t { +  PO_PushR0Lr = 0xE92D4001, // PUSH {r0, lr} +  PO_BlxIp = 0xE12FFF3C,    // BLX ip +  PO_PopR0Lr = 0xE8BD4001,  // POP {r0, lr} +  PO_B20 = 0xEA000005       // B #20 +}; + +// 0xUUUUWXYZ -> 0x000W0XYZ +inline static uint32_t getMovwMask(const uint32_t Value) XRAY_NEVER_INSTRUMENT { +  return (Value & 0xfff) | ((Value & 0xf000) << 4); +} + +// 0xWXYZUUUU -> 0x000W0XYZ +inline static uint32_t getMovtMask(const uint32_t Value) XRAY_NEVER_INSTRUMENT { +  return getMovwMask(Value >> 16); +} + +// Writes the following instructions: +//   MOVW R<regNo>, #<lower 16 bits of the |Value|> +//   MOVT R<regNo>, #<higher 16 bits of the |Value|> +inline static uint32_t * +write32bitLoadReg(uint8_t regNo, uint32_t *Address, +                  const uint32_t Value) XRAY_NEVER_INSTRUMENT { +  // This is a fatal error: we cannot just report it and continue execution. +  assert(regNo <= 15 && "Register number must be 0 to 15."); +  // MOVW R, #0xWXYZ in machine code is 0xE30WRXYZ +  *Address = (0xE3000000 | (uint32_t(regNo) << 12) | getMovwMask(Value)); +  Address++; +  // MOVT R, #0xWXYZ in machine code is 0xE34WRXYZ +  *Address = (0xE3400000 | (uint32_t(regNo) << 12) | getMovtMask(Value)); +  return Address + 1; +} + +// Writes the following instructions: +//   MOVW r0, #<lower 16 bits of the |Value|> +//   MOVT r0, #<higher 16 bits of the |Value|> +inline static uint32_t * +write32bitLoadR0(uint32_t *Address, +                 const uint32_t Value) XRAY_NEVER_INSTRUMENT { +  return write32bitLoadReg(0, Address, Value); +} + +// Writes the following instructions: +//   MOVW ip, #<lower 16 bits of the |Value|> +//   MOVT ip, #<higher 16 bits of the |Value|> +inline static uint32_t * +write32bitLoadIP(uint32_t *Address, +                 const uint32_t Value) XRAY_NEVER_INSTRUMENT { +  return write32bitLoadReg(12, Address, Value); +} + +inline static bool patchSled(const bool Enable, const uint32_t FuncId, +                             const XRaySledEntry &Sled, +                             void (*TracingHook)()) XRAY_NEVER_INSTRUMENT { +  // When |Enable| == true, +  // We replace the following compile-time stub (sled): +  // +  // xray_sled_n: +  //   B #20 +  //   6 NOPs (24 bytes) +  // +  // With the following runtime patch: +  // +  // xray_sled_n: +  //   PUSH {r0, lr} +  //   MOVW r0, #<lower 16 bits of function ID> +  //   MOVT r0, #<higher 16 bits of function ID> +  //   MOVW ip, #<lower 16 bits of address of TracingHook> +  //   MOVT ip, #<higher 16 bits of address of TracingHook> +  //   BLX ip +  //   POP {r0, lr} +  // +  // Replacement of the first 4-byte instruction should be the last and atomic +  // operation, so that the user code which reaches the sled concurrently +  // either jumps over the whole sled, or executes the whole sled when the +  // latter is ready. +  // +  // When |Enable|==false, we set back the first instruction in the sled to be +  //   B #20 + +  uint32_t *FirstAddress = reinterpret_cast<uint32_t *>(Sled.address()); +  uint32_t *CurAddress = FirstAddress + 1; +  if (Enable) { +    CurAddress = +        write32bitLoadR0(CurAddress, reinterpret_cast<uint32_t>(FuncId)); +    CurAddress = +        write32bitLoadIP(CurAddress, reinterpret_cast<uint32_t>(TracingHook)); +    *CurAddress = uint32_t(PatchOpcodes::PO_BlxIp); +    CurAddress++; +    *CurAddress = uint32_t(PatchOpcodes::PO_PopR0Lr); +    CurAddress++; +    std::atomic_store_explicit( +        reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress), +        uint32_t(PatchOpcodes::PO_PushR0Lr), std::memory_order_release); +  } else { +    std::atomic_store_explicit( +        reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress), +        uint32_t(PatchOpcodes::PO_B20), std::memory_order_release); +  } +  __clear_cache(reinterpret_cast<char *>(FirstAddress), +                reinterpret_cast<char *>(CurAddress)); +  return true; +} + +bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, +                        const XRaySledEntry &Sled, +                        void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { +  return patchSled(Enable, FuncId, Sled, Trampoline); +} + +bool patchFunctionExit(const bool Enable, const uint32_t FuncId, +                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { +  return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + +bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, +                           const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { +  return patchSled(Enable, FuncId, Sled, __xray_FunctionTailExit); +} + +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, +                      const XRaySledEntry &Sled) +    XRAY_NEVER_INSTRUMENT { // FIXME: Implement in arm? +  return false; +} + +bool patchTypedEvent(const bool Enable, const uint32_t FuncId, +                     const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { +  // FIXME: Implement in arm? +  return false; +} + +// FIXME: Maybe implement this better? +bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; } + +} // namespace __xray + +extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT { +  // FIXME: this will have to be implemented in the trampoline assembly file +} diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_flags.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_flags.cpp new file mode 100644 index 000000000000..e0a5e7bb29ee --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_flags.cpp @@ -0,0 +1,49 @@ +//===-- xray_basic_flags.cpp ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// XRay Basic flag parsing logic. +//===----------------------------------------------------------------------===// + +#include "xray_basic_flags.h" +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_flag_parser.h" +#include "sanitizer_common/sanitizer_libc.h" +#include "xray_defs.h" + +using namespace __sanitizer; + +namespace __xray { + +/// Use via basicFlags(). +BasicFlags xray_basic_flags_dont_use_directly; + +void BasicFlags::setDefaults() XRAY_NEVER_INSTRUMENT { +#define XRAY_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue; +#include "xray_basic_flags.inc" +#undef XRAY_FLAG +} + +void registerXRayBasicFlags(FlagParser *P, +                            BasicFlags *F) XRAY_NEVER_INSTRUMENT { +#define XRAY_FLAG(Type, Name, DefaultValue, Description)                       \ +  RegisterFlag(P, #Name, Description, &F->Name); +#include "xray_basic_flags.inc" +#undef XRAY_FLAG +} + +const char *useCompilerDefinedBasicFlags() XRAY_NEVER_INSTRUMENT { +#ifdef XRAY_BASIC_OPTIONS +  return SANITIZER_STRINGIFY(XRAY_BASIC_OPTIONS); +#else +  return ""; +#endif +} + +} // namespace __xray diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_flags.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_flags.h new file mode 100644 index 000000000000..b846c1233e8a --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_flags.h @@ -0,0 +1,37 @@ +//===-- xray_basic_flags.h -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// XRay Basic Mode runtime flags. +//===----------------------------------------------------------------------===// + +#ifndef XRAY_BASIC_FLAGS_H +#define XRAY_BASIC_FLAGS_H + +#include "sanitizer_common/sanitizer_flag_parser.h" +#include "sanitizer_common/sanitizer_internal_defs.h" + +namespace __xray { + +struct BasicFlags { +#define XRAY_FLAG(Type, Name, DefaultValue, Description) Type Name; +#include "xray_basic_flags.inc" +#undef XRAY_FLAG + +  void setDefaults(); +}; + +extern BasicFlags xray_basic_flags_dont_use_directly; +extern void registerXRayBasicFlags(FlagParser *P, BasicFlags *F); +const char *useCompilerDefinedBasicFlags(); +inline BasicFlags *basicFlags() { return &xray_basic_flags_dont_use_directly; } + +} // namespace __xray + +#endif // XRAY_BASIC_FLAGS_H diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_flags.inc b/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_flags.inc new file mode 100644 index 000000000000..fb38c540d356 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_flags.inc @@ -0,0 +1,23 @@ +//===-- xray_basic_flags.inc ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// XRay runtime flags. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_FLAG +#error "Define XRAY_FLAG prior to including this file!" +#endif + +XRAY_FLAG(int, func_duration_threshold_us, 5, +          "Basic logging will try to skip functions that execute for fewer " +          "microseconds than this threshold.") +XRAY_FLAG(int, max_stack_depth, 64, +          "Basic logging will keep track of at most this deep a call stack, " +          "any more and the recordings will be dropped.") +XRAY_FLAG(int, thread_buffer_size, 1024, +          "The number of entries to keep on a per-thread buffer.") diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_logging.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_logging.cpp new file mode 100644 index 000000000000..6ac5417bef75 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_logging.cpp @@ -0,0 +1,515 @@ +//===-- xray_basic_logging.cpp ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of a simple in-memory log of XRay events. This defines a +// logging function that's compatible with the XRay handler interface, and +// routines for exporting data to files. +// +//===----------------------------------------------------------------------===// + +#include <errno.h> +#include <fcntl.h> +#include <pthread.h> +#include <sys/stat.h> +#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_APPLE +#include <sys/syscall.h> +#endif +#include <sys/types.h> +#include <time.h> +#include <unistd.h> + +#include "sanitizer_common/sanitizer_allocator_internal.h" +#include "sanitizer_common/sanitizer_libc.h" +#include "xray/xray_records.h" +#include "xray_recursion_guard.h" +#include "xray_basic_flags.h" +#include "xray_basic_logging.h" +#include "xray_defs.h" +#include "xray_flags.h" +#include "xray_interface_internal.h" +#include "xray_tsc.h" +#include "xray_utils.h" + +namespace __xray { + +static SpinMutex LogMutex; + +namespace { +// We use elements of this type to record the entry TSC of every function ID we +// see as we're tracing a particular thread's execution. +struct alignas(16) StackEntry { +  int32_t FuncId; +  uint16_t Type; +  uint8_t CPU; +  uint8_t Padding; +  uint64_t TSC; +}; + +static_assert(sizeof(StackEntry) == 16, "Wrong size for StackEntry"); + +struct XRAY_TLS_ALIGNAS(64) ThreadLocalData { +  void *InMemoryBuffer = nullptr; +  size_t BufferSize = 0; +  size_t BufferOffset = 0; +  void *ShadowStack = nullptr; +  size_t StackSize = 0; +  size_t StackEntries = 0; +  __xray::LogWriter *LogWriter = nullptr; +}; + +struct BasicLoggingOptions { +  int DurationFilterMicros = 0; +  size_t MaxStackDepth = 0; +  size_t ThreadBufferSize = 0; +}; +} // namespace + +static pthread_key_t PThreadKey; + +static atomic_uint8_t BasicInitialized{0}; + +struct BasicLoggingOptions GlobalOptions; + +thread_local atomic_uint8_t Guard{0}; + +static atomic_uint8_t UseRealTSC{0}; +static atomic_uint64_t ThresholdTicks{0}; +static atomic_uint64_t TicksPerSec{0}; +static atomic_uint64_t CycleFrequency{NanosecondsPerSecond}; + +static LogWriter *getLog() XRAY_NEVER_INSTRUMENT { +  LogWriter* LW = LogWriter::Open(); +  if (LW == nullptr) +    return LW; + +  static pthread_once_t DetectOnce = PTHREAD_ONCE_INIT; +  pthread_once(&DetectOnce, +[] { +    if (atomic_load(&UseRealTSC, memory_order_acquire)) +      atomic_store(&CycleFrequency, getTSCFrequency(), memory_order_release); +  }); + +  // Since we're here, we get to write the header. We set it up so that the +  // header will only be written once, at the start, and let the threads +  // logging do writes which just append. +  XRayFileHeader Header; +  // Version 2 includes tail exit records. +  // Version 3 includes pid inside records. +  Header.Version = 3; +  Header.Type = FileTypes::NAIVE_LOG; +  Header.CycleFrequency = atomic_load(&CycleFrequency, memory_order_acquire); + +  // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc' +  // before setting the values in the header. +  Header.ConstantTSC = 1; +  Header.NonstopTSC = 1; +  LW->WriteAll(reinterpret_cast<char *>(&Header), +               reinterpret_cast<char *>(&Header) + sizeof(Header)); +  return LW; +} + +static LogWriter *getGlobalLog() XRAY_NEVER_INSTRUMENT { +  static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; +  static LogWriter *LW = nullptr; +  pthread_once(&OnceInit, +[] { LW = getLog(); }); +  return LW; +} + +static ThreadLocalData &getThreadLocalData() XRAY_NEVER_INSTRUMENT { +  thread_local ThreadLocalData TLD; +  thread_local bool UNUSED TOnce = [] { +    if (GlobalOptions.ThreadBufferSize == 0) { +      if (Verbosity()) +        Report("Not initializing TLD since ThreadBufferSize == 0.\n"); +      return false; +    } +    pthread_setspecific(PThreadKey, &TLD); +    TLD.LogWriter = getGlobalLog(); +    TLD.InMemoryBuffer = reinterpret_cast<XRayRecord *>( +        InternalAlloc(sizeof(XRayRecord) * GlobalOptions.ThreadBufferSize, +                      nullptr, alignof(XRayRecord))); +    TLD.BufferSize = GlobalOptions.ThreadBufferSize; +    TLD.BufferOffset = 0; +    if (GlobalOptions.MaxStackDepth == 0) { +      if (Verbosity()) +        Report("Not initializing the ShadowStack since MaxStackDepth == 0.\n"); +      TLD.StackSize = 0; +      TLD.StackEntries = 0; +      TLD.ShadowStack = nullptr; +      return false; +    } +    TLD.ShadowStack = reinterpret_cast<StackEntry *>( +        InternalAlloc(sizeof(StackEntry) * GlobalOptions.MaxStackDepth, nullptr, +                      alignof(StackEntry))); +    TLD.StackSize = GlobalOptions.MaxStackDepth; +    TLD.StackEntries = 0; +    return false; +  }(); +  return TLD; +} + +template <class RDTSC> +void InMemoryRawLog(int32_t FuncId, XRayEntryType Type, +                    RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT { +  auto &TLD = getThreadLocalData(); +  LogWriter *LW = getGlobalLog(); +  if (LW == nullptr) +    return; + +  // Use a simple recursion guard, to handle cases where we're already logging +  // and for one reason or another, this function gets called again in the same +  // thread. +  RecursionGuard G(Guard); +  if (!G) +    return; + +  uint8_t CPU = 0; +  uint64_t TSC = ReadTSC(CPU); + +  switch (Type) { +  case XRayEntryType::ENTRY: +  case XRayEntryType::LOG_ARGS_ENTRY: { +    // Short circuit if we've reached the maximum depth of the stack. +    if (TLD.StackEntries++ >= TLD.StackSize) +      return; + +    // When we encounter an entry event, we keep track of the TSC and the CPU, +    // and put it in the stack. +    StackEntry E; +    E.FuncId = FuncId; +    E.CPU = CPU; +    E.Type = Type; +    E.TSC = TSC; +    auto StackEntryPtr = static_cast<char *>(TLD.ShadowStack) + +                         (sizeof(StackEntry) * (TLD.StackEntries - 1)); +    internal_memcpy(StackEntryPtr, &E, sizeof(StackEntry)); +    break; +  } +  case XRayEntryType::EXIT: +  case XRayEntryType::TAIL: { +    if (TLD.StackEntries == 0) +      break; + +    if (--TLD.StackEntries >= TLD.StackSize) +      return; + +    // When we encounter an exit event, we check whether all the following are +    // true: +    // +    // - The Function ID is the same as the most recent entry in the stack. +    // - The CPU is the same as the most recent entry in the stack. +    // - The Delta of the TSCs is less than the threshold amount of time we're +    //   looking to record. +    // +    // If all of these conditions are true, we pop the stack and don't write a +    // record and move the record offset back. +    StackEntry StackTop; +    auto StackEntryPtr = static_cast<char *>(TLD.ShadowStack) + +                         (sizeof(StackEntry) * TLD.StackEntries); +    internal_memcpy(&StackTop, StackEntryPtr, sizeof(StackEntry)); +    if (StackTop.FuncId == FuncId && StackTop.CPU == CPU && +        StackTop.TSC < TSC) { +      auto Delta = TSC - StackTop.TSC; +      if (Delta < atomic_load(&ThresholdTicks, memory_order_relaxed)) { +        DCHECK(TLD.BufferOffset > 0); +        TLD.BufferOffset -= StackTop.Type == XRayEntryType::ENTRY ? 1 : 2; +        return; +      } +    } +    break; +  } +  default: +    // Should be unreachable. +    DCHECK(false && "Unsupported XRayEntryType encountered."); +    break; +  } + +  // First determine whether the delta between the function's enter record and +  // the exit record is higher than the threshold. +  XRayRecord R; +  R.RecordType = RecordTypes::NORMAL; +  R.CPU = CPU; +  R.TSC = TSC; +  R.TId = GetTid();  +  R.PId = internal_getpid();  +  R.Type = Type; +  R.FuncId = FuncId; +  auto FirstEntry = reinterpret_cast<XRayRecord *>(TLD.InMemoryBuffer); +  internal_memcpy(FirstEntry + TLD.BufferOffset, &R, sizeof(R)); +  if (++TLD.BufferOffset == TLD.BufferSize) { +    SpinMutexLock Lock(&LogMutex); +    LW->WriteAll(reinterpret_cast<char *>(FirstEntry), +                 reinterpret_cast<char *>(FirstEntry + TLD.BufferOffset)); +    TLD.BufferOffset = 0; +    TLD.StackEntries = 0; +  } +} + +template <class RDTSC> +void InMemoryRawLogWithArg(int32_t FuncId, XRayEntryType Type, uint64_t Arg1, +                           RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT { +  auto &TLD = getThreadLocalData(); +  auto FirstEntry = +      reinterpret_cast<XRayArgPayload *>(TLD.InMemoryBuffer); +  const auto &BuffLen = TLD.BufferSize; +  LogWriter *LW = getGlobalLog(); +  if (LW == nullptr) +    return; + +  // First we check whether there's enough space to write the data consecutively +  // in the thread-local buffer. If not, we first flush the buffer before +  // attempting to write the two records that must be consecutive. +  if (TLD.BufferOffset + 2 > BuffLen) { +    SpinMutexLock Lock(&LogMutex); +    LW->WriteAll(reinterpret_cast<char *>(FirstEntry), +                 reinterpret_cast<char *>(FirstEntry + TLD.BufferOffset)); +    TLD.BufferOffset = 0; +    TLD.StackEntries = 0; +  } + +  // Then we write the "we have an argument" record. +  InMemoryRawLog(FuncId, Type, ReadTSC); + +  RecursionGuard G(Guard); +  if (!G) +    return; + +  // And, from here on write the arg payload. +  XRayArgPayload R; +  R.RecordType = RecordTypes::ARG_PAYLOAD; +  R.FuncId = FuncId; +  R.TId = GetTid();  +  R.PId = internal_getpid();  +  R.Arg = Arg1; +  internal_memcpy(FirstEntry + TLD.BufferOffset, &R, sizeof(R)); +  if (++TLD.BufferOffset == BuffLen) { +    SpinMutexLock Lock(&LogMutex); +    LW->WriteAll(reinterpret_cast<char *>(FirstEntry), +                 reinterpret_cast<char *>(FirstEntry + TLD.BufferOffset)); +    TLD.BufferOffset = 0; +    TLD.StackEntries = 0; +  } +} + +void basicLoggingHandleArg0RealTSC(int32_t FuncId, +                                   XRayEntryType Type) XRAY_NEVER_INSTRUMENT { +  InMemoryRawLog(FuncId, Type, readTSC); +} + +void basicLoggingHandleArg0EmulateTSC(int32_t FuncId, XRayEntryType Type) +    XRAY_NEVER_INSTRUMENT { +  InMemoryRawLog(FuncId, Type, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT { +    timespec TS; +    int result = clock_gettime(CLOCK_REALTIME, &TS); +    if (result != 0) { +      Report("clock_gettimg(2) return %d, errno=%d.", result, int(errno)); +      TS = {0, 0}; +    } +    CPU = 0; +    return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec; +  }); +} + +void basicLoggingHandleArg1RealTSC(int32_t FuncId, XRayEntryType Type, +                                   uint64_t Arg1) XRAY_NEVER_INSTRUMENT { +  InMemoryRawLogWithArg(FuncId, Type, Arg1, readTSC); +} + +void basicLoggingHandleArg1EmulateTSC(int32_t FuncId, XRayEntryType Type, +                                      uint64_t Arg1) XRAY_NEVER_INSTRUMENT { +  InMemoryRawLogWithArg( +      FuncId, Type, Arg1, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT { +        timespec TS; +        int result = clock_gettime(CLOCK_REALTIME, &TS); +        if (result != 0) { +          Report("clock_gettimg(2) return %d, errno=%d.", result, int(errno)); +          TS = {0, 0}; +        } +        CPU = 0; +        return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec; +      }); +} + +static void TLDDestructor(void *P) XRAY_NEVER_INSTRUMENT { +  ThreadLocalData &TLD = *reinterpret_cast<ThreadLocalData *>(P); +  auto ExitGuard = at_scope_exit([&TLD] { +    // Clean up dynamic resources. +    if (TLD.InMemoryBuffer) +      InternalFree(TLD.InMemoryBuffer); +    if (TLD.ShadowStack) +      InternalFree(TLD.ShadowStack); +    if (Verbosity()) +      Report("Cleaned up log for TID: %llu\n", GetTid()); +  }); + +  if (TLD.LogWriter == nullptr || TLD.BufferOffset == 0) { +    if (Verbosity()) +      Report("Skipping buffer for TID: %llu; Offset = %zu\n", GetTid(), +             TLD.BufferOffset); +    return; +  } + +  { +    SpinMutexLock L(&LogMutex); +    TLD.LogWriter->WriteAll(reinterpret_cast<char *>(TLD.InMemoryBuffer), +                            reinterpret_cast<char *>(TLD.InMemoryBuffer) + +                            (sizeof(XRayRecord) * TLD.BufferOffset)); +  } + +  // Because this thread's exit could be the last one trying to write to +  // the file and that we're not able to close out the file properly, we +  // sync instead and hope that the pending writes are flushed as the +  // thread exits. +  TLD.LogWriter->Flush(); +} + +XRayLogInitStatus basicLoggingInit(UNUSED size_t BufferSize, +                                   UNUSED size_t BufferMax, void *Options, +                                   size_t OptionsSize) XRAY_NEVER_INSTRUMENT { +  uint8_t Expected = 0; +  if (!atomic_compare_exchange_strong(&BasicInitialized, &Expected, 1, +                                      memory_order_acq_rel)) { +    if (Verbosity()) +      Report("Basic logging already initialized.\n"); +    return XRayLogInitStatus::XRAY_LOG_INITIALIZED; +  } + +  static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; +  pthread_once(&OnceInit, +[] { +    pthread_key_create(&PThreadKey, TLDDestructor); +    atomic_store(&UseRealTSC, probeRequiredCPUFeatures(), memory_order_release); +    // Initialize the global TicksPerSec value. +    atomic_store(&TicksPerSec, +                 probeRequiredCPUFeatures() ? getTSCFrequency() +                                            : NanosecondsPerSecond, +                 memory_order_release); +    if (!atomic_load(&UseRealTSC, memory_order_relaxed) && Verbosity()) +      Report("WARNING: Required CPU features missing for XRay instrumentation, " +             "using emulation instead.\n"); +  }); + +  FlagParser P; +  BasicFlags F; +  F.setDefaults(); +  registerXRayBasicFlags(&P, &F); +  P.ParseString(useCompilerDefinedBasicFlags()); +  auto *EnvOpts = GetEnv("XRAY_BASIC_OPTIONS"); +  if (EnvOpts == nullptr) +    EnvOpts = ""; + +  P.ParseString(EnvOpts); + +  // If XRAY_BASIC_OPTIONS was not defined, then we use the deprecated options +  // set through XRAY_OPTIONS instead. +  if (internal_strlen(EnvOpts) == 0) { +    F.func_duration_threshold_us = +        flags()->xray_naive_log_func_duration_threshold_us; +    F.max_stack_depth = flags()->xray_naive_log_max_stack_depth; +    F.thread_buffer_size = flags()->xray_naive_log_thread_buffer_size; +  } + +  P.ParseString(static_cast<const char *>(Options)); +  GlobalOptions.ThreadBufferSize = F.thread_buffer_size; +  GlobalOptions.DurationFilterMicros = F.func_duration_threshold_us; +  GlobalOptions.MaxStackDepth = F.max_stack_depth; +  *basicFlags() = F; + +  atomic_store(&ThresholdTicks, +               atomic_load(&TicksPerSec, memory_order_acquire) * +                   GlobalOptions.DurationFilterMicros / 1000000, +               memory_order_release); +  __xray_set_handler_arg1(atomic_load(&UseRealTSC, memory_order_acquire) +                              ? basicLoggingHandleArg1RealTSC +                              : basicLoggingHandleArg1EmulateTSC); +  __xray_set_handler(atomic_load(&UseRealTSC, memory_order_acquire) +                         ? basicLoggingHandleArg0RealTSC +                         : basicLoggingHandleArg0EmulateTSC); + +  // TODO: Implement custom event and typed event handling support in Basic +  // Mode. +  __xray_remove_customevent_handler(); +  __xray_remove_typedevent_handler(); + +  return XRayLogInitStatus::XRAY_LOG_INITIALIZED; +} + +XRayLogInitStatus basicLoggingFinalize() XRAY_NEVER_INSTRUMENT { +  uint8_t Expected = 0; +  if (!atomic_compare_exchange_strong(&BasicInitialized, &Expected, 0, +                                      memory_order_acq_rel) && +      Verbosity()) +    Report("Basic logging already finalized.\n"); + +  // Nothing really to do aside from marking state of the global to be +  // uninitialized. + +  return XRayLogInitStatus::XRAY_LOG_FINALIZED; +} + +XRayLogFlushStatus basicLoggingFlush() XRAY_NEVER_INSTRUMENT { +  // This really does nothing, since flushing the logs happen at the end of a +  // thread's lifetime, or when the buffers are full. +  return XRayLogFlushStatus::XRAY_LOG_FLUSHED; +} + +// This is a handler that, effectively, does nothing. +void basicLoggingHandleArg0Empty(int32_t, XRayEntryType) XRAY_NEVER_INSTRUMENT { +} + +bool basicLogDynamicInitializer() XRAY_NEVER_INSTRUMENT { +  XRayLogImpl Impl{ +      basicLoggingInit, +      basicLoggingFinalize, +      basicLoggingHandleArg0Empty, +      basicLoggingFlush, +  }; +  auto RegistrationResult = __xray_log_register_mode("xray-basic", Impl); +  if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK && +      Verbosity()) +    Report("Cannot register XRay Basic Mode to 'xray-basic'; error = %d\n", +           RegistrationResult); +  if (flags()->xray_naive_log || +      !internal_strcmp(flags()->xray_mode, "xray-basic")) { +    auto SelectResult = __xray_log_select_mode("xray-basic"); +    if (SelectResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) { +      if (Verbosity()) +        Report("Failed selecting XRay Basic Mode; error = %d\n", SelectResult); +      return false; +    } + +    // We initialize the implementation using the data we get from the +    // XRAY_BASIC_OPTIONS environment variable, at this point of the +    // implementation. +    auto *Env = GetEnv("XRAY_BASIC_OPTIONS"); +    auto InitResult = +        __xray_log_init_mode("xray-basic", Env == nullptr ? "" : Env); +    if (InitResult != XRayLogInitStatus::XRAY_LOG_INITIALIZED) { +      if (Verbosity()) +        Report("Failed initializing XRay Basic Mode; error = %d\n", InitResult); +      return false; +    } + +    // At this point we know that we've successfully initialized Basic mode +    // tracing, and the only chance we're going to get for the current thread to +    // clean-up may be at thread/program exit. To ensure that we're going to get +    // the cleanup even without calling the finalization routines, we're +    // registering a program exit function that will do the cleanup. +    static pthread_once_t DynamicOnce = PTHREAD_ONCE_INIT; +    pthread_once(&DynamicOnce, +[] { +      static void *FakeTLD = nullptr; +      FakeTLD = &getThreadLocalData(); +      Atexit(+[] { TLDDestructor(FakeTLD); }); +    }); +  } +  return true; +} + +} // namespace __xray + +static auto UNUSED Unused = __xray::basicLogDynamicInitializer(); diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_logging.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_logging.h new file mode 100644 index 000000000000..89caca66b585 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_basic_logging.h @@ -0,0 +1,42 @@ +//===-- xray_basic_logging.h ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a function call tracing system. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_XRAY_INMEMORY_LOG_H +#define XRAY_XRAY_INMEMORY_LOG_H + +#include "xray/xray_log_interface.h" + +/// Basic (Naive) Mode +/// ================== +/// +/// This implementation hooks in through the XRay logging implementation +/// framework. The Basic Mode implementation will keep appending to a file as +/// soon as the thread-local buffers are full. It keeps minimal in-memory state +/// and does the minimum filtering required to keep log files smaller. + +namespace __xray { + +XRayLogInitStatus basicLoggingInit(size_t BufferSize, size_t BufferMax, +                                   void *Options, size_t OptionsSize); +XRayLogInitStatus basicLoggingFinalize(); + +void basicLoggingHandleArg0RealTSC(int32_t FuncId, XRayEntryType Entry); +void basicLoggingHandleArg0EmulateTSC(int32_t FuncId, XRayEntryType Entry); +void basicLoggingHandleArg1RealTSC(int32_t FuncId, XRayEntryType Entry, +                                   uint64_t Arg1); +void basicLoggingHandleArg1EmulateTSC(int32_t FuncId, XRayEntryType Entry, +                                      uint64_t Arg1); +XRayLogFlushStatus basicLoggingFlush(); +XRayLogInitStatus basicLoggingReset(); + +} // namespace __xray + +#endif // XRAY_XRAY_INMEMORY_LOG_H diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_buffer_queue.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_buffer_queue.cpp new file mode 100644 index 000000000000..748708ccd0f4 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_buffer_queue.cpp @@ -0,0 +1,237 @@ +//===-- xray_buffer_queue.cpp ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Defines the interface for a buffer queue implementation. +// +//===----------------------------------------------------------------------===// +#include "xray_buffer_queue.h" +#include "sanitizer_common/sanitizer_atomic.h" +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_libc.h" +#if !SANITIZER_FUCHSIA +#include "sanitizer_common/sanitizer_posix.h" +#endif +#include "xray_allocator.h" +#include "xray_defs.h" +#include <memory> +#include <sys/mman.h> + +using namespace __xray; + +namespace { + +BufferQueue::ControlBlock *allocControlBlock(size_t Size, size_t Count) { +  auto B = +      allocateBuffer((sizeof(BufferQueue::ControlBlock) - 1) + (Size * Count)); +  return B == nullptr ? nullptr +                      : reinterpret_cast<BufferQueue::ControlBlock *>(B); +} + +void deallocControlBlock(BufferQueue::ControlBlock *C, size_t Size, +                         size_t Count) { +  deallocateBuffer(reinterpret_cast<unsigned char *>(C), +                   (sizeof(BufferQueue::ControlBlock) - 1) + (Size * Count)); +} + +void decRefCount(BufferQueue::ControlBlock *C, size_t Size, size_t Count) { +  if (C == nullptr) +    return; +  if (atomic_fetch_sub(&C->RefCount, 1, memory_order_acq_rel) == 1) +    deallocControlBlock(C, Size, Count); +} + +void incRefCount(BufferQueue::ControlBlock *C) { +  if (C == nullptr) +    return; +  atomic_fetch_add(&C->RefCount, 1, memory_order_acq_rel); +} + +// We use a struct to ensure that we are allocating one atomic_uint64_t per +// cache line. This allows us to not worry about false-sharing among atomic +// objects being updated (constantly) by different threads. +struct ExtentsPadded { +  union { +    atomic_uint64_t Extents; +    unsigned char Storage[kCacheLineSize]; +  }; +}; + +constexpr size_t kExtentsSize = sizeof(ExtentsPadded); + +} // namespace + +BufferQueue::ErrorCode BufferQueue::init(size_t BS, size_t BC) { +  SpinMutexLock Guard(&Mutex); + +  if (!finalizing()) +    return BufferQueue::ErrorCode::AlreadyInitialized; + +  cleanupBuffers(); + +  bool Success = false; +  BufferSize = BS; +  BufferCount = BC; + +  BackingStore = allocControlBlock(BufferSize, BufferCount); +  if (BackingStore == nullptr) +    return BufferQueue::ErrorCode::NotEnoughMemory; + +  auto CleanupBackingStore = at_scope_exit([&, this] { +    if (Success) +      return; +    deallocControlBlock(BackingStore, BufferSize, BufferCount); +    BackingStore = nullptr; +  }); + +  // Initialize enough atomic_uint64_t instances, each +  ExtentsBackingStore = allocControlBlock(kExtentsSize, BufferCount); +  if (ExtentsBackingStore == nullptr) +    return BufferQueue::ErrorCode::NotEnoughMemory; + +  auto CleanupExtentsBackingStore = at_scope_exit([&, this] { +    if (Success) +      return; +    deallocControlBlock(ExtentsBackingStore, kExtentsSize, BufferCount); +    ExtentsBackingStore = nullptr; +  }); + +  Buffers = initArray<BufferRep>(BufferCount); +  if (Buffers == nullptr) +    return BufferQueue::ErrorCode::NotEnoughMemory; + +  // At this point we increment the generation number to associate the buffers +  // to the new generation. +  atomic_fetch_add(&Generation, 1, memory_order_acq_rel); + +  // First, we initialize the refcount in the ControlBlock, which we treat as +  // being at the start of the BackingStore pointer. +  atomic_store(&BackingStore->RefCount, 1, memory_order_release); +  atomic_store(&ExtentsBackingStore->RefCount, 1, memory_order_release); + +  // Then we initialise the individual buffers that sub-divide the whole backing +  // store. Each buffer will start at the `Data` member of the ControlBlock, and +  // will be offsets from these locations. +  for (size_t i = 0; i < BufferCount; ++i) { +    auto &T = Buffers[i]; +    auto &Buf = T.Buff; +    auto *E = reinterpret_cast<ExtentsPadded *>(&ExtentsBackingStore->Data + +                                                (kExtentsSize * i)); +    Buf.Extents = &E->Extents; +    atomic_store(Buf.Extents, 0, memory_order_release); +    Buf.Generation = generation(); +    Buf.Data = &BackingStore->Data + (BufferSize * i); +    Buf.Size = BufferSize; +    Buf.BackingStore = BackingStore; +    Buf.ExtentsBackingStore = ExtentsBackingStore; +    Buf.Count = BufferCount; +    T.Used = false; +  } + +  Next = Buffers; +  First = Buffers; +  LiveBuffers = 0; +  atomic_store(&Finalizing, 0, memory_order_release); +  Success = true; +  return BufferQueue::ErrorCode::Ok; +} + +BufferQueue::BufferQueue(size_t B, size_t N, +                         bool &Success) XRAY_NEVER_INSTRUMENT +    : BufferSize(B), +      BufferCount(N), +      Mutex(), +      Finalizing{1}, +      BackingStore(nullptr), +      ExtentsBackingStore(nullptr), +      Buffers(nullptr), +      Next(Buffers), +      First(Buffers), +      LiveBuffers(0), +      Generation{0} { +  Success = init(B, N) == BufferQueue::ErrorCode::Ok; +} + +BufferQueue::ErrorCode BufferQueue::getBuffer(Buffer &Buf) { +  if (atomic_load(&Finalizing, memory_order_acquire)) +    return ErrorCode::QueueFinalizing; + +  BufferRep *B = nullptr; +  { +    SpinMutexLock Guard(&Mutex); +    if (LiveBuffers == BufferCount) +      return ErrorCode::NotEnoughMemory; +    B = Next++; +    if (Next == (Buffers + BufferCount)) +      Next = Buffers; +    ++LiveBuffers; +  } + +  incRefCount(BackingStore); +  incRefCount(ExtentsBackingStore); +  Buf = B->Buff; +  Buf.Generation = generation(); +  B->Used = true; +  return ErrorCode::Ok; +} + +BufferQueue::ErrorCode BufferQueue::releaseBuffer(Buffer &Buf) { +  // Check whether the buffer being referred to is within the bounds of the +  // backing store's range. +  BufferRep *B = nullptr; +  { +    SpinMutexLock Guard(&Mutex); +    if (Buf.Generation != generation() || LiveBuffers == 0) { +      Buf = {}; +      decRefCount(Buf.BackingStore, Buf.Size, Buf.Count); +      decRefCount(Buf.ExtentsBackingStore, kExtentsSize, Buf.Count); +      return BufferQueue::ErrorCode::Ok; +    } + +    if (Buf.Data < &BackingStore->Data || +        Buf.Data > &BackingStore->Data + (BufferCount * BufferSize)) +      return BufferQueue::ErrorCode::UnrecognizedBuffer; + +    --LiveBuffers; +    B = First++; +    if (First == (Buffers + BufferCount)) +      First = Buffers; +  } + +  // Now that the buffer has been released, we mark it as "used". +  B->Buff = Buf; +  B->Used = true; +  decRefCount(Buf.BackingStore, Buf.Size, Buf.Count); +  decRefCount(Buf.ExtentsBackingStore, kExtentsSize, Buf.Count); +  atomic_store(B->Buff.Extents, atomic_load(Buf.Extents, memory_order_acquire), +               memory_order_release); +  Buf = {}; +  return ErrorCode::Ok; +} + +BufferQueue::ErrorCode BufferQueue::finalize() { +  if (atomic_exchange(&Finalizing, 1, memory_order_acq_rel)) +    return ErrorCode::QueueFinalizing; +  return ErrorCode::Ok; +} + +void BufferQueue::cleanupBuffers() { +  for (auto B = Buffers, E = Buffers + BufferCount; B != E; ++B) +    B->~BufferRep(); +  deallocateBuffer(Buffers, BufferCount); +  decRefCount(BackingStore, BufferSize, BufferCount); +  decRefCount(ExtentsBackingStore, kExtentsSize, BufferCount); +  BackingStore = nullptr; +  ExtentsBackingStore = nullptr; +  Buffers = nullptr; +  BufferCount = 0; +  BufferSize = 0; +} + +BufferQueue::~BufferQueue() { cleanupBuffers(); } diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_buffer_queue.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_buffer_queue.h new file mode 100644 index 000000000000..8d33f73576b5 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_buffer_queue.h @@ -0,0 +1,280 @@ +//===-- xray_buffer_queue.h ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Defines the interface for a buffer queue implementation. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_BUFFER_QUEUE_H +#define XRAY_BUFFER_QUEUE_H + +#include "sanitizer_common/sanitizer_atomic.h" +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_mutex.h" +#include "xray_defs.h" +#include <cstddef> +#include <cstdint> + +namespace __xray { + +/// BufferQueue implements a circular queue of fixed sized buffers (much like a +/// freelist) but is concerned with making it quick to initialise, finalise, and +/// get from or return buffers to the queue. This is one key component of the +/// "flight data recorder" (FDR) mode to support ongoing XRay function call +/// trace collection. +class BufferQueue { +public: +  /// ControlBlock represents the memory layout of how we interpret the backing +  /// store for all buffers and extents managed by a BufferQueue instance. The +  /// ControlBlock has the reference count as the first member, sized according +  /// to platform-specific cache-line size. We never use the Buffer member of +  /// the union, which is only there for compiler-supported alignment and +  /// sizing. +  /// +  /// This ensures that the `Data` member will be placed at least kCacheLineSize +  /// bytes from the beginning of the structure. +  struct ControlBlock { +    union { +      atomic_uint64_t RefCount; +      char Buffer[kCacheLineSize]; +    }; + +    /// We need to make this size 1, to conform to the C++ rules for array data +    /// members. Typically, we want to subtract this 1 byte for sizing +    /// information. +    char Data[1]; +  }; + +  struct Buffer { +    atomic_uint64_t *Extents = nullptr; +    uint64_t Generation{0}; +    void *Data = nullptr; +    size_t Size = 0; + +  private: +    friend class BufferQueue; +    ControlBlock *BackingStore = nullptr; +    ControlBlock *ExtentsBackingStore = nullptr; +    size_t Count = 0; +  }; + +  struct BufferRep { +    // The managed buffer. +    Buffer Buff; + +    // This is true if the buffer has been returned to the available queue, and +    // is considered "used" by another thread. +    bool Used = false; +  }; + +private: +  // This models a ForwardIterator. |T| Must be either a `Buffer` or `const +  // Buffer`. Note that we only advance to the "used" buffers, when +  // incrementing, so that at dereference we're always at a valid point. +  template <class T> class Iterator { +  public: +    BufferRep *Buffers = nullptr; +    size_t Offset = 0; +    size_t Max = 0; + +    Iterator &operator++() { +      DCHECK_NE(Offset, Max); +      do { +        ++Offset; +      } while (Offset != Max && !Buffers[Offset].Used); +      return *this; +    } + +    Iterator operator++(int) { +      Iterator C = *this; +      ++(*this); +      return C; +    } + +    T &operator*() const { return Buffers[Offset].Buff; } + +    T *operator->() const { return &(Buffers[Offset].Buff); } + +    Iterator(BufferRep *Root, size_t O, size_t M) XRAY_NEVER_INSTRUMENT +        : Buffers(Root), +          Offset(O), +          Max(M) { +      // We want to advance to the first Offset where the 'Used' property is +      // true, or to the end of the list/queue. +      while (Offset != Max && !Buffers[Offset].Used) { +        ++Offset; +      } +    } + +    Iterator() = default; +    Iterator(const Iterator &) = default; +    Iterator(Iterator &&) = default; +    Iterator &operator=(const Iterator &) = default; +    Iterator &operator=(Iterator &&) = default; +    ~Iterator() = default; + +    template <class V> +    friend bool operator==(const Iterator &L, const Iterator<V> &R) { +      DCHECK_EQ(L.Max, R.Max); +      return L.Buffers == R.Buffers && L.Offset == R.Offset; +    } + +    template <class V> +    friend bool operator!=(const Iterator &L, const Iterator<V> &R) { +      return !(L == R); +    } +  }; + +  // Size of each individual Buffer. +  size_t BufferSize; + +  // Amount of pre-allocated buffers. +  size_t BufferCount; + +  SpinMutex Mutex; +  atomic_uint8_t Finalizing; + +  // The collocated ControlBlock and buffer storage. +  ControlBlock *BackingStore; + +  // The collocated ControlBlock and extents storage. +  ControlBlock *ExtentsBackingStore; + +  // A dynamically allocated array of BufferRep instances. +  BufferRep *Buffers; + +  // Pointer to the next buffer to be handed out. +  BufferRep *Next; + +  // Pointer to the entry in the array where the next released buffer will be +  // placed. +  BufferRep *First; + +  // Count of buffers that have been handed out through 'getBuffer'. +  size_t LiveBuffers; + +  // We use a generation number to identify buffers and which generation they're +  // associated with. +  atomic_uint64_t Generation; + +  /// Releases references to the buffers backed by the current buffer queue. +  void cleanupBuffers(); + +public: +  enum class ErrorCode : unsigned { +    Ok, +    NotEnoughMemory, +    QueueFinalizing, +    UnrecognizedBuffer, +    AlreadyFinalized, +    AlreadyInitialized, +  }; + +  static const char *getErrorString(ErrorCode E) { +    switch (E) { +    case ErrorCode::Ok: +      return "(none)"; +    case ErrorCode::NotEnoughMemory: +      return "no available buffers in the queue"; +    case ErrorCode::QueueFinalizing: +      return "queue already finalizing"; +    case ErrorCode::UnrecognizedBuffer: +      return "buffer being returned not owned by buffer queue"; +    case ErrorCode::AlreadyFinalized: +      return "queue already finalized"; +    case ErrorCode::AlreadyInitialized: +      return "queue already initialized"; +    } +    return "unknown error"; +  } + +  /// Initialise a queue of size |N| with buffers of size |B|. We report success +  /// through |Success|. +  BufferQueue(size_t B, size_t N, bool &Success); + +  /// Updates |Buf| to contain the pointer to an appropriate buffer. Returns an +  /// error in case there are no available buffers to return when we will run +  /// over the upper bound for the total buffers. +  /// +  /// Requirements: +  ///   - BufferQueue is not finalising. +  /// +  /// Returns: +  ///   - ErrorCode::NotEnoughMemory on exceeding MaxSize. +  ///   - ErrorCode::Ok when we find a Buffer. +  ///   - ErrorCode::QueueFinalizing or ErrorCode::AlreadyFinalized on +  ///     a finalizing/finalized BufferQueue. +  ErrorCode getBuffer(Buffer &Buf); + +  /// Updates |Buf| to point to nullptr, with size 0. +  /// +  /// Returns: +  ///   - ErrorCode::Ok when we successfully release the buffer. +  ///   - ErrorCode::UnrecognizedBuffer for when this BufferQueue does not own +  ///     the buffer being released. +  ErrorCode releaseBuffer(Buffer &Buf); + +  /// Initializes the buffer queue, starting a new generation. We can re-set the +  /// size of buffers with |BS| along with the buffer count with |BC|. +  /// +  /// Returns: +  ///   - ErrorCode::Ok when we successfully initialize the buffer. This +  ///   requires that the buffer queue is previously finalized. +  ///   - ErrorCode::AlreadyInitialized when the buffer queue is not finalized. +  ErrorCode init(size_t BS, size_t BC); + +  bool finalizing() const { +    return atomic_load(&Finalizing, memory_order_acquire); +  } + +  uint64_t generation() const { +    return atomic_load(&Generation, memory_order_acquire); +  } + +  /// Returns the configured size of the buffers in the buffer queue. +  size_t ConfiguredBufferSize() const { return BufferSize; } + +  /// Sets the state of the BufferQueue to finalizing, which ensures that: +  /// +  ///   - All subsequent attempts to retrieve a Buffer will fail. +  ///   - All releaseBuffer operations will not fail. +  /// +  /// After a call to finalize succeeds, all subsequent calls to finalize will +  /// fail with ErrorCode::QueueFinalizing. +  ErrorCode finalize(); + +  /// Applies the provided function F to each Buffer in the queue, only if the +  /// Buffer is marked 'used' (i.e. has been the result of getBuffer(...) and a +  /// releaseBuffer(...) operation). +  template <class F> void apply(F Fn) XRAY_NEVER_INSTRUMENT { +    SpinMutexLock G(&Mutex); +    for (auto I = begin(), E = end(); I != E; ++I) +      Fn(*I); +  } + +  using const_iterator = Iterator<const Buffer>; +  using iterator = Iterator<Buffer>; + +  /// Provides iterator access to the raw Buffer instances. +  iterator begin() const { return iterator(Buffers, 0, BufferCount); } +  const_iterator cbegin() const { +    return const_iterator(Buffers, 0, BufferCount); +  } +  iterator end() const { return iterator(Buffers, BufferCount, BufferCount); } +  const_iterator cend() const { +    return const_iterator(Buffers, BufferCount, BufferCount); +  } + +  // Cleans up allocated buffers. +  ~BufferQueue(); +}; + +} // namespace __xray + +#endif // XRAY_BUFFER_QUEUE_H diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_defs.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_defs.h new file mode 100644 index 000000000000..2da03c3c3451 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_defs.h @@ -0,0 +1,31 @@ +//===-- xray_defs.h ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Common definitions useful for XRay sources. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_XRAY_DEFS_H +#define XRAY_XRAY_DEFS_H + +#if XRAY_SUPPORTED +#define XRAY_NEVER_INSTRUMENT __attribute__((xray_never_instrument)) +#else +#define XRAY_NEVER_INSTRUMENT +#endif + +#if SANITIZER_NETBSD +// NetBSD: thread_local is not aligned properly, and the code relying +// on it segfaults +#define XRAY_TLS_ALIGNAS(x) +#define XRAY_HAS_TLS_ALIGNAS 0 +#else +#define XRAY_TLS_ALIGNAS(x) alignas(x) +#define XRAY_HAS_TLS_ALIGNAS 1 +#endif + +#endif  // XRAY_XRAY_DEFS_H diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_controller.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_controller.h new file mode 100644 index 000000000000..28a3546caa7b --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_controller.h @@ -0,0 +1,372 @@ +//===-- xray_fdr_controller.h ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a function call tracing system. +// +//===----------------------------------------------------------------------===// +#ifndef COMPILER_RT_LIB_XRAY_XRAY_FDR_CONTROLLER_H_ +#define COMPILER_RT_LIB_XRAY_XRAY_FDR_CONTROLLER_H_ + +#include <limits> +#include <time.h> + +#include "xray/xray_interface.h" +#include "xray/xray_records.h" +#include "xray_buffer_queue.h" +#include "xray_fdr_log_writer.h" + +namespace __xray { + +template <size_t Version = 5> class FDRController { +  BufferQueue *BQ; +  BufferQueue::Buffer &B; +  FDRLogWriter &W; +  int (*WallClockReader)(clockid_t, struct timespec *) = 0; +  uint64_t CycleThreshold = 0; + +  uint64_t LastFunctionEntryTSC = 0; +  uint64_t LatestTSC = 0; +  uint16_t LatestCPU = 0; +  tid_t TId = 0; +  pid_t PId = 0; +  bool First = true; + +  uint32_t UndoableFunctionEnters = 0; +  uint32_t UndoableTailExits = 0; + +  bool finalized() const XRAY_NEVER_INSTRUMENT { +    return BQ == nullptr || BQ->finalizing(); +  } + +  bool hasSpace(size_t S) XRAY_NEVER_INSTRUMENT { +    return B.Data != nullptr && B.Generation == BQ->generation() && +           W.getNextRecord() + S <= reinterpret_cast<char *>(B.Data) + B.Size; +  } + +  constexpr int32_t mask(int32_t FuncId) const XRAY_NEVER_INSTRUMENT { +    return FuncId & ((1 << 29) - 1); +  } + +  bool getNewBuffer() XRAY_NEVER_INSTRUMENT { +    if (BQ->getBuffer(B) != BufferQueue::ErrorCode::Ok) +      return false; + +    W.resetRecord(); +    DCHECK_EQ(W.getNextRecord(), B.Data); +    LatestTSC = 0; +    LatestCPU = 0; +    First = true; +    UndoableFunctionEnters = 0; +    UndoableTailExits = 0; +    atomic_store(B.Extents, 0, memory_order_release); +    return true; +  } + +  bool setupNewBuffer() XRAY_NEVER_INSTRUMENT { +    if (finalized()) +      return false; + +    DCHECK(hasSpace(sizeof(MetadataRecord) * 3)); +    TId = GetTid(); +    PId = internal_getpid(); +    struct timespec TS { +      0, 0 +    }; +    WallClockReader(CLOCK_MONOTONIC, &TS); + +    MetadataRecord Metadata[] = { +        // Write out a MetadataRecord to signify that this is the start of a new +        // buffer, associated with a particular thread, with a new CPU. For the +        // data, we have 15 bytes to squeeze as much information as we can. At +        // this point we only write down the following bytes: +        //   - Thread ID (tid_t, cast to 4 bytes type due to Darwin being 8 +        //   bytes) +        createMetadataRecord<MetadataRecord::RecordKinds::NewBuffer>( +            static_cast<int32_t>(TId)), + +        // Also write the WalltimeMarker record. We only really need microsecond +        // precision here, and enforce across platforms that we need 64-bit +        // seconds and 32-bit microseconds encoded in the Metadata record. +        createMetadataRecord<MetadataRecord::RecordKinds::WalltimeMarker>( +            static_cast<int64_t>(TS.tv_sec), +            static_cast<int32_t>(TS.tv_nsec / 1000)), + +        // Also write the Pid record. +        createMetadataRecord<MetadataRecord::RecordKinds::Pid>( +            static_cast<int32_t>(PId)), +    }; + +    if (finalized()) +      return false; +    return W.writeMetadataRecords(Metadata); +  } + +  bool prepareBuffer(size_t S) XRAY_NEVER_INSTRUMENT { +    if (finalized()) +      return returnBuffer(); + +    if (UNLIKELY(!hasSpace(S))) { +      if (!returnBuffer()) +        return false; +      if (!getNewBuffer()) +        return false; +      if (!setupNewBuffer()) +        return false; +    } + +    if (First) { +      First = false; +      W.resetRecord(); +      atomic_store(B.Extents, 0, memory_order_release); +      return setupNewBuffer(); +    } + +    return true; +  } + +  bool returnBuffer() XRAY_NEVER_INSTRUMENT { +    if (BQ == nullptr) +      return false; + +    First = true; +    if (finalized()) { +      BQ->releaseBuffer(B); // ignore result. +      return false; +    } + +    return BQ->releaseBuffer(B) == BufferQueue::ErrorCode::Ok; +  } + +  enum class PreambleResult { NoChange, WroteMetadata, InvalidBuffer }; +  PreambleResult recordPreamble(uint64_t TSC, +                                uint16_t CPU) XRAY_NEVER_INSTRUMENT { +    if (UNLIKELY(LatestCPU != CPU || LatestTSC == 0)) { +      // We update our internal tracking state for the Latest TSC and CPU we've +      // seen, then write out the appropriate metadata and function records. +      LatestTSC = TSC; +      LatestCPU = CPU; + +      if (B.Generation != BQ->generation()) +        return PreambleResult::InvalidBuffer; + +      W.writeMetadata<MetadataRecord::RecordKinds::NewCPUId>(CPU, TSC); +      return PreambleResult::WroteMetadata; +    } + +    DCHECK_EQ(LatestCPU, CPU); + +    if (UNLIKELY(LatestTSC > TSC || +                 TSC - LatestTSC > +                     uint64_t{std::numeric_limits<int32_t>::max()})) { +      // Either the TSC has wrapped around from the last TSC we've seen or the +      // delta is too large to fit in a 32-bit signed integer, so we write a +      // wrap-around record. +      LatestTSC = TSC; + +      if (B.Generation != BQ->generation()) +        return PreambleResult::InvalidBuffer; + +      W.writeMetadata<MetadataRecord::RecordKinds::TSCWrap>(TSC); +      return PreambleResult::WroteMetadata; +    } + +    return PreambleResult::NoChange; +  } + +  bool rewindRecords(int32_t FuncId, uint64_t TSC, +                     uint16_t CPU) XRAY_NEVER_INSTRUMENT { +    // Undo one enter record, because at this point we are either at the state +    // of: +    // - We are exiting a function that we recently entered. +    // - We are exiting a function that was the result of a sequence of tail +    //   exits, and we can check whether the tail exits can be re-wound. +    // +    FunctionRecord F; +    W.undoWrites(sizeof(FunctionRecord)); +    if (B.Generation != BQ->generation()) +      return false; +    internal_memcpy(&F, W.getNextRecord(), sizeof(FunctionRecord)); + +    DCHECK(F.RecordKind == +               uint8_t(FunctionRecord::RecordKinds::FunctionEnter) && +           "Expected to find function entry recording when rewinding."); +    DCHECK_EQ(F.FuncId, FuncId & ~(0x0F << 28)); + +    LatestTSC -= F.TSCDelta; +    if (--UndoableFunctionEnters != 0) { +      LastFunctionEntryTSC -= F.TSCDelta; +      return true; +    } + +    LastFunctionEntryTSC = 0; +    auto RewindingTSC = LatestTSC; +    auto RewindingRecordPtr = W.getNextRecord() - sizeof(FunctionRecord); +    while (UndoableTailExits) { +      if (B.Generation != BQ->generation()) +        return false; +      internal_memcpy(&F, RewindingRecordPtr, sizeof(FunctionRecord)); +      DCHECK_EQ(F.RecordKind, +                uint8_t(FunctionRecord::RecordKinds::FunctionTailExit)); +      RewindingTSC -= F.TSCDelta; +      RewindingRecordPtr -= sizeof(FunctionRecord); +      if (B.Generation != BQ->generation()) +        return false; +      internal_memcpy(&F, RewindingRecordPtr, sizeof(FunctionRecord)); + +      // This tail call exceeded the threshold duration. It will not be erased. +      if ((TSC - RewindingTSC) >= CycleThreshold) { +        UndoableTailExits = 0; +        return true; +      } + +      --UndoableTailExits; +      W.undoWrites(sizeof(FunctionRecord) * 2); +      LatestTSC = RewindingTSC; +    } +    return true; +  } + +public: +  template <class WallClockFunc> +  FDRController(BufferQueue *BQ, BufferQueue::Buffer &B, FDRLogWriter &W, +                WallClockFunc R, uint64_t C) XRAY_NEVER_INSTRUMENT +      : BQ(BQ), +        B(B), +        W(W), +        WallClockReader(R), +        CycleThreshold(C) {} + +  bool functionEnter(int32_t FuncId, uint64_t TSC, +                     uint16_t CPU) XRAY_NEVER_INSTRUMENT { +    if (finalized() || +        !prepareBuffer(sizeof(MetadataRecord) + sizeof(FunctionRecord))) +      return returnBuffer(); + +    auto PreambleStatus = recordPreamble(TSC, CPU); +    if (PreambleStatus == PreambleResult::InvalidBuffer) +      return returnBuffer(); + +    if (PreambleStatus == PreambleResult::WroteMetadata) { +      UndoableFunctionEnters = 1; +      UndoableTailExits = 0; +    } else { +      ++UndoableFunctionEnters; +    } + +    auto Delta = TSC - LatestTSC; +    LastFunctionEntryTSC = TSC; +    LatestTSC = TSC; +    return W.writeFunction(FDRLogWriter::FunctionRecordKind::Enter, +                           mask(FuncId), Delta); +  } + +  bool functionTailExit(int32_t FuncId, uint64_t TSC, +                        uint16_t CPU) XRAY_NEVER_INSTRUMENT { +    if (finalized()) +      return returnBuffer(); + +    if (!prepareBuffer(sizeof(MetadataRecord) + sizeof(FunctionRecord))) +      return returnBuffer(); + +    auto PreambleStatus = recordPreamble(TSC, CPU); +    if (PreambleStatus == PreambleResult::InvalidBuffer) +      return returnBuffer(); + +    if (PreambleStatus == PreambleResult::NoChange && +        UndoableFunctionEnters != 0 && +        TSC - LastFunctionEntryTSC < CycleThreshold) +      return rewindRecords(FuncId, TSC, CPU); + +    UndoableTailExits = UndoableFunctionEnters ? UndoableTailExits + 1 : 0; +    UndoableFunctionEnters = 0; +    auto Delta = TSC - LatestTSC; +    LatestTSC = TSC; +    return W.writeFunction(FDRLogWriter::FunctionRecordKind::TailExit, +                           mask(FuncId), Delta); +  } + +  bool functionEnterArg(int32_t FuncId, uint64_t TSC, uint16_t CPU, +                        uint64_t Arg) XRAY_NEVER_INSTRUMENT { +    if (finalized() || +        !prepareBuffer((2 * sizeof(MetadataRecord)) + sizeof(FunctionRecord)) || +        recordPreamble(TSC, CPU) == PreambleResult::InvalidBuffer) +      return returnBuffer(); + +    auto Delta = TSC - LatestTSC; +    LatestTSC = TSC; +    LastFunctionEntryTSC = 0; +    UndoableFunctionEnters = 0; +    UndoableTailExits = 0; + +    return W.writeFunctionWithArg(FDRLogWriter::FunctionRecordKind::EnterArg, +                                  mask(FuncId), Delta, Arg); +  } + +  bool functionExit(int32_t FuncId, uint64_t TSC, +                    uint16_t CPU) XRAY_NEVER_INSTRUMENT { +    if (finalized() || +        !prepareBuffer(sizeof(MetadataRecord) + sizeof(FunctionRecord))) +      return returnBuffer(); + +    auto PreambleStatus = recordPreamble(TSC, CPU); +    if (PreambleStatus == PreambleResult::InvalidBuffer) +      return returnBuffer(); + +    if (PreambleStatus == PreambleResult::NoChange && +        UndoableFunctionEnters != 0 && +        TSC - LastFunctionEntryTSC < CycleThreshold) +      return rewindRecords(FuncId, TSC, CPU); + +    auto Delta = TSC - LatestTSC; +    LatestTSC = TSC; +    UndoableFunctionEnters = 0; +    UndoableTailExits = 0; +    return W.writeFunction(FDRLogWriter::FunctionRecordKind::Exit, mask(FuncId), +                           Delta); +  } + +  bool customEvent(uint64_t TSC, uint16_t CPU, const void *Event, +                   int32_t EventSize) XRAY_NEVER_INSTRUMENT { +    if (finalized() || +        !prepareBuffer((2 * sizeof(MetadataRecord)) + EventSize) || +        recordPreamble(TSC, CPU) == PreambleResult::InvalidBuffer) +      return returnBuffer(); + +    auto Delta = TSC - LatestTSC; +    LatestTSC = TSC; +    UndoableFunctionEnters = 0; +    UndoableTailExits = 0; +    return W.writeCustomEvent(Delta, Event, EventSize); +  } + +  bool typedEvent(uint64_t TSC, uint16_t CPU, uint16_t EventType, +                  const void *Event, int32_t EventSize) XRAY_NEVER_INSTRUMENT { +    if (finalized() || +        !prepareBuffer((2 * sizeof(MetadataRecord)) + EventSize) || +        recordPreamble(TSC, CPU) == PreambleResult::InvalidBuffer) +      return returnBuffer(); + +    auto Delta = TSC - LatestTSC; +    LatestTSC = TSC; +    UndoableFunctionEnters = 0; +    UndoableTailExits = 0; +    return W.writeTypedEvent(Delta, EventType, Event, EventSize); +  } + +  bool flush() XRAY_NEVER_INSTRUMENT { +    if (finalized()) { +      returnBuffer(); // ignore result. +      return true; +    } +    return returnBuffer(); +  } +}; + +} // namespace __xray + +#endif // COMPILER-RT_LIB_XRAY_XRAY_FDR_CONTROLLER_H_ diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_flags.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_flags.cpp new file mode 100644 index 000000000000..272b0b7cb1f7 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_flags.cpp @@ -0,0 +1,47 @@ +//===-- xray_fdr_flags.cpp --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// XRay FDR flag parsing logic. +//===----------------------------------------------------------------------===// + +#include "xray_fdr_flags.h" +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_flag_parser.h" +#include "sanitizer_common/sanitizer_libc.h" +#include "xray_defs.h" + +using namespace __sanitizer; + +namespace __xray { + +FDRFlags xray_fdr_flags_dont_use_directly; // use via fdrFlags(). + +void FDRFlags::setDefaults() XRAY_NEVER_INSTRUMENT { +#define XRAY_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue; +#include "xray_fdr_flags.inc" +#undef XRAY_FLAG +} + +void registerXRayFDRFlags(FlagParser *P, FDRFlags *F) XRAY_NEVER_INSTRUMENT { +#define XRAY_FLAG(Type, Name, DefaultValue, Description)                       \ +  RegisterFlag(P, #Name, Description, &F->Name); +#include "xray_fdr_flags.inc" +#undef XRAY_FLAG +} + +const char *useCompilerDefinedFDRFlags() XRAY_NEVER_INSTRUMENT { +#ifdef XRAY_FDR_OPTIONS +  return SANITIZER_STRINGIFY(XRAY_FDR_OPTIONS); +#else +  return ""; +#endif +} + +} // namespace __xray diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_flags.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_flags.h new file mode 100644 index 000000000000..d6f00dc48006 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_flags.h @@ -0,0 +1,37 @@ +//===-- xray_fdr_flags.h ---------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// This file defines the flags for the flight-data-recorder mode implementation. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_FDR_FLAGS_H +#define XRAY_FDR_FLAGS_H + +#include "sanitizer_common/sanitizer_flag_parser.h" +#include "sanitizer_common/sanitizer_internal_defs.h" + +namespace __xray { + +struct FDRFlags { +#define XRAY_FLAG(Type, Name, DefaultValue, Description) Type Name; +#include "xray_fdr_flags.inc" +#undef XRAY_FLAG + +  void setDefaults(); +}; + +extern FDRFlags xray_fdr_flags_dont_use_directly; +extern void registerXRayFDRFlags(FlagParser *P, FDRFlags *F); +const char *useCompilerDefinedFDRFlags(); +inline FDRFlags *fdrFlags() { return &xray_fdr_flags_dont_use_directly; } + +} // namespace __xray + +#endif // XRAY_FDR_FLAGS_H diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_flags.inc b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_flags.inc new file mode 100644 index 000000000000..6082b7e78521 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_flags.inc @@ -0,0 +1,28 @@ +//===-- xray_fdr_flags.inc --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// XRay FDR Mode runtime flags. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_FLAG +#error "Define XRAY_FLAG prior to including this file!" +#endif + +// FDR (Flight Data Recorder) Mode logging options. +XRAY_FLAG(int, func_duration_threshold_us, 5, +          "FDR logging will try to skip functions that execute for fewer " +          "microseconds than this threshold.") +XRAY_FLAG(int, grace_period_ms, 100, +          "FDR logging will wait this much time in milliseconds before " +          "actually flushing the log; this gives a chance for threads to " +          "notice that the log has been finalized and clean up.") +XRAY_FLAG(int, buffer_size, 16384, +          "Size of buffers in the circular buffer queue.") +XRAY_FLAG(int, buffer_max, 100, "Maximum number of buffers in the queue.") +XRAY_FLAG(bool, no_file_flush, false, +          "Set to true to not write log files by default.") diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_log_records.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_log_records.h new file mode 100644 index 000000000000..7a5d438314af --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_log_records.h @@ -0,0 +1,75 @@ +//===-- xray_fdr_log_records.h  -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a function call tracing system. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_XRAY_FDR_LOG_RECORDS_H +#define XRAY_XRAY_FDR_LOG_RECORDS_H +#include <cstdint> + +namespace __xray { + +enum class RecordType : uint8_t { Function, Metadata }; + +// A MetadataRecord encodes the kind of record in its first byte, and have 15 +// additional bytes in the end to hold free-form data. +struct alignas(16) MetadataRecord { +  // A MetadataRecord must always have a type of 1. +  /* RecordType */ uint8_t Type : 1; + +  // Each kind of record is represented as a 7-bit value (even though we use an +  // unsigned 8-bit enum class to do so). +  enum class RecordKinds : uint8_t { +    NewBuffer, +    EndOfBuffer, +    NewCPUId, +    TSCWrap, +    WalltimeMarker, +    CustomEventMarker, +    CallArgument, +    BufferExtents, +    TypedEventMarker, +    Pid, +  }; + +  // Use 7 bits to identify this record type. +  /* RecordKinds */ uint8_t RecordKind : 7; +  char Data[15]; +} __attribute__((packed)); + +static_assert(sizeof(MetadataRecord) == 16, "Wrong size for MetadataRecord."); + +struct alignas(8) FunctionRecord { +  // A FunctionRecord must always have a type of 0. +  /* RecordType */ uint8_t Type : 1; +  enum class RecordKinds { +    FunctionEnter = 0x00, +    FunctionExit = 0x01, +    FunctionTailExit = 0x02, +  }; +  /* RecordKinds */ uint8_t RecordKind : 3; + +  // We only use 28 bits of the function ID, so that we can use as few bytes as +  // possible. This means we only support 2^28 (268,435,456) unique function ids +  // in a single binary. +  int FuncId : 28; + +  // We use another 4 bytes to hold the delta between the previous entry's TSC. +  // In case we've found that the distance is greater than the allowable 32 bits +  // (either because we are running in a different CPU and the TSC might be +  // different then), we should use a MetadataRecord before this FunctionRecord +  // that will contain the full TSC for that CPU, and keep this to 0. +  uint32_t TSCDelta; +} __attribute__((packed)); + +static_assert(sizeof(FunctionRecord) == 8, "Wrong size for FunctionRecord."); + +} // namespace __xray + +#endif // XRAY_XRAY_FDR_LOG_RECORDS_H diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_log_writer.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_log_writer.h new file mode 100644 index 000000000000..0378663c3907 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_log_writer.h @@ -0,0 +1,231 @@ +//===-- xray_fdr_log_writer.h ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a function call tracing system. +// +//===----------------------------------------------------------------------===// +#ifndef COMPILER_RT_LIB_XRAY_XRAY_FDR_LOG_WRITER_H_ +#define COMPILER_RT_LIB_XRAY_XRAY_FDR_LOG_WRITER_H_ + +#include "xray_buffer_queue.h" +#include "xray_fdr_log_records.h" +#include <functional> +#include <tuple> +#include <type_traits> +#include <utility> + +namespace __xray { + +template <size_t Index> struct SerializerImpl { +  template <class Tuple, +            typename std::enable_if< +                Index<std::tuple_size< +                          typename std::remove_reference<Tuple>::type>::value, +                      int>::type = 0> static void serializeTo(char *Buffer, +                                                              Tuple &&T) { +    auto P = reinterpret_cast<const char *>(&std::get<Index>(T)); +    constexpr auto Size = sizeof(std::get<Index>(T)); +    internal_memcpy(Buffer, P, Size); +    SerializerImpl<Index + 1>::serializeTo(Buffer + Size, +                                           std::forward<Tuple>(T)); +  } + +  template <class Tuple, +            typename std::enable_if< +                Index >= std::tuple_size<typename std::remove_reference< +                             Tuple>::type>::value, +                int>::type = 0> +  static void serializeTo(char *, Tuple &&) {} +}; + +using Serializer = SerializerImpl<0>; + +template <class Tuple, size_t Index> struct AggregateSizesImpl { +  static constexpr size_t value = +      sizeof(typename std::tuple_element<Index, Tuple>::type) + +      AggregateSizesImpl<Tuple, Index - 1>::value; +}; + +template <class Tuple> struct AggregateSizesImpl<Tuple, 0> { +  static constexpr size_t value = +      sizeof(typename std::tuple_element<0, Tuple>::type); +}; + +template <class Tuple> struct AggregateSizes { +  static constexpr size_t value = +      AggregateSizesImpl<Tuple, std::tuple_size<Tuple>::value - 1>::value; +}; + +template <MetadataRecord::RecordKinds Kind, class... DataTypes> +MetadataRecord createMetadataRecord(DataTypes &&... Ds) { +  static_assert(AggregateSizes<std::tuple<DataTypes...>>::value <= +                    sizeof(MetadataRecord) - 1, +                "Metadata payload longer than metadata buffer!"); +  MetadataRecord R; +  R.Type = 1; +  R.RecordKind = static_cast<uint8_t>(Kind); +  Serializer::serializeTo(R.Data, +                          std::make_tuple(std::forward<DataTypes>(Ds)...)); +  return R; +} + +class FDRLogWriter { +  BufferQueue::Buffer &Buffer; +  char *NextRecord = nullptr; + +  template <class T> void writeRecord(const T &R) { +    internal_memcpy(NextRecord, reinterpret_cast<const char *>(&R), sizeof(T)); +    NextRecord += sizeof(T); +    // We need this atomic fence here to ensure that other threads attempting to +    // read the bytes in the buffer will see the writes committed before the +    // extents are updated. +    atomic_thread_fence(memory_order_release); +    atomic_fetch_add(Buffer.Extents, sizeof(T), memory_order_acq_rel); +  } + +public: +  explicit FDRLogWriter(BufferQueue::Buffer &B, char *P) +      : Buffer(B), NextRecord(P) { +    DCHECK_NE(Buffer.Data, nullptr); +    DCHECK_NE(NextRecord, nullptr); +  } + +  explicit FDRLogWriter(BufferQueue::Buffer &B) +      : FDRLogWriter(B, static_cast<char *>(B.Data)) {} + +  template <MetadataRecord::RecordKinds Kind, class... Data> +  bool writeMetadata(Data &&... Ds) { +    // TODO: Check boundary conditions: +    // 1) Buffer is full, and cannot handle one metadata record. +    // 2) Buffer queue is finalising. +    writeRecord(createMetadataRecord<Kind>(std::forward<Data>(Ds)...)); +    return true; +  } + +  template <size_t N> size_t writeMetadataRecords(MetadataRecord (&Recs)[N]) { +    constexpr auto Size = sizeof(MetadataRecord) * N; +    internal_memcpy(NextRecord, reinterpret_cast<const char *>(Recs), Size); +    NextRecord += Size; +    // We need this atomic fence here to ensure that other threads attempting to +    // read the bytes in the buffer will see the writes committed before the +    // extents are updated. +    atomic_thread_fence(memory_order_release); +    atomic_fetch_add(Buffer.Extents, Size, memory_order_acq_rel); +    return Size; +  } + +  enum class FunctionRecordKind : uint8_t { +    Enter = 0x00, +    Exit = 0x01, +    TailExit = 0x02, +    EnterArg = 0x03, +  }; + +  bool writeFunction(FunctionRecordKind Kind, int32_t FuncId, int32_t Delta) { +    FunctionRecord R; +    R.Type = 0; +    R.RecordKind = uint8_t(Kind); +    R.FuncId = FuncId; +    R.TSCDelta = Delta; +    writeRecord(R); +    return true; +  } + +  bool writeFunctionWithArg(FunctionRecordKind Kind, int32_t FuncId, +                            int32_t Delta, uint64_t Arg) { +    // We need to write the function with arg into the buffer, and then +    // atomically update the buffer extents. This ensures that any reads +    // synchronised on the buffer extents record will always see the writes +    // that happen before the atomic update. +    FunctionRecord R; +    R.Type = 0; +    R.RecordKind = uint8_t(Kind); +    R.FuncId = FuncId; +    R.TSCDelta = Delta; +    MetadataRecord A = +        createMetadataRecord<MetadataRecord::RecordKinds::CallArgument>(Arg); +    NextRecord = reinterpret_cast<char *>(internal_memcpy( +                     NextRecord, reinterpret_cast<char *>(&R), sizeof(R))) + +                 sizeof(R); +    NextRecord = reinterpret_cast<char *>(internal_memcpy( +                     NextRecord, reinterpret_cast<char *>(&A), sizeof(A))) + +                 sizeof(A); +    // We need this atomic fence here to ensure that other threads attempting to +    // read the bytes in the buffer will see the writes committed before the +    // extents are updated. +    atomic_thread_fence(memory_order_release); +    atomic_fetch_add(Buffer.Extents, sizeof(R) + sizeof(A), +                     memory_order_acq_rel); +    return true; +  } + +  bool writeCustomEvent(int32_t Delta, const void *Event, int32_t EventSize) { +    // We write the metadata record and the custom event data into the buffer +    // first, before we atomically update the extents for the buffer. This +    // allows us to ensure that any threads reading the extents of the buffer +    // will only ever see the full metadata and custom event payload accounted +    // (no partial writes accounted). +    MetadataRecord R = +        createMetadataRecord<MetadataRecord::RecordKinds::CustomEventMarker>( +            EventSize, Delta); +    NextRecord = reinterpret_cast<char *>(internal_memcpy( +                     NextRecord, reinterpret_cast<char *>(&R), sizeof(R))) + +                 sizeof(R); +    NextRecord = reinterpret_cast<char *>( +                     internal_memcpy(NextRecord, Event, EventSize)) + +                 EventSize; + +    // We need this atomic fence here to ensure that other threads attempting to +    // read the bytes in the buffer will see the writes committed before the +    // extents are updated. +    atomic_thread_fence(memory_order_release); +    atomic_fetch_add(Buffer.Extents, sizeof(R) + EventSize, +                     memory_order_acq_rel); +    return true; +  } + +  bool writeTypedEvent(int32_t Delta, uint16_t EventType, const void *Event, +                       int32_t EventSize) { +    // We do something similar when writing out typed events, see +    // writeCustomEvent(...) above for details. +    MetadataRecord R = +        createMetadataRecord<MetadataRecord::RecordKinds::TypedEventMarker>( +            EventSize, Delta, EventType); +    NextRecord = reinterpret_cast<char *>(internal_memcpy( +                     NextRecord, reinterpret_cast<char *>(&R), sizeof(R))) + +                 sizeof(R); +    NextRecord = reinterpret_cast<char *>( +                     internal_memcpy(NextRecord, Event, EventSize)) + +                 EventSize; + +    // We need this atomic fence here to ensure that other threads attempting to +    // read the bytes in the buffer will see the writes committed before the +    // extents are updated. +    atomic_thread_fence(memory_order_release); +    atomic_fetch_add(Buffer.Extents, EventSize, memory_order_acq_rel); +    return true; +  } + +  char *getNextRecord() const { return NextRecord; } + +  void resetRecord() { +    NextRecord = reinterpret_cast<char *>(Buffer.Data); +    atomic_store(Buffer.Extents, 0, memory_order_release); +  } + +  void undoWrites(size_t B) { +    DCHECK_GE(NextRecord - B, reinterpret_cast<char *>(Buffer.Data)); +    NextRecord -= B; +    atomic_fetch_sub(Buffer.Extents, B, memory_order_acq_rel); +  } + +}; // namespace __xray + +} // namespace __xray + +#endif // COMPILER-RT_LIB_XRAY_XRAY_FDR_LOG_WRITER_H_ diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_logging.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_logging.cpp new file mode 100644 index 000000000000..7def3565d56a --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_logging.cpp @@ -0,0 +1,754 @@ +//===-- xray_fdr_logging.cpp -----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Here we implement the Flight Data Recorder mode for XRay, where we use +// compact structures to store records in memory as well as when writing out the +// data to files. +// +//===----------------------------------------------------------------------===// +#include "xray_fdr_logging.h" +#include <cassert> +#include <cstddef> +#include <errno.h> +#include <limits> +#include <memory> +#include <pthread.h> +#include <sys/time.h> +#include <time.h> +#include <unistd.h> + +#include "sanitizer_common/sanitizer_allocator_internal.h" +#include "sanitizer_common/sanitizer_atomic.h" +#include "sanitizer_common/sanitizer_common.h" +#include "xray/xray_interface.h" +#include "xray/xray_records.h" +#include "xray_allocator.h" +#include "xray_buffer_queue.h" +#include "xray_defs.h" +#include "xray_fdr_controller.h" +#include "xray_fdr_flags.h" +#include "xray_fdr_log_writer.h" +#include "xray_flags.h" +#include "xray_recursion_guard.h" +#include "xray_tsc.h" +#include "xray_utils.h" + +namespace __xray { + +static atomic_sint32_t LoggingStatus = { +    XRayLogInitStatus::XRAY_LOG_UNINITIALIZED}; + +namespace { + +// Group together thread-local-data in a struct, then hide it behind a function +// call so that it can be initialized on first use instead of as a global. We +// force the alignment to 64-bytes for x86 cache line alignment, as this +// structure is used in the hot path of implementation. +struct XRAY_TLS_ALIGNAS(64) ThreadLocalData { +  BufferQueue::Buffer Buffer{}; +  BufferQueue *BQ = nullptr; + +  using LogWriterStorage = std::byte[sizeof(FDRLogWriter)]; +  alignas(FDRLogWriter) LogWriterStorage LWStorage; +  FDRLogWriter *Writer = nullptr; + +  using ControllerStorage = std::byte[sizeof(FDRController<>)]; +  alignas(FDRController<>) ControllerStorage CStorage; +  FDRController<> *Controller = nullptr; +}; + +} // namespace + +static_assert(std::is_trivially_destructible<ThreadLocalData>::value, +              "ThreadLocalData must be trivially destructible"); + +// Use a global pthread key to identify thread-local data for logging. +static pthread_key_t Key; + +// Global BufferQueue. +static std::byte BufferQueueStorage[sizeof(BufferQueue)]; +static BufferQueue *BQ = nullptr; + +// Global thresholds for function durations. +static atomic_uint64_t ThresholdTicks{0}; + +// Global for ticks per second. +static atomic_uint64_t TicksPerSec{0}; + +static atomic_sint32_t LogFlushStatus = { +    XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING}; + +// This function will initialize the thread-local data structure used by the FDR +// logging implementation and return a reference to it. The implementation +// details require a bit of care to maintain. +// +// First, some requirements on the implementation in general: +// +//   - XRay handlers should not call any memory allocation routines that may +//     delegate to an instrumented implementation. This means functions like +//     malloc() and free() should not be called while instrumenting. +// +//   - We would like to use some thread-local data initialized on first-use of +//     the XRay instrumentation. These allow us to implement unsynchronized +//     routines that access resources associated with the thread. +// +// The implementation here uses a few mechanisms that allow us to provide both +// the requirements listed above. We do this by: +// +//   1. Using a thread-local aligned storage buffer for representing the +//      ThreadLocalData struct. This data will be uninitialized memory by +//      design. +// +//   2. Not requiring a thread exit handler/implementation, keeping the +//      thread-local as purely a collection of references/data that do not +//      require cleanup. +// +// We're doing this to avoid using a `thread_local` object that has a +// non-trivial destructor, because the C++ runtime might call std::malloc(...) +// to register calls to destructors. Deadlocks may arise when, for example, an +// externally provided malloc implementation is XRay instrumented, and +// initializing the thread-locals involves calling into malloc. A malloc +// implementation that does global synchronization might be holding a lock for a +// critical section, calling a function that might be XRay instrumented (and +// thus in turn calling into malloc by virtue of registration of the +// thread_local's destructor). +#if XRAY_HAS_TLS_ALIGNAS +static_assert(alignof(ThreadLocalData) >= 64, +              "ThreadLocalData must be cache line aligned."); +#endif +static ThreadLocalData &getThreadLocalData() { +  alignas(ThreadLocalData) thread_local std::byte +      TLDStorage[sizeof(ThreadLocalData)]; + +  if (pthread_getspecific(Key) == NULL) { +    new (reinterpret_cast<ThreadLocalData *>(&TLDStorage)) ThreadLocalData{}; +    pthread_setspecific(Key, &TLDStorage); +  } + +  return *reinterpret_cast<ThreadLocalData *>(&TLDStorage); +} + +static XRayFileHeader &fdrCommonHeaderInfo() { +  alignas(XRayFileHeader) static std::byte HStorage[sizeof(XRayFileHeader)]; +  static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; +  static bool TSCSupported = true; +  static uint64_t CycleFrequency = NanosecondsPerSecond; +  pthread_once( +      &OnceInit, +[] { +        XRayFileHeader &H = reinterpret_cast<XRayFileHeader &>(HStorage); +        // Version 2 of the log writes the extents of the buffer, instead of +        // relying on an end-of-buffer record. +        // Version 3 includes PID metadata record. +        // Version 4 includes CPU data in the custom event records. +        // Version 5 uses relative deltas for custom and typed event records, +        // and removes the CPU data in custom event records (similar to how +        // function records use deltas instead of full TSCs and rely on other +        // metadata records for TSC wraparound and CPU migration). +        H.Version = 5; +        H.Type = FileTypes::FDR_LOG; + +        // Test for required CPU features and cache the cycle frequency +        TSCSupported = probeRequiredCPUFeatures(); +        if (TSCSupported) +          CycleFrequency = getTSCFrequency(); +        H.CycleFrequency = CycleFrequency; + +        // FIXME: Actually check whether we have 'constant_tsc' and +        // 'nonstop_tsc' before setting the values in the header. +        H.ConstantTSC = 1; +        H.NonstopTSC = 1; +      }); +  return reinterpret_cast<XRayFileHeader &>(HStorage); +} + +// This is the iterator implementation, which knows how to handle FDR-mode +// specific buffers. This is used as an implementation of the iterator function +// needed by __xray_set_buffer_iterator(...). It maintains a global state of the +// buffer iteration for the currently installed FDR mode buffers. In particular: +// +//   - If the argument represents the initial state of XRayBuffer ({nullptr, 0}) +//     then the iterator returns the header information. +//   - If the argument represents the header information ({address of header +//     info, size of the header info}) then it returns the first FDR buffer's +//     address and extents. +//   - It will keep returning the next buffer and extents as there are more +//     buffers to process. When the input represents the last buffer, it will +//     return the initial state to signal completion ({nullptr, 0}). +// +// See xray/xray_log_interface.h for more details on the requirements for the +// implementations of __xray_set_buffer_iterator(...) and +// __xray_log_process_buffers(...). +XRayBuffer fdrIterator(const XRayBuffer B) { +  DCHECK(internal_strcmp(__xray_log_get_current_mode(), "xray-fdr") == 0); +  DCHECK(BQ->finalizing()); + +  if (BQ == nullptr || !BQ->finalizing()) { +    if (Verbosity()) +      Report( +          "XRay FDR: Failed global buffer queue is null or not finalizing!\n"); +    return {nullptr, 0}; +  } + +  // We use a global scratch-pad for the header information, which only gets +  // initialized the first time this function is called. We'll update one part +  // of this information with some relevant data (in particular the number of +  // buffers to expect). +  alignas( +      XRayFileHeader) static std::byte HeaderStorage[sizeof(XRayFileHeader)]; +  static pthread_once_t HeaderOnce = PTHREAD_ONCE_INIT; +  pthread_once( +      &HeaderOnce, +[] { +        reinterpret_cast<XRayFileHeader &>(HeaderStorage) = +            fdrCommonHeaderInfo(); +      }); + +  // We use a convenience alias for code referring to Header from here on out. +  auto &Header = reinterpret_cast<XRayFileHeader &>(HeaderStorage); +  if (B.Data == nullptr && B.Size == 0) { +    Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()}; +    return XRayBuffer{static_cast<void *>(&Header), sizeof(Header)}; +  } + +  static BufferQueue::const_iterator It{}; +  static BufferQueue::const_iterator End{}; +  static uint8_t *CurrentBuffer{nullptr}; +  static size_t SerializedBufferSize = 0; +  if (B.Data == static_cast<void *>(&Header) && B.Size == sizeof(Header)) { +    // From this point on, we provide raw access to the raw buffer we're getting +    // from the BufferQueue. We're relying on the iterators from the current +    // Buffer queue. +    It = BQ->cbegin(); +    End = BQ->cend(); +  } + +  if (CurrentBuffer != nullptr) { +    deallocateBuffer(CurrentBuffer, SerializedBufferSize); +    CurrentBuffer = nullptr; +  } + +  if (It == End) +    return {nullptr, 0}; + +  // Set up the current buffer to contain the extents like we would when writing +  // out to disk. The difference here would be that we still write "empty" +  // buffers, or at least go through the iterators faithfully to let the +  // handlers see the empty buffers in the queue. +  // +  // We need this atomic fence here to ensure that writes happening to the +  // buffer have been committed before we load the extents atomically. Because +  // the buffer is not explicitly synchronised across threads, we rely on the +  // fence ordering to ensure that writes we expect to have been completed +  // before the fence are fully committed before we read the extents. +  atomic_thread_fence(memory_order_acquire); +  auto BufferSize = atomic_load(It->Extents, memory_order_acquire); +  SerializedBufferSize = BufferSize + sizeof(MetadataRecord); +  CurrentBuffer = allocateBuffer(SerializedBufferSize); +  if (CurrentBuffer == nullptr) +    return {nullptr, 0}; + +  // Write out the extents as a Metadata Record into the CurrentBuffer. +  MetadataRecord ExtentsRecord; +  ExtentsRecord.Type = uint8_t(RecordType::Metadata); +  ExtentsRecord.RecordKind = +      uint8_t(MetadataRecord::RecordKinds::BufferExtents); +  internal_memcpy(ExtentsRecord.Data, &BufferSize, sizeof(BufferSize)); +  auto AfterExtents = +      static_cast<char *>(internal_memcpy(CurrentBuffer, &ExtentsRecord, +                                          sizeof(MetadataRecord))) + +      sizeof(MetadataRecord); +  internal_memcpy(AfterExtents, It->Data, BufferSize); + +  XRayBuffer Result; +  Result.Data = CurrentBuffer; +  Result.Size = SerializedBufferSize; +  ++It; +  return Result; +} + +// Must finalize before flushing. +XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT { +  if (atomic_load(&LoggingStatus, memory_order_acquire) != +      XRayLogInitStatus::XRAY_LOG_FINALIZED) { +    if (Verbosity()) +      Report("Not flushing log, implementation is not finalized.\n"); +    return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; +  } + +  if (atomic_exchange(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHING, +                      memory_order_release) == +      XRayLogFlushStatus::XRAY_LOG_FLUSHING) { +    if (Verbosity()) +      Report("Not flushing log, implementation is still flushing.\n"); +    return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; +  } + +  if (BQ == nullptr) { +    if (Verbosity()) +      Report("Cannot flush when global buffer queue is null.\n"); +    return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; +  } + +  // We wait a number of milliseconds to allow threads to see that we've +  // finalised before attempting to flush the log. +  SleepForMillis(fdrFlags()->grace_period_ms); + +  // At this point, we're going to uninstall the iterator implementation, before +  // we decide to do anything further with the global buffer queue. +  __xray_log_remove_buffer_iterator(); + +  // Once flushed, we should set the global status of the logging implementation +  // to "uninitialized" to allow for FDR-logging multiple runs. +  auto ResetToUnitialized = at_scope_exit([] { +    atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, +                 memory_order_release); +  }); + +  auto CleanupBuffers = at_scope_exit([] { +    auto &TLD = getThreadLocalData(); +    if (TLD.Controller != nullptr) +      TLD.Controller->flush(); +  }); + +  if (fdrFlags()->no_file_flush) { +    if (Verbosity()) +      Report("XRay FDR: Not flushing to file, 'no_file_flush=true'.\n"); + +    atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED, +                 memory_order_release); +    return XRayLogFlushStatus::XRAY_LOG_FLUSHED; +  } + +  // We write out the file in the following format: +  // +  //   1) We write down the XRay file header with version 1, type FDR_LOG. +  //   2) Then we use the 'apply' member of the BufferQueue that's live, to +  //      ensure that at this point in time we write down the buffers that have +  //      been released (and marked "used") -- we dump the full buffer for now +  //      (fixed-sized) and let the tools reading the buffers deal with the data +  //      afterwards. +  // +  LogWriter *LW = LogWriter::Open(); +  if (LW == nullptr) { +    auto Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; +    atomic_store(&LogFlushStatus, Result, memory_order_release); +    return Result; +  } + +  XRayFileHeader Header = fdrCommonHeaderInfo(); +  Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()}; +  LW->WriteAll(reinterpret_cast<char *>(&Header), +               reinterpret_cast<char *>(&Header) + sizeof(Header)); + +  // Release the current thread's buffer before we attempt to write out all the +  // buffers. This ensures that in case we had only a single thread going, that +  // we are able to capture the data nonetheless. +  auto &TLD = getThreadLocalData(); +  if (TLD.Controller != nullptr) +    TLD.Controller->flush(); + +  BQ->apply([&](const BufferQueue::Buffer &B) { +    // Starting at version 2 of the FDR logging implementation, we only write +    // the records identified by the extents of the buffer. We use the Extents +    // from the Buffer and write that out as the first record in the buffer.  We +    // still use a Metadata record, but fill in the extents instead for the +    // data. +    MetadataRecord ExtentsRecord; +    auto BufferExtents = atomic_load(B.Extents, memory_order_acquire); +    DCHECK(BufferExtents <= B.Size); +    ExtentsRecord.Type = uint8_t(RecordType::Metadata); +    ExtentsRecord.RecordKind = +        uint8_t(MetadataRecord::RecordKinds::BufferExtents); +    internal_memcpy(ExtentsRecord.Data, &BufferExtents, sizeof(BufferExtents)); +    if (BufferExtents > 0) { +      LW->WriteAll(reinterpret_cast<char *>(&ExtentsRecord), +                   reinterpret_cast<char *>(&ExtentsRecord) + +                       sizeof(MetadataRecord)); +      LW->WriteAll(reinterpret_cast<char *>(B.Data), +                   reinterpret_cast<char *>(B.Data) + BufferExtents); +    } +  }); + +  atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED, +               memory_order_release); +  return XRayLogFlushStatus::XRAY_LOG_FLUSHED; +} + +XRayLogInitStatus fdrLoggingFinalize() XRAY_NEVER_INSTRUMENT { +  s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED; +  if (!atomic_compare_exchange_strong(&LoggingStatus, &CurrentStatus, +                                      XRayLogInitStatus::XRAY_LOG_FINALIZING, +                                      memory_order_release)) { +    if (Verbosity()) +      Report("Cannot finalize log, implementation not initialized.\n"); +    return static_cast<XRayLogInitStatus>(CurrentStatus); +  } + +  // Do special things to make the log finalize itself, and not allow any more +  // operations to be performed until re-initialized. +  if (BQ == nullptr) { +    if (Verbosity()) +      Report("Attempting to finalize an uninitialized global buffer!\n"); +  } else { +    BQ->finalize(); +  } + +  atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED, +               memory_order_release); +  return XRayLogInitStatus::XRAY_LOG_FINALIZED; +} + +struct TSCAndCPU { +  uint64_t TSC = 0; +  unsigned char CPU = 0; +}; + +static TSCAndCPU getTimestamp() XRAY_NEVER_INSTRUMENT { +  // We want to get the TSC as early as possible, so that we can check whether +  // we've seen this CPU before. We also do it before we load anything else, +  // to allow for forward progress with the scheduling. +  TSCAndCPU Result; + +  // Test once for required CPU features +  static pthread_once_t OnceProbe = PTHREAD_ONCE_INIT; +  static bool TSCSupported = true; +  pthread_once( +      &OnceProbe, +[] { TSCSupported = probeRequiredCPUFeatures(); }); + +  if (TSCSupported) { +    Result.TSC = __xray::readTSC(Result.CPU); +  } else { +    // FIXME: This code needs refactoring as it appears in multiple locations +    timespec TS; +    int result = clock_gettime(CLOCK_REALTIME, &TS); +    if (result != 0) { +      Report("clock_gettime(2) return %d, errno=%d", result, int(errno)); +      TS = {0, 0}; +    } +    Result.CPU = 0; +    Result.TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec; +  } +  return Result; +} + +thread_local atomic_uint8_t Running{0}; + +static bool setupTLD(ThreadLocalData &TLD) XRAY_NEVER_INSTRUMENT { +  // Check if we're finalizing, before proceeding. +  { +    auto Status = atomic_load(&LoggingStatus, memory_order_acquire); +    if (Status == XRayLogInitStatus::XRAY_LOG_FINALIZING || +        Status == XRayLogInitStatus::XRAY_LOG_FINALIZED) { +      if (TLD.Controller != nullptr) { +        TLD.Controller->flush(); +        TLD.Controller = nullptr; +      } +      return false; +    } +  } + +  if (UNLIKELY(TLD.Controller == nullptr)) { +    // Set up the TLD buffer queue. +    if (UNLIKELY(BQ == nullptr)) +      return false; +    TLD.BQ = BQ; + +    // Check that we have a valid buffer. +    if (TLD.Buffer.Generation != BQ->generation() && +        TLD.BQ->releaseBuffer(TLD.Buffer) != BufferQueue::ErrorCode::Ok) +      return false; + +    // Set up a buffer, before setting up the log writer. Bail out on failure. +    if (TLD.BQ->getBuffer(TLD.Buffer) != BufferQueue::ErrorCode::Ok) +      return false; + +    // Set up the Log Writer for this thread. +    if (UNLIKELY(TLD.Writer == nullptr)) { +      auto *LWStorage = reinterpret_cast<FDRLogWriter *>(&TLD.LWStorage); +      new (LWStorage) FDRLogWriter(TLD.Buffer); +      TLD.Writer = LWStorage; +    } else { +      TLD.Writer->resetRecord(); +    } + +    auto *CStorage = reinterpret_cast<FDRController<> *>(&TLD.CStorage); +    new (CStorage) +        FDRController<>(TLD.BQ, TLD.Buffer, *TLD.Writer, clock_gettime, +                        atomic_load_relaxed(&ThresholdTicks)); +    TLD.Controller = CStorage; +  } + +  DCHECK_NE(TLD.Controller, nullptr); +  return true; +} + +void fdrLoggingHandleArg0(int32_t FuncId, +                          XRayEntryType Entry) XRAY_NEVER_INSTRUMENT { +  auto TC = getTimestamp(); +  auto &TSC = TC.TSC; +  auto &CPU = TC.CPU; +  RecursionGuard Guard{Running}; +  if (!Guard) +    return; + +  auto &TLD = getThreadLocalData(); +  if (!setupTLD(TLD)) +    return; + +  switch (Entry) { +  case XRayEntryType::ENTRY: +  case XRayEntryType::LOG_ARGS_ENTRY: +    TLD.Controller->functionEnter(FuncId, TSC, CPU); +    return; +  case XRayEntryType::EXIT: +    TLD.Controller->functionExit(FuncId, TSC, CPU); +    return; +  case XRayEntryType::TAIL: +    TLD.Controller->functionTailExit(FuncId, TSC, CPU); +    return; +  case XRayEntryType::CUSTOM_EVENT: +  case XRayEntryType::TYPED_EVENT: +    break; +  } +} + +void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry, +                          uint64_t Arg) XRAY_NEVER_INSTRUMENT { +  auto TC = getTimestamp(); +  auto &TSC = TC.TSC; +  auto &CPU = TC.CPU; +  RecursionGuard Guard{Running}; +  if (!Guard) +    return; + +  auto &TLD = getThreadLocalData(); +  if (!setupTLD(TLD)) +    return; + +  switch (Entry) { +  case XRayEntryType::ENTRY: +  case XRayEntryType::LOG_ARGS_ENTRY: +    TLD.Controller->functionEnterArg(FuncId, TSC, CPU, Arg); +    return; +  case XRayEntryType::EXIT: +    TLD.Controller->functionExit(FuncId, TSC, CPU); +    return; +  case XRayEntryType::TAIL: +    TLD.Controller->functionTailExit(FuncId, TSC, CPU); +    return; +  case XRayEntryType::CUSTOM_EVENT: +  case XRayEntryType::TYPED_EVENT: +    break; +  } +} + +void fdrLoggingHandleCustomEvent(void *Event, +                                 std::size_t EventSize) XRAY_NEVER_INSTRUMENT { +  auto TC = getTimestamp(); +  auto &TSC = TC.TSC; +  auto &CPU = TC.CPU; +  RecursionGuard Guard{Running}; +  if (!Guard) +    return; + +  // Complain when we ever get at least one custom event that's larger than what +  // we can possibly support. +  if (EventSize > +      static_cast<std::size_t>(std::numeric_limits<int32_t>::max())) { +    static pthread_once_t Once = PTHREAD_ONCE_INIT; +    pthread_once( +        &Once, +[] { +          Report("Custom event size too large; truncating to %d.\n", +                 std::numeric_limits<int32_t>::max()); +        }); +  } + +  auto &TLD = getThreadLocalData(); +  if (!setupTLD(TLD)) +    return; + +  int32_t ReducedEventSize = static_cast<int32_t>(EventSize); +  TLD.Controller->customEvent(TSC, CPU, Event, ReducedEventSize); +} + +void fdrLoggingHandleTypedEvent(size_t EventType, const void *Event, +                                size_t EventSize) noexcept +    XRAY_NEVER_INSTRUMENT { +  auto TC = getTimestamp(); +  auto &TSC = TC.TSC; +  auto &CPU = TC.CPU; +  RecursionGuard Guard{Running}; +  if (!Guard) +    return; + +  // Complain when we ever get at least one typed event that's larger than what +  // we can possibly support. +  if (EventSize > +      static_cast<std::size_t>(std::numeric_limits<int32_t>::max())) { +    static pthread_once_t Once = PTHREAD_ONCE_INIT; +    pthread_once( +        &Once, +[] { +          Report("Typed event size too large; truncating to %d.\n", +                 std::numeric_limits<int32_t>::max()); +        }); +  } + +  auto &TLD = getThreadLocalData(); +  if (!setupTLD(TLD)) +    return; + +  int32_t ReducedEventSize = static_cast<int32_t>(EventSize); +  TLD.Controller->typedEvent(TSC, CPU, static_cast<uint16_t>(EventType), Event, +                             ReducedEventSize); +} + +XRayLogInitStatus fdrLoggingInit(size_t, size_t, void *Options, +                                 size_t OptionsSize) XRAY_NEVER_INSTRUMENT { +  if (Options == nullptr) +    return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + +  s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; +  if (!atomic_compare_exchange_strong(&LoggingStatus, &CurrentStatus, +                                      XRayLogInitStatus::XRAY_LOG_INITIALIZING, +                                      memory_order_release)) { +    if (Verbosity()) +      Report("Cannot initialize already initialized implementation.\n"); +    return static_cast<XRayLogInitStatus>(CurrentStatus); +  } + +  if (Verbosity()) +    Report("Initializing FDR mode with options: %s\n", +           static_cast<const char *>(Options)); + +  // TODO: Factor out the flags specific to the FDR mode implementation. For +  // now, use the global/single definition of the flags, since the FDR mode +  // flags are already defined there. +  FlagParser FDRParser; +  FDRFlags FDRFlags; +  registerXRayFDRFlags(&FDRParser, &FDRFlags); +  FDRFlags.setDefaults(); + +  // Override first from the general XRAY_DEFAULT_OPTIONS compiler-provided +  // options until we migrate everyone to use the XRAY_FDR_OPTIONS +  // compiler-provided options. +  FDRParser.ParseString(useCompilerDefinedFlags()); +  FDRParser.ParseString(useCompilerDefinedFDRFlags()); +  auto *EnvOpts = GetEnv("XRAY_FDR_OPTIONS"); +  if (EnvOpts == nullptr) +    EnvOpts = ""; +  FDRParser.ParseString(EnvOpts); + +  // FIXME: Remove this when we fully remove the deprecated flags. +  if (internal_strlen(EnvOpts) == 0) { +    FDRFlags.func_duration_threshold_us = +        flags()->xray_fdr_log_func_duration_threshold_us; +    FDRFlags.grace_period_ms = flags()->xray_fdr_log_grace_period_ms; +  } + +  // The provided options should always override the compiler-provided and +  // environment-variable defined options. +  FDRParser.ParseString(static_cast<const char *>(Options)); +  *fdrFlags() = FDRFlags; +  auto BufferSize = FDRFlags.buffer_size; +  auto BufferMax = FDRFlags.buffer_max; + +  if (BQ == nullptr) { +    bool Success = false; +    BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage); +    new (BQ) BufferQueue(BufferSize, BufferMax, Success); +    if (!Success) { +      Report("BufferQueue init failed.\n"); +      return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; +    } +  } else { +    if (BQ->init(BufferSize, BufferMax) != BufferQueue::ErrorCode::Ok) { +      if (Verbosity()) +        Report("Failed to re-initialize global buffer queue. Init failed.\n"); +      return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; +    } +  } + +  static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; +  pthread_once( +      &OnceInit, +[] { +        atomic_store(&TicksPerSec, +                     probeRequiredCPUFeatures() ? getTSCFrequency() +                                                : __xray::NanosecondsPerSecond, +                     memory_order_release); +        pthread_key_create( +            &Key, +[](void *TLDPtr) { +              if (TLDPtr == nullptr) +                return; +              auto &TLD = *reinterpret_cast<ThreadLocalData *>(TLDPtr); +              if (TLD.BQ == nullptr) +                return; +              if (TLD.Buffer.Data == nullptr) +                return; +              auto EC = TLD.BQ->releaseBuffer(TLD.Buffer); +              if (EC != BufferQueue::ErrorCode::Ok) +                Report("At thread exit, failed to release buffer at %p; " +                       "error=%s\n", +                       TLD.Buffer.Data, BufferQueue::getErrorString(EC)); +            }); +      }); + +  atomic_store(&ThresholdTicks, +               atomic_load_relaxed(&TicksPerSec) * +                   fdrFlags()->func_duration_threshold_us / 1000000, +               memory_order_release); +  // Arg1 handler should go in first to avoid concurrent code accidentally +  // falling back to arg0 when it should have ran arg1. +  __xray_set_handler_arg1(fdrLoggingHandleArg1); +  // Install the actual handleArg0 handler after initialising the buffers. +  __xray_set_handler(fdrLoggingHandleArg0); +  __xray_set_customevent_handler(fdrLoggingHandleCustomEvent); +  __xray_set_typedevent_handler(fdrLoggingHandleTypedEvent); + +  // Install the buffer iterator implementation. +  __xray_log_set_buffer_iterator(fdrIterator); + +  atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZED, +               memory_order_release); + +  if (Verbosity()) +    Report("XRay FDR init successful.\n"); +  return XRayLogInitStatus::XRAY_LOG_INITIALIZED; +} + +bool fdrLogDynamicInitializer() XRAY_NEVER_INSTRUMENT { +  XRayLogImpl Impl{ +      fdrLoggingInit, +      fdrLoggingFinalize, +      fdrLoggingHandleArg0, +      fdrLoggingFlush, +  }; +  auto RegistrationResult = __xray_log_register_mode("xray-fdr", Impl); +  if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK && +      Verbosity()) { +    Report("Cannot register XRay FDR mode to 'xray-fdr'; error = %d\n", +           RegistrationResult); +    return false; +  } + +  if (flags()->xray_fdr_log || +      !internal_strcmp(flags()->xray_mode, "xray-fdr")) { +    auto SelectResult = __xray_log_select_mode("xray-fdr"); +    if (SelectResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK && +        Verbosity()) { +      Report("Cannot select XRay FDR mode as 'xray-fdr'; error = %d\n", +             SelectResult); +      return false; +    } +  } +  return true; +} + +} // namespace __xray + +static auto UNUSED Unused = __xray::fdrLogDynamicInitializer(); diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_logging.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_logging.h new file mode 100644 index 000000000000..6df0057c4965 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_logging.h @@ -0,0 +1,38 @@ +//===-- xray_fdr_logging.h ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a function call tracing system. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_XRAY_FDR_LOGGING_H +#define XRAY_XRAY_FDR_LOGGING_H + +#include "xray/xray_log_interface.h" +#include "xray_fdr_log_records.h" + +// FDR (Flight Data Recorder) Mode +// =============================== +// +// The XRay whitepaper describes a mode of operation for function call trace +// logging that involves writing small records into an in-memory circular +// buffer, that then gets logged to disk on demand. To do this efficiently and +// capture as much data as we can, we use smaller records compared to the +// default mode of always writing fixed-size records. + +namespace __xray { +XRayLogInitStatus fdrLoggingInit(size_t BufferSize, size_t BufferMax, +                                 void *Options, size_t OptionsSize); +XRayLogInitStatus fdrLoggingFinalize(); +void fdrLoggingHandleArg0(int32_t FuncId, XRayEntryType Entry); +void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry, uint64_t Arg1); +XRayLogFlushStatus fdrLoggingFlush(); +XRayLogInitStatus fdrLoggingReset(); + +} // namespace __xray + +#endif // XRAY_XRAY_FDR_LOGGING_H diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_flags.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_flags.cpp new file mode 100644 index 000000000000..e4c6906dc443 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_flags.cpp @@ -0,0 +1,84 @@ +//===-- xray_flags.cpp ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// XRay flag parsing logic. +//===----------------------------------------------------------------------===// + +#include "xray_flags.h" +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_flag_parser.h" +#include "sanitizer_common/sanitizer_libc.h" +#include "xray_defs.h" + +using namespace __sanitizer; + +namespace __xray { + +Flags xray_flags_dont_use_directly; // use via flags(). + +void Flags::setDefaults() XRAY_NEVER_INSTRUMENT { +#define XRAY_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue; +#include "xray_flags.inc" +#undef XRAY_FLAG +} + +void registerXRayFlags(FlagParser *P, Flags *F) XRAY_NEVER_INSTRUMENT { +#define XRAY_FLAG(Type, Name, DefaultValue, Description)                       \ +  RegisterFlag(P, #Name, Description, &F->Name); +#include "xray_flags.inc" +#undef XRAY_FLAG +} + +// This function, as defined with the help of a macro meant to be introduced at +// build time of the XRay runtime, passes in a statically defined list of +// options that control XRay. This means users/deployments can tweak the +// defaults that override the hard-coded defaults in the xray_flags.inc at +// compile-time using the XRAY_DEFAULT_OPTIONS macro. +const char *useCompilerDefinedFlags() XRAY_NEVER_INSTRUMENT { +#ifdef XRAY_DEFAULT_OPTIONS +  // Do the double-layered string conversion to prevent badly crafted strings +  // provided through the XRAY_DEFAULT_OPTIONS from causing compilation issues +  // (or changing the semantics of the implementation through the macro). This +  // ensures that we convert whatever XRAY_DEFAULT_OPTIONS is defined as a +  // string literal. +  return SANITIZER_STRINGIFY(XRAY_DEFAULT_OPTIONS); +#else +  return ""; +#endif +} + +void initializeFlags() XRAY_NEVER_INSTRUMENT { +  SetCommonFlagsDefaults(); +  auto *F = flags(); +  F->setDefaults(); + +  FlagParser XRayParser; +  registerXRayFlags(&XRayParser, F); +  RegisterCommonFlags(&XRayParser); + +  // Use options defaulted at compile-time for the runtime. +  const char *XRayCompileFlags = useCompilerDefinedFlags(); +  XRayParser.ParseString(XRayCompileFlags); + +  // Override from environment variables. +  XRayParser.ParseStringFromEnv("XRAY_OPTIONS"); + +  // Override from command line. +  InitializeCommonFlags(); + +  if (Verbosity()) +    ReportUnrecognizedFlags(); + +  if (common_flags()->help) { +    XRayParser.PrintFlagDescriptions(); +  } +} + +} // namespace __xray diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_flags.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_flags.h new file mode 100644 index 000000000000..cce6fe9d62f9 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_flags.h @@ -0,0 +1,39 @@ +//===-- xray_flags.h -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// XRay runtime flags. +//===----------------------------------------------------------------------===// + +#ifndef XRAY_FLAGS_H +#define XRAY_FLAGS_H + +#include "sanitizer_common/sanitizer_flag_parser.h" +#include "sanitizer_common/sanitizer_internal_defs.h" + +namespace __xray { + +struct Flags { +#define XRAY_FLAG(Type, Name, DefaultValue, Description) Type Name; +#include "xray_flags.inc" +#undef XRAY_FLAG + +  void setDefaults(); +}; + +extern Flags xray_flags_dont_use_directly; +extern void registerXRayFlags(FlagParser *P, Flags *F); +const char *useCompilerDefinedFlags(); +inline Flags *flags() { return &xray_flags_dont_use_directly; } + +void initializeFlags(); + +} // namespace __xray + +#endif // XRAY_FLAGS_H diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_flags.inc b/contrib/llvm-project/compiler-rt/lib/xray/xray_flags.inc new file mode 100644 index 000000000000..b7dc5a08f242 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_flags.inc @@ -0,0 +1,49 @@ +//===-- xray_flags.inc ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// XRay runtime flags. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_FLAG +#error "Define XRAY_FLAG prior to including this file!" +#endif + +XRAY_FLAG(bool, patch_premain, false, +          "Whether to patch instrumentation points before main.") +XRAY_FLAG(const char *, xray_logfile_base, "xray-log.", +          "Filename base for the xray logfile.") +XRAY_FLAG(const char *, xray_mode, "", "Mode to install by default.") +XRAY_FLAG(uptr, xray_page_size_override, 0, +          "Override the default page size for the system, in bytes. The size " +          "should be a power-of-two.") + +// Basic (Naive) Mode logging options. +XRAY_FLAG(bool, xray_naive_log, false, +          "DEPRECATED: Use xray_mode=xray-basic instead.") +XRAY_FLAG(int, xray_naive_log_func_duration_threshold_us, 5, +          "DEPRECATED: use the environment variable XRAY_BASIC_OPTIONS and set " +          "func_duration_threshold_us instead.") +XRAY_FLAG(int, xray_naive_log_max_stack_depth, 64, +          "DEPRECATED: use the environment variable XRAY_BASIC_OPTIONS and set " +          "max_stack_depth instead.") +XRAY_FLAG(int, xray_naive_log_thread_buffer_size, 1024, +          "DEPRECATED: use the environment variable XRAY_BASIC_OPTIONS and set " +          "thread_buffer_size instead.") + +// FDR (Flight Data Recorder) Mode logging options. +XRAY_FLAG(bool, xray_fdr_log, false, +          "DEPRECATED: Use xray_mode=xray-fdr instead.") +XRAY_FLAG(int, xray_fdr_log_func_duration_threshold_us, 5, +          "DEPRECATED: use the environment variable XRAY_FDR_OPTIONS and set " +          "func_duration_threshold_us instead.") +XRAY_FLAG(int, xray_fdr_log_grace_period_us, 0, +          "DEPRECATED: use the environment variable XRAY_FDR_OPTIONS and set " +          "grace_period_ms instead.") +XRAY_FLAG(int, xray_fdr_log_grace_period_ms, 100, +          "DEPRECATED: use the environment variable XRAY_FDR_OPTIONS and set " +          "grace_period_ms instead.") diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_function_call_trie.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_function_call_trie.h new file mode 100644 index 000000000000..7536f39b8081 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_function_call_trie.h @@ -0,0 +1,599 @@ +//===-- xray_function_call_trie.h ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// This file defines the interface for a function call trie. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_FUNCTION_CALL_TRIE_H +#define XRAY_FUNCTION_CALL_TRIE_H + +#include "xray_buffer_queue.h" +#include "xray_defs.h" +#include "xray_profiling_flags.h" +#include "xray_segmented_array.h" +#include <limits> +#include <memory> // For placement new. +#include <utility> + +namespace __xray { + +/// A FunctionCallTrie represents the stack traces of XRay instrumented +/// functions that we've encountered, where a node corresponds to a function and +/// the path from the root to the node its stack trace. Each node in the trie +/// will contain some useful values, including: +/// +///   * The cumulative amount of time spent in this particular node/stack. +///   * The number of times this stack has appeared. +///   * A histogram of latencies for that particular node. +/// +/// Each node in the trie will also contain a list of callees, represented using +/// a Array<NodeIdPair> -- each NodeIdPair instance will contain the function +/// ID of the callee, and a pointer to the node. +/// +/// If we visualise this data structure, we'll find the following potential +/// representation: +/// +///   [function id node] -> [callees] [cumulative time] +///                         [call counter] [latency histogram] +/// +/// As an example, when we have a function in this pseudocode: +/// +///   func f(N) { +///     g() +///     h() +///     for i := 1..N { j() } +///   } +/// +/// We may end up with a trie of the following form: +/// +///   f -> [ g, h, j ] [...] [1] [...] +///   g -> [ ... ] [...] [1] [...] +///   h -> [ ... ] [...] [1] [...] +///   j -> [ ... ] [...] [N] [...] +/// +/// If for instance the function g() called j() like so: +/// +///   func g() { +///     for i := 1..10 { j() } +///   } +/// +/// We'll find the following updated trie: +/// +///   f -> [ g, h, j ] [...] [1] [...] +///   g -> [ j' ] [...] [1] [...] +///   h -> [ ... ] [...] [1] [...] +///   j -> [ ... ] [...] [N] [...] +///   j' -> [ ... ] [...] [10] [...] +/// +/// Note that we'll have a new node representing the path `f -> g -> j'` with +/// isolated data. This isolation gives us a means of representing the stack +/// traces as a path, as opposed to a key in a table. The alternative +/// implementation here would be to use a separate table for the path, and use +/// hashes of the path as an identifier to accumulate the information. We've +/// moved away from this approach as it takes a lot of time to compute the hash +/// every time we need to update a function's call information as we're handling +/// the entry and exit events. +/// +/// This approach allows us to maintain a shadow stack, which represents the +/// currently executing path, and on function exits quickly compute the amount +/// of time elapsed from the entry, then update the counters for the node +/// already represented in the trie. This necessitates an efficient +/// representation of the various data structures (the list of callees must be +/// cache-aware and efficient to look up, and the histogram must be compact and +/// quick to update) to enable us to keep the overheads of this implementation +/// to the minimum. +class FunctionCallTrie { +public: +  struct Node; + +  // We use a NodeIdPair type instead of a std::pair<...> to not rely on the +  // standard library types in this header. +  struct NodeIdPair { +    Node *NodePtr; +    int32_t FId; +  }; + +  using NodeIdPairArray = Array<NodeIdPair>; +  using NodeIdPairAllocatorType = NodeIdPairArray::AllocatorType; + +  // A Node in the FunctionCallTrie gives us a list of callees, the cumulative +  // number of times this node actually appeared, the cumulative amount of time +  // for this particular node including its children call times, and just the +  // local time spent on this node. Each Node will have the ID of the XRay +  // instrumented function that it is associated to. +  struct Node { +    Node *Parent; +    NodeIdPairArray Callees; +    uint64_t CallCount; +    uint64_t CumulativeLocalTime; // Typically in TSC deltas, not wall-time. +    int32_t FId; + +    // TODO: Include the compact histogram. +  }; + +private: +  struct ShadowStackEntry { +    uint64_t EntryTSC; +    Node *NodePtr; +    uint16_t EntryCPU; +  }; + +  using NodeArray = Array<Node>; +  using RootArray = Array<Node *>; +  using ShadowStackArray = Array<ShadowStackEntry>; + +public: +  // We collate the allocators we need into a single struct, as a convenience to +  // allow us to initialize these as a group. +  struct Allocators { +    using NodeAllocatorType = NodeArray::AllocatorType; +    using RootAllocatorType = RootArray::AllocatorType; +    using ShadowStackAllocatorType = ShadowStackArray::AllocatorType; + +    // Use hosted aligned storage members to allow for trivial move and init. +    // This also allows us to sidestep the potential-failing allocation issue. +    alignas(NodeAllocatorType) std::byte +        NodeAllocatorStorage[sizeof(NodeAllocatorType)]; +    alignas(RootAllocatorType) std::byte +        RootAllocatorStorage[sizeof(RootAllocatorType)]; +    alignas(ShadowStackAllocatorType) std::byte +        ShadowStackAllocatorStorage[sizeof(ShadowStackAllocatorType)]; +    alignas(NodeIdPairAllocatorType) std::byte +        NodeIdPairAllocatorStorage[sizeof(NodeIdPairAllocatorType)]; + +    NodeAllocatorType *NodeAllocator = nullptr; +    RootAllocatorType *RootAllocator = nullptr; +    ShadowStackAllocatorType *ShadowStackAllocator = nullptr; +    NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr; + +    Allocators() = default; +    Allocators(const Allocators &) = delete; +    Allocators &operator=(const Allocators &) = delete; + +    struct Buffers { +      BufferQueue::Buffer NodeBuffer; +      BufferQueue::Buffer RootsBuffer; +      BufferQueue::Buffer ShadowStackBuffer; +      BufferQueue::Buffer NodeIdPairBuffer; +    }; + +    explicit Allocators(Buffers &B) XRAY_NEVER_INSTRUMENT { +      new (&NodeAllocatorStorage) +          NodeAllocatorType(B.NodeBuffer.Data, B.NodeBuffer.Size); +      NodeAllocator = +          reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); + +      new (&RootAllocatorStorage) +          RootAllocatorType(B.RootsBuffer.Data, B.RootsBuffer.Size); +      RootAllocator = +          reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); + +      new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType( +          B.ShadowStackBuffer.Data, B.ShadowStackBuffer.Size); +      ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( +          &ShadowStackAllocatorStorage); + +      new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType( +          B.NodeIdPairBuffer.Data, B.NodeIdPairBuffer.Size); +      NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( +          &NodeIdPairAllocatorStorage); +    } + +    explicit Allocators(uptr Max) XRAY_NEVER_INSTRUMENT { +      new (&NodeAllocatorStorage) NodeAllocatorType(Max); +      NodeAllocator = +          reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); + +      new (&RootAllocatorStorage) RootAllocatorType(Max); +      RootAllocator = +          reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); + +      new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType(Max); +      ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( +          &ShadowStackAllocatorStorage); + +      new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType(Max); +      NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( +          &NodeIdPairAllocatorStorage); +    } + +    Allocators(Allocators &&O) XRAY_NEVER_INSTRUMENT { +      // Here we rely on the safety of memcpy'ing contents of the storage +      // members, and then pointing the source pointers to nullptr. +      internal_memcpy(&NodeAllocatorStorage, &O.NodeAllocatorStorage, +                      sizeof(NodeAllocatorType)); +      internal_memcpy(&RootAllocatorStorage, &O.RootAllocatorStorage, +                      sizeof(RootAllocatorType)); +      internal_memcpy(&ShadowStackAllocatorStorage, +                      &O.ShadowStackAllocatorStorage, +                      sizeof(ShadowStackAllocatorType)); +      internal_memcpy(&NodeIdPairAllocatorStorage, +                      &O.NodeIdPairAllocatorStorage, +                      sizeof(NodeIdPairAllocatorType)); + +      NodeAllocator = +          reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); +      RootAllocator = +          reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); +      ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( +          &ShadowStackAllocatorStorage); +      NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( +          &NodeIdPairAllocatorStorage); + +      O.NodeAllocator = nullptr; +      O.RootAllocator = nullptr; +      O.ShadowStackAllocator = nullptr; +      O.NodeIdPairAllocator = nullptr; +    } + +    Allocators &operator=(Allocators &&O) XRAY_NEVER_INSTRUMENT { +      // When moving into an existing instance, we ensure that we clean up the +      // current allocators. +      if (NodeAllocator) +        NodeAllocator->~NodeAllocatorType(); +      if (O.NodeAllocator) { +        new (&NodeAllocatorStorage) +            NodeAllocatorType(std::move(*O.NodeAllocator)); +        NodeAllocator = +            reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); +        O.NodeAllocator = nullptr; +      } else { +        NodeAllocator = nullptr; +      } + +      if (RootAllocator) +        RootAllocator->~RootAllocatorType(); +      if (O.RootAllocator) { +        new (&RootAllocatorStorage) +            RootAllocatorType(std::move(*O.RootAllocator)); +        RootAllocator = +            reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); +        O.RootAllocator = nullptr; +      } else { +        RootAllocator = nullptr; +      } + +      if (ShadowStackAllocator) +        ShadowStackAllocator->~ShadowStackAllocatorType(); +      if (O.ShadowStackAllocator) { +        new (&ShadowStackAllocatorStorage) +            ShadowStackAllocatorType(std::move(*O.ShadowStackAllocator)); +        ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( +            &ShadowStackAllocatorStorage); +        O.ShadowStackAllocator = nullptr; +      } else { +        ShadowStackAllocator = nullptr; +      } + +      if (NodeIdPairAllocator) +        NodeIdPairAllocator->~NodeIdPairAllocatorType(); +      if (O.NodeIdPairAllocator) { +        new (&NodeIdPairAllocatorStorage) +            NodeIdPairAllocatorType(std::move(*O.NodeIdPairAllocator)); +        NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( +            &NodeIdPairAllocatorStorage); +        O.NodeIdPairAllocator = nullptr; +      } else { +        NodeIdPairAllocator = nullptr; +      } + +      return *this; +    } + +    ~Allocators() XRAY_NEVER_INSTRUMENT { +      if (NodeAllocator != nullptr) +        NodeAllocator->~NodeAllocatorType(); +      if (RootAllocator != nullptr) +        RootAllocator->~RootAllocatorType(); +      if (ShadowStackAllocator != nullptr) +        ShadowStackAllocator->~ShadowStackAllocatorType(); +      if (NodeIdPairAllocator != nullptr) +        NodeIdPairAllocator->~NodeIdPairAllocatorType(); +    } +  }; + +  static Allocators InitAllocators() XRAY_NEVER_INSTRUMENT { +    return InitAllocatorsCustom(profilingFlags()->per_thread_allocator_max); +  } + +  static Allocators InitAllocatorsCustom(uptr Max) XRAY_NEVER_INSTRUMENT { +    Allocators A(Max); +    return A; +  } + +  static Allocators +  InitAllocatorsFromBuffers(Allocators::Buffers &Bufs) XRAY_NEVER_INSTRUMENT { +    Allocators A(Bufs); +    return A; +  } + +private: +  NodeArray Nodes; +  RootArray Roots; +  ShadowStackArray ShadowStack; +  NodeIdPairAllocatorType *NodeIdPairAllocator; +  uint32_t OverflowedFunctions; + +public: +  explicit FunctionCallTrie(const Allocators &A) XRAY_NEVER_INSTRUMENT +      : Nodes(*A.NodeAllocator), +        Roots(*A.RootAllocator), +        ShadowStack(*A.ShadowStackAllocator), +        NodeIdPairAllocator(A.NodeIdPairAllocator), +        OverflowedFunctions(0) {} + +  FunctionCallTrie() = delete; +  FunctionCallTrie(const FunctionCallTrie &) = delete; +  FunctionCallTrie &operator=(const FunctionCallTrie &) = delete; + +  FunctionCallTrie(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT +      : Nodes(std::move(O.Nodes)), +        Roots(std::move(O.Roots)), +        ShadowStack(std::move(O.ShadowStack)), +        NodeIdPairAllocator(O.NodeIdPairAllocator), +        OverflowedFunctions(O.OverflowedFunctions) {} + +  FunctionCallTrie &operator=(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT { +    Nodes = std::move(O.Nodes); +    Roots = std::move(O.Roots); +    ShadowStack = std::move(O.ShadowStack); +    NodeIdPairAllocator = O.NodeIdPairAllocator; +    OverflowedFunctions = O.OverflowedFunctions; +    return *this; +  } + +  ~FunctionCallTrie() XRAY_NEVER_INSTRUMENT {} + +  void enterFunction(const int32_t FId, uint64_t TSC, +                     uint16_t CPU) XRAY_NEVER_INSTRUMENT { +    DCHECK_NE(FId, 0); + +    // If we're already overflowed the function call stack, do not bother +    // attempting to record any more function entries. +    if (UNLIKELY(OverflowedFunctions)) { +      ++OverflowedFunctions; +      return; +    } + +    // If this is the first function we've encountered, we want to set up the +    // node(s) and treat it as a root. +    if (UNLIKELY(ShadowStack.empty())) { +      auto *NewRoot = Nodes.AppendEmplace( +          nullptr, NodeIdPairArray(*NodeIdPairAllocator), 0u, 0u, FId); +      if (UNLIKELY(NewRoot == nullptr)) +        return; +      if (Roots.AppendEmplace(NewRoot) == nullptr) { +        Nodes.trim(1); +        return; +      } +      if (ShadowStack.AppendEmplace(TSC, NewRoot, CPU) == nullptr) { +        Nodes.trim(1); +        Roots.trim(1); +        ++OverflowedFunctions; +        return; +      } +      return; +    } + +    // From this point on, we require that the stack is not empty. +    DCHECK(!ShadowStack.empty()); +    auto TopNode = ShadowStack.back().NodePtr; +    DCHECK_NE(TopNode, nullptr); + +    // If we've seen this callee before, then we access that node and place that +    // on the top of the stack. +    auto* Callee = TopNode->Callees.find_element( +        [FId](const NodeIdPair &NR) { return NR.FId == FId; }); +    if (Callee != nullptr) { +      CHECK_NE(Callee->NodePtr, nullptr); +      if (ShadowStack.AppendEmplace(TSC, Callee->NodePtr, CPU) == nullptr) +        ++OverflowedFunctions; +      return; +    } + +    // This means we've never seen this stack before, create a new node here. +    auto* NewNode = Nodes.AppendEmplace( +        TopNode, NodeIdPairArray(*NodeIdPairAllocator), 0u, 0u, FId); +    if (UNLIKELY(NewNode == nullptr)) +      return; +    DCHECK_NE(NewNode, nullptr); +    TopNode->Callees.AppendEmplace(NewNode, FId); +    if (ShadowStack.AppendEmplace(TSC, NewNode, CPU) == nullptr) +      ++OverflowedFunctions; +    return; +  } + +  void exitFunction(int32_t FId, uint64_t TSC, +                    uint16_t CPU) XRAY_NEVER_INSTRUMENT { +    // If we're exiting functions that have "overflowed" or don't fit into the +    // stack due to allocator constraints, we then decrement that count first. +    if (OverflowedFunctions) { +      --OverflowedFunctions; +      return; +    } + +    // When we exit a function, we look up the ShadowStack to see whether we've +    // entered this function before. We do as little processing here as we can, +    // since most of the hard work would have already been done at function +    // entry. +    uint64_t CumulativeTreeTime = 0; + +    while (!ShadowStack.empty()) { +      const auto &Top = ShadowStack.back(); +      auto TopNode = Top.NodePtr; +      DCHECK_NE(TopNode, nullptr); + +      // We may encounter overflow on the TSC we're provided, which may end up +      // being less than the TSC when we first entered the function. +      // +      // To get the accurate measurement of cycles, we need to check whether +      // we've overflowed (TSC < Top.EntryTSC) and then account the difference +      // between the entry TSC and the max for the TSC counter (max of uint64_t) +      // then add the value of TSC. We can prove that the maximum delta we will +      // get is at most the 64-bit unsigned value, since the difference between +      // a TSC of 0 and a Top.EntryTSC of 1 is (numeric_limits<uint64_t>::max() +      // - 1) + 1. +      // +      // NOTE: This assumes that TSCs are synchronised across CPUs. +      // TODO: Count the number of times we've seen CPU migrations. +      uint64_t LocalTime = +          Top.EntryTSC > TSC +              ? (std::numeric_limits<uint64_t>::max() - Top.EntryTSC) + TSC +              : TSC - Top.EntryTSC; +      TopNode->CallCount++; +      TopNode->CumulativeLocalTime += LocalTime - CumulativeTreeTime; +      CumulativeTreeTime += LocalTime; +      ShadowStack.trim(1); + +      // TODO: Update the histogram for the node. +      if (TopNode->FId == FId) +        break; +    } +  } + +  const RootArray &getRoots() const XRAY_NEVER_INSTRUMENT { return Roots; } + +  // The deepCopyInto operation will update the provided FunctionCallTrie by +  // re-creating the contents of this particular FunctionCallTrie in the other +  // FunctionCallTrie. It will do this using a Depth First Traversal from the +  // roots, and while doing so recreating the traversal in the provided +  // FunctionCallTrie. +  // +  // This operation will *not* destroy the state in `O`, and thus may cause some +  // duplicate entries in `O` if it is not empty. +  // +  // This function is *not* thread-safe, and may require external +  // synchronisation of both "this" and |O|. +  // +  // This function must *not* be called with a non-empty FunctionCallTrie |O|. +  void deepCopyInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT { +    DCHECK(O.getRoots().empty()); + +    // We then push the root into a stack, to use as the parent marker for new +    // nodes we push in as we're traversing depth-first down the call tree. +    struct NodeAndParent { +      FunctionCallTrie::Node *Node; +      FunctionCallTrie::Node *NewNode; +    }; +    using Stack = Array<NodeAndParent>; + +    typename Stack::AllocatorType StackAllocator( +        profilingFlags()->stack_allocator_max); +    Stack DFSStack(StackAllocator); + +    for (const auto Root : getRoots()) { +      // Add a node in O for this root. +      auto NewRoot = O.Nodes.AppendEmplace( +          nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), Root->CallCount, +          Root->CumulativeLocalTime, Root->FId); + +      // Because we cannot allocate more memory we should bail out right away. +      if (UNLIKELY(NewRoot == nullptr)) +        return; + +      if (UNLIKELY(O.Roots.Append(NewRoot) == nullptr)) +        return; + +      // TODO: Figure out what to do if we fail to allocate any more stack +      // space. Maybe warn or report once? +      if (DFSStack.AppendEmplace(Root, NewRoot) == nullptr) +        return; +      while (!DFSStack.empty()) { +        NodeAndParent NP = DFSStack.back(); +        DCHECK_NE(NP.Node, nullptr); +        DCHECK_NE(NP.NewNode, nullptr); +        DFSStack.trim(1); +        for (const auto Callee : NP.Node->Callees) { +          auto NewNode = O.Nodes.AppendEmplace( +              NP.NewNode, NodeIdPairArray(*O.NodeIdPairAllocator), +              Callee.NodePtr->CallCount, Callee.NodePtr->CumulativeLocalTime, +              Callee.FId); +          if (UNLIKELY(NewNode == nullptr)) +            return; +          if (UNLIKELY(NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId) == +                       nullptr)) +            return; +          if (UNLIKELY(DFSStack.AppendEmplace(Callee.NodePtr, NewNode) == +                       nullptr)) +            return; +        } +      } +    } +  } + +  // The mergeInto operation will update the provided FunctionCallTrie by +  // traversing the current trie's roots and updating (i.e. merging) the data in +  // the nodes with the data in the target's nodes. If the node doesn't exist in +  // the provided trie, we add a new one in the right position, and inherit the +  // data from the original (current) trie, along with all its callees. +  // +  // This function is *not* thread-safe, and may require external +  // synchronisation of both "this" and |O|. +  void mergeInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT { +    struct NodeAndTarget { +      FunctionCallTrie::Node *OrigNode; +      FunctionCallTrie::Node *TargetNode; +    }; +    using Stack = Array<NodeAndTarget>; +    typename Stack::AllocatorType StackAllocator( +        profilingFlags()->stack_allocator_max); +    Stack DFSStack(StackAllocator); + +    for (const auto Root : getRoots()) { +      Node *TargetRoot = nullptr; +      auto R = O.Roots.find_element( +          [&](const Node *Node) { return Node->FId == Root->FId; }); +      if (R == nullptr) { +        TargetRoot = O.Nodes.AppendEmplace( +            nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u, +            Root->FId); +        if (UNLIKELY(TargetRoot == nullptr)) +          return; + +        O.Roots.Append(TargetRoot); +      } else { +        TargetRoot = *R; +      } + +      DFSStack.AppendEmplace(Root, TargetRoot); +      while (!DFSStack.empty()) { +        NodeAndTarget NT = DFSStack.back(); +        DCHECK_NE(NT.OrigNode, nullptr); +        DCHECK_NE(NT.TargetNode, nullptr); +        DFSStack.trim(1); +        // TODO: Update the histogram as well when we have it ready. +        NT.TargetNode->CallCount += NT.OrigNode->CallCount; +        NT.TargetNode->CumulativeLocalTime += NT.OrigNode->CumulativeLocalTime; +        for (const auto Callee : NT.OrigNode->Callees) { +          auto TargetCallee = NT.TargetNode->Callees.find_element( +              [&](const FunctionCallTrie::NodeIdPair &C) { +                return C.FId == Callee.FId; +              }); +          if (TargetCallee == nullptr) { +            auto NewTargetNode = O.Nodes.AppendEmplace( +                NT.TargetNode, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u, +                Callee.FId); + +            if (UNLIKELY(NewTargetNode == nullptr)) +              return; + +            TargetCallee = +                NT.TargetNode->Callees.AppendEmplace(NewTargetNode, Callee.FId); +          } +          DFSStack.AppendEmplace(Callee.NodePtr, TargetCallee->NodePtr); +        } +      } +    } +  } +}; + +} // namespace __xray + +#endif // XRAY_FUNCTION_CALL_TRIE_H diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_hexagon.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_hexagon.cpp new file mode 100644 index 000000000000..7f127b2b499c --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_hexagon.cpp @@ -0,0 +1,168 @@ +//===-- xray_hexagon.cpp --------------------------------------*- C++ ---*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of hexagon-specific routines (32-bit). +// +//===----------------------------------------------------------------------===// +#include "sanitizer_common/sanitizer_common.h" +#include "xray_defs.h" +#include "xray_interface_internal.h" +#include <assert.h> +#include <atomic> + +namespace __xray { + +// The machine codes for some instructions used in runtime patching. +enum PatchOpcodes : uint32_t { +  PO_JUMPI_14 = 0x5800c00a, // jump #0x014 (PC + 0x014) +  PO_CALLR_R6 = 0x50a6c000, // indirect call: callr r6 +  PO_TFR_IMM = 0x78000000,  // transfer immed +                            // ICLASS 0x7 - S2-type A-type +  PO_IMMEXT = 0x00000000, // constant extender +}; + +enum PacketWordParseBits : uint32_t { +  PP_DUPLEX = 0x00 << 14, +  PP_NOT_END = 0x01 << 14, +  PP_PACKET_END = 0x03 << 14, +}; + +enum RegNum : uint32_t { +  RN_R6 = 0x6, +  RN_R7 = 0x7, +}; + +inline static uint32_t +encodeExtendedTransferImmediate(uint32_t Imm, RegNum DestReg, +                                bool PacketEnd = false) XRAY_NEVER_INSTRUMENT { +  static const uint32_t REG_MASK = 0x1f; +  assert((DestReg & (~REG_MASK)) == 0); +  // The constant-extended register transfer encodes the 6 least +  // significant bits of the effective constant: +  Imm = Imm & 0x03f; +  const PacketWordParseBits ParseBits = PacketEnd ? PP_PACKET_END : PP_NOT_END; + +  return PO_TFR_IMM | ParseBits | (Imm << 5) | (DestReg & REG_MASK); +} + +inline static uint32_t +encodeConstantExtender(uint32_t Imm) XRAY_NEVER_INSTRUMENT { +  // Bits   Name      Description +  // -----  -------   ------------------------------------------ +  // 31:28  ICLASS    Instruction class = 0000 +  // 27:16  high      High 12 bits of 26-bit constant extension +  // 15:14  Parse     Parse bits +  // 13:0   low       Low 14 bits of 26-bit constant extension +  static const uint32_t IMM_MASK_LOW = 0x03fff; +  static const uint32_t IMM_MASK_HIGH = 0x00fff << 14; + +  // The extender encodes the 26 most significant bits of the effective +  // constant: +  Imm = Imm >> 6; + +  const uint32_t high = (Imm & IMM_MASK_HIGH) << 16; +  const uint32_t low = Imm & IMM_MASK_LOW; + +  return PO_IMMEXT | high | PP_NOT_END | low; +} + +static void WriteInstFlushCache(void *Addr, uint32_t NewInstruction) { +  asm volatile("icinva(%[inst_addr])\n\t" +               "isync\n\t" +               "memw(%[inst_addr]) = %[new_inst]\n\t" +               "dccleaninva(%[inst_addr])\n\t" +               "syncht\n\t" +               : +               : [ inst_addr ] "r"(Addr), [ new_inst ] "r"(NewInstruction) +               : "memory"); +} + +inline static bool patchSled(const bool Enable, const uint32_t FuncId, +                             const XRaySledEntry &Sled, +                             void (*TracingHook)()) XRAY_NEVER_INSTRUMENT { +  // When |Enable| == true, +  // We replace the following compile-time stub (sled): +  // +  // .L_xray_sled_N: +  // <xray_sled_base>: +  // {  jump .Ltmp0 } +  // {  nop +  //    nop +  //    nop +  //    nop } +  // .Ltmp0: + +  // With the following runtime patch: +  // +  // xray_sled_n (32-bit): +  // +  // <xray_sled_n>: +  // {  immext(#...) // upper 26-bits of func id +  //    r7 = ##...   // lower  6-bits of func id +  //    immext(#...) // upper 26-bits of trampoline +  //    r6 = ##... }  // lower 6 bits of trampoline +  // {  callr r6 } +  // +  // When |Enable|==false, we set back the first instruction in the sled to be +  // {  jump .Ltmp0 } + +  uint32_t *FirstAddress = reinterpret_cast<uint32_t *>(Sled.address()); +  if (Enable) { +    uint32_t *CurAddress = FirstAddress + 1; +    *CurAddress = encodeExtendedTransferImmediate(FuncId, RN_R7); +    CurAddress++; +    *CurAddress = encodeConstantExtender(reinterpret_cast<uint32_t>(TracingHook)); +    CurAddress++; +    *CurAddress = +        encodeExtendedTransferImmediate(reinterpret_cast<uint32_t>(TracingHook), RN_R6, true); +    CurAddress++; + +    *CurAddress = uint32_t(PO_CALLR_R6); + +    WriteInstFlushCache(FirstAddress, uint32_t(encodeConstantExtender(FuncId))); +  } else { +    WriteInstFlushCache(FirstAddress, uint32_t(PatchOpcodes::PO_JUMPI_14)); +  } +  return true; +} + +bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, +                        const XRaySledEntry &Sled, +                        void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { +  return patchSled(Enable, FuncId, Sled, Trampoline); +} + +bool patchFunctionExit(const bool Enable, const uint32_t FuncId, +                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { +  return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + +bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, +                           const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { +  return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, +                      const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { +  // FIXME: Implement in hexagon? +  return false; +} + +bool patchTypedEvent(const bool Enable, const uint32_t FuncId, +                     const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { +  // FIXME: Implement in hexagon? +  return false; +} + +} // namespace __xray + +extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT { +  // FIXME: this will have to be implemented in the trampoline assembly file +} diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_init.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_init.cpp new file mode 100644 index 000000000000..f22a31b95686 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_init.cpp @@ -0,0 +1,131 @@ +//===-- xray_init.cpp -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// XRay initialisation logic. +//===----------------------------------------------------------------------===// + +#include <fcntl.h> +#include <strings.h> +#include <unistd.h> + +#include "sanitizer_common/sanitizer_common.h" +#include "xray_defs.h" +#include "xray_flags.h" +#include "xray_interface_internal.h" + +extern "C" { +void __xray_init(); +extern const XRaySledEntry __start_xray_instr_map[] __attribute__((weak)); +extern const XRaySledEntry __stop_xray_instr_map[] __attribute__((weak)); +extern const XRayFunctionSledIndex __start_xray_fn_idx[] __attribute__((weak)); +extern const XRayFunctionSledIndex __stop_xray_fn_idx[] __attribute__((weak)); + +#if SANITIZER_APPLE +// HACK: This is a temporary workaround to make XRay build on  +// Darwin, but it will probably not work at runtime. +const XRaySledEntry __start_xray_instr_map[] = {}; +extern const XRaySledEntry __stop_xray_instr_map[] = {}; +extern const XRayFunctionSledIndex __start_xray_fn_idx[] = {}; +extern const XRayFunctionSledIndex __stop_xray_fn_idx[] = {}; +#endif +} + +using namespace __xray; + +// When set to 'true' this means the XRay runtime has been initialised. We use +// the weak symbols defined above (__start_xray_inst_map and +// __stop_xray_instr_map) to initialise the instrumentation map that XRay uses +// for runtime patching/unpatching of instrumentation points. +// +// FIXME: Support DSO instrumentation maps too. The current solution only works +// for statically linked executables. +atomic_uint8_t XRayInitialized{0}; + +// This should always be updated before XRayInitialized is updated. +SpinMutex XRayInstrMapMutex; +XRaySledMap XRayInstrMap; + +// Global flag to determine whether the flags have been initialized. +atomic_uint8_t XRayFlagsInitialized{0}; + +// A mutex to allow only one thread to initialize the XRay data structures. +SpinMutex XRayInitMutex; + +// __xray_init() will do the actual loading of the current process' memory map +// and then proceed to look for the .xray_instr_map section/segment. +void __xray_init() XRAY_NEVER_INSTRUMENT { +  SpinMutexLock Guard(&XRayInitMutex); +  // Short-circuit if we've already initialized XRay before. +  if (atomic_load(&XRayInitialized, memory_order_acquire)) +    return; + +  // XRAY is not compatible with PaX MPROTECT +  CheckMPROTECT(); + +  if (!atomic_load(&XRayFlagsInitialized, memory_order_acquire)) { +    initializeFlags(); +    atomic_store(&XRayFlagsInitialized, true, memory_order_release); +  } + +  if (__start_xray_instr_map == nullptr) { +    if (Verbosity()) +      Report("XRay instrumentation map missing. Not initializing XRay.\n"); +    return; +  } + +  { +    SpinMutexLock Guard(&XRayInstrMapMutex); +    XRayInstrMap.Sleds = __start_xray_instr_map; +    XRayInstrMap.Entries = __stop_xray_instr_map - __start_xray_instr_map; +    if (__start_xray_fn_idx != nullptr) { +      XRayInstrMap.SledsIndex = __start_xray_fn_idx; +      XRayInstrMap.Functions = __stop_xray_fn_idx - __start_xray_fn_idx; +    } else { +      size_t CountFunctions = 0; +      uint64_t LastFnAddr = 0; + +      for (std::size_t I = 0; I < XRayInstrMap.Entries; I++) { +        const auto &Sled = XRayInstrMap.Sleds[I]; +        const auto Function = Sled.function(); +        if (Function != LastFnAddr) { +          CountFunctions++; +          LastFnAddr = Function; +        } +      } + +      XRayInstrMap.Functions = CountFunctions; +    } +  } +  atomic_store(&XRayInitialized, true, memory_order_release); + +#ifndef XRAY_NO_PREINIT +  if (flags()->patch_premain) +    __xray_patch(); +#endif +} + +// FIXME: Make check-xray tests work on FreeBSD without +// SANITIZER_CAN_USE_PREINIT_ARRAY. +// See sanitizer_internal_defs.h where the macro is defined. +// Calling unresolved PLT functions in .preinit_array can lead to deadlock on +// FreeBSD but here it seems benign. +#if !defined(XRAY_NO_PREINIT) &&                                               \ +    (SANITIZER_CAN_USE_PREINIT_ARRAY || SANITIZER_FREEBSD) +// Only add the preinit array initialization if the sanitizers can. +__attribute__((section(".preinit_array"), +               used)) void (*__local_xray_preinit)(void) = __xray_init; +#else +// If we cannot use the .preinit_array section, we should instead use dynamic +// initialisation. +__attribute__ ((constructor (0))) +static void __local_xray_dyninit() { +  __xray_init(); +} +#endif diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_interface.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_interface.cpp new file mode 100644 index 000000000000..5839043fcb93 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_interface.cpp @@ -0,0 +1,530 @@ +//===-- xray_interface.cpp --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of the API functions. +// +//===----------------------------------------------------------------------===// + +#include "xray_interface_internal.h" + +#include <cinttypes> +#include <cstdio> +#include <errno.h> +#include <limits> +#include <string.h> +#include <sys/mman.h> + +#if SANITIZER_FUCHSIA +#include <zircon/process.h> +#include <zircon/sanitizer.h> +#include <zircon/status.h> +#include <zircon/syscalls.h> +#endif + +#include "sanitizer_common/sanitizer_addrhashmap.h" +#include "sanitizer_common/sanitizer_common.h" + +#include "xray_defs.h" +#include "xray_flags.h" + +extern __sanitizer::SpinMutex XRayInstrMapMutex; +extern __sanitizer::atomic_uint8_t XRayInitialized; +extern __xray::XRaySledMap XRayInstrMap; + +namespace __xray { + +#if defined(__x86_64__) +static const int16_t cSledLength = 12; +#elif defined(__aarch64__) +static const int16_t cSledLength = 32; +#elif defined(__arm__) +static const int16_t cSledLength = 28; +#elif SANITIZER_LOONGARCH64 +static const int16_t cSledLength = 48; +#elif SANITIZER_MIPS32 +static const int16_t cSledLength = 48; +#elif SANITIZER_MIPS64 +static const int16_t cSledLength = 64; +#elif defined(__powerpc64__) +static const int16_t cSledLength = 8; +#elif defined(__hexagon__) +static const int16_t cSledLength = 20; +#else +#error "Unsupported CPU Architecture" +#endif /* CPU architecture */ + +// This is the function to call when we encounter the entry or exit sleds. +atomic_uintptr_t XRayPatchedFunction{0}; + +// This is the function to call from the arg1-enabled sleds/trampolines. +atomic_uintptr_t XRayArgLogger{0}; + +// This is the function to call when we encounter a custom event log call. +atomic_uintptr_t XRayPatchedCustomEvent{0}; + +// This is the function to call when we encounter a typed event log call. +atomic_uintptr_t XRayPatchedTypedEvent{0}; + +// This is the global status to determine whether we are currently +// patching/unpatching. +atomic_uint8_t XRayPatching{0}; + +struct TypeDescription { +  uint32_t type_id; +  std::size_t description_string_length; +}; + +using TypeDescriptorMapType = AddrHashMap<TypeDescription, 11>; +// An address map from immutable descriptors to type ids. +TypeDescriptorMapType TypeDescriptorAddressMap{}; + +atomic_uint32_t TypeEventDescriptorCounter{0}; + +// MProtectHelper is an RAII wrapper for calls to mprotect(...) that will +// undo any successful mprotect(...) changes. This is used to make a page +// writeable and executable, and upon destruction if it was successful in +// doing so returns the page into a read-only and executable page. +// +// This is only used specifically for runtime-patching of the XRay +// instrumentation points. This assumes that the executable pages are +// originally read-and-execute only. +class MProtectHelper { +  void *PageAlignedAddr; +  std::size_t MProtectLen; +  bool MustCleanup; + +public: +  explicit MProtectHelper(void *PageAlignedAddr, +                          std::size_t MProtectLen, +                          std::size_t PageSize) XRAY_NEVER_INSTRUMENT +      : PageAlignedAddr(PageAlignedAddr), +        MProtectLen(MProtectLen), +        MustCleanup(false) { +#if SANITIZER_FUCHSIA +    MProtectLen = RoundUpTo(MProtectLen, PageSize); +#endif +  } + +  int MakeWriteable() XRAY_NEVER_INSTRUMENT { +#if SANITIZER_FUCHSIA +    auto R = __sanitizer_change_code_protection( +        reinterpret_cast<uintptr_t>(PageAlignedAddr), MProtectLen, true); +    if (R != ZX_OK) { +      Report("XRay: cannot change code protection: %s\n", +             _zx_status_get_string(R)); +      return -1; +    } +    MustCleanup = true; +    return 0; +#else +    auto R = mprotect(PageAlignedAddr, MProtectLen, +                      PROT_READ | PROT_WRITE | PROT_EXEC); +    if (R != -1) +      MustCleanup = true; +    return R; +#endif +  } + +  ~MProtectHelper() XRAY_NEVER_INSTRUMENT { +    if (MustCleanup) { +#if SANITIZER_FUCHSIA +      auto R = __sanitizer_change_code_protection( +          reinterpret_cast<uintptr_t>(PageAlignedAddr), MProtectLen, false); +      if (R != ZX_OK) { +        Report("XRay: cannot change code protection: %s\n", +               _zx_status_get_string(R)); +      } +#else +      mprotect(PageAlignedAddr, MProtectLen, PROT_READ | PROT_EXEC); +#endif +    } +  } +}; + +namespace { + +bool patchSled(const XRaySledEntry &Sled, bool Enable, +               int32_t FuncId) XRAY_NEVER_INSTRUMENT { +  bool Success = false; +  switch (Sled.Kind) { +  case XRayEntryType::ENTRY: +    Success = patchFunctionEntry(Enable, FuncId, Sled, __xray_FunctionEntry); +    break; +  case XRayEntryType::EXIT: +    Success = patchFunctionExit(Enable, FuncId, Sled); +    break; +  case XRayEntryType::TAIL: +    Success = patchFunctionTailExit(Enable, FuncId, Sled); +    break; +  case XRayEntryType::LOG_ARGS_ENTRY: +    Success = patchFunctionEntry(Enable, FuncId, Sled, __xray_ArgLoggerEntry); +    break; +  case XRayEntryType::CUSTOM_EVENT: +    Success = patchCustomEvent(Enable, FuncId, Sled); +    break; +  case XRayEntryType::TYPED_EVENT: +    Success = patchTypedEvent(Enable, FuncId, Sled); +    break; +  default: +    Report("Unsupported sled kind '%" PRIu64 "' @%04x\n", Sled.Address, +           int(Sled.Kind)); +    return false; +  } +  return Success; +} + +const XRayFunctionSledIndex +findFunctionSleds(int32_t FuncId, +                  const XRaySledMap &InstrMap) XRAY_NEVER_INSTRUMENT { +  int32_t CurFn = 0; +  uint64_t LastFnAddr = 0; +  XRayFunctionSledIndex Index = {nullptr, 0}; + +  for (std::size_t I = 0; I < InstrMap.Entries && CurFn <= FuncId; I++) { +    const auto &Sled = InstrMap.Sleds[I]; +    const auto Function = Sled.function(); +    if (Function != LastFnAddr) { +      CurFn++; +      LastFnAddr = Function; +    } + +    if (CurFn == FuncId) { +      if (Index.Begin == nullptr) +        Index.Begin = &Sled; +      Index.Size = &Sled - Index.Begin + 1; +    } +  } + +  return Index; +} + +XRayPatchingStatus patchFunction(int32_t FuncId, +                                 bool Enable) XRAY_NEVER_INSTRUMENT { +  if (!atomic_load(&XRayInitialized, +                                memory_order_acquire)) +    return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized. + +  uint8_t NotPatching = false; +  if (!atomic_compare_exchange_strong( +          &XRayPatching, &NotPatching, true, memory_order_acq_rel)) +    return XRayPatchingStatus::ONGOING; // Already patching. + +  // Next, we look for the function index. +  XRaySledMap InstrMap; +  { +    SpinMutexLock Guard(&XRayInstrMapMutex); +    InstrMap = XRayInstrMap; +  } + +  // If we don't have an index, we can't patch individual functions. +  if (InstrMap.Functions == 0) +    return XRayPatchingStatus::NOT_INITIALIZED; + +  // FuncId must be a positive number, less than the number of functions +  // instrumented. +  if (FuncId <= 0 || static_cast<size_t>(FuncId) > InstrMap.Functions) { +    Report("Invalid function id provided: %d\n", FuncId); +    return XRayPatchingStatus::FAILED; +  } + +  // Now we patch ths sleds for this specific function. +  XRayFunctionSledIndex SledRange; +  if (InstrMap.SledsIndex) { +    SledRange = {InstrMap.SledsIndex[FuncId - 1].fromPCRelative(), +                 InstrMap.SledsIndex[FuncId - 1].Size}; +  } else { +    SledRange = findFunctionSleds(FuncId, InstrMap); +  } +  auto *f = SledRange.Begin; +  bool SucceedOnce = false; +  for (size_t i = 0; i != SledRange.Size; ++i) +    SucceedOnce |= patchSled(f[i], Enable, FuncId); + +  atomic_store(&XRayPatching, false, +                            memory_order_release); + +  if (!SucceedOnce) { +    Report("Failed patching any sled for function '%d'.", FuncId); +    return XRayPatchingStatus::FAILED; +  } + +  return XRayPatchingStatus::SUCCESS; +} + +// controlPatching implements the common internals of the patching/unpatching +// implementation. |Enable| defines whether we're enabling or disabling the +// runtime XRay instrumentation. +XRayPatchingStatus controlPatching(bool Enable) XRAY_NEVER_INSTRUMENT { +  if (!atomic_load(&XRayInitialized, +                                memory_order_acquire)) +    return XRayPatchingStatus::NOT_INITIALIZED; // Not initialized. + +  uint8_t NotPatching = false; +  if (!atomic_compare_exchange_strong( +          &XRayPatching, &NotPatching, true, memory_order_acq_rel)) +    return XRayPatchingStatus::ONGOING; // Already patching. + +  uint8_t PatchingSuccess = false; +  auto XRayPatchingStatusResetter = +      at_scope_exit([&PatchingSuccess] { +        if (!PatchingSuccess) +          atomic_store(&XRayPatching, false, +                                    memory_order_release); +      }); + +  XRaySledMap InstrMap; +  { +    SpinMutexLock Guard(&XRayInstrMapMutex); +    InstrMap = XRayInstrMap; +  } +  if (InstrMap.Entries == 0) +    return XRayPatchingStatus::NOT_INITIALIZED; + +  uint32_t FuncId = 1; +  uint64_t CurFun = 0; + +  // First we want to find the bounds for which we have instrumentation points, +  // and try to get as few calls to mprotect(...) as possible. We're assuming +  // that all the sleds for the instrumentation map are contiguous as a single +  // set of pages. When we do support dynamic shared object instrumentation, +  // we'll need to do this for each set of page load offsets per DSO loaded. For +  // now we're assuming we can mprotect the whole section of text between the +  // minimum sled address and the maximum sled address (+ the largest sled +  // size). +  auto *MinSled = &InstrMap.Sleds[0]; +  auto *MaxSled = &InstrMap.Sleds[InstrMap.Entries - 1]; +  for (std::size_t I = 0; I < InstrMap.Entries; I++) { +    const auto &Sled = InstrMap.Sleds[I]; +    if (Sled.address() < MinSled->address()) +      MinSled = &Sled; +    if (Sled.address() > MaxSled->address()) +      MaxSled = &Sled; +  } + +  const size_t PageSize = flags()->xray_page_size_override > 0 +                              ? flags()->xray_page_size_override +                              : GetPageSizeCached(); +  if ((PageSize == 0) || ((PageSize & (PageSize - 1)) != 0)) { +    Report("System page size is not a power of two: %zu\n", PageSize); +    return XRayPatchingStatus::FAILED; +  } + +  void *PageAlignedAddr = +      reinterpret_cast<void *>(MinSled->address() & ~(PageSize - 1)); +  size_t MProtectLen = +      (MaxSled->address() - reinterpret_cast<uptr>(PageAlignedAddr)) + +      cSledLength; +  MProtectHelper Protector(PageAlignedAddr, MProtectLen, PageSize); +  if (Protector.MakeWriteable() == -1) { +    Report("Failed mprotect: %d\n", errno); +    return XRayPatchingStatus::FAILED; +  } + +  for (std::size_t I = 0; I < InstrMap.Entries; ++I) { +    auto &Sled = InstrMap.Sleds[I]; +    auto F = Sled.function(); +    if (CurFun == 0) +      CurFun = F; +    if (F != CurFun) { +      ++FuncId; +      CurFun = F; +    } +    patchSled(Sled, Enable, FuncId); +  } +  atomic_store(&XRayPatching, false, +                            memory_order_release); +  PatchingSuccess = true; +  return XRayPatchingStatus::SUCCESS; +} + +XRayPatchingStatus mprotectAndPatchFunction(int32_t FuncId, +                                            bool Enable) XRAY_NEVER_INSTRUMENT { +  XRaySledMap InstrMap; +  { +    SpinMutexLock Guard(&XRayInstrMapMutex); +    InstrMap = XRayInstrMap; +  } + +  // FuncId must be a positive number, less than the number of functions +  // instrumented. +  if (FuncId <= 0 || static_cast<size_t>(FuncId) > InstrMap.Functions) { +    Report("Invalid function id provided: %d\n", FuncId); +    return XRayPatchingStatus::FAILED; +  } + +  const size_t PageSize = flags()->xray_page_size_override > 0 +                              ? flags()->xray_page_size_override +                              : GetPageSizeCached(); +  if ((PageSize == 0) || ((PageSize & (PageSize - 1)) != 0)) { +    Report("Provided page size is not a power of two: %zu\n", PageSize); +    return XRayPatchingStatus::FAILED; +  } + +  // Here we compute the minimum sled and maximum sled associated with a +  // particular function ID. +  XRayFunctionSledIndex SledRange; +  if (InstrMap.SledsIndex) { +    SledRange = {InstrMap.SledsIndex[FuncId - 1].fromPCRelative(), +                 InstrMap.SledsIndex[FuncId - 1].Size}; +  } else { +    SledRange = findFunctionSleds(FuncId, InstrMap); +  } +  auto *f = SledRange.Begin; +  auto *e = SledRange.Begin + SledRange.Size; +  auto *MinSled = f; +  auto *MaxSled = e - 1; +  while (f != e) { +    if (f->address() < MinSled->address()) +      MinSled = f; +    if (f->address() > MaxSled->address()) +      MaxSled = f; +    ++f; +  } + +  void *PageAlignedAddr = +      reinterpret_cast<void *>(MinSled->address() & ~(PageSize - 1)); +  size_t MProtectLen = +      (MaxSled->address() - reinterpret_cast<uptr>(PageAlignedAddr)) + +      cSledLength; +  MProtectHelper Protector(PageAlignedAddr, MProtectLen, PageSize); +  if (Protector.MakeWriteable() == -1) { +    Report("Failed mprotect: %d\n", errno); +    return XRayPatchingStatus::FAILED; +  } +  return patchFunction(FuncId, Enable); +} + +} // namespace + +} // namespace __xray + +using namespace __xray; + +// The following functions are declared `extern "C" {...}` in the header, hence +// they're defined in the global namespace. + +int __xray_set_handler(void (*entry)(int32_t, +                                     XRayEntryType)) XRAY_NEVER_INSTRUMENT { +  if (atomic_load(&XRayInitialized, +                               memory_order_acquire)) { + +    atomic_store(&__xray::XRayPatchedFunction, +                              reinterpret_cast<uintptr_t>(entry), +                              memory_order_release); +    return 1; +  } +  return 0; +} + +int __xray_set_customevent_handler(void (*entry)(void *, size_t)) +    XRAY_NEVER_INSTRUMENT { +  if (atomic_load(&XRayInitialized, +                               memory_order_acquire)) { +    atomic_store(&__xray::XRayPatchedCustomEvent, +                              reinterpret_cast<uintptr_t>(entry), +                              memory_order_release); +    return 1; +  } +  return 0; +} + +int __xray_set_typedevent_handler(void (*entry)(size_t, const void *, +                                                size_t)) XRAY_NEVER_INSTRUMENT { +  if (atomic_load(&XRayInitialized, +                               memory_order_acquire)) { +    atomic_store(&__xray::XRayPatchedTypedEvent, +                              reinterpret_cast<uintptr_t>(entry), +                              memory_order_release); +    return 1; +  } +  return 0; +} + +int __xray_remove_handler() XRAY_NEVER_INSTRUMENT { +  return __xray_set_handler(nullptr); +} + +int __xray_remove_customevent_handler() XRAY_NEVER_INSTRUMENT { +  return __xray_set_customevent_handler(nullptr); +} + +int __xray_remove_typedevent_handler() XRAY_NEVER_INSTRUMENT { +  return __xray_set_typedevent_handler(nullptr); +} + +uint16_t __xray_register_event_type( +    const char *const event_type) XRAY_NEVER_INSTRUMENT { +  TypeDescriptorMapType::Handle h(&TypeDescriptorAddressMap, (uptr)event_type); +  if (h.created()) { +    h->type_id = atomic_fetch_add( +        &TypeEventDescriptorCounter, 1, memory_order_acq_rel); +    h->description_string_length = strnlen(event_type, 1024); +  } +  return h->type_id; +} + +XRayPatchingStatus __xray_patch() XRAY_NEVER_INSTRUMENT { +  return controlPatching(true); +} + +XRayPatchingStatus __xray_unpatch() XRAY_NEVER_INSTRUMENT { +  return controlPatching(false); +} + +XRayPatchingStatus __xray_patch_function(int32_t FuncId) XRAY_NEVER_INSTRUMENT { +  return mprotectAndPatchFunction(FuncId, true); +} + +XRayPatchingStatus +__xray_unpatch_function(int32_t FuncId) XRAY_NEVER_INSTRUMENT { +  return mprotectAndPatchFunction(FuncId, false); +} + +int __xray_set_handler_arg1(void (*entry)(int32_t, XRayEntryType, uint64_t)) { +  if (!atomic_load(&XRayInitialized, +                                memory_order_acquire)) +    return 0; + +  // A relaxed write might not be visible even if the current thread gets +  // scheduled on a different CPU/NUMA node.  We need to wait for everyone to +  // have this handler installed for consistency of collected data across CPUs. +  atomic_store(&XRayArgLogger, reinterpret_cast<uint64_t>(entry), +                            memory_order_release); +  return 1; +} + +int __xray_remove_handler_arg1() { return __xray_set_handler_arg1(nullptr); } + +uintptr_t __xray_function_address(int32_t FuncId) XRAY_NEVER_INSTRUMENT { +  XRaySledMap InstrMap; +  { +    SpinMutexLock Guard(&XRayInstrMapMutex); +    InstrMap = XRayInstrMap; +  } + +  if (FuncId <= 0 || static_cast<size_t>(FuncId) > InstrMap.Functions) +    return 0; +  const XRaySledEntry *Sled = +      InstrMap.SledsIndex ? InstrMap.SledsIndex[FuncId - 1].fromPCRelative() +                          : findFunctionSleds(FuncId, InstrMap).Begin; +  return Sled->function() +// On PPC, function entries are always aligned to 16 bytes. The beginning of a +// sled might be a local entry, which is always +8 based on the global entry. +// Always return the global entry. +#ifdef __PPC__ +         & ~0xf +#endif +      ; +} + +size_t __xray_max_function_id() XRAY_NEVER_INSTRUMENT { +  SpinMutexLock Guard(&XRayInstrMapMutex); +  return XRayInstrMap.Functions; +} diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_interface_internal.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_interface_internal.h new file mode 100644 index 000000000000..80c07c167f64 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_interface_internal.h @@ -0,0 +1,102 @@ +//===-- xray_interface_internal.h -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of the API functions. See also include/xray/xray_interface.h. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_INTERFACE_INTERNAL_H +#define XRAY_INTERFACE_INTERNAL_H + +#include "sanitizer_common/sanitizer_platform.h" +#include "xray/xray_interface.h" +#include <cstddef> +#include <cstdint> + +extern "C" { + +struct XRaySledEntry { +#if SANITIZER_WORDSIZE == 64 +  uint64_t Address; +  uint64_t Function; +  unsigned char Kind; +  unsigned char AlwaysInstrument; +  unsigned char Version; +  unsigned char Padding[13]; // Need 32 bytes +  uint64_t function() const { +    // The target address is relative to the location of the Function variable. +    return reinterpret_cast<uint64_t>(&Function) + Function; +  } +  uint64_t address() const { +    // The target address is relative to the location of the Address variable. +    return reinterpret_cast<uint64_t>(&Address) + Address; +  } +#elif SANITIZER_WORDSIZE == 32 +  uint32_t Address; +  uint32_t Function; +  unsigned char Kind; +  unsigned char AlwaysInstrument; +  unsigned char Version; +  unsigned char Padding[5]; // Need 16 bytes +  uint32_t function() const { +    // The target address is relative to the location of the Function variable. +    return reinterpret_cast<uint32_t>(&Function) + Function; +  } +  uint32_t address() const { +    // The target address is relative to the location of the Address variable. +    return reinterpret_cast<uint32_t>(&Address) + Address; +  } +#else +#error "Unsupported word size." +#endif +}; + +struct XRayFunctionSledIndex { +  const XRaySledEntry *Begin; +  size_t Size; +  // For an entry in the xray_fn_idx section, the address is relative to the +  // location of the Begin variable. +  const XRaySledEntry *fromPCRelative() const { +    return reinterpret_cast<const XRaySledEntry *>(uintptr_t(&Begin) + +                                                   uintptr_t(Begin)); +  } +}; +} + +namespace __xray { + +struct XRaySledMap { +  const XRaySledEntry *Sleds; +  size_t Entries; +  const XRayFunctionSledIndex *SledsIndex; +  size_t Functions; +}; + +bool patchFunctionEntry(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled, +                        void (*Trampoline)()); +bool patchFunctionExit(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled); +bool patchFunctionTailExit(bool Enable, uint32_t FuncId, +                           const XRaySledEntry &Sled); +bool patchCustomEvent(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled); +bool patchTypedEvent(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled); + +} // namespace __xray + +extern "C" { +// The following functions have to be defined in assembler, on a per-platform +// basis. See xray_trampoline_*.S files for implementations. +extern void __xray_FunctionEntry(); +extern void __xray_FunctionExit(); +extern void __xray_FunctionTailExit(); +extern void __xray_ArgLoggerEntry(); +extern void __xray_CustomEvent(); +extern void __xray_TypedEvent(); +} + +#endif diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_log_interface.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_log_interface.cpp new file mode 100644 index 000000000000..fc70373f9dac --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_log_interface.cpp @@ -0,0 +1,209 @@ +//===-- xray_log_interface.cpp --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a function call tracing system. +// +//===----------------------------------------------------------------------===// +#include "xray/xray_log_interface.h" + +#include "sanitizer_common/sanitizer_allocator_internal.h" +#include "sanitizer_common/sanitizer_atomic.h" +#include "sanitizer_common/sanitizer_mutex.h" +#include "xray/xray_interface.h" +#include "xray_defs.h" + +namespace __xray { +static SpinMutex XRayImplMutex; +static XRayLogImpl CurrentXRayImpl{nullptr, nullptr, nullptr, nullptr}; +static XRayLogImpl *GlobalXRayImpl = nullptr; + +// This is the default implementation of a buffer iterator, which always yields +// a null buffer. +XRayBuffer NullBufferIterator(XRayBuffer) XRAY_NEVER_INSTRUMENT { +  return {nullptr, 0}; +} + +// This is the global function responsible for iterating through given buffers. +atomic_uintptr_t XRayBufferIterator{ +    reinterpret_cast<uintptr_t>(&NullBufferIterator)}; + +// We use a linked list of Mode to XRayLogImpl mappings. This is a linked list +// when it should be a map because we're avoiding having to depend on C++ +// standard library data structures at this level of the implementation. +struct ModeImpl { +  ModeImpl *Next; +  const char *Mode; +  XRayLogImpl Impl; +}; + +static ModeImpl SentinelModeImpl{ +    nullptr, nullptr, {nullptr, nullptr, nullptr, nullptr}}; +static ModeImpl *ModeImpls = &SentinelModeImpl; +static const ModeImpl *CurrentMode = nullptr; + +} // namespace __xray + +using namespace __xray; + +void __xray_log_set_buffer_iterator(XRayBuffer (*Iterator)(XRayBuffer)) +    XRAY_NEVER_INSTRUMENT { +  atomic_store(&__xray::XRayBufferIterator, +               reinterpret_cast<uintptr_t>(Iterator), memory_order_release); +} + +void __xray_log_remove_buffer_iterator() XRAY_NEVER_INSTRUMENT { +  __xray_log_set_buffer_iterator(&NullBufferIterator); +} + +XRayLogRegisterStatus +__xray_log_register_mode(const char *Mode, +                         XRayLogImpl Impl) XRAY_NEVER_INSTRUMENT { +  if (Impl.flush_log == nullptr || Impl.handle_arg0 == nullptr || +      Impl.log_finalize == nullptr || Impl.log_init == nullptr) +    return XRayLogRegisterStatus::XRAY_INCOMPLETE_IMPL; + +  SpinMutexLock Guard(&XRayImplMutex); +  // First, look for whether the mode already has a registered implementation. +  for (ModeImpl *it = ModeImpls; it != &SentinelModeImpl; it = it->Next) { +    if (!internal_strcmp(Mode, it->Mode)) +      return XRayLogRegisterStatus::XRAY_DUPLICATE_MODE; +  } +  auto *NewModeImpl = static_cast<ModeImpl *>(InternalAlloc(sizeof(ModeImpl))); +  NewModeImpl->Next = ModeImpls; +  NewModeImpl->Mode = internal_strdup(Mode); +  NewModeImpl->Impl = Impl; +  ModeImpls = NewModeImpl; +  return XRayLogRegisterStatus::XRAY_REGISTRATION_OK; +} + +XRayLogRegisterStatus +__xray_log_select_mode(const char *Mode) XRAY_NEVER_INSTRUMENT { +  SpinMutexLock Guard(&XRayImplMutex); +  for (ModeImpl *it = ModeImpls; it != &SentinelModeImpl; it = it->Next) { +    if (!internal_strcmp(Mode, it->Mode)) { +      CurrentMode = it; +      CurrentXRayImpl = it->Impl; +      GlobalXRayImpl = &CurrentXRayImpl; +      __xray_set_handler(it->Impl.handle_arg0); +      return XRayLogRegisterStatus::XRAY_REGISTRATION_OK; +    } +  } +  return XRayLogRegisterStatus::XRAY_MODE_NOT_FOUND; +} + +const char *__xray_log_get_current_mode() XRAY_NEVER_INSTRUMENT { +  SpinMutexLock Guard(&XRayImplMutex); +  if (CurrentMode != nullptr) +    return CurrentMode->Mode; +  return nullptr; +} + +void __xray_set_log_impl(XRayLogImpl Impl) XRAY_NEVER_INSTRUMENT { +  if (Impl.log_init == nullptr || Impl.log_finalize == nullptr || +      Impl.handle_arg0 == nullptr || Impl.flush_log == nullptr) { +    SpinMutexLock Guard(&XRayImplMutex); +    GlobalXRayImpl = nullptr; +    CurrentMode = nullptr; +    __xray_remove_handler(); +    __xray_remove_handler_arg1(); +    return; +  } + +  SpinMutexLock Guard(&XRayImplMutex); +  CurrentXRayImpl = Impl; +  GlobalXRayImpl = &CurrentXRayImpl; +  __xray_set_handler(Impl.handle_arg0); +} + +void __xray_remove_log_impl() XRAY_NEVER_INSTRUMENT { +  SpinMutexLock Guard(&XRayImplMutex); +  GlobalXRayImpl = nullptr; +  __xray_remove_handler(); +  __xray_remove_handler_arg1(); +} + +XRayLogInitStatus __xray_log_init(size_t BufferSize, size_t MaxBuffers, +                                  void *Args, +                                  size_t ArgsSize) XRAY_NEVER_INSTRUMENT { +  SpinMutexLock Guard(&XRayImplMutex); +  if (!GlobalXRayImpl) +    return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; +  return GlobalXRayImpl->log_init(BufferSize, MaxBuffers, Args, ArgsSize); +} + +XRayLogInitStatus __xray_log_init_mode(const char *Mode, const char *Config) +    XRAY_NEVER_INSTRUMENT { +  SpinMutexLock Guard(&XRayImplMutex); +  if (!GlobalXRayImpl) +    return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + +  if (Config == nullptr) +    return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + +  // Check first whether the current mode is the same as what we expect. +  if (CurrentMode == nullptr || internal_strcmp(CurrentMode->Mode, Mode) != 0) +    return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + +  // Here we do some work to coerce the pointer we're provided, so that +  // the implementations that still take void* pointers can handle the +  // data provided in the Config argument. +  return GlobalXRayImpl->log_init( +      0, 0, const_cast<void *>(static_cast<const void *>(Config)), 0); +} + +XRayLogInitStatus +__xray_log_init_mode_bin(const char *Mode, const char *Config, +                         size_t ConfigSize) XRAY_NEVER_INSTRUMENT { +  SpinMutexLock Guard(&XRayImplMutex); +  if (!GlobalXRayImpl) +    return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + +  if (Config == nullptr) +    return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + +  // Check first whether the current mode is the same as what we expect. +  if (CurrentMode == nullptr || internal_strcmp(CurrentMode->Mode, Mode) != 0) +    return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + +  // Here we do some work to coerce the pointer we're provided, so that +  // the implementations that still take void* pointers can handle the +  // data provided in the Config argument. +  return GlobalXRayImpl->log_init( +      0, 0, const_cast<void *>(static_cast<const void *>(Config)), ConfigSize); +} + +XRayLogInitStatus __xray_log_finalize() XRAY_NEVER_INSTRUMENT { +  SpinMutexLock Guard(&XRayImplMutex); +  if (!GlobalXRayImpl) +    return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; +  return GlobalXRayImpl->log_finalize(); +} + +XRayLogFlushStatus __xray_log_flushLog() XRAY_NEVER_INSTRUMENT { +  SpinMutexLock Guard(&XRayImplMutex); +  if (!GlobalXRayImpl) +    return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; +  return GlobalXRayImpl->flush_log(); +} + +XRayLogFlushStatus __xray_log_process_buffers( +    void (*Processor)(const char *, XRayBuffer)) XRAY_NEVER_INSTRUMENT { +  // We want to make sure that there will be no changes to the global state for +  // the log by synchronising on the XRayBufferIteratorMutex. +  if (!GlobalXRayImpl) +    return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; +  auto Iterator = reinterpret_cast<XRayBuffer (*)(XRayBuffer)>( +      atomic_load(&XRayBufferIterator, memory_order_acquire)); +  auto Buffer = (*Iterator)(XRayBuffer{nullptr, 0}); +  auto Mode = CurrentMode ? CurrentMode->Mode : nullptr; +  while (Buffer.Data != nullptr) { +    (*Processor)(Mode, Buffer); +    Buffer = (*Iterator)(Buffer); +  } +  return XRayLogFlushStatus::XRAY_LOG_FLUSHED; +} diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_loongarch64.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_loongarch64.cpp new file mode 100644 index 000000000000..b839adba00d2 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_loongarch64.cpp @@ -0,0 +1,160 @@ +//===-------- xray_loongarch64.cpp ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of loongarch-specific routines. +// +//===----------------------------------------------------------------------===// +#include "sanitizer_common/sanitizer_common.h" +#include "xray_defs.h" +#include "xray_interface_internal.h" +#include <atomic> + +namespace __xray { + +enum RegNum : uint32_t { +  RN_RA = 1, +  RN_SP = 3, +  RN_T0 = 12, +  RN_T1 = 13, +}; + +// Encode instructions in the 2RIx format, where the primary formats here +// are 2RI12-type and 2RI16-type. +static inline uint32_t +encodeInstruction2RIx(uint32_t Opcode, uint32_t Rd, uint32_t Rj, +                      uint32_t Imm) XRAY_NEVER_INSTRUMENT { +  return Opcode | (Imm << 10) | (Rj << 5) | Rd; +} + +// Encode instructions in 1RI20 format, e.g. lu12i.w/lu32i.d. +static inline uint32_t +encodeInstruction1RI20(uint32_t Opcode, uint32_t Rd, +                       uint32_t Imm) XRAY_NEVER_INSTRUMENT { +  return Opcode | (Imm << 5) | Rd; +} + +static inline bool patchSled(const bool Enable, const uint32_t FuncId, +                             const XRaySledEntry &Sled, +                             void (*TracingHook)()) XRAY_NEVER_INSTRUMENT { +  // When |Enable| == true, +  // We replace the following compile-time stub (sled): +  // +  // .Lxray_sled_beginN: +  //	B .Lxray_sled_endN +  //	11 NOPs (44 bytes) +  // .Lxray_sled_endN: +  // +  // With the following runtime patch: +  // +  // xray_sled_n: +  //   addi.d  sp, sp, -16                       ; create the stack frame +  //   st.d    ra, sp, 8                         ; save the return address +  //   lu12i.w t0, %abs_hi20(__xray_FunctionEntry/Exit) +  //   ori     t0, t0, %abs_lo12(__xray_FunctionEntry/Exit) +  //   lu32i.d t0, %abs64_lo20(__xray_FunctionEntry/Exit) +  //   lu52i.d t0, t0, %abs64_hi12(__xray_FunctionEntry/Exit) +  //   lu12i.w t1, %abs_hi20(function_id) +  //   ori     t1, t1, %abs_lo12(function_id)    ; pass the function id +  //   jirl    ra, t0, 0                         ; call the tracing hook +  //   ld.d    ra, sp, 8                         ; restore the return address +  //   addi.d  sp, sp, 16                        ; de-allocate the stack frame +  // +  // Replacement of the first 4-byte instruction should be the last and atomic +  // operation, so that the user code which reaches the sled concurrently +  // either jumps over the whole sled, or executes the whole sled when the +  // latter is ready. +  // +  // When |Enable|==false, we set the first instruction in the sled back to +  //   B #48 + +  uint32_t *Address = reinterpret_cast<uint32_t *>(Sled.address()); +  if (Enable) { +    uint32_t LoTracingHookAddr = reinterpret_cast<int64_t>(TracingHook) & 0xfff; +    uint32_t HiTracingHookAddr = +        (reinterpret_cast<int64_t>(TracingHook) >> 12) & 0xfffff; +    uint32_t HigherTracingHookAddr = +        (reinterpret_cast<int64_t>(TracingHook) >> 32) & 0xfffff; +    uint32_t HighestTracingHookAddr = +        (reinterpret_cast<int64_t>(TracingHook) >> 52) & 0xfff; +    uint32_t LoFunctionID = FuncId & 0xfff; +    uint32_t HiFunctionID = (FuncId >> 12) & 0xfffff; +    Address[1] = encodeInstruction2RIx(0x29c00000, RegNum::RN_RA, RegNum::RN_SP, +                                       0x8); // st.d ra, sp, 8 +    Address[2] = encodeInstruction1RI20( +        0x14000000, RegNum::RN_T0, +        HiTracingHookAddr); // lu12i.w t0, HiTracingHookAddr +    Address[3] = encodeInstruction2RIx( +        0x03800000, RegNum::RN_T0, RegNum::RN_T0, +        LoTracingHookAddr); // ori t0, t0, LoTracingHookAddr +    Address[4] = encodeInstruction1RI20( +        0x16000000, RegNum::RN_T0, +        HigherTracingHookAddr); // lu32i.d t0, HigherTracingHookAddr +    Address[5] = encodeInstruction2RIx( +        0x03000000, RegNum::RN_T0, RegNum::RN_T0, +        HighestTracingHookAddr); // lu52i.d t0, t0, HighestTracingHookAddr +    Address[6] = +        encodeInstruction1RI20(0x14000000, RegNum::RN_T1, +                               HiFunctionID); // lu12i.w t1, HiFunctionID +    Address[7] = +        encodeInstruction2RIx(0x03800000, RegNum::RN_T1, RegNum::RN_T1, +                              LoFunctionID); // ori t1, t1, LoFunctionID +    Address[8] = encodeInstruction2RIx(0x4c000000, RegNum::RN_RA, RegNum::RN_T0, +                                       0); // jirl ra, t0, 0 +    Address[9] = encodeInstruction2RIx(0x28c00000, RegNum::RN_RA, RegNum::RN_SP, +                                       0x8); // ld.d ra, sp, 8 +    Address[10] = encodeInstruction2RIx( +        0x02c00000, RegNum::RN_SP, RegNum::RN_SP, 0x10); // addi.d sp, sp, 16 +    uint32_t CreateStackSpace = encodeInstruction2RIx( +        0x02c00000, RegNum::RN_SP, RegNum::RN_SP, 0xff0); // addi.d sp, sp, -16 +    std::atomic_store_explicit( +        reinterpret_cast<std::atomic<uint32_t> *>(Address), CreateStackSpace, +        std::memory_order_release); +  } else { +    std::atomic_store_explicit( +        reinterpret_cast<std::atomic<uint32_t> *>(Address), +        uint32_t(0x50003000), std::memory_order_release); // b #48 +  } +  return true; +} + +bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, +                        const XRaySledEntry &Sled, +                        void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { +  return patchSled(Enable, FuncId, Sled, Trampoline); +} + +bool patchFunctionExit(const bool Enable, const uint32_t FuncId, +                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { +  return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + +bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, +                           const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { +  // TODO: In the future we'd need to distinguish between non-tail exits and +  // tail exits for better information preservation. +  return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, +                      const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { +  // FIXME: Implement in loongarch? +  return false; +} + +bool patchTypedEvent(const bool Enable, const uint32_t FuncId, +                     const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { +  // FIXME: Implement in loongarch? +  return false; +} +} // namespace __xray + +extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT { +  // TODO: This will have to be implemented in the trampoline assembly file. +} diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_mips.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_mips.cpp new file mode 100644 index 000000000000..dc9e837a555d --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_mips.cpp @@ -0,0 +1,171 @@ +//===-- xray_mips.cpp -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of MIPS-specific routines (32-bit). +// +//===----------------------------------------------------------------------===// +#include "sanitizer_common/sanitizer_common.h" +#include "xray_defs.h" +#include "xray_interface_internal.h" +#include <atomic> + +namespace __xray { + +// The machine codes for some instructions used in runtime patching. +enum PatchOpcodes : uint32_t { +  PO_ADDIU = 0x24000000, // addiu rt, rs, imm +  PO_SW = 0xAC000000,    // sw rt, offset(sp) +  PO_LUI = 0x3C000000,   // lui rs, %hi(address) +  PO_ORI = 0x34000000,   // ori rt, rs, %lo(address) +  PO_JALR = 0x0000F809,  // jalr rs +  PO_LW = 0x8C000000,    // lw rt, offset(address) +  PO_B44 = 0x1000000b,   // b #44 +  PO_NOP = 0x0,          // nop +}; + +enum RegNum : uint32_t { +  RN_T0 = 0x8, +  RN_T9 = 0x19, +  RN_RA = 0x1F, +  RN_SP = 0x1D, +}; + +inline static uint32_t encodeInstruction(uint32_t Opcode, uint32_t Rs, +                                         uint32_t Rt, +                                         uint32_t Imm) XRAY_NEVER_INSTRUMENT { +  return (Opcode | Rs << 21 | Rt << 16 | Imm); +} + +inline static uint32_t +encodeSpecialInstruction(uint32_t Opcode, uint32_t Rs, uint32_t Rt, uint32_t Rd, +                         uint32_t Imm) XRAY_NEVER_INSTRUMENT { +  return (Rs << 21 | Rt << 16 | Rd << 11 | Imm << 6 | Opcode); +} + +inline static bool patchSled(const bool Enable, const uint32_t FuncId, +                             const XRaySledEntry &Sled, +                             void (*TracingHook)()) XRAY_NEVER_INSTRUMENT { +  // When |Enable| == true, +  // We replace the following compile-time stub (sled): +  // +  // xray_sled_n: +  //	B .tmpN +  //	11 NOPs (44 bytes) +  //	.tmpN +  //	ADDIU T9, T9, 44 +  // +  // With the following runtime patch: +  // +  // xray_sled_n (32-bit): +  //    addiu sp, sp, -8                        ;create stack frame +  //    nop +  //    sw ra, 4(sp)                            ;save return address +  //    sw t9, 0(sp)                            ;save register t9 +  //    lui t9, %hi(__xray_FunctionEntry/Exit) +  //    ori t9, t9, %lo(__xray_FunctionEntry/Exit) +  //    lui t0, %hi(function_id) +  //    jalr t9                                 ;call Tracing hook +  //    ori t0, t0, %lo(function_id)            ;pass function id (delay slot) +  //    lw t9, 0(sp)                            ;restore register t9 +  //    lw ra, 4(sp)                            ;restore return address +  //    addiu sp, sp, 8                         ;delete stack frame +  // +  // We add 44 bytes to t9 because we want to adjust the function pointer to +  // the actual start of function i.e. the address just after the noop sled. +  // We do this because gp displacement relocation is emitted at the start of +  // of the function i.e after the nop sled and to correctly calculate the +  // global offset table address, t9 must hold the address of the instruction +  // containing the gp displacement relocation. +  // FIXME: Is this correct for the static relocation model? +  // +  // Replacement of the first 4-byte instruction should be the last and atomic +  // operation, so that the user code which reaches the sled concurrently +  // either jumps over the whole sled, or executes the whole sled when the +  // latter is ready. +  // +  // When |Enable|==false, we set back the first instruction in the sled to be +  //   B #44 + +  uint32_t *Address = reinterpret_cast<uint32_t *>(Sled.address()); +  if (Enable) { +    uint32_t LoTracingHookAddr = +        reinterpret_cast<int32_t>(TracingHook) & 0xffff; +    uint32_t HiTracingHookAddr = +        (reinterpret_cast<int32_t>(TracingHook) >> 16) & 0xffff; +    uint32_t LoFunctionID = FuncId & 0xffff; +    uint32_t HiFunctionID = (FuncId >> 16) & 0xffff; +    Address[2] = encodeInstruction(PatchOpcodes::PO_SW, RegNum::RN_SP, +                                   RegNum::RN_RA, 0x4); +    Address[3] = encodeInstruction(PatchOpcodes::PO_SW, RegNum::RN_SP, +                                   RegNum::RN_T9, 0x0); +    Address[4] = encodeInstruction(PatchOpcodes::PO_LUI, 0x0, RegNum::RN_T9, +                                   HiTracingHookAddr); +    Address[5] = encodeInstruction(PatchOpcodes::PO_ORI, RegNum::RN_T9, +                                   RegNum::RN_T9, LoTracingHookAddr); +    Address[6] = encodeInstruction(PatchOpcodes::PO_LUI, 0x0, RegNum::RN_T0, +                                   HiFunctionID); +    Address[7] = encodeSpecialInstruction(PatchOpcodes::PO_JALR, RegNum::RN_T9, +                                          0x0, RegNum::RN_RA, 0X0); +    Address[8] = encodeInstruction(PatchOpcodes::PO_ORI, RegNum::RN_T0, +                                   RegNum::RN_T0, LoFunctionID); +    Address[9] = encodeInstruction(PatchOpcodes::PO_LW, RegNum::RN_SP, +                                   RegNum::RN_T9, 0x0); +    Address[10] = encodeInstruction(PatchOpcodes::PO_LW, RegNum::RN_SP, +                                    RegNum::RN_RA, 0x4); +    Address[11] = encodeInstruction(PatchOpcodes::PO_ADDIU, RegNum::RN_SP, +                                    RegNum::RN_SP, 0x8); +    uint32_t CreateStackSpaceInstr = encodeInstruction( +        PatchOpcodes::PO_ADDIU, RegNum::RN_SP, RegNum::RN_SP, 0xFFF8); +    std::atomic_store_explicit( +        reinterpret_cast<std::atomic<uint32_t> *>(Address), +        uint32_t(CreateStackSpaceInstr), std::memory_order_release); +  } else { +    std::atomic_store_explicit( +        reinterpret_cast<std::atomic<uint32_t> *>(Address), +        uint32_t(PatchOpcodes::PO_B44), std::memory_order_release); +  } +  return true; +} + +bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, +                        const XRaySledEntry &Sled, +                        void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { +  return patchSled(Enable, FuncId, Sled, Trampoline); +} + +bool patchFunctionExit(const bool Enable, const uint32_t FuncId, +                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { +  return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + +bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, +                           const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { +  // FIXME: In the future we'd need to distinguish between non-tail exits and +  // tail exits for better information preservation. +  return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, +                      const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { +  // FIXME: Implement in mips? +  return false; +} + +bool patchTypedEvent(const bool Enable, const uint32_t FuncId, +                     const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { +  // FIXME: Implement in mips? +  return false; +} + +} // namespace __xray + +extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT { +  // FIXME: this will have to be implemented in the trampoline assembly file +} diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_mips64.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_mips64.cpp new file mode 100644 index 000000000000..5b221bb6ddc0 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_mips64.cpp @@ -0,0 +1,178 @@ +//===-- xray_mips64.cpp -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of MIPS64-specific routines. +// +//===----------------------------------------------------------------------===// +#include "sanitizer_common/sanitizer_common.h" +#include "xray_defs.h" +#include "xray_interface_internal.h" +#include <atomic> + +namespace __xray { + +// The machine codes for some instructions used in runtime patching. +enum PatchOpcodes : uint32_t { +  PO_DADDIU = 0x64000000, // daddiu rt, rs, imm +  PO_SD = 0xFC000000,     // sd rt, base(offset) +  PO_LUI = 0x3C000000,    // lui rt, imm +  PO_ORI = 0x34000000,    // ori rt, rs, imm +  PO_DSLL = 0x00000038,   // dsll rd, rt, sa +  PO_JALR = 0x00000009,   // jalr rs +  PO_LD = 0xDC000000,     // ld rt, base(offset) +  PO_B60 = 0x1000000f,    // b #60 +  PO_NOP = 0x0,           // nop +}; + +enum RegNum : uint32_t { +  RN_T0 = 0xC, +  RN_T9 = 0x19, +  RN_RA = 0x1F, +  RN_SP = 0x1D, +}; + +inline static uint32_t encodeInstruction(uint32_t Opcode, uint32_t Rs, +                                         uint32_t Rt, +                                         uint32_t Imm) XRAY_NEVER_INSTRUMENT { +  return (Opcode | Rs << 21 | Rt << 16 | Imm); +} + +inline static uint32_t +encodeSpecialInstruction(uint32_t Opcode, uint32_t Rs, uint32_t Rt, uint32_t Rd, +                         uint32_t Imm) XRAY_NEVER_INSTRUMENT { +  return (Rs << 21 | Rt << 16 | Rd << 11 | Imm << 6 | Opcode); +} + +inline static bool patchSled(const bool Enable, const uint32_t FuncId, +                             const XRaySledEntry &Sled, +                             void (*TracingHook)()) XRAY_NEVER_INSTRUMENT { +  // When |Enable| == true, +  // We replace the following compile-time stub (sled): +  // +  // xray_sled_n: +  //	B .tmpN +  //	15 NOPs (60 bytes) +  //	.tmpN +  // +  // With the following runtime patch: +  // +  // xray_sled_n (64-bit): +  //    daddiu sp, sp, -16                      ;create stack frame +  //    nop +  //    sd ra, 8(sp)                            ;save return address +  //    sd t9, 0(sp)                            ;save register t9 +  //    lui t9, %highest(__xray_FunctionEntry/Exit) +  //    ori t9, t9, %higher(__xray_FunctionEntry/Exit) +  //    dsll t9, t9, 16 +  //    ori t9, t9, %hi(__xray_FunctionEntry/Exit) +  //    dsll t9, t9, 16 +  //    ori t9, t9, %lo(__xray_FunctionEntry/Exit) +  //    lui t0, %hi(function_id) +  //    jalr t9                                 ;call Tracing hook +  //    ori t0, t0, %lo(function_id)            ;pass function id (delay slot) +  //    ld t9, 0(sp)                            ;restore register t9 +  //    ld ra, 8(sp)                            ;restore return address +  //    daddiu sp, sp, 16                       ;delete stack frame +  // +  // Replacement of the first 4-byte instruction should be the last and atomic +  // operation, so that the user code which reaches the sled concurrently +  // either jumps over the whole sled, or executes the whole sled when the +  // latter is ready. +  // +  // When |Enable|==false, we set back the first instruction in the sled to be +  //   B #60 + +  uint32_t *Address = reinterpret_cast<uint32_t *>(Sled.address()); +  if (Enable) { +    uint32_t LoTracingHookAddr = +        reinterpret_cast<int64_t>(TracingHook) & 0xffff; +    uint32_t HiTracingHookAddr = +        (reinterpret_cast<int64_t>(TracingHook) >> 16) & 0xffff; +    uint32_t HigherTracingHookAddr = +        (reinterpret_cast<int64_t>(TracingHook) >> 32) & 0xffff; +    uint32_t HighestTracingHookAddr = +        (reinterpret_cast<int64_t>(TracingHook) >> 48) & 0xffff; +    uint32_t LoFunctionID = FuncId & 0xffff; +    uint32_t HiFunctionID = (FuncId >> 16) & 0xffff; +    Address[2] = encodeInstruction(PatchOpcodes::PO_SD, RegNum::RN_SP, +                                   RegNum::RN_RA, 0x8); +    Address[3] = encodeInstruction(PatchOpcodes::PO_SD, RegNum::RN_SP, +                                   RegNum::RN_T9, 0x0); +    Address[4] = encodeInstruction(PatchOpcodes::PO_LUI, 0x0, RegNum::RN_T9, +                                   HighestTracingHookAddr); +    Address[5] = encodeInstruction(PatchOpcodes::PO_ORI, RegNum::RN_T9, +                                   RegNum::RN_T9, HigherTracingHookAddr); +    Address[6] = encodeSpecialInstruction(PatchOpcodes::PO_DSLL, 0x0, +                                          RegNum::RN_T9, RegNum::RN_T9, 0x10); +    Address[7] = encodeInstruction(PatchOpcodes::PO_ORI, RegNum::RN_T9, +                                   RegNum::RN_T9, HiTracingHookAddr); +    Address[8] = encodeSpecialInstruction(PatchOpcodes::PO_DSLL, 0x0, +                                          RegNum::RN_T9, RegNum::RN_T9, 0x10); +    Address[9] = encodeInstruction(PatchOpcodes::PO_ORI, RegNum::RN_T9, +                                   RegNum::RN_T9, LoTracingHookAddr); +    Address[10] = encodeInstruction(PatchOpcodes::PO_LUI, 0x0, RegNum::RN_T0, +                                    HiFunctionID); +    Address[11] = encodeSpecialInstruction(PatchOpcodes::PO_JALR, RegNum::RN_T9, +                                           0x0, RegNum::RN_RA, 0X0); +    Address[12] = encodeInstruction(PatchOpcodes::PO_ORI, RegNum::RN_T0, +                                    RegNum::RN_T0, LoFunctionID); +    Address[13] = encodeInstruction(PatchOpcodes::PO_LD, RegNum::RN_SP, +                                    RegNum::RN_T9, 0x0); +    Address[14] = encodeInstruction(PatchOpcodes::PO_LD, RegNum::RN_SP, +                                    RegNum::RN_RA, 0x8); +    Address[15] = encodeInstruction(PatchOpcodes::PO_DADDIU, RegNum::RN_SP, +                                    RegNum::RN_SP, 0x10); +    uint32_t CreateStackSpace = encodeInstruction( +        PatchOpcodes::PO_DADDIU, RegNum::RN_SP, RegNum::RN_SP, 0xfff0); +    std::atomic_store_explicit( +        reinterpret_cast<std::atomic<uint32_t> *>(Address), CreateStackSpace, +        std::memory_order_release); +  } else { +    std::atomic_store_explicit( +        reinterpret_cast<std::atomic<uint32_t> *>(Address), +        uint32_t(PatchOpcodes::PO_B60), std::memory_order_release); +  } +  return true; +} + +bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, +                        const XRaySledEntry &Sled, +                        void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { +  return patchSled(Enable, FuncId, Sled, Trampoline); +} + +bool patchFunctionExit(const bool Enable, const uint32_t FuncId, +                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { +  return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + +bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, +                           const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { +  // FIXME: In the future we'd need to distinguish between non-tail exits and +  // tail exits for better information preservation. +  return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, +                      const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { +  // FIXME: Implement in mips64? +  return false; +} + +bool patchTypedEvent(const bool Enable, const uint32_t FuncId, +                     const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { +  // FIXME: Implement in mips64? +  return false; +} +} // namespace __xray + +extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT { +  // FIXME: this will have to be implemented in the trampoline assembly file +} diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_never_instrument.txt b/contrib/llvm-project/compiler-rt/lib/xray/xray_never_instrument.txt new file mode 100644 index 000000000000..7fa48dda7e16 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_never_instrument.txt @@ -0,0 +1,6 @@ +# List of function matchers common to C/C++ applications that make sense to +# never instrument. You can use this as an argument to +# -fxray-never-instrument=<path> along with your project-specific lists. + +# Never instrument any function whose symbol starts with __xray. +fun:__xray* diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_powerpc64.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_powerpc64.cpp new file mode 100644 index 000000000000..c3553d848313 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_powerpc64.cpp @@ -0,0 +1,113 @@ +//===-- xray_powerpc64.cpp --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of powerpc64 and powerpc64le routines. +// +//===----------------------------------------------------------------------===// +#include "sanitizer_common/sanitizer_common.h" +#include "xray_defs.h" +#include "xray_interface_internal.h" +#include "xray_utils.h" +#include <atomic> +#include <cassert> +#include <cstring> + +#ifndef __LITTLE_ENDIAN__ +#error powerpc64 big endian is not supported for now. +#endif + +namespace { + +constexpr unsigned long long JumpOverInstNum = 7; + +void clearCache(void *Addr, size_t Len) { +  const size_t LineSize = 32; + +  const intptr_t Mask = ~(LineSize - 1); +  const intptr_t StartLine = ((intptr_t)Addr) & Mask; +  const intptr_t EndLine = ((intptr_t)Addr + Len + LineSize - 1) & Mask; + +  for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize) +    asm volatile("dcbf 0, %0" : : "r"(Line)); +  asm volatile("sync"); + +  for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize) +    asm volatile("icbi 0, %0" : : "r"(Line)); +  asm volatile("isync"); +} + +} // namespace + +extern "C" void __clear_cache(void *start, void *end); + +namespace __xray { + +bool patchFunctionEntry(const bool Enable, uint32_t FuncId, +                        const XRaySledEntry &Sled, +                        void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { +  const uint64_t Address = Sled.address(); +  if (Enable) { +    // lis 0, FuncId[16..32] +    // li 0, FuncId[0..15] +    *reinterpret_cast<uint64_t *>(Address) = +        (0x3c000000ull + (FuncId >> 16)) + +        ((0x60000000ull + (FuncId & 0xffff)) << 32); +  } else { +    // b +JumpOverInstNum instructions. +    *reinterpret_cast<uint32_t *>(Address) = +        0x48000000ull + (JumpOverInstNum << 2); +  } +  clearCache(reinterpret_cast<void *>(Address), 8); +  return true; +} + +bool patchFunctionExit(const bool Enable, uint32_t FuncId, +                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { +  const uint64_t Address = Sled.address(); +  if (Enable) { +    // lis 0, FuncId[16..32] +    // li 0, FuncId[0..15] +    *reinterpret_cast<uint64_t *>(Address) = +        (0x3c000000ull + (FuncId >> 16)) + +        ((0x60000000ull + (FuncId & 0xffff)) << 32); +  } else { +    // Copy the blr/b instruction after JumpOverInstNum instructions. +    *reinterpret_cast<uint32_t *>(Address) = +        *(reinterpret_cast<uint32_t *>(Address) + JumpOverInstNum); +  } +  clearCache(reinterpret_cast<void *>(Address), 8); +  return true; +} + +bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, +                           const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { +  return patchFunctionExit(Enable, FuncId, Sled); +} + +// FIXME: Maybe implement this better? +bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; } + +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, +                      const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { +  // FIXME: Implement in powerpc64? +  return false; +} + +bool patchTypedEvent(const bool Enable, const uint32_t FuncId, +                     const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { +  // FIXME: Implement in powerpc64? +  return false; +} + +} // namespace __xray + +extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT { +  // FIXME: this will have to be implemented in the trampoline assembly file +} diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_powerpc64.inc b/contrib/llvm-project/compiler-rt/lib/xray/xray_powerpc64.inc new file mode 100644 index 000000000000..7e872b5b42e6 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_powerpc64.inc @@ -0,0 +1,51 @@ +//===-- xray_powerpc64.inc --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +//===----------------------------------------------------------------------===// + +#include <cstdint> +#include <mutex> +#ifdef __linux__ +#include <sys/platform/ppc.h> +#elif defined(__FreeBSD__) +#include <sys/types.h> +#include <sys/sysctl.h> + +#define __ppc_get_timebase __builtin_ppc_get_timebase + +uint64_t __ppc_get_timebase_freq (void) +{ +  uint64_t tb_freq = 0; +  size_t length = sizeof(tb_freq); +  sysctlbyname("kern.timecounter.tc.timebase.frequency", &tb_freq, &length, nullptr, 0); +  return tb_freq; +} +#endif + +#include "xray_defs.h" + +namespace __xray { + +ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT { +  CPU = 0; +  return __ppc_get_timebase(); +} + +inline uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { +  static std::mutex M; +  std::lock_guard<std::mutex> Guard(M); +  return __ppc_get_timebase_freq(); +} + +inline bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { +  return true; +} + +} // namespace __xray diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_profile_collector.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_profile_collector.cpp new file mode 100644 index 000000000000..3a28240e603c --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_profile_collector.cpp @@ -0,0 +1,411 @@ +//===-- xray_profile_collector.cpp -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// This implements the interface for the profileCollectorService. +// +//===----------------------------------------------------------------------===// +#include "xray_profile_collector.h" +#include "sanitizer_common/sanitizer_common.h" +#include "xray_allocator.h" +#include "xray_defs.h" +#include "xray_profiling_flags.h" +#include "xray_segmented_array.h" +#include <memory> +#include <pthread.h> +#include <utility> + +namespace __xray { +namespace profileCollectorService { + +namespace { + +SpinMutex GlobalMutex; +struct ThreadTrie { +  tid_t TId; +  alignas(FunctionCallTrie) std::byte TrieStorage[sizeof(FunctionCallTrie)]; +}; + +struct ProfileBuffer { +  void *Data; +  size_t Size; +}; + +// Current version of the profile format. +constexpr u64 XRayProfilingVersion = 0x20180424; + +// Identifier for XRay profiling files 'xrayprof' in hex. +constexpr u64 XRayMagicBytes = 0x7872617970726f66; + +struct XRayProfilingFileHeader { +  const u64 MagicBytes = XRayMagicBytes; +  const u64 Version = XRayProfilingVersion; +  u64 Timestamp = 0; // System time in nanoseconds. +  u64 PID = 0;       // Process ID. +}; + +struct BlockHeader { +  u32 BlockSize; +  u32 BlockNum; +  u64 ThreadId; +}; + +struct ThreadData { +  BufferQueue *BQ; +  FunctionCallTrie::Allocators::Buffers Buffers; +  FunctionCallTrie::Allocators Allocators; +  FunctionCallTrie FCT; +  tid_t TId; +}; + +using ThreadDataArray = Array<ThreadData>; +using ThreadDataAllocator = ThreadDataArray::AllocatorType; + +// We use a separate buffer queue for the backing store for the allocator used +// by the ThreadData array. This lets us host the buffers, allocators, and tries +// associated with a thread by moving the data into the array instead of +// attempting to copy the data to a separately backed set of tries. +alignas(BufferQueue) static std::byte BufferQueueStorage[sizeof(BufferQueue)]; +static BufferQueue *BQ = nullptr; +static BufferQueue::Buffer Buffer; +alignas(ThreadDataAllocator) static std::byte +    ThreadDataAllocatorStorage[sizeof(ThreadDataAllocator)]; +alignas(ThreadDataArray) static std::byte +    ThreadDataArrayStorage[sizeof(ThreadDataArray)]; + +static ThreadDataAllocator *TDAllocator = nullptr; +static ThreadDataArray *TDArray = nullptr; + +using ProfileBufferArray = Array<ProfileBuffer>; +using ProfileBufferArrayAllocator = typename ProfileBufferArray::AllocatorType; + +// These need to be global aligned storage to avoid dynamic initialization. We +// need these to be aligned to allow us to placement new objects into the +// storage, and have pointers to those objects be appropriately aligned. +alignas(ProfileBufferArray) static std::byte +    ProfileBuffersStorage[sizeof(ProfileBufferArray)]; +alignas(ProfileBufferArrayAllocator) static std::byte +    ProfileBufferArrayAllocatorStorage[sizeof(ProfileBufferArrayAllocator)]; + +static ProfileBufferArrayAllocator *ProfileBuffersAllocator = nullptr; +static ProfileBufferArray *ProfileBuffers = nullptr; + +// Use a global flag to determine whether the collector implementation has been +// initialized. +static atomic_uint8_t CollectorInitialized{0}; + +} // namespace + +void post(BufferQueue *Q, FunctionCallTrie &&T, +          FunctionCallTrie::Allocators &&A, +          FunctionCallTrie::Allocators::Buffers &&B, +          tid_t TId) XRAY_NEVER_INSTRUMENT { +  DCHECK_NE(Q, nullptr); + +  // Bail out early if the collector has not been initialized. +  if (!atomic_load(&CollectorInitialized, memory_order_acquire)) { +    T.~FunctionCallTrie(); +    A.~Allocators(); +    Q->releaseBuffer(B.NodeBuffer); +    Q->releaseBuffer(B.RootsBuffer); +    Q->releaseBuffer(B.ShadowStackBuffer); +    Q->releaseBuffer(B.NodeIdPairBuffer); +    B.~Buffers(); +    return; +  } + +  { +    SpinMutexLock Lock(&GlobalMutex); +    DCHECK_NE(TDAllocator, nullptr); +    DCHECK_NE(TDArray, nullptr); + +    if (TDArray->AppendEmplace(Q, std::move(B), std::move(A), std::move(T), +                               TId) == nullptr) { +      // If we fail to add the data to the array, we should destroy the objects +      // handed us. +      T.~FunctionCallTrie(); +      A.~Allocators(); +      Q->releaseBuffer(B.NodeBuffer); +      Q->releaseBuffer(B.RootsBuffer); +      Q->releaseBuffer(B.ShadowStackBuffer); +      Q->releaseBuffer(B.NodeIdPairBuffer); +      B.~Buffers(); +    } +  } +} + +// A PathArray represents the function id's representing a stack trace. In this +// context a path is almost always represented from the leaf function in a call +// stack to a root of the call trie. +using PathArray = Array<int32_t>; + +struct ProfileRecord { +  using PathAllocator = typename PathArray::AllocatorType; + +  // The Path in this record is the function id's from the leaf to the root of +  // the function call stack as represented from a FunctionCallTrie. +  PathArray Path; +  const FunctionCallTrie::Node *Node; +}; + +namespace { + +using ProfileRecordArray = Array<ProfileRecord>; + +// Walk a depth-first traversal of each root of the FunctionCallTrie to generate +// the path(s) and the data associated with the path. +static void +populateRecords(ProfileRecordArray &PRs, ProfileRecord::PathAllocator &PA, +                const FunctionCallTrie &Trie) XRAY_NEVER_INSTRUMENT { +  using StackArray = Array<const FunctionCallTrie::Node *>; +  using StackAllocator = typename StackArray::AllocatorType; +  StackAllocator StackAlloc(profilingFlags()->stack_allocator_max); +  StackArray DFSStack(StackAlloc); +  for (const auto *R : Trie.getRoots()) { +    DFSStack.Append(R); +    while (!DFSStack.empty()) { +      auto *Node = DFSStack.back(); +      DFSStack.trim(1); +      if (Node == nullptr) +        continue; +      auto Record = PRs.AppendEmplace(PathArray{PA}, Node); +      if (Record == nullptr) +        return; +      DCHECK_NE(Record, nullptr); + +      // Traverse the Node's parents and as we're doing so, get the FIds in +      // the order they appear. +      for (auto N = Node; N != nullptr; N = N->Parent) +        Record->Path.Append(N->FId); +      DCHECK(!Record->Path.empty()); + +      for (const auto C : Node->Callees) +        DFSStack.Append(C.NodePtr); +    } +  } +} + +static void serializeRecords(ProfileBuffer *Buffer, const BlockHeader &Header, +                             const ProfileRecordArray &ProfileRecords) +    XRAY_NEVER_INSTRUMENT { +  auto NextPtr = static_cast<uint8_t *>( +                     internal_memcpy(Buffer->Data, &Header, sizeof(Header))) + +                 sizeof(Header); +  for (const auto &Record : ProfileRecords) { +    // List of IDs follow: +    for (const auto FId : Record.Path) +      NextPtr = +          static_cast<uint8_t *>(internal_memcpy(NextPtr, &FId, sizeof(FId))) + +          sizeof(FId); + +    // Add the sentinel here. +    constexpr int32_t SentinelFId = 0; +    NextPtr = static_cast<uint8_t *>( +                  internal_memset(NextPtr, SentinelFId, sizeof(SentinelFId))) + +              sizeof(SentinelFId); + +    // Add the node data here. +    NextPtr = +        static_cast<uint8_t *>(internal_memcpy( +            NextPtr, &Record.Node->CallCount, sizeof(Record.Node->CallCount))) + +        sizeof(Record.Node->CallCount); +    NextPtr = static_cast<uint8_t *>( +                  internal_memcpy(NextPtr, &Record.Node->CumulativeLocalTime, +                                  sizeof(Record.Node->CumulativeLocalTime))) + +              sizeof(Record.Node->CumulativeLocalTime); +  } + +  DCHECK_EQ(NextPtr - static_cast<uint8_t *>(Buffer->Data), Buffer->Size); +} + +} // namespace + +void serialize() XRAY_NEVER_INSTRUMENT { +  if (!atomic_load(&CollectorInitialized, memory_order_acquire)) +    return; + +  SpinMutexLock Lock(&GlobalMutex); + +  // Clear out the global ProfileBuffers, if it's not empty. +  for (auto &B : *ProfileBuffers) +    deallocateBuffer(reinterpret_cast<unsigned char *>(B.Data), B.Size); +  ProfileBuffers->trim(ProfileBuffers->size()); + +  DCHECK_NE(TDArray, nullptr); +  if (TDArray->empty()) +    return; + +  // Then repopulate the global ProfileBuffers. +  u32 I = 0; +  auto MaxSize = profilingFlags()->global_allocator_max; +  auto ProfileArena = allocateBuffer(MaxSize); +  if (ProfileArena == nullptr) +    return; + +  auto ProfileArenaCleanup = at_scope_exit( +      [&]() XRAY_NEVER_INSTRUMENT { deallocateBuffer(ProfileArena, MaxSize); }); + +  auto PathArena = allocateBuffer(profilingFlags()->global_allocator_max); +  if (PathArena == nullptr) +    return; + +  auto PathArenaCleanup = at_scope_exit( +      [&]() XRAY_NEVER_INSTRUMENT { deallocateBuffer(PathArena, MaxSize); }); + +  for (const auto &ThreadTrie : *TDArray) { +    using ProfileRecordAllocator = typename ProfileRecordArray::AllocatorType; +    ProfileRecordAllocator PRAlloc(ProfileArena, +                                   profilingFlags()->global_allocator_max); +    ProfileRecord::PathAllocator PathAlloc( +        PathArena, profilingFlags()->global_allocator_max); +    ProfileRecordArray ProfileRecords(PRAlloc); + +    // First, we want to compute the amount of space we're going to need. We'll +    // use a local allocator and an __xray::Array<...> to store the intermediary +    // data, then compute the size as we're going along. Then we'll allocate the +    // contiguous space to contain the thread buffer data. +    if (ThreadTrie.FCT.getRoots().empty()) +      continue; + +    populateRecords(ProfileRecords, PathAlloc, ThreadTrie.FCT); +    DCHECK(!ThreadTrie.FCT.getRoots().empty()); +    DCHECK(!ProfileRecords.empty()); + +    // Go through each record, to compute the sizes. +    // +    // header size = block size (4 bytes) +    //   + block number (4 bytes) +    //   + thread id (8 bytes) +    // record size = path ids (4 bytes * number of ids + sentinel 4 bytes) +    //   + call count (8 bytes) +    //   + local time (8 bytes) +    //   + end of record (8 bytes) +    u32 CumulativeSizes = 0; +    for (const auto &Record : ProfileRecords) +      CumulativeSizes += 20 + (4 * Record.Path.size()); + +    BlockHeader Header{16 + CumulativeSizes, I++, ThreadTrie.TId}; +    auto B = ProfileBuffers->Append({}); +    B->Size = sizeof(Header) + CumulativeSizes; +    B->Data = allocateBuffer(B->Size); +    DCHECK_NE(B->Data, nullptr); +    serializeRecords(B, Header, ProfileRecords); +  } +} + +void reset() XRAY_NEVER_INSTRUMENT { +  atomic_store(&CollectorInitialized, 0, memory_order_release); +  SpinMutexLock Lock(&GlobalMutex); + +  if (ProfileBuffers != nullptr) { +    // Clear out the profile buffers that have been serialized. +    for (auto &B : *ProfileBuffers) +      deallocateBuffer(reinterpret_cast<uint8_t *>(B.Data), B.Size); +    ProfileBuffers->trim(ProfileBuffers->size()); +    ProfileBuffers = nullptr; +  } + +  if (TDArray != nullptr) { +    // Release the resources as required. +    for (auto &TD : *TDArray) { +      TD.BQ->releaseBuffer(TD.Buffers.NodeBuffer); +      TD.BQ->releaseBuffer(TD.Buffers.RootsBuffer); +      TD.BQ->releaseBuffer(TD.Buffers.ShadowStackBuffer); +      TD.BQ->releaseBuffer(TD.Buffers.NodeIdPairBuffer); +    } +    // We don't bother destroying the array here because we've already +    // potentially freed the backing store for the array. Instead we're going to +    // reset the pointer to nullptr, and re-use the storage later instead +    // (placement-new'ing into the storage as-is). +    TDArray = nullptr; +  } + +  if (TDAllocator != nullptr) { +    TDAllocator->~Allocator(); +    TDAllocator = nullptr; +  } + +  if (Buffer.Data != nullptr) { +    BQ->releaseBuffer(Buffer); +  } + +  if (BQ == nullptr) { +    bool Success = false; +    new (&BufferQueueStorage) +        BufferQueue(profilingFlags()->global_allocator_max, 1, Success); +    if (!Success) +      return; +    BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage); +  } else { +    BQ->finalize(); + +    if (BQ->init(profilingFlags()->global_allocator_max, 1) != +        BufferQueue::ErrorCode::Ok) +      return; +  } + +  if (BQ->getBuffer(Buffer) != BufferQueue::ErrorCode::Ok) +    return; + +  new (&ProfileBufferArrayAllocatorStorage) +      ProfileBufferArrayAllocator(profilingFlags()->global_allocator_max); +  ProfileBuffersAllocator = reinterpret_cast<ProfileBufferArrayAllocator *>( +      &ProfileBufferArrayAllocatorStorage); + +  new (&ProfileBuffersStorage) ProfileBufferArray(*ProfileBuffersAllocator); +  ProfileBuffers = +      reinterpret_cast<ProfileBufferArray *>(&ProfileBuffersStorage); + +  new (&ThreadDataAllocatorStorage) +      ThreadDataAllocator(Buffer.Data, Buffer.Size); +  TDAllocator = +      reinterpret_cast<ThreadDataAllocator *>(&ThreadDataAllocatorStorage); +  new (&ThreadDataArrayStorage) ThreadDataArray(*TDAllocator); +  TDArray = reinterpret_cast<ThreadDataArray *>(&ThreadDataArrayStorage); + +  atomic_store(&CollectorInitialized, 1, memory_order_release); +} + +XRayBuffer nextBuffer(XRayBuffer B) XRAY_NEVER_INSTRUMENT { +  SpinMutexLock Lock(&GlobalMutex); + +  if (ProfileBuffers == nullptr || ProfileBuffers->size() == 0) +    return {nullptr, 0}; + +  static pthread_once_t Once = PTHREAD_ONCE_INIT; +  alignas(XRayProfilingFileHeader) static std::byte +      FileHeaderStorage[sizeof(XRayProfilingFileHeader)]; +  pthread_once( +      &Once, +[]() XRAY_NEVER_INSTRUMENT { +        new (&FileHeaderStorage) XRayProfilingFileHeader{}; +      }); + +  if (UNLIKELY(B.Data == nullptr)) { +    // The first buffer should always contain the file header information. +    auto &FileHeader = +        *reinterpret_cast<XRayProfilingFileHeader *>(&FileHeaderStorage); +    FileHeader.Timestamp = NanoTime(); +    FileHeader.PID = internal_getpid(); +    return {&FileHeaderStorage, sizeof(XRayProfilingFileHeader)}; +  } + +  if (UNLIKELY(B.Data == &FileHeaderStorage)) +    return {(*ProfileBuffers)[0].Data, (*ProfileBuffers)[0].Size}; + +  BlockHeader Header; +  internal_memcpy(&Header, B.Data, sizeof(BlockHeader)); +  auto NextBlock = Header.BlockNum + 1; +  if (NextBlock < ProfileBuffers->size()) +    return {(*ProfileBuffers)[NextBlock].Data, +            (*ProfileBuffers)[NextBlock].Size}; +  return {nullptr, 0}; +} + +} // namespace profileCollectorService +} // namespace __xray diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_profile_collector.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_profile_collector.h new file mode 100644 index 000000000000..6e0f252714ba --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_profile_collector.h @@ -0,0 +1,73 @@ +//===-- xray_profile_collector.h -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// This file defines the interface for a data collection service, for XRay +// profiling. What we implement here is an in-process service where +// FunctionCallTrie instances can be handed off by threads, to be +// consolidated/collected. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_XRAY_PROFILE_COLLECTOR_H +#define XRAY_XRAY_PROFILE_COLLECTOR_H + +#include "xray_function_call_trie.h" + +#include "xray/xray_log_interface.h" + +namespace __xray { + +/// The ProfileCollectorService implements a centralised mechanism for +/// collecting FunctionCallTrie instances, indexed by thread ID. On demand, the +/// ProfileCollectorService can be queried for the most recent state of the +/// data, in a form that allows traversal. +namespace profileCollectorService { + +/// Posts the FunctionCallTrie associated with a specific Thread ID. This +/// will: +/// +/// Moves the collection of FunctionCallTrie, Allocators, and Buffers associated +/// with a thread's data to the queue. This takes ownership of the memory +/// associated with a thread, and manages those exclusively. +/// +void post(BufferQueue *Q, FunctionCallTrie &&T, +          FunctionCallTrie::Allocators &&A, +          FunctionCallTrie::Allocators::Buffers &&B, tid_t TId); + +/// The serialize will process all FunctionCallTrie instances in memory, and +/// turn those into specifically formatted blocks, each describing the +/// function call trie's contents in a compact form. In memory, this looks +/// like the following layout: +/// +///   - block size (32 bits) +///   - block number (32 bits) +///   - thread id (64 bits) +///   - list of records: +///     - function ids in leaf to root order, terminated by +///       0 (32 bits per function id) +///     - call count (64 bit) +///     - cumulative local time (64 bit) +///     - record delimiter (64 bit, 0x0) +/// +void serialize(); + +/// The reset function will clear out any internal memory held by the +/// service. The intent is to have the resetting be done in calls to the +/// initialization routine, or explicitly through the flush log API. +void reset(); + +/// This nextBuffer function is meant to implement the iterator functionality, +/// provided in the XRay API. +XRayBuffer nextBuffer(XRayBuffer B); + +} // namespace profileCollectorService + +} // namespace __xray + +#endif // XRAY_XRAY_PROFILE_COLLECTOR_H diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling.cpp new file mode 100644 index 000000000000..e9ac2fdd8aad --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling.cpp @@ -0,0 +1,516 @@ +//===-- xray_profiling.cpp --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// This is the implementation of a profiling handler. +// +//===----------------------------------------------------------------------===// +#include <memory> +#include <time.h> + +#include "sanitizer_common/sanitizer_atomic.h" +#include "sanitizer_common/sanitizer_flags.h" +#include "xray/xray_interface.h" +#include "xray/xray_log_interface.h" +#include "xray_buffer_queue.h" +#include "xray_flags.h" +#include "xray_profile_collector.h" +#include "xray_profiling_flags.h" +#include "xray_recursion_guard.h" +#include "xray_tsc.h" +#include "xray_utils.h" +#include <pthread.h> + +namespace __xray { + +namespace { + +static atomic_sint32_t ProfilerLogFlushStatus = { +    XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING}; + +static atomic_sint32_t ProfilerLogStatus = { +    XRayLogInitStatus::XRAY_LOG_UNINITIALIZED}; + +static SpinMutex ProfilerOptionsMutex; + +struct ProfilingData { +  atomic_uintptr_t Allocators; +  atomic_uintptr_t FCT; +}; + +static pthread_key_t ProfilingKey; + +// We use a global buffer queue, which gets initialized once at initialisation +// time, and gets reset when profiling is "done". +alignas(BufferQueue) static std::byte BufferQueueStorage[sizeof(BufferQueue)]; +static BufferQueue *BQ = nullptr; + +thread_local FunctionCallTrie::Allocators::Buffers ThreadBuffers; +alignas(FunctionCallTrie::Allocators) thread_local std::byte +    AllocatorsStorage[sizeof(FunctionCallTrie::Allocators)]; +alignas(FunctionCallTrie) thread_local std::byte +    FunctionCallTrieStorage[sizeof(FunctionCallTrie)]; +thread_local ProfilingData TLD{{0}, {0}}; +thread_local atomic_uint8_t ReentranceGuard{0}; + +// We use a separate guard for ensuring that for this thread, if we're already +// cleaning up, that any signal handlers don't attempt to cleanup nor +// initialise. +thread_local atomic_uint8_t TLDInitGuard{0}; + +// We also use a separate latch to signal that the thread is exiting, and +// non-essential work should be ignored (things like recording events, etc.). +thread_local atomic_uint8_t ThreadExitingLatch{0}; + +static ProfilingData *getThreadLocalData() XRAY_NEVER_INSTRUMENT { +  thread_local auto ThreadOnce = []() XRAY_NEVER_INSTRUMENT { +    pthread_setspecific(ProfilingKey, &TLD); +    return false; +  }(); +  (void)ThreadOnce; + +  RecursionGuard TLDInit(TLDInitGuard); +  if (!TLDInit) +    return nullptr; + +  if (atomic_load_relaxed(&ThreadExitingLatch)) +    return nullptr; + +  uptr Allocators = 0; +  if (atomic_compare_exchange_strong(&TLD.Allocators, &Allocators, 1, +                                     memory_order_acq_rel)) { +    bool Success = false; +    auto AllocatorsUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { +      if (!Success) +        atomic_store(&TLD.Allocators, 0, memory_order_release); +    }); + +    // Acquire a set of buffers for this thread. +    if (BQ == nullptr) +      return nullptr; + +    if (BQ->getBuffer(ThreadBuffers.NodeBuffer) != BufferQueue::ErrorCode::Ok) +      return nullptr; +    auto NodeBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { +      if (!Success) +        BQ->releaseBuffer(ThreadBuffers.NodeBuffer); +    }); + +    if (BQ->getBuffer(ThreadBuffers.RootsBuffer) != BufferQueue::ErrorCode::Ok) +      return nullptr; +    auto RootsBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { +      if (!Success) +        BQ->releaseBuffer(ThreadBuffers.RootsBuffer); +    }); + +    if (BQ->getBuffer(ThreadBuffers.ShadowStackBuffer) != +        BufferQueue::ErrorCode::Ok) +      return nullptr; +    auto ShadowStackBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { +      if (!Success) +        BQ->releaseBuffer(ThreadBuffers.ShadowStackBuffer); +    }); + +    if (BQ->getBuffer(ThreadBuffers.NodeIdPairBuffer) != +        BufferQueue::ErrorCode::Ok) +      return nullptr; + +    Success = true; +    new (&AllocatorsStorage) FunctionCallTrie::Allocators( +        FunctionCallTrie::InitAllocatorsFromBuffers(ThreadBuffers)); +    Allocators = reinterpret_cast<uptr>( +        reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage)); +    atomic_store(&TLD.Allocators, Allocators, memory_order_release); +  } + +  if (Allocators == 1) +    return nullptr; + +  uptr FCT = 0; +  if (atomic_compare_exchange_strong(&TLD.FCT, &FCT, 1, memory_order_acq_rel)) { +    new (&FunctionCallTrieStorage) +        FunctionCallTrie(*reinterpret_cast<FunctionCallTrie::Allocators *>( +            atomic_load_relaxed(&TLD.Allocators))); +    FCT = reinterpret_cast<uptr>( +        reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage)); +    atomic_store(&TLD.FCT, FCT, memory_order_release); +  } + +  if (FCT == 1) +    return nullptr; + +  return &TLD; +} + +static void cleanupTLD() XRAY_NEVER_INSTRUMENT { +  auto FCT = atomic_exchange(&TLD.FCT, 0, memory_order_acq_rel); +  if (FCT == reinterpret_cast<uptr>(reinterpret_cast<FunctionCallTrie *>( +                 &FunctionCallTrieStorage))) +    reinterpret_cast<FunctionCallTrie *>(FCT)->~FunctionCallTrie(); + +  auto Allocators = atomic_exchange(&TLD.Allocators, 0, memory_order_acq_rel); +  if (Allocators == +      reinterpret_cast<uptr>( +          reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage))) +    reinterpret_cast<FunctionCallTrie::Allocators *>(Allocators)->~Allocators(); +} + +static void postCurrentThreadFCT(ProfilingData &T) XRAY_NEVER_INSTRUMENT { +  RecursionGuard TLDInit(TLDInitGuard); +  if (!TLDInit) +    return; + +  uptr P = atomic_exchange(&T.FCT, 0, memory_order_acq_rel); +  if (P != reinterpret_cast<uptr>( +               reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage))) +    return; + +  auto FCT = reinterpret_cast<FunctionCallTrie *>(P); +  DCHECK_NE(FCT, nullptr); + +  uptr A = atomic_exchange(&T.Allocators, 0, memory_order_acq_rel); +  if (A != +      reinterpret_cast<uptr>( +          reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage))) +    return; + +  auto Allocators = reinterpret_cast<FunctionCallTrie::Allocators *>(A); +  DCHECK_NE(Allocators, nullptr); + +  // Always move the data into the profile collector. +  profileCollectorService::post(BQ, std::move(*FCT), std::move(*Allocators), +                                std::move(ThreadBuffers), GetTid()); + +  // Re-initialize the ThreadBuffers object to a known "default" state. +  ThreadBuffers = FunctionCallTrie::Allocators::Buffers{}; +} + +} // namespace + +const char *profilingCompilerDefinedFlags() XRAY_NEVER_INSTRUMENT { +#ifdef XRAY_PROFILER_DEFAULT_OPTIONS +  return SANITIZER_STRINGIFY(XRAY_PROFILER_DEFAULT_OPTIONS); +#else +  return ""; +#endif +} + +XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT { +  if (atomic_load(&ProfilerLogStatus, memory_order_acquire) != +      XRayLogInitStatus::XRAY_LOG_FINALIZED) { +    if (Verbosity()) +      Report("Not flushing profiles, profiling not been finalized.\n"); +    return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; +  } + +  RecursionGuard SignalGuard(ReentranceGuard); +  if (!SignalGuard) { +    if (Verbosity()) +      Report("Cannot finalize properly inside a signal handler!\n"); +    atomic_store(&ProfilerLogFlushStatus, +                 XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING, +                 memory_order_release); +    return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; +  } + +  s32 Previous = atomic_exchange(&ProfilerLogFlushStatus, +                                 XRayLogFlushStatus::XRAY_LOG_FLUSHING, +                                 memory_order_acq_rel); +  if (Previous == XRayLogFlushStatus::XRAY_LOG_FLUSHING) { +    if (Verbosity()) +      Report("Not flushing profiles, implementation still flushing.\n"); +    return XRayLogFlushStatus::XRAY_LOG_FLUSHING; +  } + +  // At this point, we'll create the file that will contain the profile, but +  // only if the options say so. +  if (!profilingFlags()->no_flush) { +    // First check whether we have data in the profile collector service +    // before we try and write anything down. +    XRayBuffer B = profileCollectorService::nextBuffer({nullptr, 0}); +    if (B.Data == nullptr) { +      if (Verbosity()) +        Report("profiling: No data to flush.\n"); +    } else { +      LogWriter *LW = LogWriter::Open(); +      if (LW == nullptr) { +        if (Verbosity()) +          Report("profiling: Failed to flush to file, dropping data.\n"); +      } else { +        // Now for each of the buffers, write out the profile data as we would +        // see it in memory, verbatim. +        while (B.Data != nullptr && B.Size != 0) { +          LW->WriteAll(reinterpret_cast<const char *>(B.Data), +                       reinterpret_cast<const char *>(B.Data) + B.Size); +          B = profileCollectorService::nextBuffer(B); +        } +        LogWriter::Close(LW); +      } +    } +  } + +  profileCollectorService::reset(); + +  atomic_store(&ProfilerLogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED, +               memory_order_release); +  atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, +               memory_order_release); + +  return XRayLogFlushStatus::XRAY_LOG_FLUSHED; +} + +void profilingHandleArg0(int32_t FuncId, +                         XRayEntryType Entry) XRAY_NEVER_INSTRUMENT { +  unsigned char CPU; +  auto TSC = readTSC(CPU); +  RecursionGuard G(ReentranceGuard); +  if (!G) +    return; + +  auto Status = atomic_load(&ProfilerLogStatus, memory_order_acquire); +  if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_UNINITIALIZED || +               Status == XRayLogInitStatus::XRAY_LOG_INITIALIZING)) +    return; + +  if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_FINALIZED || +               Status == XRayLogInitStatus::XRAY_LOG_FINALIZING)) { +    postCurrentThreadFCT(TLD); +    return; +  } + +  auto T = getThreadLocalData(); +  if (T == nullptr) +    return; + +  auto FCT = reinterpret_cast<FunctionCallTrie *>(atomic_load_relaxed(&T->FCT)); +  switch (Entry) { +  case XRayEntryType::ENTRY: +  case XRayEntryType::LOG_ARGS_ENTRY: +    FCT->enterFunction(FuncId, TSC, CPU); +    break; +  case XRayEntryType::EXIT: +  case XRayEntryType::TAIL: +    FCT->exitFunction(FuncId, TSC, CPU); +    break; +  default: +    // FIXME: Handle bugs. +    break; +  } +} + +void profilingHandleArg1(int32_t FuncId, XRayEntryType Entry, +                         uint64_t) XRAY_NEVER_INSTRUMENT { +  return profilingHandleArg0(FuncId, Entry); +} + +XRayLogInitStatus profilingFinalize() XRAY_NEVER_INSTRUMENT { +  s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED; +  if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus, +                                      XRayLogInitStatus::XRAY_LOG_FINALIZING, +                                      memory_order_release)) { +    if (Verbosity()) +      Report("Cannot finalize profile, the profiling is not initialized.\n"); +    return static_cast<XRayLogInitStatus>(CurrentStatus); +  } + +  // Mark then finalize the current generation of buffers. This allows us to let +  // the threads currently holding onto new buffers still use them, but let the +  // last reference do the memory cleanup. +  DCHECK_NE(BQ, nullptr); +  BQ->finalize(); + +  // Wait a grace period to allow threads to see that we're finalizing. +  SleepForMillis(profilingFlags()->grace_period_ms); + +  // If we for some reason are entering this function from an instrumented +  // handler, we bail out. +  RecursionGuard G(ReentranceGuard); +  if (!G) +    return static_cast<XRayLogInitStatus>(CurrentStatus); + +  // Post the current thread's data if we have any. +  postCurrentThreadFCT(TLD); + +  // Then we force serialize the log data. +  profileCollectorService::serialize(); + +  atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED, +               memory_order_release); +  return XRayLogInitStatus::XRAY_LOG_FINALIZED; +} + +XRayLogInitStatus +profilingLoggingInit(size_t, size_t, void *Options, +                     size_t OptionsSize) XRAY_NEVER_INSTRUMENT { +  RecursionGuard G(ReentranceGuard); +  if (!G) +    return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + +  s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; +  if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus, +                                      XRayLogInitStatus::XRAY_LOG_INITIALIZING, +                                      memory_order_acq_rel)) { +    if (Verbosity()) +      Report("Cannot initialize already initialised profiling " +             "implementation.\n"); +    return static_cast<XRayLogInitStatus>(CurrentStatus); +  } + +  { +    SpinMutexLock Lock(&ProfilerOptionsMutex); +    FlagParser ConfigParser; +    ProfilerFlags Flags; +    Flags.setDefaults(); +    registerProfilerFlags(&ConfigParser, &Flags); +    ConfigParser.ParseString(profilingCompilerDefinedFlags()); +    const char *Env = GetEnv("XRAY_PROFILING_OPTIONS"); +    if (Env == nullptr) +      Env = ""; +    ConfigParser.ParseString(Env); + +    // Then parse the configuration string provided. +    ConfigParser.ParseString(static_cast<const char *>(Options)); +    if (Verbosity()) +      ReportUnrecognizedFlags(); +    *profilingFlags() = Flags; +  } + +  // We need to reset the profile data collection implementation now. +  profileCollectorService::reset(); + +  // Then also reset the buffer queue implementation. +  if (BQ == nullptr) { +    bool Success = false; +    new (&BufferQueueStorage) +        BufferQueue(profilingFlags()->per_thread_allocator_max, +                    profilingFlags()->buffers_max, Success); +    if (!Success) { +      if (Verbosity()) +        Report("Failed to initialize preallocated memory buffers!"); +      atomic_store(&ProfilerLogStatus, +                   XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, +                   memory_order_release); +      return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; +    } + +    // If we've succeeded, set the global pointer to the initialised storage. +    BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage); +  } else { +    BQ->finalize(); +    auto InitStatus = BQ->init(profilingFlags()->per_thread_allocator_max, +                               profilingFlags()->buffers_max); + +    if (InitStatus != BufferQueue::ErrorCode::Ok) { +      if (Verbosity()) +        Report("Failed to initialize preallocated memory buffers; error: %s", +               BufferQueue::getErrorString(InitStatus)); +      atomic_store(&ProfilerLogStatus, +                   XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, +                   memory_order_release); +      return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; +    } + +    DCHECK(!BQ->finalizing()); +  } + +  // We need to set up the exit handlers. +  static pthread_once_t Once = PTHREAD_ONCE_INIT; +  pthread_once( +      &Once, +[] { +        pthread_key_create( +            &ProfilingKey, +[](void *P) XRAY_NEVER_INSTRUMENT { +              if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel)) +                return; + +              if (P == nullptr) +                return; + +              auto T = reinterpret_cast<ProfilingData *>(P); +              if (atomic_load_relaxed(&T->Allocators) == 0) +                return; + +              { +                // If we're somehow executing this while inside a +                // non-reentrant-friendly context, we skip attempting to post +                // the current thread's data. +                RecursionGuard G(ReentranceGuard); +                if (!G) +                  return; + +                postCurrentThreadFCT(*T); +              } +            }); + +        // We also need to set up an exit handler, so that we can get the +        // profile information at exit time. We use the C API to do this, to not +        // rely on C++ ABI functions for registering exit handlers. +        Atexit(+[]() XRAY_NEVER_INSTRUMENT { +          if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel)) +            return; + +          auto Cleanup = +              at_scope_exit([]() XRAY_NEVER_INSTRUMENT { cleanupTLD(); }); + +          // Finalize and flush. +          if (profilingFinalize() != XRAY_LOG_FINALIZED || +              profilingFlush() != XRAY_LOG_FLUSHED) +            return; + +          if (Verbosity()) +            Report("XRay Profile flushed at exit."); +        }); +      }); + +  __xray_log_set_buffer_iterator(profileCollectorService::nextBuffer); +  __xray_set_handler(profilingHandleArg0); +  __xray_set_handler_arg1(profilingHandleArg1); + +  atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZED, +               memory_order_release); +  if (Verbosity()) +    Report("XRay Profiling init successful.\n"); + +  return XRayLogInitStatus::XRAY_LOG_INITIALIZED; +} + +bool profilingDynamicInitializer() XRAY_NEVER_INSTRUMENT { +  // Set up the flag defaults from the static defaults and the +  // compiler-provided defaults. +  { +    SpinMutexLock Lock(&ProfilerOptionsMutex); +    auto *F = profilingFlags(); +    F->setDefaults(); +    FlagParser ProfilingParser; +    registerProfilerFlags(&ProfilingParser, F); +    ProfilingParser.ParseString(profilingCompilerDefinedFlags()); +  } + +  XRayLogImpl Impl{ +      profilingLoggingInit, +      profilingFinalize, +      profilingHandleArg0, +      profilingFlush, +  }; +  auto RegistrationResult = __xray_log_register_mode("xray-profiling", Impl); +  if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) { +    if (Verbosity()) +      Report("Cannot register XRay Profiling mode to 'xray-profiling'; error = " +             "%d\n", +             RegistrationResult); +    return false; +  } + +  if (!internal_strcmp(flags()->xray_mode, "xray-profiling")) +    __xray_log_select_mode("xray_profiling"); +  return true; +} + +} // namespace __xray + +static auto UNUSED Unused = __xray::profilingDynamicInitializer(); diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling_flags.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling_flags.cpp new file mode 100644 index 000000000000..0e89b7420f8c --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling_flags.cpp @@ -0,0 +1,39 @@ +//===-- xray_flags.h -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// XRay runtime flags. +//===----------------------------------------------------------------------===// + +#include "xray_profiling_flags.h" +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_flag_parser.h" +#include "sanitizer_common/sanitizer_libc.h" +#include "xray_defs.h" + +namespace __xray { + +// Storage for the profiling flags. +ProfilerFlags xray_profiling_flags_dont_use_directly; + +void ProfilerFlags::setDefaults() XRAY_NEVER_INSTRUMENT { +#define XRAY_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue; +#include "xray_profiling_flags.inc" +#undef XRAY_FLAG +} + +void registerProfilerFlags(FlagParser *P, +                           ProfilerFlags *F) XRAY_NEVER_INSTRUMENT { +#define XRAY_FLAG(Type, Name, DefaultValue, Description)                       \ +  RegisterFlag(P, #Name, Description, &F->Name); +#include "xray_profiling_flags.inc" +#undef XRAY_FLAG +} + +} // namespace __xray diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling_flags.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling_flags.h new file mode 100644 index 000000000000..d67f240adc88 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling_flags.h @@ -0,0 +1,38 @@ +//===-- xray_profiling_flags.h ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// XRay profiling runtime flags. +//===----------------------------------------------------------------------===// + +#ifndef XRAY_PROFILER_FLAGS_H +#define XRAY_PROFILER_FLAGS_H + +#include "sanitizer_common/sanitizer_flag_parser.h" +#include "sanitizer_common/sanitizer_internal_defs.h" + +namespace __xray { + +struct ProfilerFlags { +#define XRAY_FLAG(Type, Name, DefaultValue, Description) Type Name; +#include "xray_profiling_flags.inc" +#undef XRAY_FLAG + +  void setDefaults(); +}; + +extern ProfilerFlags xray_profiling_flags_dont_use_directly; +inline ProfilerFlags *profilingFlags() { +  return &xray_profiling_flags_dont_use_directly; +} +void registerProfilerFlags(FlagParser *P, ProfilerFlags *F); + +} // namespace __xray + +#endif // XRAY_PROFILER_FLAGS_H diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling_flags.inc b/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling_flags.inc new file mode 100644 index 000000000000..4f6138872af7 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling_flags.inc @@ -0,0 +1,31 @@ +//===-- xray_profiling_flags.inc --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// XRay profiling runtime flags. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_FLAG +#error "Define XRAY_FLAG prior to including this file!" +#endif + +XRAY_FLAG(uptr, per_thread_allocator_max, 16384, +          "Maximum size of any single per-thread allocator.") +XRAY_FLAG(uptr, global_allocator_max, 2 << 24, +          "Maximum size of the global allocator for profile storage.") +XRAY_FLAG(uptr, stack_allocator_max, 2 << 20, +          "Maximum size of the traversal stack allocator.") +XRAY_FLAG(int, grace_period_ms, 1, +          "Profile collection will wait this much time in milliseconds before " +          "resetting the global state. This gives a chance to threads to " +          "notice that the profiler has been finalized and clean up.") +XRAY_FLAG(bool, no_flush, false, +          "Set to true if we want the profiling implementation to not write " +          "out files.") +XRAY_FLAG(int, buffers_max, 128, +          "The number of buffers to pre-allocate used by the profiling " +          "implementation.") diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_recursion_guard.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_recursion_guard.h new file mode 100644 index 000000000000..3b6158a2d36c --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_recursion_guard.h @@ -0,0 +1,56 @@ +//===-- xray_recursion_guard.h ---------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_XRAY_RECURSION_GUARD_H +#define XRAY_XRAY_RECURSION_GUARD_H + +#include "sanitizer_common/sanitizer_atomic.h" + +namespace __xray { + +/// The RecursionGuard is useful for guarding against signal handlers which are +/// also potentially calling XRay-instrumented functions. To use the +/// RecursionGuard, you'll typically need a thread_local atomic_uint8_t: +/// +///   thread_local atomic_uint8_t Guard{0}; +/// +///   // In a handler function: +///   void handleArg0(int32_t F, XRayEntryType T) { +///     RecursionGuard G(Guard); +///     if (!G) +///       return;  // Failed to acquire the guard. +///     ... +///   } +/// +class RecursionGuard { +  atomic_uint8_t &Running; +  const bool Valid; + +public: +  explicit inline RecursionGuard(atomic_uint8_t &R) +      : Running(R), Valid(!atomic_exchange(&R, 1, memory_order_acq_rel)) {} + +  inline RecursionGuard(const RecursionGuard &) = delete; +  inline RecursionGuard(RecursionGuard &&) = delete; +  inline RecursionGuard &operator=(const RecursionGuard &) = delete; +  inline RecursionGuard &operator=(RecursionGuard &&) = delete; + +  explicit inline operator bool() const { return Valid; } + +  inline ~RecursionGuard() noexcept { +    if (Valid) +      atomic_store(&Running, 0, memory_order_release); +  } +}; + +} // namespace __xray + +#endif // XRAY_XRAY_RECURSION_GUARD_H diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_segmented_array.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_segmented_array.h new file mode 100644 index 000000000000..3ab174bcbe18 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_segmented_array.h @@ -0,0 +1,649 @@ +//===-- xray_segmented_array.h ---------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Defines the implementation of a segmented array, with fixed-size segments +// backing the segments. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_SEGMENTED_ARRAY_H +#define XRAY_SEGMENTED_ARRAY_H + +#include "sanitizer_common/sanitizer_allocator.h" +#include "xray_allocator.h" +#include "xray_utils.h" +#include <cassert> +#include <type_traits> +#include <utility> + +namespace __xray { + +/// The Array type provides an interface similar to std::vector<...> but does +/// not shrink in size. Once constructed, elements can be appended but cannot be +/// removed. The implementation is heavily dependent on the contract provided by +/// the Allocator type, in that all memory will be released when the Allocator +/// is destroyed. When an Array is destroyed, it will destroy elements in the +/// backing store but will not free the memory. +template <class T> class Array { +  struct Segment { +    Segment *Prev; +    Segment *Next; +    char Data[1]; +  }; + +public: +  // Each segment of the array will be laid out with the following assumptions: +  // +  //   - Each segment will be on a cache-line address boundary (kCacheLineSize +  //     aligned). +  // +  //   - The elements will be accessed through an aligned pointer, dependent on +  //     the alignment of T. +  // +  //   - Each element is at least two-pointers worth from the beginning of the +  //     Segment, aligned properly, and the rest of the elements are accessed +  //     through appropriate alignment. +  // +  // We then compute the size of the segment to follow this logic: +  // +  //   - Compute the number of elements that can fit within +  //     kCacheLineSize-multiple segments, minus the size of two pointers. +  // +  //   - Request cacheline-multiple sized elements from the allocator. +  static constexpr uint64_t AlignedElementStorageSize = sizeof(T); + +  static constexpr uint64_t SegmentControlBlockSize = sizeof(Segment *) * 2; + +  static constexpr uint64_t SegmentSize = nearest_boundary( +      SegmentControlBlockSize + next_pow2(sizeof(T)), kCacheLineSize); + +  using AllocatorType = Allocator<SegmentSize>; + +  static constexpr uint64_t ElementsPerSegment = +      (SegmentSize - SegmentControlBlockSize) / next_pow2(sizeof(T)); + +  static_assert(ElementsPerSegment > 0, +                "Must have at least 1 element per segment."); + +  static Segment SentinelSegment; + +  using size_type = uint64_t; + +private: +  // This Iterator models a BidirectionalIterator. +  template <class U> class Iterator { +    Segment *S = &SentinelSegment; +    uint64_t Offset = 0; +    uint64_t Size = 0; + +  public: +    Iterator(Segment *IS, uint64_t Off, uint64_t S) XRAY_NEVER_INSTRUMENT +        : S(IS), +          Offset(Off), +          Size(S) {} +    Iterator(const Iterator &) NOEXCEPT XRAY_NEVER_INSTRUMENT = default; +    Iterator() NOEXCEPT XRAY_NEVER_INSTRUMENT = default; +    Iterator(Iterator &&) NOEXCEPT XRAY_NEVER_INSTRUMENT = default; +    Iterator &operator=(const Iterator &) XRAY_NEVER_INSTRUMENT = default; +    Iterator &operator=(Iterator &&) XRAY_NEVER_INSTRUMENT = default; +    ~Iterator() XRAY_NEVER_INSTRUMENT = default; + +    Iterator &operator++() XRAY_NEVER_INSTRUMENT { +      if (++Offset % ElementsPerSegment || Offset == Size) +        return *this; + +      // At this point, we know that Offset % N == 0, so we must advance the +      // segment pointer. +      DCHECK_EQ(Offset % ElementsPerSegment, 0); +      DCHECK_NE(Offset, Size); +      DCHECK_NE(S, &SentinelSegment); +      DCHECK_NE(S->Next, &SentinelSegment); +      S = S->Next; +      DCHECK_NE(S, &SentinelSegment); +      return *this; +    } + +    Iterator &operator--() XRAY_NEVER_INSTRUMENT { +      DCHECK_NE(S, &SentinelSegment); +      DCHECK_GT(Offset, 0); + +      auto PreviousOffset = Offset--; +      if (PreviousOffset != Size && PreviousOffset % ElementsPerSegment == 0) { +        DCHECK_NE(S->Prev, &SentinelSegment); +        S = S->Prev; +      } + +      return *this; +    } + +    Iterator operator++(int) XRAY_NEVER_INSTRUMENT { +      Iterator Copy(*this); +      ++(*this); +      return Copy; +    } + +    Iterator operator--(int) XRAY_NEVER_INSTRUMENT { +      Iterator Copy(*this); +      --(*this); +      return Copy; +    } + +    template <class V, class W> +    friend bool operator==(const Iterator<V> &L, +                           const Iterator<W> &R) XRAY_NEVER_INSTRUMENT { +      return L.S == R.S && L.Offset == R.Offset; +    } + +    template <class V, class W> +    friend bool operator!=(const Iterator<V> &L, +                           const Iterator<W> &R) XRAY_NEVER_INSTRUMENT { +      return !(L == R); +    } + +    U &operator*() const XRAY_NEVER_INSTRUMENT { +      DCHECK_NE(S, &SentinelSegment); +      auto RelOff = Offset % ElementsPerSegment; + +      // We need to compute the character-aligned pointer, offset from the +      // segment's Data location to get the element in the position of Offset. +      auto Base = &S->Data; +      auto AlignedOffset = Base + (RelOff * AlignedElementStorageSize); +      return *reinterpret_cast<U *>(AlignedOffset); +    } + +    U *operator->() const XRAY_NEVER_INSTRUMENT { return &(**this); } +  }; + +  AllocatorType *Alloc; +  Segment *Head; +  Segment *Tail; + +  // Here we keep track of segments in the freelist, to allow us to re-use +  // segments when elements are trimmed off the end. +  Segment *Freelist; +  uint64_t Size; + +  // =============================== +  // In the following implementation, we work through the algorithms and the +  // list operations using the following notation: +  // +  //   - pred(s) is the predecessor (previous node accessor) and succ(s) is +  //     the successor (next node accessor). +  // +  //   - S is a sentinel segment, which has the following property: +  // +  //         pred(S) == succ(S) == S +  // +  //   - @ is a loop operator, which can imply pred(s) == s if it appears on +  //     the left of s, or succ(s) == S if it appears on the right of s. +  // +  //   - sL <-> sR : means a bidirectional relation between sL and sR, which +  //     means: +  // +  //         succ(sL) == sR && pred(SR) == sL +  // +  //   - sL -> sR : implies a unidirectional relation between sL and SR, +  //     with the following properties: +  // +  //         succ(sL) == sR +  // +  //     sL <- sR : implies a unidirectional relation between sR and sL, +  //     with the following properties: +  // +  //         pred(sR) == sL +  // +  // =============================== + +  Segment *NewSegment() XRAY_NEVER_INSTRUMENT { +    // We need to handle the case in which enough elements have been trimmed to +    // allow us to re-use segments we've allocated before. For this we look into +    // the Freelist, to see whether we need to actually allocate new blocks or +    // just re-use blocks we've already seen before. +    if (Freelist != &SentinelSegment) { +      // The current state of lists resemble something like this at this point: +      // +      //   Freelist: @S@<-f0->...<->fN->@S@ +      //                  ^ Freelist +      // +      // We want to perform a splice of `f0` from Freelist to a temporary list, +      // which looks like: +      // +      //   Templist: @S@<-f0->@S@ +      //                  ^ FreeSegment +      // +      // Our algorithm preconditions are: +      DCHECK_EQ(Freelist->Prev, &SentinelSegment); + +      // Then the algorithm we implement is: +      // +      //   SFS = Freelist +      //   Freelist = succ(Freelist) +      //   if (Freelist != S) +      //     pred(Freelist) = S +      //   succ(SFS) = S +      //   pred(SFS) = S +      // +      auto *FreeSegment = Freelist; +      Freelist = Freelist->Next; + +      // Note that we need to handle the case where Freelist is now pointing to +      // S, which we don't want to be overwriting. +      // TODO: Determine whether the cost of the branch is higher than the cost +      // of the blind assignment. +      if (Freelist != &SentinelSegment) +        Freelist->Prev = &SentinelSegment; + +      FreeSegment->Next = &SentinelSegment; +      FreeSegment->Prev = &SentinelSegment; + +      // Our postconditions are: +      DCHECK_EQ(Freelist->Prev, &SentinelSegment); +      DCHECK_NE(FreeSegment, &SentinelSegment); +      return FreeSegment; +    } + +    auto SegmentBlock = Alloc->Allocate(); +    if (SegmentBlock.Data == nullptr) +      return nullptr; + +    // Placement-new the Segment element at the beginning of the SegmentBlock. +    new (SegmentBlock.Data) Segment{&SentinelSegment, &SentinelSegment, {0}}; +    auto SB = reinterpret_cast<Segment *>(SegmentBlock.Data); +    return SB; +  } + +  Segment *InitHeadAndTail() XRAY_NEVER_INSTRUMENT { +    DCHECK_EQ(Head, &SentinelSegment); +    DCHECK_EQ(Tail, &SentinelSegment); +    auto S = NewSegment(); +    if (S == nullptr) +      return nullptr; +    DCHECK_EQ(S->Next, &SentinelSegment); +    DCHECK_EQ(S->Prev, &SentinelSegment); +    DCHECK_NE(S, &SentinelSegment); +    Head = S; +    Tail = S; +    DCHECK_EQ(Head, Tail); +    DCHECK_EQ(Tail->Next, &SentinelSegment); +    DCHECK_EQ(Tail->Prev, &SentinelSegment); +    return S; +  } + +  Segment *AppendNewSegment() XRAY_NEVER_INSTRUMENT { +    auto S = NewSegment(); +    if (S == nullptr) +      return nullptr; +    DCHECK_NE(Tail, &SentinelSegment); +    DCHECK_EQ(Tail->Next, &SentinelSegment); +    DCHECK_EQ(S->Prev, &SentinelSegment); +    DCHECK_EQ(S->Next, &SentinelSegment); +    S->Prev = Tail; +    Tail->Next = S; +    Tail = S; +    DCHECK_EQ(S, S->Prev->Next); +    DCHECK_EQ(Tail->Next, &SentinelSegment); +    return S; +  } + +public: +  explicit Array(AllocatorType &A) XRAY_NEVER_INSTRUMENT +      : Alloc(&A), +        Head(&SentinelSegment), +        Tail(&SentinelSegment), +        Freelist(&SentinelSegment), +        Size(0) {} + +  Array() XRAY_NEVER_INSTRUMENT : Alloc(nullptr), +                                  Head(&SentinelSegment), +                                  Tail(&SentinelSegment), +                                  Freelist(&SentinelSegment), +                                  Size(0) {} + +  Array(const Array &) = delete; +  Array &operator=(const Array &) = delete; + +  Array(Array &&O) XRAY_NEVER_INSTRUMENT : Alloc(O.Alloc), +                                           Head(O.Head), +                                           Tail(O.Tail), +                                           Freelist(O.Freelist), +                                           Size(O.Size) { +    O.Alloc = nullptr; +    O.Head = &SentinelSegment; +    O.Tail = &SentinelSegment; +    O.Size = 0; +    O.Freelist = &SentinelSegment; +  } + +  Array &operator=(Array &&O) XRAY_NEVER_INSTRUMENT { +    Alloc = O.Alloc; +    O.Alloc = nullptr; +    Head = O.Head; +    O.Head = &SentinelSegment; +    Tail = O.Tail; +    O.Tail = &SentinelSegment; +    Freelist = O.Freelist; +    O.Freelist = &SentinelSegment; +    Size = O.Size; +    O.Size = 0; +    return *this; +  } + +  ~Array() XRAY_NEVER_INSTRUMENT { +    for (auto &E : *this) +      (&E)->~T(); +  } + +  bool empty() const XRAY_NEVER_INSTRUMENT { return Size == 0; } + +  AllocatorType &allocator() const XRAY_NEVER_INSTRUMENT { +    DCHECK_NE(Alloc, nullptr); +    return *Alloc; +  } + +  uint64_t size() const XRAY_NEVER_INSTRUMENT { return Size; } + +  template <class... Args> +  T *AppendEmplace(Args &&... args) XRAY_NEVER_INSTRUMENT { +    DCHECK((Size == 0 && Head == &SentinelSegment && Head == Tail) || +           (Size != 0 && Head != &SentinelSegment && Tail != &SentinelSegment)); +    if (UNLIKELY(Head == &SentinelSegment)) { +      auto R = InitHeadAndTail(); +      if (R == nullptr) +        return nullptr; +    } + +    DCHECK_NE(Head, &SentinelSegment); +    DCHECK_NE(Tail, &SentinelSegment); + +    auto Offset = Size % ElementsPerSegment; +    if (UNLIKELY(Size != 0 && Offset == 0)) +      if (AppendNewSegment() == nullptr) +        return nullptr; + +    DCHECK_NE(Tail, &SentinelSegment); +    auto Base = &Tail->Data; +    auto AlignedOffset = Base + (Offset * AlignedElementStorageSize); +    DCHECK_LE(AlignedOffset + sizeof(T), +              reinterpret_cast<unsigned char *>(Base) + SegmentSize); + +    // In-place construct at Position. +    new (AlignedOffset) T{std::forward<Args>(args)...}; +    ++Size; +    return reinterpret_cast<T *>(AlignedOffset); +  } + +  T *Append(const T &E) XRAY_NEVER_INSTRUMENT { +    // FIXME: This is a duplication of AppenEmplace with the copy semantics +    // explicitly used, as a work-around to GCC 4.8 not invoking the copy +    // constructor with the placement new with braced-init syntax. +    DCHECK((Size == 0 && Head == &SentinelSegment && Head == Tail) || +           (Size != 0 && Head != &SentinelSegment && Tail != &SentinelSegment)); +    if (UNLIKELY(Head == &SentinelSegment)) { +      auto R = InitHeadAndTail(); +      if (R == nullptr) +        return nullptr; +    } + +    DCHECK_NE(Head, &SentinelSegment); +    DCHECK_NE(Tail, &SentinelSegment); + +    auto Offset = Size % ElementsPerSegment; +    if (UNLIKELY(Size != 0 && Offset == 0)) +      if (AppendNewSegment() == nullptr) +        return nullptr; + +    DCHECK_NE(Tail, &SentinelSegment); +    auto Base = &Tail->Data; +    auto AlignedOffset = Base + (Offset * AlignedElementStorageSize); +    DCHECK_LE(AlignedOffset + sizeof(T), +              reinterpret_cast<unsigned char *>(Tail) + SegmentSize); + +    // In-place construct at Position. +    new (AlignedOffset) T(E); +    ++Size; +    return reinterpret_cast<T *>(AlignedOffset); +  } + +  T &operator[](uint64_t Offset) const XRAY_NEVER_INSTRUMENT { +    DCHECK_LE(Offset, Size); +    // We need to traverse the array enough times to find the element at Offset. +    auto S = Head; +    while (Offset >= ElementsPerSegment) { +      S = S->Next; +      Offset -= ElementsPerSegment; +      DCHECK_NE(S, &SentinelSegment); +    } +    auto Base = &S->Data; +    auto AlignedOffset = Base + (Offset * AlignedElementStorageSize); +    auto Position = reinterpret_cast<T *>(AlignedOffset); +    return *reinterpret_cast<T *>(Position); +  } + +  T &front() const XRAY_NEVER_INSTRUMENT { +    DCHECK_NE(Head, &SentinelSegment); +    DCHECK_NE(Size, 0u); +    return *begin(); +  } + +  T &back() const XRAY_NEVER_INSTRUMENT { +    DCHECK_NE(Tail, &SentinelSegment); +    DCHECK_NE(Size, 0u); +    auto It = end(); +    --It; +    return *It; +  } + +  template <class Predicate> +  T *find_element(Predicate P) const XRAY_NEVER_INSTRUMENT { +    if (empty()) +      return nullptr; + +    auto E = end(); +    for (auto I = begin(); I != E; ++I) +      if (P(*I)) +        return &(*I); + +    return nullptr; +  } + +  /// Remove N Elements from the end. This leaves the blocks behind, and not +  /// require allocation of new blocks for new elements added after trimming. +  void trim(uint64_t Elements) XRAY_NEVER_INSTRUMENT { +    auto OldSize = Size; +    Elements = Elements > Size ? Size : Elements; +    Size -= Elements; + +    // We compute the number of segments we're going to return from the tail by +    // counting how many elements have been trimmed. Given the following: +    // +    // - Each segment has N valid positions, where N > 0 +    // - The previous size > current size +    // +    // To compute the number of segments to return, we need to perform the +    // following calculations for the number of segments required given 'x' +    // elements: +    // +    //   f(x) = { +    //            x == 0          : 0 +    //          , 0 < x <= N      : 1 +    //          , N < x <= max    : x / N + (x % N ? 1 : 0) +    //          } +    // +    // We can simplify this down to: +    // +    //   f(x) = { +    //            x == 0          : 0, +    //          , 0 < x <= max    : x / N + (x < N || x % N ? 1 : 0) +    //          } +    // +    // And further down to: +    // +    //   f(x) = x ? x / N + (x < N || x % N ? 1 : 0) : 0 +    // +    // We can then perform the following calculation `s` which counts the number +    // of segments we need to remove from the end of the data structure: +    // +    //   s(p, c) = f(p) - f(c) +    // +    // If we treat p = previous size, and c = current size, and given the +    // properties above, the possible range for s(...) is [0..max(typeof(p))/N] +    // given that typeof(p) == typeof(c). +    auto F = [](uint64_t X) { +      return X ? (X / ElementsPerSegment) + +                     (X < ElementsPerSegment || X % ElementsPerSegment ? 1 : 0) +               : 0; +    }; +    auto PS = F(OldSize); +    auto CS = F(Size); +    DCHECK_GE(PS, CS); +    auto SegmentsToTrim = PS - CS; +    for (auto I = 0uL; I < SegmentsToTrim; ++I) { +      // Here we place the current tail segment to the freelist. To do this +      // appropriately, we need to perform a splice operation on two +      // bidirectional linked-lists. In particular, we have the current state of +      // the doubly-linked list of segments: +      // +      //   @S@ <- s0 <-> s1 <-> ... <-> sT -> @S@ +      // +      DCHECK_NE(Head, &SentinelSegment); +      DCHECK_NE(Tail, &SentinelSegment); +      DCHECK_EQ(Tail->Next, &SentinelSegment); + +      if (Freelist == &SentinelSegment) { +        // Our two lists at this point are in this configuration: +        // +        //   Freelist: (potentially) @S@ +        //   Mainlist: @S@<-s0<->s1<->...<->sPT<->sT->@S@ +        //                  ^ Head                ^ Tail +        // +        // The end state for us will be this configuration: +        // +        //   Freelist: @S@<-sT->@S@ +        //   Mainlist: @S@<-s0<->s1<->...<->sPT->@S@ +        //                  ^ Head          ^ Tail +        // +        // The first step for us is to hold a reference to the tail of Mainlist, +        // which in our notation is represented by sT. We call this our "free +        // segment" which is the segment we are placing on the Freelist. +        // +        //   sF = sT +        // +        // Then, we also hold a reference to the "pre-tail" element, which we +        // call sPT: +        // +        //   sPT = pred(sT) +        // +        // We want to splice sT into the beginning of the Freelist, which in +        // an empty Freelist means placing a segment whose predecessor and +        // successor is the sentinel segment. +        // +        // The splice operation then can be performed in the following +        // algorithm: +        // +        //   succ(sPT) = S +        //   pred(sT) = S +        //   succ(sT) = Freelist +        //   Freelist = sT +        //   Tail = sPT +        // +        auto SPT = Tail->Prev; +        SPT->Next = &SentinelSegment; +        Tail->Prev = &SentinelSegment; +        Tail->Next = Freelist; +        Freelist = Tail; +        Tail = SPT; + +        // Our post-conditions here are: +        DCHECK_EQ(Tail->Next, &SentinelSegment); +        DCHECK_EQ(Freelist->Prev, &SentinelSegment); +      } else { +        // In the other case, where the Freelist is not empty, we perform the +        // following transformation instead: +        // +        // This transforms the current state: +        // +        //   Freelist: @S@<-f0->@S@ +        //                  ^ Freelist +        //   Mainlist: @S@<-s0<->s1<->...<->sPT<->sT->@S@ +        //                  ^ Head                ^ Tail +        // +        // Into the following: +        // +        //   Freelist: @S@<-sT<->f0->@S@ +        //                  ^ Freelist +        //   Mainlist: @S@<-s0<->s1<->...<->sPT->@S@ +        //                  ^ Head          ^ Tail +        // +        // The algorithm is: +        // +        //   sFH = Freelist +        //   sPT = pred(sT) +        //   pred(SFH) = sT +        //   succ(sT) = Freelist +        //   pred(sT) = S +        //   succ(sPT) = S +        //   Tail = sPT +        //   Freelist = sT +        // +        auto SFH = Freelist; +        auto SPT = Tail->Prev; +        auto ST = Tail; +        SFH->Prev = ST; +        ST->Next = Freelist; +        ST->Prev = &SentinelSegment; +        SPT->Next = &SentinelSegment; +        Tail = SPT; +        Freelist = ST; + +        // Our post-conditions here are: +        DCHECK_EQ(Tail->Next, &SentinelSegment); +        DCHECK_EQ(Freelist->Prev, &SentinelSegment); +        DCHECK_EQ(Freelist->Next->Prev, Freelist); +      } +    } + +    // Now in case we've spliced all the segments in the end, we ensure that the +    // main list is "empty", or both the head and tail pointing to the sentinel +    // segment. +    if (Tail == &SentinelSegment) +      Head = Tail; + +    DCHECK( +        (Size == 0 && Head == &SentinelSegment && Tail == &SentinelSegment) || +        (Size != 0 && Head != &SentinelSegment && Tail != &SentinelSegment)); +    DCHECK( +        (Freelist != &SentinelSegment && Freelist->Prev == &SentinelSegment) || +        (Freelist == &SentinelSegment && Tail->Next == &SentinelSegment)); +  } + +  // Provide iterators. +  Iterator<T> begin() const XRAY_NEVER_INSTRUMENT { +    return Iterator<T>(Head, 0, Size); +  } +  Iterator<T> end() const XRAY_NEVER_INSTRUMENT { +    return Iterator<T>(Tail, Size, Size); +  } +  Iterator<const T> cbegin() const XRAY_NEVER_INSTRUMENT { +    return Iterator<const T>(Head, 0, Size); +  } +  Iterator<const T> cend() const XRAY_NEVER_INSTRUMENT { +    return Iterator<const T>(Tail, Size, Size); +  } +}; + +// We need to have this storage definition out-of-line so that the compiler can +// ensure that storage for the SentinelSegment is defined and has a single +// address. +template <class T> +typename Array<T>::Segment Array<T>::SentinelSegment{ +    &Array<T>::SentinelSegment, &Array<T>::SentinelSegment, {'\0'}}; + +} // namespace __xray + +#endif // XRAY_SEGMENTED_ARRAY_H diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_AArch64.S b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_AArch64.S new file mode 100644 index 000000000000..536a79e0d150 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_AArch64.S @@ -0,0 +1,169 @@ +#include "../builtins/assembly.h" +#include "../sanitizer_common/sanitizer_asm.h" + +.macro SAVE_REGISTERS +  stp x1, x2, [sp, #-16]! +  stp x3, x4, [sp, #-16]! +  stp x5, x6, [sp, #-16]! +  stp x7, x30, [sp, #-16]! +  stp q0, q1, [sp, #-32]! +  stp q2, q3, [sp, #-32]! +  stp q4, q5, [sp, #-32]! +  stp q6, q7, [sp, #-32]! +  // x8 is the indirect result register and needs to be preserved for the body of the function to use. +  stp x8, x0, [sp, #-16]! +.endm + +.macro RESTORE_REGISTERS +  ldp x8, x0, [sp], #16 +  ldp q6, q7, [sp], #32 +  ldp q4, q5, [sp], #32 +  ldp q2, q3, [sp], #32 +  ldp q0, q1, [sp], #32 +  ldp x7, x30, [sp], #16 +  ldp x5, x6, [sp], #16 +  ldp x3, x4, [sp], #16 +  ldp x1, x2, [sp], #16 +.endm + +.text +.p2align 2 +.global ASM_SYMBOL(__xray_FunctionEntry) +ASM_HIDDEN(__xray_FunctionEntry) +ASM_TYPE_FUNCTION(__xray_FunctionEntry) +ASM_SYMBOL(__xray_FunctionEntry): +    /* Move the return address beyond the end of sled data. The 12 bytes of +         data are inserted in the code of the runtime patch, between the call +         instruction and the instruction returned into. The data contains 32 +         bits of instrumented function ID and 64 bits of the address of +         the current trampoline. */ +  add x30, x30, #12 +  // Push the registers which may be modified by the handler function. +  SAVE_REGISTERS + +  // Load the handler function pointer. +  adrp x2, ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE) +  ldr x2, [x2, #:lo12:ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)] +  cbz x2, 1f +  // Set w0 to the function ID (w17). Set x1 to XRayEntryType::ENTRY = 0. +  mov w0, w17 +  mov x1, #0 +  // Call the handler with 2 parameters. +  blr x2 +1: +  RESTORE_REGISTERS +  ret +ASM_SIZE(__xray_FunctionEntry) + +.p2align 2 +.global ASM_SYMBOL(__xray_FunctionExit) +ASM_HIDDEN(__xray_FunctionExit) +ASM_TYPE_FUNCTION(__xray_FunctionExit) +ASM_SYMBOL(__xray_FunctionExit): +    /* Move the return address beyond the end of sled data. The 12 bytes of +         data are inserted in the code of the runtime patch, between the call +         instruction and the instruction returned into. The data contains 32 +         bits of instrumented function ID and 64 bits of the address of +         the current trampoline. */ +  add x30, x30, #12 +  SAVE_REGISTERS + +  // Load the handler function pointer into x2. +  adrp x2, ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE) +  ldr x2, [x2, #:lo12:ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)] +  cbz x2, 1f +  // Set w0 to the function ID (w17). Set x1 to XRayEntryType::EXIT = 1. +  mov w0, w17 +  mov x1, #1 +  // Call the handler with 2 parameters. +  blr x2 +1: +  RESTORE_REGISTERS +  ret +ASM_SIZE(__xray_FunctionExit) + +.p2align 2 +.global ASM_SYMBOL(__xray_FunctionTailExit) +ASM_HIDDEN(__xray_FunctionTailExit) +ASM_TYPE_FUNCTION(__xray_FunctionTailExit) +ASM_SYMBOL(__xray_FunctionTailExit): +    /* Move the return address beyond the end of sled data. The 12 bytes of +         data are inserted in the code of the runtime patch, between the call +         instruction and the instruction returned into. The data contains 32 +         bits of instrumented function ID and 64 bits of the address of +         the current trampoline. */ +  add x30, x30, #12 +  // Save the registers which may be modified by the handler function. +  SAVE_REGISTERS +  // Load the handler function pointer into x2. +  adrp x2, ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE) +  ldr x2, [x2, #:lo12:ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)] +  cbz x2, 1f +  // Set w0 to the function ID (w17). Set x1 to XRayEntryType::TAIL = 2. +  mov w0, w17 +  mov x1, #2 +  // Call the handler with 2 parameters. +  blr x2 +1: +  RESTORE_REGISTERS +  ret +ASM_SIZE(__xray_FunctionTailExit) + +.p2align 2 +.global ASM_SYMBOL(__xray_ArgLoggerEntry) +ASM_HIDDEN(__xray_ArgLoggerEntry) +ASM_TYPE_FUNCTION(__xray_ArgLoggerEntry) +ASM_SYMBOL(__xray_ArgLoggerEntry): +  add x30, x30, #12 +  // Push the registers which may be modified by the handler function. +  SAVE_REGISTERS + +  adrp x8, ASM_SYMBOL(_ZN6__xray13XRayArgLoggerE) +  ldr x8, [x8, #:lo12:ASM_SYMBOL(_ZN6__xray13XRayArgLoggerE)] +  cbnz x8, 2f + +  // Load the handler function pointer. +  adrp x8, ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE) +  ldr x8, [x8, #:lo12:ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)] +  cbz x8, 1f + +2: +  mov x2, x0 +  mov x1, #3  // XRayEntryType::LOG_ARGS_ENTRY +  mov w0, w17 +  blr x8 + +1: +  RESTORE_REGISTERS +  ret +ASM_SIZE(__xray_ArgLoggerEntry) + +// __xray_*Event have default visibility so that they can be referenced by user +// DSOs that do not link against the runtime. +.global ASM_SYMBOL(__xray_CustomEvent) +ASM_TYPE_FUNCTION(__xray_CustomEvent) +ASM_SYMBOL(__xray_CustomEvent): +  SAVE_REGISTERS +  adrp x8, ASM_SYMBOL(_ZN6__xray22XRayPatchedCustomEventE) +  ldr x8, [x8, #:lo12:ASM_SYMBOL(_ZN6__xray22XRayPatchedCustomEventE)] +  cbz x8, 1f +  blr x8 +1: +  RESTORE_REGISTERS +  ret +ASM_SIZE(__xray_CustomEvent) + +.global ASM_SYMBOL(__xray_TypedEvent) +ASM_TYPE_FUNCTION(__xray_TypedEvent) +ASM_SYMBOL(__xray_TypedEvent): +  SAVE_REGISTERS +  adrp x8, ASM_SYMBOL(_ZN6__xray21XRayPatchedTypedEventE) +  ldr x8, [x8, #:lo12:ASM_SYMBOL(_ZN6__xray21XRayPatchedTypedEventE)] +  cbz x8, 1f +  blr x8 +1: +  RESTORE_REGISTERS +  ret +ASM_SIZE(__xray_TypedEvent) + +NO_EXEC_STACK_DIRECTIVE diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_arm.S b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_arm.S new file mode 100644 index 000000000000..3ffc1e443761 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_arm.S @@ -0,0 +1,105 @@ +#include "../builtins/assembly.h" + +    .syntax unified +    .arch armv6t2 +    .fpu vfpv2 +    .code 32 +    .global _ZN6__xray19XRayPatchedFunctionE + +    @ Word-aligned function entry point +    .p2align 2 +    @ Let C/C++ see the symbol +    .global __xray_FunctionEntry +    .hidden __xray_FunctionEntry +    @ It preserves all registers except r0, r12(ip), r14(lr) and r15(pc) +    @ Assume that "q" part of the floating-point registers is not used +    @   for passing parameters to C/C++ functions. +    .type __xray_FunctionEntry, %function +    @ In C++ it is void extern "C" __xray_FunctionEntry(uint32_t FuncId) with +    @   FuncId passed in r0 register. +__xray_FunctionEntry: +    PUSH {r1-r3,lr} +    @ Save floating-point parameters of the instrumented function +    VPUSH {d0-d7} +    MOVW r1, #:lower16:_ZN6__xray19XRayPatchedFunctionE - (. + 16) +    MOVT r1, #:upper16:_ZN6__xray19XRayPatchedFunctionE - (. + 12) +    LDR r2, [pc, r1] +    @ Handler address is nullptr if handler is not set +    CMP r2, #0 +    BEQ FunctionEntry_restore +    @ Function ID is already in r0 (the first parameter). +    @ r1=0 means that we are tracing an entry event +    MOV r1, #0 +    @ Call the handler with 2 parameters in r0 and r1 +    BLX r2 +FunctionEntry_restore: +    @ Restore floating-point parameters of the instrumented function +    VPOP {d0-d7} +    POP {r1-r3,pc} + +    @ Word-aligned function entry point +    .p2align 2 +    @ Let C/C++ see the symbol +	.global __xray_FunctionExit +	.hidden __xray_FunctionExit +	@ Assume that d1-d7 are not used for the return value. +    @ Assume that "q" part of the floating-point registers is not used for the +    @   return value in C/C++. +	.type __xray_FunctionExit, %function +	@ In C++ it is extern "C" void __xray_FunctionExit(uint32_t FuncId) with +    @   FuncId passed in r0 register. +__xray_FunctionExit: +    PUSH {r1-r3,lr} +    @ Save the floating-point return value of the instrumented function +    VPUSH {d0} +    @ Load the handler address +    MOVW r1, #:lower16:_ZN6__xray19XRayPatchedFunctionE - (. + 16) +    MOVT r1, #:upper16:_ZN6__xray19XRayPatchedFunctionE - (. + 12) +    LDR r2, [pc, r1] +    @ Handler address is nullptr if handler is not set +    CMP r2, #0 +    BEQ FunctionExit_restore +    @ Function ID is already in r0 (the first parameter). +    @ 1 means that we are tracing an exit event +    MOV r1, #1 +    @ Call the handler with 2 parameters in r0 and r1 +    BLX r2 +FunctionExit_restore: +    @ Restore the floating-point return value of the instrumented function +    VPOP {d0} +    POP {r1-r3,pc} + +    @ Word-aligned function entry point +    .p2align 2 +    @ Let C/C++ see the symbol +    .global __xray_FunctionTailExit +    .hidden __xray_FunctionTailExit +    @ It preserves all registers except r0, r12(ip), r14(lr) and r15(pc) +    @ Assume that "q" part of the floating-point registers is not used +    @   for passing parameters to C/C++ functions. +    .type __xray_FunctionTailExit, %function +    @ In C++ it is void extern "C" __xray_FunctionTailExit(uint32_t FuncId) +    @   with FuncId passed in r0 register. +__xray_FunctionTailExit: +    PUSH {r1-r3,lr} +    @ Save floating-point parameters of the instrumented function +    VPUSH {d0-d7} +    MOVW r1, #:lower16:_ZN6__xray19XRayPatchedFunctionE - (. + 16) +    MOVT r1, #:upper16:_ZN6__xray19XRayPatchedFunctionE - (. + 12) +    LDR r2, [pc, r1] +    @ Handler address is nullptr if handler is not set +    CMP r2, #0 +    BEQ FunctionTailExit_restore +    @ Function ID is already in r0 (the first parameter). +    @ r1=2 means that we are tracing a tail exit event +    @ But before the logging part of XRay is ready, we pretend that here a +    @   normal function exit happens, so we give the handler code 1 +    MOV r1, #1 +    @ Call the handler with 2 parameters in r0 and r1 +    BLX r2 +FunctionTailExit_restore: +    @ Restore floating-point parameters of the instrumented function +    VPOP {d0-d7} +    POP {r1-r3,pc} + +NO_EXEC_STACK_DIRECTIVE diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_hexagon.S b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_hexagon.S new file mode 100644 index 000000000000..c87ec4bed1f9 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_hexagon.S @@ -0,0 +1,99 @@ +//===-- xray_trampoline_hexagon.s -------------------------------*- ASM -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// This implements the hexagon-specific assembler for the trampolines. +// +//===----------------------------------------------------------------------===// + +#include "../builtins/assembly.h" +#include "../sanitizer_common/sanitizer_asm.h" + +.macro SAVE_REGISTERS +memw(sp+#0)=r0 +memw(sp+#4)=r1 +memw(sp+#8)=r2 +memw(sp+#12)=r3 +memw(sp+#16)=r4 +.endm +.macro RESTORE_REGISTERS +r0=memw(sp+#0) +r1=memw(sp+#4) +r2=memw(sp+#8) +r3=memw(sp+#12) +r4=memw(sp+#16) +.endm + +.macro CALL_PATCHED_FUNC entry_type +	// if (xray::XRayPatchedFunctionE != NULL) +	//     xray::XRayPatchedFunctionE(FuncType); + +	r8 = #ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE) + +	// The patched sled puts the function type +	// into r6.  Move it into r0 to pass it to +	// the patched function. +	{ r0 = r6 +          r1 = \entry_type +          p0 = !cmp.eq(r8, #0) +	  if (p0) callr r8 } +.endm + +	.text +	.globl ASM_SYMBOL(__xray_FunctionEntry) +	ASM_HIDDEN(__xray_FunctionEntry) +	ASM_TYPE_FUNCTION(__xray_FunctionEntry) +# LLVM-MCA-BEGIN __xray_FunctionEntry +ASM_SYMBOL(__xray_FunctionEntry): +	CFI_STARTPROC +	SAVE_REGISTERS + +	CALL_PATCHED_FUNC #0  // XRayEntryType::ENTRY +.Ltmp0: +	RESTORE_REGISTERS +	// return +# LLVM-MCA-END +	ASM_SIZE(__xray_FunctionEntry) +	CFI_ENDPROC + + +	.globl ASM_SYMBOL(__xray_FunctionExit) +	ASM_HIDDEN(__xray_FunctionExit) +	ASM_TYPE_FUNCTION(__xray_FunctionExit) +# LLVM-MCA-BEGIN __xray_FunctionExit +ASM_SYMBOL(__xray_FunctionExit): +	CFI_STARTPROC +	SAVE_REGISTERS + +	CALL_PATCHED_FUNC #1  // XRayEntryType::EXIT +.Ltmp1: +	RESTORE_REGISTERS +	// return +	jumpr r31 +# LLVM-MCA-END +	ASM_SIZE(__xray_FunctionExit) +	CFI_ENDPROC + + +	.globl ASM_SYMBOL(__xray_FunctionTailExit) +	ASM_HIDDEN(__xray_FunctionTailExit) +	ASM_TYPE_FUNCTION(__xray_FunctionTailExit) +# LLVM-MCA-BEGIN __xray_FunctionTailExit +ASM_SYMBOL(__xray_FunctionTailExit): +	CFI_STARTPROC +	SAVE_REGISTERS + +	CALL_PATCHED_FUNC #2  // XRayEntryType::TAIL +.Ltmp2: +	RESTORE_REGISTERS +	// return +	jumpr r31 +# LLVM-MCA-END +	ASM_SIZE(__xray_FunctionTailExit) +	CFI_ENDPROC diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_loongarch64.S b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_loongarch64.S new file mode 100644 index 000000000000..fcbefcc5f7a2 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_loongarch64.S @@ -0,0 +1,124 @@ +//===-- xray_trampoline_loongarch64.s ---------------------------*- ASM -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// This implements the loongarch-specific assembler for the trampolines. +// +//===----------------------------------------------------------------------===// + +#include "../sanitizer_common/sanitizer_asm.h" + +#define FROM_0_TO_7 0,1,2,3,4,5,6,7 +#define FROM_7_TO_0 7,6,5,4,3,2,1,0 + +.macro SAVE_ARG_REGISTERS +  .irp i,FROM_7_TO_0 +    st.d  $a\i, $sp, (8 * 8 + 8 * \i) +  .endr +  .irp i,FROM_7_TO_0 +    fst.d $f\i, $sp, (8 * \i) +  .endr +.endm + +.macro RESTORE_ARG_REGISTERS +  .irp i,FROM_0_TO_7 +    fld.d $f\i, $sp, (8 * \i) +  .endr +  .irp i,FROM_0_TO_7 +    ld.d  $a\i, $sp, (8 * 8 + 8 * \i) +  .endr +.endm + +.macro SAVE_RET_REGISTERS +  st.d    $a1, $sp, 24 +  st.d    $a0, $sp, 16 +  fst.d   $f1, $sp, 8 +  fst.d   $f0, $sp, 0 +.endm + +.macro RESTORE_RET_REGISTERS +  fld.d   $f0, $sp, 0 +  fld.d   $f1, $sp, 8 +  ld.d    $a0, $sp, 16 +  ld.d    $a1, $sp, 24 +.endm + +  .text +  .file "xray_trampoline_loongarch64.S" +  .globl ASM_SYMBOL(__xray_FunctionEntry) +  ASM_HIDDEN(__xray_FunctionEntry) +  .p2align 2 +  ASM_TYPE_FUNCTION(__xray_FunctionEntry) +ASM_SYMBOL(__xray_FunctionEntry): +  .cfi_startproc +  // Save argument registers before doing any actual work. +  .cfi_def_cfa_offset 136 +  addi.d  $sp, $sp, -136 +  st.d    $ra, $sp, 128 +  .cfi_offset 1, -8 +  SAVE_ARG_REGISTERS + +  la.got  $t2, ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE) +  ld.d    $t2, $t2, 0 + +  beqz    $t2, FunctionEntry_restore + +  // a1=0 means that we are tracing an entry event. +  move    $a1, $zero +  // Function ID is in t1 (the first parameter). +  move    $a0, $t1 +  jirl    $ra, $t2, 0 + +FunctionEntry_restore: +  // Restore argument registers. +  RESTORE_ARG_REGISTERS +  ld.d    $ra, $sp, 128 +  addi.d  $sp, $sp, 136 +  ret +FunctionEntry_end: +  ASM_SIZE(__xray_FunctionEntry) +  .cfi_endproc + +  .text +  .globl ASM_SYMBOL(__xray_FunctionExit) +  ASM_HIDDEN(__xray_FunctionExit) +  .p2align 2 +  ASM_TYPE_FUNCTION(__xray_FunctionExit) +ASM_SYMBOL(__xray_FunctionExit): +  .cfi_startproc +  // Save return registers before doing any actual work. +  .cfi_def_cfa_offset 48 +  addi.d  $sp, $sp, -48 +  st.d    $ra, $sp, 40 +  .cfi_offset 1, -8 +  st.d    $fp, $sp, 32 +  SAVE_RET_REGISTERS + +  la.got  $t2, ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE) +  ld.d    $t2, $t2, 0 + +  beqz    $t2, FunctionExit_restore + +  // a1=1 means that we are tracing an exit event. +  li.w    $a1, 1 +  // Function ID is in t1 (the first parameter). +  move    $a0, $t1 +  jirl    $ra, $t2, 0 + +FunctionExit_restore: +  // Restore return registers. +  RESTORE_RET_REGISTERS +  ld.d    $fp, $sp, 32 +  ld.d    $ra, $sp, 40 +  addi.d  $sp, $sp, 48 +  ret + +FunctionExit_end: +  ASM_SIZE(__xray_FunctionExit) +  .cfi_endproc diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_mips.S b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_mips.S new file mode 100644 index 000000000000..499c350d2a24 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_mips.S @@ -0,0 +1,109 @@ +//===-- xray_trampoline_mips.s ----------------------------------*- ASM -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// This implements the MIPS-specific assembler for the trampolines. +// +//===----------------------------------------------------------------------===// + +  .text +  .file "xray_trampoline_mips.S" +  .globl __xray_FunctionEntry +  .p2align 2 +  .type __xray_FunctionEntry,@function +__xray_FunctionEntry: +  .cfi_startproc +  .set noreorder +  .cpload $t9 +  .set reorder +  // Save argument registers before doing any actual work +  .cfi_def_cfa_offset 36 +  addiu  $sp, $sp, -36 +  sw     $ra, 32($sp) +  .cfi_offset 31, -4 +  sw     $a3, 28($sp) +  sw     $a2, 24($sp) +  sw     $a1, 20($sp) +  sw     $a0, 16($sp) +  sdc1	 $f14, 8($sp) +  sdc1	 $f12, 0($sp) + +  la     $t9, _ZN6__xray19XRayPatchedFunctionE +  lw     $t9, 0($t9) + +  beqz   $t9, FunctionEntry_restore + +  // a1=0 means that we are tracing an entry event +  move   $a1, $zero +  // Function ID is in t0 (the first parameter). +  move   $a0, $t0 +  jalr   $t9 + +FunctionEntry_restore: +  // Restore argument registers +  ldc1   $f12, 0($sp) +  ldc1   $f14, 8($sp) +  lw     $a0, 16($sp) +  lw     $a1, 20($sp) +  lw     $a2, 24($sp) +  lw     $a3, 28($sp) +  lw     $ra, 32($sp) +  addiu	 $sp, $sp, 36 +  jr     $ra +FunctionEntry_end: +  .size __xray_FunctionEntry, FunctionEntry_end-__xray_FunctionEntry +  .cfi_endproc + +  .text +  .globl __xray_FunctionExit +  .p2align 2 +  .type __xray_FunctionExit,@function +__xray_FunctionExit: +  .cfi_startproc +  .set noreorder +  .cpload $t9 +  .set reorder +  // Save return registers before doing any actual work. +  .cfi_def_cfa_offset 36 +  addiu  $sp, $sp, -36 +  sw     $ra, 32($sp) +  .cfi_offset 31, -4 +  sw     $a1, 28($sp) +  sw     $a0, 24($sp) +  sw     $v1, 20($sp) +  sw     $v0, 16($sp) +  sdc1   $f2, 8($sp) +  sdc1   $f0, 0($sp) + +  la     $t9, _ZN6__xray19XRayPatchedFunctionE +  lw     $t9, 0($t9) + +  beqz	 $t9, FunctionExit_restore + +  // a1=1 means that we are tracing an exit event +  li     $a1, 1 +  // Function ID is in t0 (the first parameter). +  move   $a0, $t0 +  jalr   $t9 + +FunctionExit_restore: +  // Restore return registers +  ldc1   $f0, 0($sp) +  ldc1   $f2, 8($sp) +  lw     $v0, 16($sp) +  lw     $v1, 20($sp) +  lw     $a0, 24($sp) +  lw     $a1, 28($sp) +  lw     $ra, 32($sp) +  addiu  $sp, $sp, 36 +  jr     $ra + +FunctionExit_end: +  .size __xray_FunctionExit, FunctionExit_end-__xray_FunctionExit +  .cfi_endproc diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_mips64.S b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_mips64.S new file mode 100644 index 000000000000..d65bec1fc687 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_mips64.S @@ -0,0 +1,135 @@ +//===-- xray_trampoline_mips64.s --------------------------------*- ASM -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// This implements the MIPS64-specific assembler for the trampolines. +// +//===----------------------------------------------------------------------===// + +  .text +  .file "xray_trampoline_mips64.S" +  .globl __xray_FunctionEntry +  .p2align 2 +  .type __xray_FunctionEntry,@function +__xray_FunctionEntry: +  .cfi_startproc +  // Save argument registers before doing any actual work. +  .cfi_def_cfa_offset 144 +  daddiu  $sp, $sp, -144 +  sd      $ra, 136($sp) +  .cfi_offset 31, -8 +  sd      $gp, 128($sp) +  sd      $a7, 120($sp) +  sd      $a6, 112($sp) +  sd      $a5, 104($sp) +  sd      $a4, 96($sp) +  sd      $a3, 88($sp) +  sd      $a2, 80($sp) +  sd      $a1, 72($sp) +  sd      $a0, 64($sp) +  sdc1    $f19, 56($sp) +  sdc1    $f18, 48($sp) +  sdc1    $f17, 40($sp) +  sdc1    $f16, 32($sp) +  sdc1    $f15, 24($sp) +  sdc1    $f14, 16($sp) +  sdc1    $f13, 8($sp) +  sdc1    $f12, 0($sp) + +  lui     $gp, %hi(%neg(%gp_rel(__xray_FunctionEntry))) +  daddu   $gp, $gp, $t9 +  daddiu  $gp ,$gp, %lo(%neg(%gp_rel(__xray_FunctionEntry))) + +  dla     $t9, _ZN6__xray19XRayPatchedFunctionE +  ld      $t9, 0($t9) + +  beqz    $t9, FunctionEntry_restore + +  // a1=0 means that we are tracing an entry event +  move    $a1, $zero +  // Function ID is in t0 (the first parameter). +  move    $a0, $t0 +  jalr    $t9 + +FunctionEntry_restore: +  // Restore argument registers +  ldc1    $f12, 0($sp) +  ldc1    $f13, 8($sp) +  ldc1    $f14, 16($sp) +  ldc1    $f15, 24($sp) +  ldc1    $f16, 32($sp) +  ldc1    $f17, 40($sp) +  ldc1    $f18, 48($sp) +  ldc1    $f19, 56($sp) +  ld      $a0, 64($sp) +  ld      $a1, 72($sp) +  ld      $a2, 80($sp) +  ld      $a3, 88($sp) +  ld      $a4, 96($sp) +  ld      $a5, 104($sp) +  ld      $a6, 112($sp) +  ld      $a7, 120($sp) +  ld      $gp, 128($sp) +  ld      $ra, 136($sp) +  daddiu  $sp, $sp, 144 +  jr      $ra +FunctionEntry_end: +  .size __xray_FunctionEntry, FunctionEntry_end-__xray_FunctionEntry +  .cfi_endproc + +  .text +  .globl __xray_FunctionExit +  .p2align 2 +  .type __xray_FunctionExit,@function +__xray_FunctionExit: +  .cfi_startproc +  // Save return registers before doing any actual work. +  .cfi_def_cfa_offset 64 +  daddiu  $sp, $sp, -64 +  sd      $ra, 56($sp) +  .cfi_offset 31, -8 +  sd      $gp, 48($sp) +  sd      $a0, 40($sp) +  sd      $v1, 32($sp) +  sd      $v0, 24($sp) +  sdc1    $f2, 16($sp) +  sdc1    $f1, 8($sp) +  sdc1    $f0, 0($sp) + +  lui     $gp, %hi(%neg(%gp_rel(__xray_FunctionExit))) +  daddu   $gp, $gp, $t9 +  daddiu  $gp ,$gp, %lo(%neg(%gp_rel(__xray_FunctionExit))) + +  dla     $t9, _ZN6__xray19XRayPatchedFunctionE +  ld      $t9, 0($t9) + +  beqz    $t9, FunctionExit_restore + +  // a1=1 means that we are tracing an exit event +  li      $a1, 1 +  // Function ID is in t0 (the first parameter). +  move    $a0, $t0 +  jalr    $t9 + +FunctionExit_restore: +  // Restore return registers +  ldc1    $f0, 0($sp) +  ldc1    $f1, 8($sp) +  ldc1    $f2, 16($sp) +  ld      $v0, 24($sp) +  ld      $v1, 32($sp) +  ld      $a0, 40($sp) +  ld      $gp, 48($sp) +  ld      $ra, 56($sp) +  daddiu  $sp, $sp, 64 +  jr      $ra + +FunctionExit_end: +  .size __xray_FunctionExit, FunctionExit_end-__xray_FunctionExit +  .cfi_endproc diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_powerpc64.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_powerpc64.cpp new file mode 100644 index 000000000000..878c46930fee --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_powerpc64.cpp @@ -0,0 +1,15 @@ +#include <atomic> +#include <xray/xray_interface.h> + +namespace __xray { + +extern std::atomic<void (*)(int32_t, XRayEntryType)> XRayPatchedFunction; + +// Implement this in C++ instead of assembly, to avoid dealing with ToC by hand. +void CallXRayPatchedFunction(int32_t FuncId, XRayEntryType Type) { +  auto fptr = __xray::XRayPatchedFunction.load(); +  if (fptr != nullptr) +    (*fptr)(FuncId, Type); +} + +} // namespace __xray diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_powerpc64_asm.S b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_powerpc64_asm.S new file mode 100644 index 000000000000..250e2e5be67a --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_powerpc64_asm.S @@ -0,0 +1,235 @@ +	.text +	.abiversion 2 +	.globl	__xray_FunctionEntry +	.p2align	4 +__xray_FunctionEntry: +	std 0, 16(1) +	stdu 1, -408(1) +# Spill r3-r10, f1-f13, and vsr34-vsr45, which are parameter registers. +# If this appears to be slow, the caller needs to pass in number of generic, +# floating point, and vector parameters, so that we only spill those live ones. +	std 3, 32(1) +	ld 3, 400(1) # FuncId +	std 4, 40(1) +	std 5, 48(1) +	std 6, 56(1) +	std 7, 64(1) +	std 8, 72(1) +	std 9, 80(1) +	std 10, 88(1) +	addi 4, 1, 96 +	stxsdx 1, 0, 4 +	addi 4, 1, 104 +	stxsdx 2, 0, 4 +	addi 4, 1, 112 +	stxsdx 3, 0, 4 +	addi 4, 1, 120 +	stxsdx 4, 0, 4 +	addi 4, 1, 128 +	stxsdx 5, 0, 4 +	addi 4, 1, 136 +	stxsdx 6, 0, 4 +	addi 4, 1, 144 +	stxsdx 7, 0, 4 +	addi 4, 1, 152 +	stxsdx 8, 0, 4 +	addi 4, 1, 160 +	stxsdx 9, 0, 4 +	addi 4, 1, 168 +	stxsdx 10, 0, 4 +	addi 4, 1, 176 +	stxsdx 11, 0, 4 +	addi 4, 1, 184 +	stxsdx 12, 0, 4 +	addi 4, 1, 192 +	stxsdx 13, 0, 4 +	addi 4, 1, 200 +	stxvd2x 34, 0, 4 +	addi 4, 1, 216 +	stxvd2x 35, 0, 4 +	addi 4, 1, 232 +	stxvd2x 36, 0, 4 +	addi 4, 1, 248 +	stxvd2x 37, 0, 4 +	addi 4, 1, 264 +	stxvd2x 38, 0, 4 +	addi 4, 1, 280 +	stxvd2x 39, 0, 4 +	addi 4, 1, 296 +	stxvd2x 40, 0, 4 +	addi 4, 1, 312 +	stxvd2x 41, 0, 4 +	addi 4, 1, 328 +	stxvd2x 42, 0, 4 +	addi 4, 1, 344 +	stxvd2x 43, 0, 4 +	addi 4, 1, 360 +	stxvd2x 44, 0, 4 +	addi 4, 1, 376 +	stxvd2x 45, 0, 4 +	std 2, 392(1) +	mflr 0 +	std 0, 400(1) + +	li 4, 0 +	bl _ZN6__xray23CallXRayPatchedFunctionEi13XRayEntryType +	nop + +	addi 4, 1, 96 +	lxsdx 1, 0, 4 +	addi 4, 1, 104 +	lxsdx 2, 0, 4 +	addi 4, 1, 112 +	lxsdx 3, 0, 4 +	addi 4, 1, 120 +	lxsdx 4, 0, 4 +	addi 4, 1, 128 +	lxsdx 5, 0, 4 +	addi 4, 1, 136 +	lxsdx 6, 0, 4 +	addi 4, 1, 144 +	lxsdx 7, 0, 4 +	addi 4, 1, 152 +	lxsdx 8, 0, 4 +	addi 4, 1, 160 +	lxsdx 9, 0, 4 +	addi 4, 1, 168 +	lxsdx 10, 0, 4 +	addi 4, 1, 176 +	lxsdx 11, 0, 4 +	addi 4, 1, 184 +	lxsdx 12, 0, 4 +	addi 4, 1, 192 +	lxsdx 13, 0, 4 +	addi 4, 1, 200 +	lxvd2x 34, 0, 4 +	addi 4, 1, 216 +	lxvd2x 35, 0, 4 +	addi 4, 1, 232 +	lxvd2x 36, 0, 4 +	addi 4, 1, 248 +	lxvd2x 37, 0, 4 +	addi 4, 1, 264 +	lxvd2x 38, 0, 4 +	addi 4, 1, 280 +	lxvd2x 39, 0, 4 +	addi 4, 1, 296 +	lxvd2x 40, 0, 4 +	addi 4, 1, 312 +	lxvd2x 41, 0, 4 +	addi 4, 1, 328 +	lxvd2x 42, 0, 4 +	addi 4, 1, 344 +	lxvd2x 43, 0, 4 +	addi 4, 1, 360 +	lxvd2x 44, 0, 4 +	addi 4, 1, 376 +	lxvd2x 45, 0, 4 +	ld 0, 400(1) +	mtlr 0 +	ld 2, 392(1) +	ld 3, 32(1) +	ld 4, 40(1) +	ld 5, 48(1) +	ld 6, 56(1) +	ld 7, 64(1) +	ld 8, 72(1) +	ld 9, 80(1) +	ld 10, 88(1) + +	addi 1, 1, 408 +	ld 0, 16(1) +	blr + +	.globl	__xray_FunctionExit +	.p2align	4 +__xray_FunctionExit: +	std 0, 16(1) +	stdu 1, -256(1) +# Spill r3-r4, f1-f8, and vsr34-vsr41, which are return registers. +# If this appears to be slow, the caller needs to pass in number of generic, +# floating point, and vector parameters, so that we only spill those live ones. +	std 3, 32(1) +	ld 3, 248(1) # FuncId +	std 4, 40(1) +	addi 4, 1, 48 +	stxsdx 1, 0, 4 +	addi 4, 1, 56 +	stxsdx 2, 0, 4 +	addi 4, 1, 64 +	stxsdx 3, 0, 4 +	addi 4, 1, 72 +	stxsdx 4, 0, 4 +	addi 4, 1, 80 +	stxsdx 5, 0, 4 +	addi 4, 1, 88 +	stxsdx 6, 0, 4 +	addi 4, 1, 96 +	stxsdx 7, 0, 4 +	addi 4, 1, 104 +	stxsdx 8, 0, 4 +	addi 4, 1, 112 +	stxvd2x 34, 0, 4 +	addi 4, 1, 128 +	stxvd2x 35, 0, 4 +	addi 4, 1, 144 +	stxvd2x 36, 0, 4 +	addi 4, 1, 160 +	stxvd2x 37, 0, 4 +	addi 4, 1, 176 +	stxvd2x 38, 0, 4 +	addi 4, 1, 192 +	stxvd2x 39, 0, 4 +	addi 4, 1, 208 +	stxvd2x 40, 0, 4 +	addi 4, 1, 224 +	stxvd2x 41, 0, 4 +	std 2, 240(1) +	mflr 0 +	std 0, 248(1) + +	li 4, 1 +	bl _ZN6__xray23CallXRayPatchedFunctionEi13XRayEntryType +	nop + +	addi 4, 1, 48 +	lxsdx 1, 0, 4 +	addi 4, 1, 56 +	lxsdx 2, 0, 4 +	addi 4, 1, 64 +	lxsdx 3, 0, 4 +	addi 4, 1, 72 +	lxsdx 4, 0, 4 +	addi 4, 1, 80 +	lxsdx 5, 0, 4 +	addi 4, 1, 88 +	lxsdx 6, 0, 4 +	addi 4, 1, 96 +	lxsdx 7, 0, 4 +	addi 4, 1, 104 +	lxsdx 8, 0, 4 +	addi 4, 1, 112 +	lxvd2x 34, 0, 4 +	addi 4, 1, 128 +	lxvd2x 35, 0, 4 +	addi 4, 1, 144 +	lxvd2x 36, 0, 4 +	addi 4, 1, 160 +	lxvd2x 37, 0, 4 +	addi 4, 1, 176 +	lxvd2x 38, 0, 4 +	addi 4, 1, 192 +	lxvd2x 39, 0, 4 +	addi 4, 1, 208 +	lxvd2x 40, 0, 4 +	addi 4, 1, 224 +	lxvd2x 41, 0, 4 +	ld 0, 248(1) +	mtlr 0 +	ld 2, 240(1) +	ld 3, 32(1) +	ld 4, 40(1) + +	addi 1, 1, 256 +	ld 0, 16(1) +	blr diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_x86_64.S b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_x86_64.S new file mode 100644 index 000000000000..01098f60eeab --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_x86_64.S @@ -0,0 +1,311 @@ +//===-- xray_trampoline_x86.s -----------------------------------*- ASM -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// This implements the X86-specific assembler for the trampolines. +// +//===----------------------------------------------------------------------===// + +#include "../builtins/assembly.h" +#include "../sanitizer_common/sanitizer_asm.h" + +// XRay trampolines which are not produced by intrinsics are not System V AMD64 +// ABI compliant because they are called with a stack that is always misaligned +// by 8 bytes with respect to a 16 bytes alignment. This is because they are +// called immediately after the call to, or immediately before returning from, +// the function being instrumented. This saves space in the patch point, but +// misaligns the stack by 8 bytes. + +.macro ALIGN_STACK_16B +#if defined(__APPLE__) +	subq	$$8, %rsp +#else +	subq	$8, %rsp +#endif +	CFI_ADJUST_CFA_OFFSET(8) +.endm + +.macro RESTORE_STACK_ALIGNMENT +#if defined(__APPLE__) +	addq	$$8, %rsp +#else +	addq	$8, %rsp +#endif +	CFI_ADJUST_CFA_OFFSET(-8) +.endm + +// This macro should lower the stack pointer by an odd multiple of 8. +.macro SAVE_REGISTERS +	pushfq +	CFI_ADJUST_CFA_OFFSET(8) +	subq $240, %rsp +	CFI_ADJUST_CFA_OFFSET(240) +	movq %rbp, 232(%rsp) +	movupd	%xmm0, 216(%rsp) +	movupd	%xmm1, 200(%rsp) +	movupd	%xmm2, 184(%rsp) +	movupd	%xmm3, 168(%rsp) +	movupd	%xmm4, 152(%rsp) +	movupd	%xmm5, 136(%rsp) +	movupd	%xmm6, 120(%rsp) +	movupd	%xmm7, 104(%rsp) +	movq	%rdi, 96(%rsp) +	movq	%rax, 88(%rsp) +	movq	%rdx, 80(%rsp) +	movq	%rsi, 72(%rsp) +	movq	%rcx, 64(%rsp) +	movq	%r8, 56(%rsp) +	movq	%r9, 48(%rsp) +	movq  %r10, 40(%rsp) +	movq  %r11, 32(%rsp) +	movq  %r12, 24(%rsp) +	movq  %r13, 16(%rsp) +	movq  %r14, 8(%rsp) +	movq  %r15, 0(%rsp) +.endm + +.macro RESTORE_REGISTERS +	movq  232(%rsp), %rbp +	movupd	216(%rsp), %xmm0 +	movupd	200(%rsp), %xmm1 +	movupd	184(%rsp), %xmm2 +	movupd	168(%rsp), %xmm3 +	movupd	152(%rsp), %xmm4 +	movupd	136(%rsp), %xmm5 +	movupd	120(%rsp) , %xmm6 +	movupd	104(%rsp) , %xmm7 +	movq	96(%rsp), %rdi +	movq	88(%rsp), %rax +	movq	80(%rsp), %rdx +	movq	72(%rsp), %rsi +	movq	64(%rsp), %rcx +	movq	56(%rsp), %r8 +	movq	48(%rsp), %r9 +	movq  40(%rsp), %r10 +	movq  32(%rsp), %r11 +	movq  24(%rsp), %r12 +	movq  16(%rsp), %r13 +	movq  8(%rsp), %r14 +	movq  0(%rsp), %r15 +	addq	$240, %rsp +	CFI_ADJUST_CFA_OFFSET(-240) +	popfq +	CFI_ADJUST_CFA_OFFSET(-8) +.endm + +	.text +#if !defined(__APPLE__) +	.section .text +	.file "xray_trampoline_x86.S" +#else +	.section __TEXT,__text +#endif + +//===----------------------------------------------------------------------===// + +	.globl ASM_SYMBOL(__xray_FunctionEntry) +	ASM_HIDDEN(__xray_FunctionEntry) +	.align 16, 0x90 +	ASM_TYPE_FUNCTION(__xray_FunctionEntry) +# LLVM-MCA-BEGIN __xray_FunctionEntry +ASM_SYMBOL(__xray_FunctionEntry): +	CFI_STARTPROC +	SAVE_REGISTERS +	ALIGN_STACK_16B + +	// This load has to be atomic, it's concurrent with __xray_patch(). +	// On x86/amd64, a simple (type-aligned) MOV instruction is enough. +	movq	ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax +	testq	%rax, %rax +	je	LOCAL_LABEL(tmp0) + +	// The patched function prologue puts its xray_instr_map index into %r10d. +	movl	%r10d, %edi +	xor	%esi,%esi +	callq	*%rax + +LOCAL_LABEL(tmp0): +	RESTORE_STACK_ALIGNMENT +	RESTORE_REGISTERS +	retq +# LLVM-MCA-END +	ASM_SIZE(__xray_FunctionEntry) +	CFI_ENDPROC + +//===----------------------------------------------------------------------===// + +	.globl ASM_SYMBOL(__xray_FunctionExit) +	ASM_HIDDEN(__xray_FunctionExit) +	.align 16, 0x90 +	ASM_TYPE_FUNCTION(__xray_FunctionExit) +# LLVM-MCA-BEGIN __xray_FunctionExit +ASM_SYMBOL(__xray_FunctionExit): +	CFI_STARTPROC +	ALIGN_STACK_16B + +	// Save the important registers first. Since we're assuming that this +	// function is only jumped into, we only preserve the registers for +	// returning. +	subq	$64, %rsp +	CFI_ADJUST_CFA_OFFSET(64) +	movq  %rbp, 48(%rsp) +	movupd	%xmm0, 32(%rsp) +	movupd	%xmm1, 16(%rsp) +	movq	%rax, 8(%rsp) +	movq	%rdx, 0(%rsp) +	movq	ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax +	testq %rax,%rax +	je	LOCAL_LABEL(tmp2) + +	movl	%r10d, %edi +	movl	$1, %esi +	callq	*%rax + +LOCAL_LABEL(tmp2): +	// Restore the important registers. +	movq  48(%rsp), %rbp +	movupd	32(%rsp), %xmm0 +	movupd	16(%rsp), %xmm1 +	movq	8(%rsp), %rax +	movq	0(%rsp), %rdx +	addq	$64, %rsp +	CFI_ADJUST_CFA_OFFSET(-64) + +	RESTORE_STACK_ALIGNMENT +	retq +# LLVM-MCA-END +	ASM_SIZE(__xray_FunctionExit) +	CFI_ENDPROC + +//===----------------------------------------------------------------------===// + +	.globl ASM_SYMBOL(__xray_FunctionTailExit) +	ASM_HIDDEN(__xray_FunctionTailExit) +	.align 16, 0x90 +	ASM_TYPE_FUNCTION(__xray_FunctionTailExit) +# LLVM-MCA-BEGIN __xray_FunctionTailExit +ASM_SYMBOL(__xray_FunctionTailExit): +	CFI_STARTPROC +	SAVE_REGISTERS +	ALIGN_STACK_16B + +	movq	ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax +	testq %rax,%rax +	je	LOCAL_LABEL(tmp4) + +	movl	%r10d, %edi +	movl	$2, %esi +	callq	*%rax + +LOCAL_LABEL(tmp4): +	RESTORE_STACK_ALIGNMENT +	RESTORE_REGISTERS +	retq +# LLVM-MCA-END +	ASM_SIZE(__xray_FunctionTailExit) +	CFI_ENDPROC + +//===----------------------------------------------------------------------===// + +	.globl ASM_SYMBOL(__xray_ArgLoggerEntry) +	ASM_HIDDEN(__xray_ArgLoggerEntry) +	.align 16, 0x90 +	ASM_TYPE_FUNCTION(__xray_ArgLoggerEntry) +# LLVM-MCA-BEGIN __xray_ArgLoggerEntry +ASM_SYMBOL(__xray_ArgLoggerEntry): +	CFI_STARTPROC +	SAVE_REGISTERS +	ALIGN_STACK_16B + +	// Again, these function pointer loads must be atomic; MOV is fine. +	movq	ASM_SYMBOL(_ZN6__xray13XRayArgLoggerE)(%rip), %rax +	testq	%rax, %rax +	jne	LOCAL_LABEL(arg1entryLog) + +	// If [arg1 logging handler] not set, defer to no-arg logging. +	movq	ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax +	testq	%rax, %rax +	je	LOCAL_LABEL(arg1entryFail) + +LOCAL_LABEL(arg1entryLog): + +	// First argument will become the third +	movq	%rdi, %rdx + +	// XRayEntryType::LOG_ARGS_ENTRY into the second +	mov	$0x3, %esi + +	// 32-bit function ID becomes the first +	movl	%r10d, %edi + +	callq	*%rax + +LOCAL_LABEL(arg1entryFail): +	RESTORE_STACK_ALIGNMENT +	RESTORE_REGISTERS +	retq +# LLVM-MCA-END +	ASM_SIZE(__xray_ArgLoggerEntry) +	CFI_ENDPROC + +//===----------------------------------------------------------------------===// + +// __xray_*Event have default visibility so that they can be referenced by user +// DSOs that do not link against the runtime. +	.global ASM_SYMBOL(__xray_CustomEvent) +	.align 16, 0x90 +	ASM_TYPE_FUNCTION(__xray_CustomEvent) +# LLVM-MCA-BEGIN __xray_CustomEvent +ASM_SYMBOL(__xray_CustomEvent): +	CFI_STARTPROC +	SAVE_REGISTERS + +	// We take two arguments to this trampoline, which should be in rdi	and rsi +	// already. +	movq ASM_SYMBOL(_ZN6__xray22XRayPatchedCustomEventE)(%rip), %rax +	testq %rax,%rax +	je LOCAL_LABEL(customEventCleanup) + +	callq	*%rax + +LOCAL_LABEL(customEventCleanup): +	RESTORE_REGISTERS +	retq +# LLVM-MCA-END +	ASM_SIZE(__xray_CustomEvent) +	CFI_ENDPROC + +//===----------------------------------------------------------------------===// + +	.global ASM_SYMBOL(__xray_TypedEvent) +	.align 16, 0x90 +	ASM_TYPE_FUNCTION(__xray_TypedEvent) +# LLVM-MCA-BEGIN __xray_TypedEvent +ASM_SYMBOL(__xray_TypedEvent): +	CFI_STARTPROC +	SAVE_REGISTERS + +	// We pass three arguments to this trampoline, which should be in rdi, rsi +	// and rdx without our intervention. +	movq ASM_SYMBOL(_ZN6__xray21XRayPatchedTypedEventE)(%rip), %rax +	testq %rax,%rax +	je LOCAL_LABEL(typedEventCleanup) + +	callq	*%rax + +LOCAL_LABEL(typedEventCleanup): +	RESTORE_REGISTERS +	retq +# LLVM-MCA-END +	ASM_SIZE(__xray_TypedEvent) +	CFI_ENDPROC + +//===----------------------------------------------------------------------===// + +NO_EXEC_STACK_DIRECTIVE diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_tsc.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_tsc.h new file mode 100644 index 000000000000..e1cafe1bf11d --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_tsc.h @@ -0,0 +1,91 @@ +//===-- xray_tsc.h ----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_EMULATE_TSC_H +#define XRAY_EMULATE_TSC_H + +#include "sanitizer_common/sanitizer_common.h" + +namespace __xray { +static constexpr uint64_t NanosecondsPerSecond = 1000ULL * 1000 * 1000; +} + +#if SANITIZER_FUCHSIA +#include <zircon/syscalls.h> + +namespace __xray { + +inline bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; } + +ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT { +  CPU = 0; +  return _zx_ticks_get(); +} + +inline uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { +  return _zx_ticks_per_second(); +} + +} // namespace __xray + +#else // SANITIZER_FUCHSIA + +#if defined(__x86_64__) +#include "xray_x86_64.inc" +#elif defined(__powerpc64__) +#include "xray_powerpc64.inc" +#elif defined(__arm__) || defined(__aarch64__) || defined(__mips__) ||         \ +    defined(__hexagon__) || defined(__loongarch_lp64) +// Emulated TSC. +// There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does +//   not have a constant frequency like TSC on x86(_64), it may go faster +//   or slower depending on CPU turbo or power saving mode. Furthermore, +//   to read from CP15 on ARM a kernel modification or a driver is needed. +//   We can not require this from users of compiler-rt. +// So on ARM we use clock_gettime() which gives the result in nanoseconds. +//   To get the measurements per second, we scale this by the number of +//   nanoseconds per second, pretending that the TSC frequency is 1GHz and +//   one TSC tick is 1 nanosecond. +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_internal_defs.h" +#include "xray_defs.h" +#include <cerrno> +#include <cstdint> +#include <time.h> + +namespace __xray { + +inline bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; } + +ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT { +  timespec TS; +  int result = clock_gettime(CLOCK_REALTIME, &TS); +  if (result != 0) { +    Report("clock_gettime(2) returned %d, errno=%d.", result, int(errno)); +    TS.tv_sec = 0; +    TS.tv_nsec = 0; +  } +  CPU = 0; +  return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec; +} + +inline uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { +  return NanosecondsPerSecond; +} + +} // namespace __xray + +#else +#error Target architecture is not supported. +#endif // CPU architecture +#endif // SANITIZER_FUCHSIA + +#endif // XRAY_EMULATE_TSC_H diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_utils.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_utils.cpp new file mode 100644 index 000000000000..5d51df9937c2 --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_utils.cpp @@ -0,0 +1,200 @@ +//===-- xray_utils.cpp ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +//===----------------------------------------------------------------------===// +#include "xray_utils.h" + +#include "sanitizer_common/sanitizer_allocator_internal.h" +#include "sanitizer_common/sanitizer_common.h" +#include "xray_allocator.h" +#include "xray_defs.h" +#include "xray_flags.h" +#include <cstdio> +#include <errno.h> +#include <fcntl.h> +#include <iterator> +#include <new> +#include <stdlib.h> +#include <sys/types.h> +#include <tuple> +#include <unistd.h> +#include <utility> + +#if SANITIZER_FUCHSIA +#include "sanitizer_common/sanitizer_symbolizer_markup_constants.h" + +#include <inttypes.h> +#include <zircon/process.h> +#include <zircon/sanitizer.h> +#include <zircon/status.h> +#include <zircon/syscalls.h> +#endif + +namespace __xray { + +#if SANITIZER_FUCHSIA +constexpr const char* ProfileSinkName = "llvm-xray"; + +LogWriter::~LogWriter() { +  _zx_handle_close(Vmo); +} + +void LogWriter::WriteAll(const char *Begin, const char *End) XRAY_NEVER_INSTRUMENT { +  if (Begin == End) +    return; +  auto TotalBytes = std::distance(Begin, End); + +  const size_t PageSize = flags()->xray_page_size_override > 0 +                              ? flags()->xray_page_size_override +                              : GetPageSizeCached(); +  if (RoundUpTo(Offset, PageSize) != RoundUpTo(Offset + TotalBytes, PageSize)) { +    // Resize the VMO to ensure there's sufficient space for the data. +    zx_status_t Status = _zx_vmo_set_size(Vmo, Offset + TotalBytes); +    if (Status != ZX_OK) { +      Report("Failed to resize VMO: %s\n", _zx_status_get_string(Status)); +      return; +    } +  } + +  // Write the data into VMO. +  zx_status_t Status = _zx_vmo_write(Vmo, Begin, Offset, TotalBytes); +  if (Status != ZX_OK) { +    Report("Failed to write: %s\n", _zx_status_get_string(Status)); +    return; +  } +  Offset += TotalBytes; + +  // Record the data size as a property of the VMO. +  _zx_object_set_property(Vmo, ZX_PROP_VMO_CONTENT_SIZE, +                          &Offset, sizeof(Offset)); +} + +void LogWriter::Flush() XRAY_NEVER_INSTRUMENT { +  // Nothing to do here since WriteAll writes directly into the VMO. +} + +LogWriter *LogWriter::Open() XRAY_NEVER_INSTRUMENT { +  // Create VMO to hold the profile data. +  zx_handle_t Vmo; +  zx_status_t Status = _zx_vmo_create(0, ZX_VMO_RESIZABLE, &Vmo); +  if (Status != ZX_OK) { +    Report("XRay: cannot create VMO: %s\n", _zx_status_get_string(Status)); +    return nullptr; +  } + +  // Get the KOID of the current process to use in the VMO name. +  zx_info_handle_basic_t Info; +  Status = _zx_object_get_info(_zx_process_self(), ZX_INFO_HANDLE_BASIC, &Info, +                               sizeof(Info), NULL, NULL); +  if (Status != ZX_OK) { +    Report("XRay: cannot get basic info about current process handle: %s\n", +           _zx_status_get_string(Status)); +    return nullptr; +  } + +  // Give the VMO a name including our process KOID so it's easy to spot. +  char VmoName[ZX_MAX_NAME_LEN]; +  internal_snprintf(VmoName, sizeof(VmoName), "%s.%zu", ProfileSinkName, +                    Info.koid); +  _zx_object_set_property(Vmo, ZX_PROP_NAME, VmoName, strlen(VmoName)); + +  // Duplicate the handle since __sanitizer_publish_data consumes it and +  // LogWriter needs to hold onto it. +  zx_handle_t Handle; +  Status =_zx_handle_duplicate(Vmo, ZX_RIGHT_SAME_RIGHTS, &Handle); +  if (Status != ZX_OK) { +    Report("XRay: cannot duplicate VMO handle: %s\n", +           _zx_status_get_string(Status)); +    return nullptr; +  } + +  // Publish the VMO that receives the logging. Note the VMO's contents can +  // grow and change after publication. The contents won't be read out until +  // after the process exits. +  __sanitizer_publish_data(ProfileSinkName, Handle); + +  // Use the dumpfile symbolizer markup element to write the name of the VMO. +  Report("XRay: " FORMAT_DUMPFILE "\n", ProfileSinkName, VmoName); + +  LogWriter *LW = reinterpret_cast<LogWriter *>(InternalAlloc(sizeof(LogWriter))); +  new (LW) LogWriter(Vmo); +  return LW; +} + +void LogWriter::Close(LogWriter *LW) { +  LW->~LogWriter(); +  InternalFree(LW); +} +#else // SANITIZER_FUCHSIA +LogWriter::~LogWriter() { +  internal_close(Fd); +} + +void LogWriter::WriteAll(const char *Begin, const char *End) XRAY_NEVER_INSTRUMENT { +  if (Begin == End) +    return; +  auto TotalBytes = std::distance(Begin, End); +  while (auto Written = write(Fd, Begin, TotalBytes)) { +    if (Written < 0) { +      if (errno == EINTR) +        continue; // Try again. +      Report("Failed to write; errno = %d\n", errno); +      return; +    } +    TotalBytes -= Written; +    if (TotalBytes == 0) +      break; +    Begin += Written; +  } +} + +void LogWriter::Flush() XRAY_NEVER_INSTRUMENT { +  fsync(Fd); +} + +LogWriter *LogWriter::Open() XRAY_NEVER_INSTRUMENT { +  // Open a temporary file once for the log. +  char TmpFilename[256] = {}; +  char TmpWildcardPattern[] = "XXXXXX"; +  auto **Argv = GetArgv(); +  const char *Progname = !Argv ? "(unknown)" : Argv[0]; +  const char *LastSlash = internal_strrchr(Progname, '/'); + +  if (LastSlash != nullptr) +    Progname = LastSlash + 1; + +  int NeededLength = internal_snprintf( +      TmpFilename, sizeof(TmpFilename), "%s%s.%s", +      flags()->xray_logfile_base, Progname, TmpWildcardPattern); +  if (NeededLength > int(sizeof(TmpFilename))) { +    Report("XRay log file name too long (%d): %s\n", NeededLength, TmpFilename); +    return nullptr; +  } +  int Fd = mkstemp(TmpFilename); +  if (Fd == -1) { +    Report("XRay: Failed opening temporary file '%s'; not logging events.\n", +           TmpFilename); +    return nullptr; +  } +  if (Verbosity()) +    Report("XRay: Log file in '%s'\n", TmpFilename); + +  LogWriter *LW = allocate<LogWriter>(); +  new (LW) LogWriter(Fd); +  return LW; +} + +void LogWriter::Close(LogWriter *LW) { +  LW->~LogWriter(); +  deallocate(LW); +} +#endif // SANITIZER_FUCHSIA + +} // namespace __xray diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_utils.h b/contrib/llvm-project/compiler-rt/lib/xray/xray_utils.h new file mode 100644 index 000000000000..5dc73d7fa8cd --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_utils.h @@ -0,0 +1,85 @@ +//===-- xray_utils.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Some shared utilities for the XRay runtime implementation. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_UTILS_H +#define XRAY_UTILS_H + +#include <cstddef> +#include <cstdint> +#include <sys/types.h> +#include <utility> + +#include "sanitizer_common/sanitizer_common.h" +#if SANITIZER_FUCHSIA +#include <zircon/types.h> +#endif + +namespace __xray { + +class LogWriter { +public: +#if SANITIZER_FUCHSIA + LogWriter(zx_handle_t Vmo) : Vmo(Vmo) {} +#else +  explicit LogWriter(int Fd) : Fd(Fd) {} +#endif + ~LogWriter(); + + // Write a character range into a log. + void WriteAll(const char *Begin, const char *End); + + void Flush(); + + // Returns a new log instance initialized using the flag-provided values. + static LogWriter *Open(); + // Closes and deallocates the log instance. + static void Close(LogWriter *LogWriter); + +private: +#if SANITIZER_FUCHSIA + zx_handle_t Vmo = ZX_HANDLE_INVALID; + uint64_t Offset = 0; +#else + int Fd = -1; +#endif +}; + +constexpr size_t gcd(size_t a, size_t b) { +  return (b == 0) ? a : gcd(b, a % b); +} + +constexpr size_t lcm(size_t a, size_t b) { return a * b / gcd(a, b); } + +constexpr size_t nearest_boundary(size_t number, size_t multiple) { +  return multiple * ((number / multiple) + ((number % multiple) ? 1 : 0)); +} + +constexpr size_t next_pow2_helper(size_t num, size_t acc) { +  return (1u << acc) >= num ? (1u << acc) : next_pow2_helper(num, acc + 1); +} + +constexpr size_t next_pow2(size_t number) { +  return next_pow2_helper(number, 1); +} + +template <class T> constexpr T &max(T &A, T &B) { return A > B ? A : B; } + +template <class T> constexpr T &min(T &A, T &B) { return A <= B ? A : B; } + +constexpr ptrdiff_t diff(uintptr_t A, uintptr_t B) { +  return max(A, B) - min(A, B); +} + +} // namespace __xray + +#endif // XRAY_UTILS_H diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_x86_64.cpp b/contrib/llvm-project/compiler-rt/lib/xray/xray_x86_64.cpp new file mode 100644 index 000000000000..b9666a40861d --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_x86_64.cpp @@ -0,0 +1,334 @@ +#include "cpuid.h" +#include "sanitizer_common/sanitizer_common.h" +#if !SANITIZER_FUCHSIA +#include "sanitizer_common/sanitizer_posix.h" +#endif +#include "xray_defs.h" +#include "xray_interface_internal.h" + +#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_APPLE +#include <sys/types.h> +#include <sys/sysctl.h> +#elif SANITIZER_FUCHSIA +#include <zircon/syscalls.h> +#endif + +#include <atomic> +#include <cstdint> +#include <errno.h> +#include <fcntl.h> +#include <iterator> +#include <limits> +#include <tuple> +#include <unistd.h> + +namespace __xray { + +#if SANITIZER_LINUX +static std::pair<ssize_t, bool> +retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT { +  auto BytesToRead = std::distance(Begin, End); +  ssize_t BytesRead; +  ssize_t TotalBytesRead = 0; +  while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) { +    if (BytesRead == -1) { +      if (errno == EINTR) +        continue; +      Report("Read error; errno = %d\n", errno); +      return std::make_pair(TotalBytesRead, false); +    } + +    TotalBytesRead += BytesRead; +    BytesToRead -= BytesRead; +    Begin += BytesRead; +  } +  return std::make_pair(TotalBytesRead, true); +} + +static bool readValueFromFile(const char *Filename, +                              long long *Value) XRAY_NEVER_INSTRUMENT { +  int Fd = open(Filename, O_RDONLY | O_CLOEXEC); +  if (Fd == -1) +    return false; +  static constexpr size_t BufSize = 256; +  char Line[BufSize] = {}; +  ssize_t BytesRead; +  bool Success; +  std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize); +  close(Fd); +  if (!Success) +    return false; +  const char *End = nullptr; +  long long Tmp = internal_simple_strtoll(Line, &End, 10); +  bool Result = false; +  if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) { +    *Value = Tmp; +    Result = true; +  } +  return Result; +} + +uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { +  long long TSCFrequency = -1; +  if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", +                        &TSCFrequency)) { +    TSCFrequency *= 1000; +  } else if (readValueFromFile( +                 "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", +                 &TSCFrequency)) { +    TSCFrequency *= 1000; +  } else { +    Report("Unable to determine CPU frequency for TSC accounting.\n"); +  } +  return TSCFrequency == -1 ? 0 : static_cast<uint64_t>(TSCFrequency); +} +#elif SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_APPLE +uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { +    long long TSCFrequency = -1; +    size_t tscfreqsz = sizeof(TSCFrequency); +#if SANITIZER_APPLE +    if (internal_sysctlbyname("machdep.tsc.frequency", &TSCFrequency, +                              &tscfreqsz, NULL, 0) != -1) { + +#else +    if (internal_sysctlbyname("machdep.tsc_freq", &TSCFrequency, &tscfreqsz, +                              NULL, 0) != -1) { +#endif +        return static_cast<uint64_t>(TSCFrequency); +    } else { +      Report("Unable to determine CPU frequency for TSC accounting.\n"); +    } + +    return 0; +} +#elif !SANITIZER_FUCHSIA +uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { +    /* Not supported */ +    return 0; +} +#endif + +static constexpr uint8_t CallOpCode = 0xe8; +static constexpr uint16_t MovR10Seq = 0xba41; +static constexpr uint16_t Jmp9Seq = 0x09eb; +static constexpr uint16_t Jmp20Seq = 0x14eb; +static constexpr uint16_t Jmp15Seq = 0x0feb; +static constexpr uint8_t JmpOpCode = 0xe9; +static constexpr uint8_t RetOpCode = 0xc3; +static constexpr uint16_t NopwSeq = 0x9066; + +static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()}; +static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()}; + +bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, +                        const XRaySledEntry &Sled, +                        void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { +  // Here we do the dance of replacing the following sled: +  // +  // xray_sled_n: +  //   jmp +9 +  //   <9 byte nop> +  // +  // With the following: +  // +  //   mov r10d, <function id> +  //   call <relative 32bit offset to entry trampoline> +  // +  // We need to do this in the following order: +  // +  // 1. Put the function id first, 2 bytes from the start of the sled (just +  // after the 2-byte jmp instruction). +  // 2. Put the call opcode 6 bytes from the start of the sled. +  // 3. Put the relative offset 7 bytes from the start of the sled. +  // 4. Do an atomic write over the jmp instruction for the "mov r10d" +  // opcode and first operand. +  // +  // Prerequisite is to compute the relative offset to the trampoline's address. +  const uint64_t Address = Sled.address(); +  int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) - +                             (static_cast<int64_t>(Address) + 11); +  if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { +    Report("XRay Entry trampoline (%p) too far from sled (%p)\n", +           reinterpret_cast<void *>(Trampoline), +           reinterpret_cast<void *>(Address)); +    return false; +  } +  if (Enable) { +    *reinterpret_cast<uint32_t *>(Address + 2) = FuncId; +    *reinterpret_cast<uint8_t *>(Address + 6) = CallOpCode; +    *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset; +    std::atomic_store_explicit( +        reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq, +        std::memory_order_release); +  } else { +    std::atomic_store_explicit( +        reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp9Seq, +        std::memory_order_release); +    // FIXME: Write out the nops still? +  } +  return true; +} + +bool patchFunctionExit(const bool Enable, const uint32_t FuncId, +                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { +  // Here we do the dance of replacing the following sled: +  // +  // xray_sled_n: +  //   ret +  //   <10 byte nop> +  // +  // With the following: +  // +  //   mov r10d, <function id> +  //   jmp <relative 32bit offset to exit trampoline> +  // +  // 1. Put the function id first, 2 bytes from the start of the sled (just +  // after the 1-byte ret instruction). +  // 2. Put the jmp opcode 6 bytes from the start of the sled. +  // 3. Put the relative offset 7 bytes from the start of the sled. +  // 4. Do an atomic write over the jmp instruction for the "mov r10d" +  // opcode and first operand. +  // +  // Prerequisite is to compute the relative offset fo the +  // __xray_FunctionExit function's address. +  const uint64_t Address = Sled.address(); +  int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionExit) - +                             (static_cast<int64_t>(Address) + 11); +  if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { +    Report("XRay Exit trampoline (%p) too far from sled (%p)\n", +           reinterpret_cast<void *>(__xray_FunctionExit), +           reinterpret_cast<void *>(Address)); +    return false; +  } +  if (Enable) { +    *reinterpret_cast<uint32_t *>(Address + 2) = FuncId; +    *reinterpret_cast<uint8_t *>(Address + 6) = JmpOpCode; +    *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset; +    std::atomic_store_explicit( +        reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq, +        std::memory_order_release); +  } else { +    std::atomic_store_explicit( +        reinterpret_cast<std::atomic<uint8_t> *>(Address), RetOpCode, +        std::memory_order_release); +    // FIXME: Write out the nops still? +  } +  return true; +} + +bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, +                           const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { +  // Here we do the dance of replacing the tail call sled with a similar +  // sequence as the entry sled, but calls the tail exit sled instead. +  const uint64_t Address = Sled.address(); +  int64_t TrampolineOffset = +      reinterpret_cast<int64_t>(__xray_FunctionTailExit) - +      (static_cast<int64_t>(Address) + 11); +  if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { +    Report("XRay Tail Exit trampoline (%p) too far from sled (%p)\n", +           reinterpret_cast<void *>(__xray_FunctionTailExit), +           reinterpret_cast<void *>(Address)); +    return false; +  } +  if (Enable) { +    *reinterpret_cast<uint32_t *>(Address + 2) = FuncId; +    *reinterpret_cast<uint8_t *>(Address + 6) = CallOpCode; +    *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset; +    std::atomic_store_explicit( +        reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq, +        std::memory_order_release); +  } else { +    std::atomic_store_explicit( +        reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp9Seq, +        std::memory_order_release); +    // FIXME: Write out the nops still? +  } +  return true; +} + +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, +                      const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { +  // Here we do the dance of replacing the following sled: +  // +  // xray_sled_n: +  //   jmp +15          // 2 bytes +  //   ... +  // +  // With the following: +  // +  //   nopw             // 2 bytes* +  //   ... +  // +  // +  // The "unpatch" should just turn the 'nopw' back to a 'jmp +15'. +  const uint64_t Address = Sled.address(); +  if (Enable) { +    std::atomic_store_explicit( +        reinterpret_cast<std::atomic<uint16_t> *>(Address), NopwSeq, +        std::memory_order_release); +  } else { +    std::atomic_store_explicit( +        reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp15Seq, +        std::memory_order_release); +  } +  return false; +} + +bool patchTypedEvent(const bool Enable, const uint32_t FuncId, +                      const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { +  // Here we do the dance of replacing the following sled: +  // +  // xray_sled_n: +  //   jmp +20          // 2 byte instruction +  //   ... +  // +  // With the following: +  // +  //   nopw             // 2 bytes +  //   ... +  // +  // +  // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'. +  // The 20 byte sled stashes three argument registers, calls the trampoline, +  // unstashes the registers and returns. If the arguments are already in +  // the correct registers, the stashing and unstashing become equivalently +  // sized nops. +  const uint64_t Address = Sled.address(); +  if (Enable) { +    std::atomic_store_explicit( +        reinterpret_cast<std::atomic<uint16_t> *>(Address), NopwSeq, +        std::memory_order_release); +  } else { +    std::atomic_store_explicit( +        reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp20Seq, +        std::memory_order_release); +  } +  return false; +} + +#if !SANITIZER_FUCHSIA +// We determine whether the CPU we're running on has the correct features we +// need. In x86_64 this will be rdtscp support. +bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { +  unsigned int EAX, EBX, ECX, EDX; + +  // We check whether rdtscp support is enabled. According to the x86_64 manual, +  // level should be set at 0x80000001, and we should have a look at bit 27 in +  // EDX. That's 0x8000000 (or 1u << 27). +  __asm__ __volatile__("cpuid" : "=a"(EAX), "=b"(EBX), "=c"(ECX), "=d"(EDX) +    : "0"(0x80000001)); +  if (!(EDX & (1u << 27))) { +    Report("Missing rdtscp support.\n"); +    return false; +  } +  // Also check whether we can determine the CPU frequency, since if we cannot, +  // we should use the emulated TSC instead. +  if (!getTSCFrequency()) { +    Report("Unable to determine CPU frequency.\n"); +    return false; +  } +  return true; +} +#endif + +} // namespace __xray diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_x86_64.inc b/contrib/llvm-project/compiler-rt/lib/xray/xray_x86_64.inc new file mode 100644 index 000000000000..dc71fb87f63d --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_x86_64.inc @@ -0,0 +1,32 @@ +//===-- xray_x86_64.inc -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +//===----------------------------------------------------------------------===// + +#include <cstdint> + +#include "sanitizer_common/sanitizer_internal_defs.h" +#include "xray_defs.h" + +namespace __xray { + +ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT { +  unsigned LongCPU; +  unsigned long Rax, Rdx; +  __asm__ __volatile__("rdtscp\n" : "=a"(Rax), "=d"(Rdx), "=c"(LongCPU) ::); +  CPU = LongCPU; +  return (Rdx << 32) + Rax; +} + +uint64_t getTSCFrequency(); + +bool probeRequiredCPUFeatures(); + +} // namespace __xray | 
