summaryrefslogtreecommitdiff
path: root/lib/xray/xray_x86_64.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/xray/xray_x86_64.cpp')
-rw-r--r--lib/xray/xray_x86_64.cpp353
1 files changed, 353 insertions, 0 deletions
diff --git a/lib/xray/xray_x86_64.cpp b/lib/xray/xray_x86_64.cpp
new file mode 100644
index 0000000000000..e63ee1b3bd02e
--- /dev/null
+++ b/lib/xray/xray_x86_64.cpp
@@ -0,0 +1,353 @@
+#include "cpuid.h"
+#include "sanitizer_common/sanitizer_common.h"
+#if !SANITIZER_FUCHSIA
+#include "sanitizer_common/sanitizer_posix.h"
+#endif
+#include "xray_defs.h"
+#include "xray_interface_internal.h"
+
+#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC
+#include <sys/types.h>
+#if SANITIZER_OPENBSD
+#include <sys/time.h>
+#include <machine/cpu.h>
+#endif
+#include <sys/sysctl.h>
+#elif SANITIZER_FUCHSIA
+#include <zircon/syscalls.h>
+#endif
+
+#include <atomic>
+#include <cstdint>
+#include <errno.h>
+#include <fcntl.h>
+#include <iterator>
+#include <limits>
+#include <tuple>
+#include <unistd.h>
+
+namespace __xray {
+
+#if SANITIZER_LINUX
+static std::pair<ssize_t, bool>
+retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT {
+ auto BytesToRead = std::distance(Begin, End);
+ ssize_t BytesRead;
+ ssize_t TotalBytesRead = 0;
+ while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) {
+ if (BytesRead == -1) {
+ if (errno == EINTR)
+ continue;
+ Report("Read error; errno = %d\n", errno);
+ return std::make_pair(TotalBytesRead, false);
+ }
+
+ TotalBytesRead += BytesRead;
+ BytesToRead -= BytesRead;
+ Begin += BytesRead;
+ }
+ return std::make_pair(TotalBytesRead, true);
+}
+
+static bool readValueFromFile(const char *Filename,
+ long long *Value) XRAY_NEVER_INSTRUMENT {
+ int Fd = open(Filename, O_RDONLY | O_CLOEXEC);
+ if (Fd == -1)
+ return false;
+ static constexpr size_t BufSize = 256;
+ char Line[BufSize] = {};
+ ssize_t BytesRead;
+ bool Success;
+ std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize);
+ close(Fd);
+ if (!Success)
+ return false;
+ const char *End = nullptr;
+ long long Tmp = internal_simple_strtoll(Line, &End, 10);
+ bool Result = false;
+ if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) {
+ *Value = Tmp;
+ Result = true;
+ }
+ return Result;
+}
+
+uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
+ long long TSCFrequency = -1;
+ if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz",
+ &TSCFrequency)) {
+ TSCFrequency *= 1000;
+ } else if (readValueFromFile(
+ "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
+ &TSCFrequency)) {
+ TSCFrequency *= 1000;
+ } else {
+ Report("Unable to determine CPU frequency for TSC accounting.\n");
+ }
+ return TSCFrequency == -1 ? 0 : static_cast<uint64_t>(TSCFrequency);
+}
+#elif SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC
+uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
+ long long TSCFrequency = -1;
+ size_t tscfreqsz = sizeof(TSCFrequency);
+#if SANITIZER_OPENBSD
+ int Mib[2] = { CTL_MACHDEP, CPU_TSCFREQ };
+ if (internal_sysctl(Mib, 2, &TSCFrequency, &tscfreqsz, NULL, 0) != -1) {
+#elif SANITIZER_MAC
+ if (internal_sysctlbyname("machdep.tsc.frequency", &TSCFrequency,
+ &tscfreqsz, NULL, 0) != -1) {
+
+#else
+ if (internal_sysctlbyname("machdep.tsc_freq", &TSCFrequency, &tscfreqsz,
+ NULL, 0) != -1) {
+#endif
+ return static_cast<uint64_t>(TSCFrequency);
+ } else {
+ Report("Unable to determine CPU frequency for TSC accounting.\n");
+ }
+
+ return 0;
+}
+#elif !SANITIZER_FUCHSIA
+uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
+ /* Not supported */
+ return 0;
+}
+#endif
+
+static constexpr uint8_t CallOpCode = 0xe8;
+static constexpr uint16_t MovR10Seq = 0xba41;
+static constexpr uint16_t Jmp9Seq = 0x09eb;
+static constexpr uint16_t Jmp20Seq = 0x14eb;
+static constexpr uint16_t Jmp15Seq = 0x0feb;
+static constexpr uint8_t JmpOpCode = 0xe9;
+static constexpr uint8_t RetOpCode = 0xc3;
+static constexpr uint16_t NopwSeq = 0x9066;
+
+static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()};
+static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()};
+
+bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled,
+ void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
+ // Here we do the dance of replacing the following sled:
+ //
+ // xray_sled_n:
+ // jmp +9
+ // <9 byte nop>
+ //
+ // With the following:
+ //
+ // mov r10d, <function id>
+ // call <relative 32bit offset to entry trampoline>
+ //
+ // We need to do this in the following order:
+ //
+ // 1. Put the function id first, 2 bytes from the start of the sled (just
+ // after the 2-byte jmp instruction).
+ // 2. Put the call opcode 6 bytes from the start of the sled.
+ // 3. Put the relative offset 7 bytes from the start of the sled.
+ // 4. Do an atomic write over the jmp instruction for the "mov r10d"
+ // opcode and first operand.
+ //
+ // Prerequisite is to compute the relative offset to the trampoline's address.
+ int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) -
+ (static_cast<int64_t>(Sled.Address) + 11);
+ if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
+ Report("XRay Entry trampoline (%p) too far from sled (%p)\n",
+ Trampoline, reinterpret_cast<void *>(Sled.Address));
+ return false;
+ }
+ if (Enable) {
+ *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
+ *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode;
+ *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
+ std::memory_order_release);
+ } else {
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq,
+ std::memory_order_release);
+ // FIXME: Write out the nops still?
+ }
+ return true;
+}
+
+bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // Here we do the dance of replacing the following sled:
+ //
+ // xray_sled_n:
+ // ret
+ // <10 byte nop>
+ //
+ // With the following:
+ //
+ // mov r10d, <function id>
+ // jmp <relative 32bit offset to exit trampoline>
+ //
+ // 1. Put the function id first, 2 bytes from the start of the sled (just
+ // after the 1-byte ret instruction).
+ // 2. Put the jmp opcode 6 bytes from the start of the sled.
+ // 3. Put the relative offset 7 bytes from the start of the sled.
+ // 4. Do an atomic write over the jmp instruction for the "mov r10d"
+ // opcode and first operand.
+ //
+ // Prerequisite is to compute the relative offset fo the
+ // __xray_FunctionExit function's address.
+ int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionExit) -
+ (static_cast<int64_t>(Sled.Address) + 11);
+ if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
+ Report("XRay Exit trampoline (%p) too far from sled (%p)\n",
+ __xray_FunctionExit, reinterpret_cast<void *>(Sled.Address));
+ return false;
+ }
+ if (Enable) {
+ *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
+ *reinterpret_cast<uint8_t *>(Sled.Address + 6) = JmpOpCode;
+ *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
+ std::memory_order_release);
+ } else {
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint8_t> *>(Sled.Address), RetOpCode,
+ std::memory_order_release);
+ // FIXME: Write out the nops still?
+ }
+ return true;
+}
+
+bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // Here we do the dance of replacing the tail call sled with a similar
+ // sequence as the entry sled, but calls the tail exit sled instead.
+ int64_t TrampolineOffset =
+ reinterpret_cast<int64_t>(__xray_FunctionTailExit) -
+ (static_cast<int64_t>(Sled.Address) + 11);
+ if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
+ Report("XRay Tail Exit trampoline (%p) too far from sled (%p)\n",
+ __xray_FunctionTailExit, reinterpret_cast<void *>(Sled.Address));
+ return false;
+ }
+ if (Enable) {
+ *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
+ *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode;
+ *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
+ std::memory_order_release);
+ } else {
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq,
+ std::memory_order_release);
+ // FIXME: Write out the nops still?
+ }
+ return true;
+}
+
+bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // Here we do the dance of replacing the following sled:
+ //
+ // In Version 0:
+ //
+ // xray_sled_n:
+ // jmp +20 // 2 bytes
+ // ...
+ //
+ // With the following:
+ //
+ // nopw // 2 bytes*
+ // ...
+ //
+ //
+ // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
+ //
+ // ---
+ //
+ // In Version 1:
+ //
+ // The jump offset is now 15 bytes (0x0f), so when restoring the nopw back
+ // to a jmp, use 15 bytes instead.
+ //
+ if (Enable) {
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), NopwSeq,
+ std::memory_order_release);
+ } else {
+ switch (Sled.Version) {
+ case 1:
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp15Seq,
+ std::memory_order_release);
+ break;
+ case 0:
+ default:
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp20Seq,
+ std::memory_order_release);
+ break;
+ }
+ }
+ return false;
+}
+
+bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
+ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+ // Here we do the dance of replacing the following sled:
+ //
+ // xray_sled_n:
+ // jmp +20 // 2 byte instruction
+ // ...
+ //
+ // With the following:
+ //
+ // nopw // 2 bytes
+ // ...
+ //
+ //
+ // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
+ // The 20 byte sled stashes three argument registers, calls the trampoline,
+ // unstashes the registers and returns. If the arguments are already in
+ // the correct registers, the stashing and unstashing become equivalently
+ // sized nops.
+ if (Enable) {
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), NopwSeq,
+ std::memory_order_release);
+ } else {
+ std::atomic_store_explicit(
+ reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp20Seq,
+ std::memory_order_release);
+ }
+ return false;
+}
+
+#if !SANITIZER_FUCHSIA
+// We determine whether the CPU we're running on has the correct features we
+// need. In x86_64 this will be rdtscp support.
+bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT {
+ unsigned int EAX, EBX, ECX, EDX;
+
+ // We check whether rdtscp support is enabled. According to the x86_64 manual,
+ // level should be set at 0x80000001, and we should have a look at bit 27 in
+ // EDX. That's 0x8000000 (or 1u << 27).
+ __asm__ __volatile__("cpuid" : "=a"(EAX), "=b"(EBX), "=c"(ECX), "=d"(EDX)
+ : "0"(0x80000001));
+ if (!(EDX & (1u << 27))) {
+ Report("Missing rdtscp support.\n");
+ return false;
+ }
+ // Also check whether we can determine the CPU frequency, since if we cannot,
+ // we should use the emulated TSC instead.
+ if (!getTSCFrequency()) {
+ Report("Unable to determine CPU frequency.\n");
+ return false;
+ }
+ return true;
+}
+#endif
+
+} // namespace __xray