diff options
Diffstat (limited to 'contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_x86_64.S')
| -rw-r--r-- | contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_x86_64.S | 311 |
1 files changed, 311 insertions, 0 deletions
diff --git a/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_x86_64.S b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_x86_64.S new file mode 100644 index 000000000000..01098f60eeab --- /dev/null +++ b/contrib/llvm-project/compiler-rt/lib/xray/xray_trampoline_x86_64.S @@ -0,0 +1,311 @@ +//===-- xray_trampoline_x86.s -----------------------------------*- ASM -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// This implements the X86-specific assembler for the trampolines. +// +//===----------------------------------------------------------------------===// + +#include "../builtins/assembly.h" +#include "../sanitizer_common/sanitizer_asm.h" + +// XRay trampolines which are not produced by intrinsics are not System V AMD64 +// ABI compliant because they are called with a stack that is always misaligned +// by 8 bytes with respect to a 16 bytes alignment. This is because they are +// called immediately after the call to, or immediately before returning from, +// the function being instrumented. This saves space in the patch point, but +// misaligns the stack by 8 bytes. + +.macro ALIGN_STACK_16B +#if defined(__APPLE__) + subq $$8, %rsp +#else + subq $8, %rsp +#endif + CFI_ADJUST_CFA_OFFSET(8) +.endm + +.macro RESTORE_STACK_ALIGNMENT +#if defined(__APPLE__) + addq $$8, %rsp +#else + addq $8, %rsp +#endif + CFI_ADJUST_CFA_OFFSET(-8) +.endm + +// This macro should lower the stack pointer by an odd multiple of 8. +.macro SAVE_REGISTERS + pushfq + CFI_ADJUST_CFA_OFFSET(8) + subq $240, %rsp + CFI_ADJUST_CFA_OFFSET(240) + movq %rbp, 232(%rsp) + movupd %xmm0, 216(%rsp) + movupd %xmm1, 200(%rsp) + movupd %xmm2, 184(%rsp) + movupd %xmm3, 168(%rsp) + movupd %xmm4, 152(%rsp) + movupd %xmm5, 136(%rsp) + movupd %xmm6, 120(%rsp) + movupd %xmm7, 104(%rsp) + movq %rdi, 96(%rsp) + movq %rax, 88(%rsp) + movq %rdx, 80(%rsp) + movq %rsi, 72(%rsp) + movq %rcx, 64(%rsp) + movq %r8, 56(%rsp) + movq %r9, 48(%rsp) + movq %r10, 40(%rsp) + movq %r11, 32(%rsp) + movq %r12, 24(%rsp) + movq %r13, 16(%rsp) + movq %r14, 8(%rsp) + movq %r15, 0(%rsp) +.endm + +.macro RESTORE_REGISTERS + movq 232(%rsp), %rbp + movupd 216(%rsp), %xmm0 + movupd 200(%rsp), %xmm1 + movupd 184(%rsp), %xmm2 + movupd 168(%rsp), %xmm3 + movupd 152(%rsp), %xmm4 + movupd 136(%rsp), %xmm5 + movupd 120(%rsp) , %xmm6 + movupd 104(%rsp) , %xmm7 + movq 96(%rsp), %rdi + movq 88(%rsp), %rax + movq 80(%rsp), %rdx + movq 72(%rsp), %rsi + movq 64(%rsp), %rcx + movq 56(%rsp), %r8 + movq 48(%rsp), %r9 + movq 40(%rsp), %r10 + movq 32(%rsp), %r11 + movq 24(%rsp), %r12 + movq 16(%rsp), %r13 + movq 8(%rsp), %r14 + movq 0(%rsp), %r15 + addq $240, %rsp + CFI_ADJUST_CFA_OFFSET(-240) + popfq + CFI_ADJUST_CFA_OFFSET(-8) +.endm + + .text +#if !defined(__APPLE__) + .section .text + .file "xray_trampoline_x86.S" +#else + .section __TEXT,__text +#endif + +//===----------------------------------------------------------------------===// + + .globl ASM_SYMBOL(__xray_FunctionEntry) + ASM_HIDDEN(__xray_FunctionEntry) + .align 16, 0x90 + ASM_TYPE_FUNCTION(__xray_FunctionEntry) +# LLVM-MCA-BEGIN __xray_FunctionEntry +ASM_SYMBOL(__xray_FunctionEntry): + CFI_STARTPROC + SAVE_REGISTERS + ALIGN_STACK_16B + + // This load has to be atomic, it's concurrent with __xray_patch(). + // On x86/amd64, a simple (type-aligned) MOV instruction is enough. + movq ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax + testq %rax, %rax + je LOCAL_LABEL(tmp0) + + // The patched function prologue puts its xray_instr_map index into %r10d. + movl %r10d, %edi + xor %esi,%esi + callq *%rax + +LOCAL_LABEL(tmp0): + RESTORE_STACK_ALIGNMENT + RESTORE_REGISTERS + retq +# LLVM-MCA-END + ASM_SIZE(__xray_FunctionEntry) + CFI_ENDPROC + +//===----------------------------------------------------------------------===// + + .globl ASM_SYMBOL(__xray_FunctionExit) + ASM_HIDDEN(__xray_FunctionExit) + .align 16, 0x90 + ASM_TYPE_FUNCTION(__xray_FunctionExit) +# LLVM-MCA-BEGIN __xray_FunctionExit +ASM_SYMBOL(__xray_FunctionExit): + CFI_STARTPROC + ALIGN_STACK_16B + + // Save the important registers first. Since we're assuming that this + // function is only jumped into, we only preserve the registers for + // returning. + subq $64, %rsp + CFI_ADJUST_CFA_OFFSET(64) + movq %rbp, 48(%rsp) + movupd %xmm0, 32(%rsp) + movupd %xmm1, 16(%rsp) + movq %rax, 8(%rsp) + movq %rdx, 0(%rsp) + movq ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax + testq %rax,%rax + je LOCAL_LABEL(tmp2) + + movl %r10d, %edi + movl $1, %esi + callq *%rax + +LOCAL_LABEL(tmp2): + // Restore the important registers. + movq 48(%rsp), %rbp + movupd 32(%rsp), %xmm0 + movupd 16(%rsp), %xmm1 + movq 8(%rsp), %rax + movq 0(%rsp), %rdx + addq $64, %rsp + CFI_ADJUST_CFA_OFFSET(-64) + + RESTORE_STACK_ALIGNMENT + retq +# LLVM-MCA-END + ASM_SIZE(__xray_FunctionExit) + CFI_ENDPROC + +//===----------------------------------------------------------------------===// + + .globl ASM_SYMBOL(__xray_FunctionTailExit) + ASM_HIDDEN(__xray_FunctionTailExit) + .align 16, 0x90 + ASM_TYPE_FUNCTION(__xray_FunctionTailExit) +# LLVM-MCA-BEGIN __xray_FunctionTailExit +ASM_SYMBOL(__xray_FunctionTailExit): + CFI_STARTPROC + SAVE_REGISTERS + ALIGN_STACK_16B + + movq ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax + testq %rax,%rax + je LOCAL_LABEL(tmp4) + + movl %r10d, %edi + movl $2, %esi + callq *%rax + +LOCAL_LABEL(tmp4): + RESTORE_STACK_ALIGNMENT + RESTORE_REGISTERS + retq +# LLVM-MCA-END + ASM_SIZE(__xray_FunctionTailExit) + CFI_ENDPROC + +//===----------------------------------------------------------------------===// + + .globl ASM_SYMBOL(__xray_ArgLoggerEntry) + ASM_HIDDEN(__xray_ArgLoggerEntry) + .align 16, 0x90 + ASM_TYPE_FUNCTION(__xray_ArgLoggerEntry) +# LLVM-MCA-BEGIN __xray_ArgLoggerEntry +ASM_SYMBOL(__xray_ArgLoggerEntry): + CFI_STARTPROC + SAVE_REGISTERS + ALIGN_STACK_16B + + // Again, these function pointer loads must be atomic; MOV is fine. + movq ASM_SYMBOL(_ZN6__xray13XRayArgLoggerE)(%rip), %rax + testq %rax, %rax + jne LOCAL_LABEL(arg1entryLog) + + // If [arg1 logging handler] not set, defer to no-arg logging. + movq ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax + testq %rax, %rax + je LOCAL_LABEL(arg1entryFail) + +LOCAL_LABEL(arg1entryLog): + + // First argument will become the third + movq %rdi, %rdx + + // XRayEntryType::LOG_ARGS_ENTRY into the second + mov $0x3, %esi + + // 32-bit function ID becomes the first + movl %r10d, %edi + + callq *%rax + +LOCAL_LABEL(arg1entryFail): + RESTORE_STACK_ALIGNMENT + RESTORE_REGISTERS + retq +# LLVM-MCA-END + ASM_SIZE(__xray_ArgLoggerEntry) + CFI_ENDPROC + +//===----------------------------------------------------------------------===// + +// __xray_*Event have default visibility so that they can be referenced by user +// DSOs that do not link against the runtime. + .global ASM_SYMBOL(__xray_CustomEvent) + .align 16, 0x90 + ASM_TYPE_FUNCTION(__xray_CustomEvent) +# LLVM-MCA-BEGIN __xray_CustomEvent +ASM_SYMBOL(__xray_CustomEvent): + CFI_STARTPROC + SAVE_REGISTERS + + // We take two arguments to this trampoline, which should be in rdi and rsi + // already. + movq ASM_SYMBOL(_ZN6__xray22XRayPatchedCustomEventE)(%rip), %rax + testq %rax,%rax + je LOCAL_LABEL(customEventCleanup) + + callq *%rax + +LOCAL_LABEL(customEventCleanup): + RESTORE_REGISTERS + retq +# LLVM-MCA-END + ASM_SIZE(__xray_CustomEvent) + CFI_ENDPROC + +//===----------------------------------------------------------------------===// + + .global ASM_SYMBOL(__xray_TypedEvent) + .align 16, 0x90 + ASM_TYPE_FUNCTION(__xray_TypedEvent) +# LLVM-MCA-BEGIN __xray_TypedEvent +ASM_SYMBOL(__xray_TypedEvent): + CFI_STARTPROC + SAVE_REGISTERS + + // We pass three arguments to this trampoline, which should be in rdi, rsi + // and rdx without our intervention. + movq ASM_SYMBOL(_ZN6__xray21XRayPatchedTypedEventE)(%rip), %rax + testq %rax,%rax + je LOCAL_LABEL(typedEventCleanup) + + callq *%rax + +LOCAL_LABEL(typedEventCleanup): + RESTORE_REGISTERS + retq +# LLVM-MCA-END + ASM_SIZE(__xray_TypedEvent) + CFI_ENDPROC + +//===----------------------------------------------------------------------===// + +NO_EXEC_STACK_DIRECTIVE |
