diff options
Diffstat (limited to 'lib/interception')
-rw-r--r-- | lib/interception/CMakeLists.txt | 37 | ||||
-rw-r--r-- | lib/interception/Makefile.mk | 23 | ||||
-rw-r--r-- | lib/interception/interception.h | 168 | ||||
-rw-r--r-- | lib/interception/interception_linux.cc | 29 | ||||
-rw-r--r-- | lib/interception/interception_linux.h | 35 | ||||
-rw-r--r-- | lib/interception/interception_mac.cc | 33 | ||||
-rw-r--r-- | lib/interception/interception_mac.h | 47 | ||||
-rw-r--r-- | lib/interception/interception_win.cc | 149 | ||||
-rw-r--r-- | lib/interception/interception_win.h | 42 | ||||
-rw-r--r-- | lib/interception/mach_override/LICENSE.TXT | 3 | ||||
-rw-r--r-- | lib/interception/mach_override/Makefile.mk | 22 | ||||
-rw-r--r-- | lib/interception/mach_override/README.txt | 9 | ||||
-rw-r--r-- | lib/interception/mach_override/mach_override.c | 958 | ||||
-rw-r--r-- | lib/interception/mach_override/mach_override.h | 140 |
14 files changed, 1695 insertions, 0 deletions
diff --git a/lib/interception/CMakeLists.txt b/lib/interception/CMakeLists.txt new file mode 100644 index 000000000000..033b05fc1b34 --- /dev/null +++ b/lib/interception/CMakeLists.txt @@ -0,0 +1,37 @@ +# Build for the runtime interception helper library. + +set(INTERCEPTION_SOURCES + interception_linux.cc + interception_mac.cc + interception_win.cc + ) + +# Only add this C file if we're building on a Mac. Other source files can be +# harmlessly compiled on any platform, but the C file is complained about due +# to pedantic rules about empty translation units. +if (APPLE) + list(APPEND INTERCEPTION_SOURCES mach_override/mach_override.c) +endif () + +set(INTERCEPTION_CFLAGS "-fPIC -fno-exceptions -funwind-tables -fvisibility=hidden") +if (SUPPORTS_NO_VARIADIC_MACROS_FLAG) + set(INTERCEPTION_CFLAGS "${INTERCEPTION_CFLAGS} -Wno-variadic-macros") +endif () + +set(INTERCEPTION_COMMON_DEFINITIONS + INTERCEPTION_HAS_EXCEPTIONS=1) + +if(CAN_TARGET_X86_64) + add_library(RTInterception.x86_64 OBJECT ${INTERCEPTION_SOURCES}) + set_property(TARGET RTInterception.x86_64 PROPERTY COMPILE_FLAGS + "${INTERCEPTION_CFLAGS} ${TARGET_X86_64_CFLAGS}") + set_property(TARGET RTInterception.x86_64 APPEND PROPERTY COMPILE_DEFINITIONS + ${INTERCEPTION_COMMON_DEFINITIONS}) +endif() +if(CAN_TARGET_I386) + add_library(RTInterception.i386 OBJECT ${INTERCEPTION_SOURCES}) + set_property(TARGET RTInterception.i386 PROPERTY COMPILE_FLAGS + "${INTERCEPTION_CFLAGS} ${TARGET_I386_CFLAGS}") + set_property(TARGET RTInterception.i386 APPEND PROPERTY COMPILE_DEFINITIONS + ${INTERCEPTION_COMMON_DEFINITIONS}) +endif() diff --git a/lib/interception/Makefile.mk b/lib/interception/Makefile.mk new file mode 100644 index 000000000000..1412a016f80e --- /dev/null +++ b/lib/interception/Makefile.mk @@ -0,0 +1,23 @@ +#===- lib/interception/Makefile.mk -------------------------*- Makefile -*--===# +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +ModuleName := interception +SubDirs := mach_override + +Sources := $(foreach file,$(wildcard $(Dir)/*.cc),$(notdir $(file))) +ObjNames := $(Sources:%.cc=%.o) + +Implementation := Generic + +# FIXME: use automatic dependencies? +Dependencies := $(wildcard $(Dir)/*.h) +Dependencies += $(wildcard $(Dir)/mach_override/*.h) + +# Define a convenience variable for all the interception functions. +InterceptionFunctions := $(Sources:%.cc=%) diff --git a/lib/interception/interception.h b/lib/interception/interception.h new file mode 100644 index 000000000000..b72bff2a6c02 --- /dev/null +++ b/lib/interception/interception.h @@ -0,0 +1,168 @@ +//===-- interception.h ------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of AddressSanitizer, an address sanity checker. +// +// Machinery for providing replacements/wrappers for system functions. +//===----------------------------------------------------------------------===// + +#ifndef INTERCEPTION_H +#define INTERCEPTION_H + +#if !defined(__linux__) && !defined(__APPLE__) && !defined(_WIN32) +# error "Interception doesn't work on this operating system." +#endif + +// How to use this library: +// 1) Include this header to define your own interceptors +// (see details below). +// 2) Build all *.cc files and link against them. +// On Mac you will also need to: +// 3) Provide your own implementation for the following functions: +// mach_error_t __interception::allocate_island(void **ptr, +// size_t size, +// void *hint); +// mach_error_t __interception::deallocate_island(void *ptr); +// See "interception_mac.h" for more details. + +// How to add an interceptor: +// Suppose you need to wrap/replace system function (generally, from libc): +// int foo(const char *bar, double baz); +// You'll need to: +// 1) define INTERCEPTOR(int, foo, const char *bar, double baz) { ... } in +// your source file. +// 2) Call "INTERCEPT_FUNCTION(foo)" prior to the first call of "foo". +// INTERCEPT_FUNCTION(foo) evaluates to "true" iff the function was +// intercepted successfully. +// You can access original function by calling REAL(foo)(bar, baz). +// By default, REAL(foo) will be visible only inside your interceptor, and if +// you want to use it in other parts of RTL, you'll need to: +// 3a) add DECLARE_REAL(int, foo, const char*, double) to a +// header file. +// However, if the call "INTERCEPT_FUNCTION(foo)" and definition for +// INTERCEPTOR(..., foo, ...) are in different files, you'll instead need to: +// 3b) add DECLARE_REAL_AND_INTERCEPTOR(int, foo, const char*, double) +// to a header file. + +// Notes: 1. Things may not work properly if macro INTERCEPT(...) {...} or +// DECLARE_REAL(...) are located inside namespaces. +// 2. On Mac you can also use: "OVERRIDE_FUNCTION(foo, zoo);" to +// effectively redirect calls from "foo" to "zoo". In this case +// you aren't required to implement +// INTERCEPTOR(int, foo, const char *bar, double baz) {...} +// but instead you'll have to add +// DEFINE_REAL(int, foo, const char *bar, double baz) in your +// source file (to define a pointer to overriden function). + +// How it works: +// To replace system functions on Linux we just need to declare functions +// with same names in our library and then obtain the real function pointers +// using dlsym(). +// There is one complication. A user may also intercept some of the functions +// we intercept. To resolve this we declare our interceptors with __interceptor_ +// prefix, and then make actual interceptors weak aliases to __interceptor_ +// functions. +// This is not so on Mac OS, where the two-level namespace makes +// our replacement functions invisible to other libraries. This may be overcomed +// using the DYLD_FORCE_FLAT_NAMESPACE, but some errors loading the shared +// libraries in Chromium were noticed when doing so. Instead we use +// mach_override, a handy framework for patching functions at runtime. +// To avoid possible name clashes, our replacement functions have +// the "wrap_" prefix on Mac. + +#if defined(__APPLE__) +# define WRAP(x) wrap_##x +# define WRAPPER_NAME(x) "wrap_"#x +# define INTERCEPTOR_ATTRIBUTE +# define DECLARE_WRAPPER(ret_type, convention, func, ...) +#elif defined(_WIN32) +# if defined(_DLL) // DLL CRT +# define WRAP(x) x +# define WRAPPER_NAME(x) #x +# define INTERCEPTOR_ATTRIBUTE +# else // Static CRT +# define WRAP(x) wrap_##x +# define WRAPPER_NAME(x) "wrap_"#x +# define INTERCEPTOR_ATTRIBUTE +# endif +# define DECLARE_WRAPPER(ret_type, convention, func, ...) +#else +# define WRAP(x) __interceptor_ ## x +# define WRAPPER_NAME(x) "__interceptor_" #x +# define INTERCEPTOR_ATTRIBUTE __attribute__((visibility("default"))) +# define DECLARE_WRAPPER(ret_type, convention, func, ...) \ + extern "C" ret_type convention func(__VA_ARGS__) \ + __attribute__((weak, alias("__interceptor_" #func), visibility("default"))); +#endif + +#define PTR_TO_REAL(x) real_##x +#define REAL(x) __interception::PTR_TO_REAL(x) +#define FUNC_TYPE(x) x##_f + +#define DECLARE_REAL(ret_type, func, ...) \ + typedef ret_type (*FUNC_TYPE(func))(__VA_ARGS__); \ + namespace __interception { \ + extern FUNC_TYPE(func) PTR_TO_REAL(func); \ + } + +#define DECLARE_REAL_AND_INTERCEPTOR(ret_type, func, ...) \ + DECLARE_REAL(ret_type, func, ##__VA_ARGS__) \ + extern "C" ret_type WRAP(func)(__VA_ARGS__); + +// FIXME(timurrrr): We might need to add DECLARE_REAL_EX etc to support +// different calling conventions later. + +#define DEFINE_REAL_EX(ret_type, convention, func, ...) \ + typedef ret_type (convention *FUNC_TYPE(func))(__VA_ARGS__); \ + namespace __interception { \ + FUNC_TYPE(func) PTR_TO_REAL(func); \ + } + +// Generally, you don't need to use DEFINE_REAL by itself, as INTERCEPTOR +// macros does its job. In exceptional cases you may need to call REAL(foo) +// without defining INTERCEPTOR(..., foo, ...). For example, if you override +// foo with an interceptor for other function. +#define DEFAULT_CONVENTION + +#define DEFINE_REAL(ret_type, func, ...) \ + DEFINE_REAL_EX(ret_type, DEFAULT_CONVENTION, func, __VA_ARGS__) + +#define INTERCEPTOR_EX(ret_type, convention, func, ...) \ + DEFINE_REAL_EX(ret_type, convention, func, __VA_ARGS__) \ + DECLARE_WRAPPER(ret_type, convention, func, __VA_ARGS__) \ + extern "C" \ + INTERCEPTOR_ATTRIBUTE \ + ret_type convention WRAP(func)(__VA_ARGS__) + +#define INTERCEPTOR(ret_type, func, ...) \ + INTERCEPTOR_EX(ret_type, DEFAULT_CONVENTION, func, __VA_ARGS__) + +#if defined(_WIN32) +# define INTERCEPTOR_WINAPI(ret_type, func, ...) \ + INTERCEPTOR_EX(ret_type, __stdcall, func, __VA_ARGS__) +#endif + +#define INCLUDED_FROM_INTERCEPTION_LIB + +#if defined(__linux__) +# include "interception_linux.h" +# define INTERCEPT_FUNCTION(func) INTERCEPT_FUNCTION_LINUX(func) +#elif defined(__APPLE__) +# include "interception_mac.h" +# define OVERRIDE_FUNCTION(old_func, new_func) \ + OVERRIDE_FUNCTION_MAC(old_func, new_func) +# define INTERCEPT_FUNCTION(func) INTERCEPT_FUNCTION_MAC(func) +#else // defined(_WIN32) +# include "interception_win.h" +# define INTERCEPT_FUNCTION(func) INTERCEPT_FUNCTION_WIN(func) +#endif + +#undef INCLUDED_FROM_INTERCEPTION_LIB + +#endif // INTERCEPTION_H diff --git a/lib/interception/interception_linux.cc b/lib/interception/interception_linux.cc new file mode 100644 index 000000000000..37e593323d22 --- /dev/null +++ b/lib/interception/interception_linux.cc @@ -0,0 +1,29 @@ +//===-- interception_linux.cc -----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of AddressSanitizer, an address sanity checker. +// +// Linux-specific interception methods. +//===----------------------------------------------------------------------===// + +#ifdef __linux__ + +#include <stddef.h> // for NULL +#include <dlfcn.h> // for dlsym + +namespace __interception { +bool GetRealFunctionAddress(const char *func_name, void **func_addr, + void *real, void *wrapper) { + *func_addr = dlsym(RTLD_NEXT, func_name); + return real == wrapper; +} +} // namespace __interception + + +#endif // __linux__ diff --git a/lib/interception/interception_linux.h b/lib/interception/interception_linux.h new file mode 100644 index 000000000000..76a29c6a99a9 --- /dev/null +++ b/lib/interception/interception_linux.h @@ -0,0 +1,35 @@ +//===-- interception_linux.h ------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of AddressSanitizer, an address sanity checker. +// +// Linux-specific interception methods. +//===----------------------------------------------------------------------===// + +#ifdef __linux__ + +#if !defined(INCLUDED_FROM_INTERCEPTION_LIB) +# error "interception_linux.h should be included from interception library only" +#endif + +#ifndef INTERCEPTION_LINUX_H +#define INTERCEPTION_LINUX_H + +namespace __interception { +// returns true if a function with the given name was found. +bool GetRealFunctionAddress(const char *func_name, void **func_addr, + void *real, void *wrapper); +} // namespace __interception + +#define INTERCEPT_FUNCTION_LINUX(func) \ + ::__interception::GetRealFunctionAddress(#func, (void**)&REAL(func), \ + (void*)&(func), (void*)&WRAP(func)) + +#endif // INTERCEPTION_LINUX_H +#endif // __linux__ diff --git a/lib/interception/interception_mac.cc b/lib/interception/interception_mac.cc new file mode 100644 index 000000000000..cc9e4a70db8f --- /dev/null +++ b/lib/interception/interception_mac.cc @@ -0,0 +1,33 @@ +//===-- interception_mac.cc -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of AddressSanitizer, an address sanity checker. +// +// Mac-specific interception methods. +//===----------------------------------------------------------------------===// + +#ifdef __APPLE__ + +#define INCLUDED_FROM_INTERCEPTION_LIB +#include "interception_mac.h" +#undef INCLUDED_FROM_INTERCEPTION_LIB +#include "mach_override/mach_override.h" + +namespace __interception { +bool OverrideFunction(void *old_func, void *new_func, void **orig_old_func) { + *orig_old_func = NULL; + int res = __asan_mach_override_ptr_custom(old_func, new_func, + orig_old_func, + __interception_allocate_island, + __interception_deallocate_island); + return (res == 0) && (*orig_old_func != NULL); +} +} // namespace __interception + +#endif // __APPLE__ diff --git a/lib/interception/interception_mac.h b/lib/interception/interception_mac.h new file mode 100644 index 000000000000..224d961eefe0 --- /dev/null +++ b/lib/interception/interception_mac.h @@ -0,0 +1,47 @@ +//===-- interception_mac.h --------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of AddressSanitizer, an address sanity checker. +// +// Mac-specific interception methods. +//===----------------------------------------------------------------------===// + +#ifdef __APPLE__ + +#if !defined(INCLUDED_FROM_INTERCEPTION_LIB) +# error "interception_mac.h should be included from interception.h only" +#endif + +#ifndef INTERCEPTION_MAC_H +#define INTERCEPTION_MAC_H + +#include <mach/mach_error.h> +#include <stddef.h> + +// Allocate memory for the escape island. This cannot be moved to +// mach_override, because each user of interceptors may specify its +// own memory range for escape islands. +extern "C" { +mach_error_t __interception_allocate_island(void **ptr, size_t unused_size, + void *unused_hint); +mach_error_t __interception_deallocate_island(void *ptr); +} // extern "C" + +namespace __interception { +// returns true if the old function existed. +bool OverrideFunction(void *old_func, void *new_func, void **orig_old_func); +} // namespace __interception + +# define OVERRIDE_FUNCTION_MAC(old_func, new_func) \ + ::__interception::OverrideFunction((void*)old_func, (void*)new_func, \ + (void**)&REAL(old_func)) +# define INTERCEPT_FUNCTION_MAC(func) OVERRIDE_FUNCTION_MAC(func, WRAP(func)) + +#endif // INTERCEPTION_MAC_H +#endif // __APPLE__ diff --git a/lib/interception/interception_win.cc b/lib/interception/interception_win.cc new file mode 100644 index 000000000000..a60c741cb3de --- /dev/null +++ b/lib/interception/interception_win.cc @@ -0,0 +1,149 @@ +//===-- interception_linux.cc -----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of AddressSanitizer, an address sanity checker. +// +// Windows-specific interception methods. +//===----------------------------------------------------------------------===// + +#ifdef _WIN32 + +#include <windows.h> + +namespace __interception { + +bool GetRealFunctionAddress(const char *func_name, void **func_addr) { + const char *DLLS[] = { + "msvcr80.dll", + "msvcr90.dll", + "kernel32.dll", + NULL + }; + *func_addr = NULL; + for (size_t i = 0; *func_addr == NULL && DLLS[i]; ++i) { + *func_addr = GetProcAddress(GetModuleHandleA(DLLS[i]), func_name); + } + return (*func_addr != NULL); +} + +// FIXME: internal_str* and internal_mem* functions should be moved from the +// ASan sources into interception/. + +static void _memset(void *p, int value, size_t sz) { + for (size_t i = 0; i < sz; ++i) + ((char*)p)[i] = (char)value; +} + +static void _memcpy(void *dst, void *src, size_t sz) { + char *dst_c = (char*)dst, + *src_c = (char*)src; + for (size_t i = 0; i < sz; ++i) + dst_c[i] = src_c[i]; +} + +static void WriteJumpInstruction(char *jmp_from, char *to) { + // jmp XXYYZZWW = E9 WW ZZ YY XX, where XXYYZZWW is an offset fromt jmp_from + // to the next instruction to the destination. + ptrdiff_t offset = to - jmp_from - 5; + *jmp_from = '\xE9'; + *(ptrdiff_t*)(jmp_from + 1) = offset; +} + +bool OverrideFunction(void *old_func, void *new_func, void **orig_old_func) { +#ifdef _WIN64 +# error OverrideFunction was not tested on x64 +#endif + // Basic idea: + // We write 5 bytes (jmp-to-new_func) at the beginning of the 'old_func' + // to override it. We want to be able to execute the original 'old_func' from + // the wrapper, so we need to keep the leading 5+ bytes ('head') of the + // original instructions somewhere with a "jmp old_func+head". + // We call these 'head'+5 bytes of instructions a "trampoline". + + // Trampolines are allocated from a common pool. + const int POOL_SIZE = 1024; + static char *pool = NULL; + static size_t pool_used = 0; + if (pool == NULL) { + pool = (char*)VirtualAlloc(NULL, POOL_SIZE, + MEM_RESERVE | MEM_COMMIT, + PAGE_EXECUTE_READWRITE); + // FIXME: set PAGE_EXECUTE_READ access after setting all interceptors? + if (pool == NULL) + return false; + _memset(pool, 0xCC /* int 3 */, POOL_SIZE); + } + + char* old_bytes = (char*)old_func; + char* trampoline = pool + pool_used; + + // Find out the number of bytes of the instructions we need to copy to the + // island and store it in 'head'. + size_t head = 0; + while (head < 5) { + switch (old_bytes[head]) { + case '\x55': // push ebp + case '\x56': // push esi + case '\x57': // push edi + head++; + continue; + } + switch (*(unsigned short*)(old_bytes + head)) { // NOLINT + case 0xFF8B: // 8B FF = mov edi, edi + case 0xEC8B: // 8B EC = mov ebp, esp + case 0xC033: // 33 C0 = xor eax, eax + head += 2; + continue; + case 0xEC83: // 83 EC XX = sub esp, XX + head += 3; + continue; + case 0xC1F7: // F7 C1 XX YY ZZ WW = test ecx, WWZZYYXX + head += 6; + continue; + } + switch (0x00FFFFFF & *(unsigned int*)(old_bytes + head)) { + case 0x24448A: // 8A 44 24 XX = mov eal, dword ptr [esp+XXh] + case 0x244C8B: // 8B 4C 24 XX = mov ecx, dword ptr [esp+XXh] + case 0x24548B: // 8B 54 24 XX = mov edx, dword ptr [esp+XXh] + case 0x247C8B: // 8B 7C 24 XX = mov edi, dword ptr [esp+XXh] + head += 4; + continue; + } + + // Unknown instruction! + return false; + } + + if (pool_used + head + 5 > POOL_SIZE) + return false; + + // Now put the "jump to trampoline" instruction into the original code. + DWORD old_prot, unused_prot; + if (!VirtualProtect(old_func, head, PAGE_EXECUTE_READWRITE, &old_prot)) + return false; + + // Put the needed instructions into the trampoline bytes. + _memcpy(trampoline, old_bytes, head); + WriteJumpInstruction(trampoline + head, old_bytes + head); + *orig_old_func = trampoline; + pool_used += head + 5; + + // Intercept the 'old_func'. + WriteJumpInstruction(old_bytes, (char*)new_func); + _memset(old_bytes + 5, 0xCC /* int 3 */, head - 5); + + if (!VirtualProtect(old_func, head, old_prot, &unused_prot)) + return false; // not clear if this failure bothers us. + + return true; +} + +} // namespace __interception + +#endif // _WIN32 diff --git a/lib/interception/interception_win.h b/lib/interception/interception_win.h new file mode 100644 index 000000000000..9d1586ecb173 --- /dev/null +++ b/lib/interception/interception_win.h @@ -0,0 +1,42 @@ +//===-- interception_linux.h ------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of AddressSanitizer, an address sanity checker. +// +// Windows-specific interception methods. +//===----------------------------------------------------------------------===// + +#ifdef _WIN32 + +#if !defined(INCLUDED_FROM_INTERCEPTION_LIB) +# error "interception_win.h should be included from interception library only" +#endif + +#ifndef INTERCEPTION_WIN_H +#define INTERCEPTION_WIN_H + +namespace __interception { +// returns true if a function with the given name was found. +bool GetRealFunctionAddress(const char *func_name, void **func_addr); + +// returns true if the old function existed, false on failure. +bool OverrideFunction(void *old_func, void *new_func, void **orig_old_func); +} // namespace __interception + +#if defined(_DLL) +# define INTERCEPT_FUNCTION_WIN(func) \ + ::__interception::GetRealFunctionAddress(#func, (void**)&REAL(func)) +#else +# define INTERCEPT_FUNCTION_WIN(func) \ + ::__interception::OverrideFunction((void*)func, (void*)WRAP(func), \ + (void**)&REAL(func)) +#endif + +#endif // INTERCEPTION_WIN_H +#endif // _WIN32 diff --git a/lib/interception/mach_override/LICENSE.TXT b/lib/interception/mach_override/LICENSE.TXT new file mode 100644 index 000000000000..9446965176ce --- /dev/null +++ b/lib/interception/mach_override/LICENSE.TXT @@ -0,0 +1,3 @@ +Copyright (c) 2003-2009 Jonathan 'Wolf' Rentzsch: <http://rentzsch.com> +Some rights reserved: <http://opensource.org/licenses/mit-license.php> + diff --git a/lib/interception/mach_override/Makefile.mk b/lib/interception/mach_override/Makefile.mk new file mode 100644 index 000000000000..8f5ebdab1bf2 --- /dev/null +++ b/lib/interception/mach_override/Makefile.mk @@ -0,0 +1,22 @@ +#===- lib/interception/mach_override/Makefile.mk -----------*- Makefile -*--===# +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +ModuleName := interception +SubDirs := + +Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file))) +ObjNames := $(Sources:%.c=%.o) + +Implementation := Generic + +# FIXME: use automatic dependencies? +Dependencies := $(wildcard $(Dir)/*.h) + +# Define a convenience variable for all the interception functions. +InterceptionFunctions += $(Sources:%.c=%) diff --git a/lib/interception/mach_override/README.txt b/lib/interception/mach_override/README.txt new file mode 100644 index 000000000000..5f62ad7b994f --- /dev/null +++ b/lib/interception/mach_override/README.txt @@ -0,0 +1,9 @@ +-- mach_override.c is taken from upstream version at + https://github.com/rentzsch/mach_star/tree/f8e0c424b5be5cb641ded67c265e616157ae4bcf +-- Added debugging code under DEBUG_DISASM. +-- The files are guarded with #ifdef __APPLE__ +-- some opcodes are added in order to parse the library functions on Lion +-- fixupInstructions() is extended to relocate relative calls, not only jumps +-- mach_override_ptr is renamed to __asan_mach_override_ptr and + other functions are marked as hidden. + diff --git a/lib/interception/mach_override/mach_override.c b/lib/interception/mach_override/mach_override.c new file mode 100644 index 000000000000..499cc029b187 --- /dev/null +++ b/lib/interception/mach_override/mach_override.c @@ -0,0 +1,958 @@ +/******************************************************************************* + mach_override.c + Copyright (c) 2003-2009 Jonathan 'Wolf' Rentzsch: <http://rentzsch.com> + Some rights reserved: <http://opensource.org/licenses/mit-license.php> + + ***************************************************************************/ +#ifdef __APPLE__ + +#include "mach_override.h" + +#include <mach-o/dyld.h> +#include <mach/mach_host.h> +#include <mach/mach_init.h> +#include <mach/vm_map.h> +#include <sys/mman.h> + +#include <CoreServices/CoreServices.h> + +//#define DEBUG_DISASM 1 +#undef DEBUG_DISASM + +/************************** +* +* Constants +* +**************************/ +#pragma mark - +#pragma mark (Constants) + +#if defined(__ppc__) || defined(__POWERPC__) + +long kIslandTemplate[] = { + 0x9001FFFC, // stw r0,-4(SP) + 0x3C00DEAD, // lis r0,0xDEAD + 0x6000BEEF, // ori r0,r0,0xBEEF + 0x7C0903A6, // mtctr r0 + 0x8001FFFC, // lwz r0,-4(SP) + 0x60000000, // nop ; optionally replaced + 0x4E800420 // bctr +}; + +#define kAddressHi 3 +#define kAddressLo 5 +#define kInstructionHi 10 +#define kInstructionLo 11 + +#elif defined(__i386__) + +#define kOriginalInstructionsSize 16 + +char kIslandTemplate[] = { + // kOriginalInstructionsSize nop instructions so that we + // should have enough space to host original instructions + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + // Now the real jump instruction + 0xE9, 0xEF, 0xBE, 0xAD, 0xDE +}; + +#define kInstructions 0 +#define kJumpAddress kInstructions + kOriginalInstructionsSize + 1 +#elif defined(__x86_64__) + +#define kOriginalInstructionsSize 32 + +#define kJumpAddress kOriginalInstructionsSize + 6 + +char kIslandTemplate[] = { + // kOriginalInstructionsSize nop instructions so that we + // should have enough space to host original instructions + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + // Now the real jump instruction + 0xFF, 0x25, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 +}; + +#endif + +#define kAllocateHigh 1 +#define kAllocateNormal 0 + +/************************** +* +* Data Types +* +**************************/ +#pragma mark - +#pragma mark (Data Types) + +typedef struct { + char instructions[sizeof(kIslandTemplate)]; + int allocatedHigh; +} BranchIsland; + +/************************** +* +* Funky Protos +* +**************************/ +#pragma mark - +#pragma mark (Funky Protos) + + mach_error_t +allocateBranchIsland( + BranchIsland **island, + int allocateHigh, + void *originalFunctionAddress) __attribute__((visibility("hidden"))); + + mach_error_t +freeBranchIsland( + BranchIsland *island ) __attribute__((visibility("hidden"))); + + mach_error_t +defaultIslandMalloc( + void **ptr, size_t unused_size, void *hint) __attribute__((visibility("hidden"))); + + mach_error_t +defaultIslandFree( + void *ptr) __attribute__((visibility("hidden"))); + +#if defined(__ppc__) || defined(__POWERPC__) + mach_error_t +setBranchIslandTarget( + BranchIsland *island, + const void *branchTo, + long instruction ) __attribute__((visibility("hidden"))); +#endif + +#if defined(__i386__) || defined(__x86_64__) +mach_error_t +setBranchIslandTarget_i386( + BranchIsland *island, + const void *branchTo, + char* instructions ) __attribute__((visibility("hidden"))); +void +atomic_mov64( + uint64_t *targetAddress, + uint64_t value ) __attribute__((visibility("hidden"))); + + static Boolean +eatKnownInstructions( + unsigned char *code, + uint64_t *newInstruction, + int *howManyEaten, + char *originalInstructions, + int *originalInstructionCount, + uint8_t *originalInstructionSizes ) __attribute__((visibility("hidden"))); + + static void +fixupInstructions( + void *originalFunction, + void *escapeIsland, + void *instructionsToFix, + int instructionCount, + uint8_t *instructionSizes ) __attribute__((visibility("hidden"))); + +#ifdef DEBUG_DISASM + static void +dump16Bytes( + void *ptr); +#endif // DEBUG_DISASM +#endif + +/******************************************************************************* +* +* Interface +* +*******************************************************************************/ +#pragma mark - +#pragma mark (Interface) + +#if defined(__i386__) || defined(__x86_64__) +mach_error_t makeIslandExecutable(void *address) { + mach_error_t err = err_none; + vm_size_t pageSize; + host_page_size( mach_host_self(), &pageSize ); + uintptr_t page = (uintptr_t)address & ~(uintptr_t)(pageSize-1); + int e = err_none; + e |= mprotect((void *)page, pageSize, PROT_EXEC | PROT_READ | PROT_WRITE); + e |= msync((void *)page, pageSize, MS_INVALIDATE ); + if (e) { + err = err_cannot_override; + } + return err; +} +#endif + + mach_error_t +defaultIslandMalloc( + void **ptr, size_t unused_size, void *hint) { + return allocateBranchIsland( (BranchIsland**)ptr, kAllocateHigh, hint ); +} + mach_error_t +defaultIslandFree( + void *ptr) { + return freeBranchIsland(ptr); +} + + mach_error_t +__asan_mach_override_ptr( + void *originalFunctionAddress, + const void *overrideFunctionAddress, + void **originalFunctionReentryIsland ) +{ + return __asan_mach_override_ptr_custom(originalFunctionAddress, + overrideFunctionAddress, + originalFunctionReentryIsland, + defaultIslandMalloc, + defaultIslandFree); +} + + mach_error_t +__asan_mach_override_ptr_custom( + void *originalFunctionAddress, + const void *overrideFunctionAddress, + void **originalFunctionReentryIsland, + island_malloc *alloc, + island_free *dealloc) +{ + assert( originalFunctionAddress ); + assert( overrideFunctionAddress ); + + // this addresses overriding such functions as AudioOutputUnitStart() + // test with modified DefaultOutputUnit project +#if defined(__x86_64__) + for(;;){ + if(*(uint16_t*)originalFunctionAddress==0x25FF) // jmp qword near [rip+0x????????] + originalFunctionAddress=*(void**)((char*)originalFunctionAddress+6+*(int32_t *)((uint16_t*)originalFunctionAddress+1)); + else break; + } +#elif defined(__i386__) + for(;;){ + if(*(uint16_t*)originalFunctionAddress==0x25FF) // jmp *0x???????? + originalFunctionAddress=**(void***)((uint16_t*)originalFunctionAddress+1); + else break; + } +#endif +#ifdef DEBUG_DISASM + { + fprintf(stderr, "Replacing function at %p\n", originalFunctionAddress); + fprintf(stderr, "First 16 bytes of the function: "); + unsigned char *orig = (unsigned char *)originalFunctionAddress; + int i; + for (i = 0; i < 16; i++) { + fprintf(stderr, "%x ", (unsigned int) orig[i]); + } + fprintf(stderr, "\n"); + fprintf(stderr, + "To disassemble, save the following function as disas.c" + " and run:\n gcc -c disas.c && gobjdump -d disas.o\n" + "The first 16 bytes of the original function will start" + " after four nop instructions.\n"); + fprintf(stderr, "\nvoid foo() {\n asm volatile(\"nop;nop;nop;nop;\");\n"); + int j = 0; + for (j = 0; j < 2; j++) { + fprintf(stderr, " asm volatile(\".byte "); + for (i = 8 * j; i < 8 * (j+1) - 1; i++) { + fprintf(stderr, "0x%x, ", (unsigned int) orig[i]); + } + fprintf(stderr, "0x%x;\");\n", (unsigned int) orig[8 * (j+1) - 1]); + } + fprintf(stderr, "}\n\n"); + } +#endif + + long *originalFunctionPtr = (long*) originalFunctionAddress; + mach_error_t err = err_none; + +#if defined(__ppc__) || defined(__POWERPC__) + // Ensure first instruction isn't 'mfctr'. + #define kMFCTRMask 0xfc1fffff + #define kMFCTRInstruction 0x7c0903a6 + + long originalInstruction = *originalFunctionPtr; + if( !err && ((originalInstruction & kMFCTRMask) == kMFCTRInstruction) ) + err = err_cannot_override; +#elif defined(__i386__) || defined(__x86_64__) + int eatenCount = 0; + int originalInstructionCount = 0; + char originalInstructions[kOriginalInstructionsSize]; + uint8_t originalInstructionSizes[kOriginalInstructionsSize]; + uint64_t jumpRelativeInstruction = 0; // JMP + + Boolean overridePossible = eatKnownInstructions ((unsigned char *)originalFunctionPtr, + &jumpRelativeInstruction, &eatenCount, + originalInstructions, &originalInstructionCount, + originalInstructionSizes ); +#ifdef DEBUG_DISASM + if (!overridePossible) fprintf(stderr, "overridePossible = false @%d\n", __LINE__); +#endif + if (eatenCount > kOriginalInstructionsSize) { +#ifdef DEBUG_DISASM + fprintf(stderr, "Too many instructions eaten\n"); +#endif + overridePossible = false; + } + if (!overridePossible) err = err_cannot_override; + if (err) fprintf(stderr, "err = %x %s:%d\n", err, __FILE__, __LINE__); +#endif + + // Make the original function implementation writable. + if( !err ) { + err = vm_protect( mach_task_self(), + (vm_address_t) originalFunctionPtr, 8, false, + (VM_PROT_ALL | VM_PROT_COPY) ); + if( err ) + err = vm_protect( mach_task_self(), + (vm_address_t) originalFunctionPtr, 8, false, + (VM_PROT_DEFAULT | VM_PROT_COPY) ); + } + if (err) fprintf(stderr, "err = %x %s:%d\n", err, __FILE__, __LINE__); + + // Allocate and target the escape island to the overriding function. + BranchIsland *escapeIsland = NULL; + if( !err ) + err = alloc( (void**)&escapeIsland, sizeof(BranchIsland), originalFunctionAddress ); + if ( err ) fprintf(stderr, "err = %x %s:%d\n", err, __FILE__, __LINE__); + +#if defined(__ppc__) || defined(__POWERPC__) + if( !err ) + err = setBranchIslandTarget( escapeIsland, overrideFunctionAddress, 0 ); + + // Build the branch absolute instruction to the escape island. + long branchAbsoluteInstruction = 0; // Set to 0 just to silence warning. + if( !err ) { + long escapeIslandAddress = ((long) escapeIsland) & 0x3FFFFFF; + branchAbsoluteInstruction = 0x48000002 | escapeIslandAddress; + } +#elif defined(__i386__) || defined(__x86_64__) + if (err) fprintf(stderr, "err = %x %s:%d\n", err, __FILE__, __LINE__); + + if( !err ) + err = setBranchIslandTarget_i386( escapeIsland, overrideFunctionAddress, 0 ); + + if (err) fprintf(stderr, "err = %x %s:%d\n", err, __FILE__, __LINE__); + // Build the jump relative instruction to the escape island +#endif + + +#if defined(__i386__) || defined(__x86_64__) + if (!err) { + uint32_t addressOffset = ((char*)escapeIsland - (char*)originalFunctionPtr - 5); + addressOffset = OSSwapInt32(addressOffset); + + jumpRelativeInstruction |= 0xE900000000000000LL; + jumpRelativeInstruction |= ((uint64_t)addressOffset & 0xffffffff) << 24; + jumpRelativeInstruction = OSSwapInt64(jumpRelativeInstruction); + } +#endif + + // Optionally allocate & return the reentry island. This may contain relocated + // jmp instructions and so has all the same addressing reachability requirements + // the escape island has to the original function, except the escape island is + // technically our original function. + BranchIsland *reentryIsland = NULL; + if( !err && originalFunctionReentryIsland ) { + err = alloc( (void**)&reentryIsland, sizeof(BranchIsland), escapeIsland); + if( !err ) + *originalFunctionReentryIsland = reentryIsland; + } + +#if defined(__ppc__) || defined(__POWERPC__) + // Atomically: + // o If the reentry island was allocated: + // o Insert the original instruction into the reentry island. + // o Target the reentry island at the 2nd instruction of the + // original function. + // o Replace the original instruction with the branch absolute. + if( !err ) { + int escapeIslandEngaged = false; + do { + if( reentryIsland ) + err = setBranchIslandTarget( reentryIsland, + (void*) (originalFunctionPtr+1), originalInstruction ); + if( !err ) { + escapeIslandEngaged = CompareAndSwap( originalInstruction, + branchAbsoluteInstruction, + (UInt32*)originalFunctionPtr ); + if( !escapeIslandEngaged ) { + // Someone replaced the instruction out from under us, + // re-read the instruction, make sure it's still not + // 'mfctr' and try again. + originalInstruction = *originalFunctionPtr; + if( (originalInstruction & kMFCTRMask) == kMFCTRInstruction) + err = err_cannot_override; + } + } + } while( !err && !escapeIslandEngaged ); + } +#elif defined(__i386__) || defined(__x86_64__) + // Atomically: + // o If the reentry island was allocated: + // o Insert the original instructions into the reentry island. + // o Target the reentry island at the first non-replaced + // instruction of the original function. + // o Replace the original first instructions with the jump relative. + // + // Note that on i386, we do not support someone else changing the code under our feet + if ( !err ) { + fixupInstructions(originalFunctionPtr, reentryIsland, originalInstructions, + originalInstructionCount, originalInstructionSizes ); + + if( reentryIsland ) + err = setBranchIslandTarget_i386( reentryIsland, + (void*) ((char *)originalFunctionPtr+eatenCount), originalInstructions ); + // try making islands executable before planting the jmp +#if defined(__x86_64__) || defined(__i386__) + if( !err ) + err = makeIslandExecutable(escapeIsland); + if( !err && reentryIsland ) + err = makeIslandExecutable(reentryIsland); +#endif + if ( !err ) + atomic_mov64((uint64_t *)originalFunctionPtr, jumpRelativeInstruction); + } +#endif + + // Clean up on error. + if( err ) { + if( reentryIsland ) + dealloc( reentryIsland ); + if( escapeIsland ) + dealloc( escapeIsland ); + } + +#ifdef DEBUG_DISASM + { + fprintf(stderr, "First 16 bytes of the function after slicing: "); + unsigned char *orig = (unsigned char *)originalFunctionAddress; + int i; + for (i = 0; i < 16; i++) { + fprintf(stderr, "%x ", (unsigned int) orig[i]); + } + fprintf(stderr, "\n"); + } +#endif + return err; +} + +/******************************************************************************* +* +* Implementation +* +*******************************************************************************/ +#pragma mark - +#pragma mark (Implementation) + +/***************************************************************************//** + Implementation: Allocates memory for a branch island. + + @param island <- The allocated island. + @param allocateHigh -> Whether to allocate the island at the end of the + address space (for use with the branch absolute + instruction). + @result <- mach_error_t + + ***************************************************************************/ + + mach_error_t +allocateBranchIsland( + BranchIsland **island, + int allocateHigh, + void *originalFunctionAddress) +{ + assert( island ); + + mach_error_t err = err_none; + + if( allocateHigh ) { + vm_size_t pageSize; + err = host_page_size( mach_host_self(), &pageSize ); + if( !err ) { + assert( sizeof( BranchIsland ) <= pageSize ); +#if defined(__ppc__) || defined(__POWERPC__) + vm_address_t first = 0xfeffffff; + vm_address_t last = 0xfe000000 + pageSize; +#elif defined(__x86_64__) + vm_address_t first = ((uint64_t)originalFunctionAddress & ~(uint64_t)(((uint64_t)1 << 31) - 1)) | ((uint64_t)1 << 31); // start in the middle of the page? + vm_address_t last = 0x0; +#else + vm_address_t first = 0xffc00000; + vm_address_t last = 0xfffe0000; +#endif + + vm_address_t page = first; + int allocated = 0; + vm_map_t task_self = mach_task_self(); + + while( !err && !allocated && page != last ) { + + err = vm_allocate( task_self, &page, pageSize, 0 ); + if( err == err_none ) + allocated = 1; + else if( err == KERN_NO_SPACE ) { +#if defined(__x86_64__) + page -= pageSize; +#else + page += pageSize; +#endif + err = err_none; + } + } + if( allocated ) + *island = (BranchIsland*) page; + else if( !allocated && !err ) + err = KERN_NO_SPACE; + } + } else { + void *block = malloc( sizeof( BranchIsland ) ); + if( block ) + *island = block; + else + err = KERN_NO_SPACE; + } + if( !err ) + (**island).allocatedHigh = allocateHigh; + + return err; +} + +/***************************************************************************//** + Implementation: Deallocates memory for a branch island. + + @param island -> The island to deallocate. + @result <- mach_error_t + + ***************************************************************************/ + + mach_error_t +freeBranchIsland( + BranchIsland *island ) +{ + assert( island ); + assert( (*(long*)&island->instructions[0]) == kIslandTemplate[0] ); + assert( island->allocatedHigh ); + + mach_error_t err = err_none; + + if( island->allocatedHigh ) { + vm_size_t pageSize; + err = host_page_size( mach_host_self(), &pageSize ); + if( !err ) { + assert( sizeof( BranchIsland ) <= pageSize ); + err = vm_deallocate( + mach_task_self(), + (vm_address_t) island, pageSize ); + } + } else { + free( island ); + } + + return err; +} + +/***************************************************************************//** + Implementation: Sets the branch island's target, with an optional + instruction. + + @param island -> The branch island to insert target into. + @param branchTo -> The address of the target. + @param instruction -> Optional instruction to execute prior to branch. Set + to zero for nop. + @result <- mach_error_t + + ***************************************************************************/ +#if defined(__ppc__) || defined(__POWERPC__) + mach_error_t +setBranchIslandTarget( + BranchIsland *island, + const void *branchTo, + long instruction ) +{ + // Copy over the template code. + bcopy( kIslandTemplate, island->instructions, sizeof( kIslandTemplate ) ); + + // Fill in the address. + ((short*)island->instructions)[kAddressLo] = ((long) branchTo) & 0x0000FFFF; + ((short*)island->instructions)[kAddressHi] + = (((long) branchTo) >> 16) & 0x0000FFFF; + + // Fill in the (optional) instuction. + if( instruction != 0 ) { + ((short*)island->instructions)[kInstructionLo] + = instruction & 0x0000FFFF; + ((short*)island->instructions)[kInstructionHi] + = (instruction >> 16) & 0x0000FFFF; + } + + //MakeDataExecutable( island->instructions, sizeof( kIslandTemplate ) ); + msync( island->instructions, sizeof( kIslandTemplate ), MS_INVALIDATE ); + + return err_none; +} +#endif + +#if defined(__i386__) + mach_error_t +setBranchIslandTarget_i386( + BranchIsland *island, + const void *branchTo, + char* instructions ) +{ + + // Copy over the template code. + bcopy( kIslandTemplate, island->instructions, sizeof( kIslandTemplate ) ); + + // copy original instructions + if (instructions) { + bcopy (instructions, island->instructions + kInstructions, kOriginalInstructionsSize); + } + + // Fill in the address. + int32_t addressOffset = (char *)branchTo - (island->instructions + kJumpAddress + 4); + *((int32_t *)(island->instructions + kJumpAddress)) = addressOffset; + + msync( island->instructions, sizeof( kIslandTemplate ), MS_INVALIDATE ); + return err_none; +} + +#elif defined(__x86_64__) +mach_error_t +setBranchIslandTarget_i386( + BranchIsland *island, + const void *branchTo, + char* instructions ) +{ + // Copy over the template code. + bcopy( kIslandTemplate, island->instructions, sizeof( kIslandTemplate ) ); + + // Copy original instructions. + if (instructions) { + bcopy (instructions, island->instructions, kOriginalInstructionsSize); + } + + // Fill in the address. + *((uint64_t *)(island->instructions + kJumpAddress)) = (uint64_t)branchTo; + msync( island->instructions, sizeof( kIslandTemplate ), MS_INVALIDATE ); + + return err_none; +} +#endif + + +#if defined(__i386__) || defined(__x86_64__) +// simplistic instruction matching +typedef struct { + unsigned int length; // max 15 + unsigned char mask[15]; // sequence of bytes in memory order + unsigned char constraint[15]; // sequence of bytes in memory order +} AsmInstructionMatch; + +#if defined(__i386__) +static AsmInstructionMatch possibleInstructions[] = { + { 0x5, {0xFF, 0x00, 0x00, 0x00, 0x00}, {0xE9, 0x00, 0x00, 0x00, 0x00} }, // jmp 0x???????? + { 0x5, {0xFF, 0xFF, 0xFF, 0xFF, 0xFF}, {0x55, 0x89, 0xe5, 0xc9, 0xc3} }, // push %esp; mov %esp,%ebp; leave; ret + { 0x1, {0xFF}, {0x90} }, // nop + { 0x1, {0xF8}, {0x50} }, // push %reg + { 0x2, {0xFF, 0xFF}, {0x89, 0xE5} }, // mov %esp,%ebp + { 0x3, {0xFF, 0xFF, 0xFF}, {0x89, 0x1C, 0x24} }, // mov %ebx,(%esp) + { 0x3, {0xFF, 0xFF, 0x00}, {0x83, 0xEC, 0x00} }, // sub 0x??, %esp + { 0x6, {0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00}, {0x81, 0xEC, 0x00, 0x00, 0x00, 0x00} }, // sub 0x??, %esp with 32bit immediate + { 0x2, {0xFF, 0xFF}, {0x31, 0xC0} }, // xor %eax, %eax + { 0x3, {0xFF, 0x4F, 0x00}, {0x8B, 0x45, 0x00} }, // mov $imm(%ebp), %reg + { 0x3, {0xFF, 0x4C, 0x00}, {0x8B, 0x40, 0x00} }, // mov $imm(%eax-%edx), %reg + { 0x3, {0xFF, 0xCF, 0x00}, {0x8B, 0x4D, 0x00} }, // mov $imm(%rpb), %reg + { 0x3, {0xFF, 0x4F, 0x00}, {0x8A, 0x4D, 0x00} }, // mov $imm(%ebp), %cl + { 0x4, {0xFF, 0xFF, 0xFF, 0x00}, {0x8B, 0x4C, 0x24, 0x00} }, // mov $imm(%esp), %ecx + { 0x4, {0xFF, 0x00, 0x00, 0x00}, {0x8B, 0x00, 0x00, 0x00} }, // mov r16,r/m16 or r32,r/m32 + { 0x5, {0xFF, 0x00, 0x00, 0x00, 0x00}, {0xB9, 0x00, 0x00, 0x00, 0x00} }, // mov $imm, %ecx + { 0x5, {0xFF, 0x00, 0x00, 0x00, 0x00}, {0xB8, 0x00, 0x00, 0x00, 0x00} }, // mov $imm, %eax + { 0x4, {0xFF, 0xFF, 0xFF, 0x00}, {0x66, 0x0F, 0xEF, 0x00} }, // pxor xmm2/128, xmm1 + { 0x2, {0xFF, 0xFF}, {0xDB, 0xE3} }, // fninit + { 0x5, {0xFF, 0x00, 0x00, 0x00, 0x00}, {0xE8, 0x00, 0x00, 0x00, 0x00} }, // call $imm + { 0x0 } +}; +#elif defined(__x86_64__) +// TODO(glider): disassembling the "0x48, 0x89" sequences is trickier than it's done below. +// If it stops working, refer to http://ref.x86asm.net/geek.html#modrm_byte_32_64 to do it +// more accurately. +// Note: 0x48 is in fact the REX.W prefix, but it might be wrong to treat it as a separate +// instruction. +static AsmInstructionMatch possibleInstructions[] = { + { 0x5, {0xFF, 0x00, 0x00, 0x00, 0x00}, {0xE9, 0x00, 0x00, 0x00, 0x00} }, // jmp 0x???????? + { 0x1, {0xFF}, {0x90} }, // nop + { 0x1, {0xF8}, {0x50} }, // push %rX + { 0x1, {0xFF}, {0x65} }, // GS prefix + { 0x3, {0xFF, 0xFF, 0xFF}, {0x48, 0x89, 0xE5} }, // mov %rsp,%rbp + { 0x4, {0xFF, 0xFF, 0xFF, 0x00}, {0x48, 0x83, 0xEC, 0x00} }, // sub 0x??, %rsp + { 0x4, {0xFB, 0xFF, 0x07, 0x00}, {0x48, 0x89, 0x05, 0x00} }, // move onto rbp + { 0x3, {0xFB, 0xFF, 0x00}, {0x48, 0x89, 0x00} }, // mov %reg, %reg + { 0x3, {0xFB, 0xFF, 0x00}, {0x49, 0x89, 0x00} }, // mov %reg, %reg (REX.WB) + { 0x2, {0xFF, 0x00}, {0x41, 0x00} }, // push %rXX + { 0x2, {0xFF, 0x00}, {0x85, 0x00} }, // test %rX,%rX + { 0x2, {0xFF, 0x00}, {0x77, 0x00} }, // ja $i8 + { 0x2, {0xFF, 0x00}, {0x74, 0x00} }, // je $i8 + { 0x5, {0xF8, 0x00, 0x00, 0x00, 0x00}, {0xB8, 0x00, 0x00, 0x00, 0x00} }, // mov $imm, %reg + { 0x3, {0xFF, 0xFF, 0x00}, {0xFF, 0x77, 0x00} }, // pushq $imm(%rdi) + { 0x2, {0xFF, 0xFF}, {0x31, 0xC0} }, // xor %eax, %eax + { 0x5, {0xFF, 0x00, 0x00, 0x00, 0x00}, {0x25, 0x00, 0x00, 0x00, 0x00} }, // and $imm, %eax + { 0x3, {0xFF, 0xFF, 0xFF}, {0x80, 0x3F, 0x00} }, // cmpb $imm, (%rdi) + + { 0x8, {0xFF, 0xFF, 0xCF, 0xFF, 0x00, 0x00, 0x00, 0x00}, + {0x48, 0x8B, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00}, }, // mov $imm, %{rax,rdx,rsp,rsi} + { 0x4, {0xFF, 0xFF, 0xFF, 0x00}, {0x48, 0x83, 0xFA, 0x00}, }, // cmp $i8, %rdx + { 0x4, {0xFF, 0xFF, 0x00, 0x00}, {0x83, 0x7f, 0x00, 0x00}, }, // cmpl $imm, $imm(%rdi) + { 0xa, {0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x48, 0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} }, // mov $imm, %rax + { 0x6, {0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00}, + {0x81, 0xE6, 0x00, 0x00, 0x00, 0x00} }, // and $imm, %esi + { 0x6, {0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00}, + {0xFF, 0x25, 0x00, 0x00, 0x00, 0x00} }, // jmpq *(%rip) + { 0x4, {0xFF, 0xFF, 0xFF, 0x00}, {0x66, 0x0F, 0xEF, 0x00} }, // pxor xmm2/128, xmm1 + { 0x2, {0xFF, 0x00}, {0x89, 0x00} }, // mov r/m32,r32 or r/m16,r16 + { 0x3, {0xFF, 0xFF, 0xFF}, {0x49, 0x89, 0xF8} }, // mov %rdi,%r8 + { 0x3, {0xFF, 0xFF, 0x00}, {0xFF, 0x77, 0x00} }, // pushq $imm(%rdi) + { 0x2, {0xFF, 0xFF}, {0xDB, 0xE3} }, // fninit + { 0x3, {0xFF, 0xFF, 0xFF}, {0x48, 0x85, 0xD2} }, // test %rdx,%rdx + { 0x0 } +}; +#endif + +static Boolean codeMatchesInstruction(unsigned char *code, AsmInstructionMatch* instruction) +{ + Boolean match = true; + + size_t i; + assert(instruction); +#ifdef DEBUG_DISASM + fprintf(stderr, "Matching: "); +#endif + for (i=0; i<instruction->length; i++) { + unsigned char mask = instruction->mask[i]; + unsigned char constraint = instruction->constraint[i]; + unsigned char codeValue = code[i]; +#ifdef DEBUG_DISASM + fprintf(stderr, "%x ", (unsigned)codeValue); +#endif + match = ((codeValue & mask) == constraint); + if (!match) break; + } +#ifdef DEBUG_DISASM + if (match) { + fprintf(stderr, " OK\n"); + } else { + fprintf(stderr, " FAIL\n"); + } +#endif + return match; +} + +#if defined(__i386__) || defined(__x86_64__) + static Boolean +eatKnownInstructions( + unsigned char *code, + uint64_t *newInstruction, + int *howManyEaten, + char *originalInstructions, + int *originalInstructionCount, + uint8_t *originalInstructionSizes ) +{ + Boolean allInstructionsKnown = true; + int totalEaten = 0; + unsigned char* ptr = code; + int remainsToEat = 5; // a JMP instruction takes 5 bytes + int instructionIndex = 0; + + if (howManyEaten) *howManyEaten = 0; + if (originalInstructionCount) *originalInstructionCount = 0; + while (remainsToEat > 0) { + Boolean curInstructionKnown = false; + + // See if instruction matches one we know + AsmInstructionMatch* curInstr = possibleInstructions; + do { + if ((curInstructionKnown = codeMatchesInstruction(ptr, curInstr))) break; + curInstr++; + } while (curInstr->length > 0); + + // if all instruction matches failed, we don't know current instruction then, stop here + if (!curInstructionKnown) { + allInstructionsKnown = false; + fprintf(stderr, "mach_override: some instructions unknown! Need to update mach_override.c\n"); + break; + } + + // At this point, we've matched curInstr + int eaten = curInstr->length; + ptr += eaten; + remainsToEat -= eaten; + totalEaten += eaten; + + if (originalInstructionSizes) originalInstructionSizes[instructionIndex] = eaten; + instructionIndex += 1; + if (originalInstructionCount) *originalInstructionCount = instructionIndex; + } + + + if (howManyEaten) *howManyEaten = totalEaten; + + if (originalInstructions) { + Boolean enoughSpaceForOriginalInstructions = (totalEaten < kOriginalInstructionsSize); + + if (enoughSpaceForOriginalInstructions) { + memset(originalInstructions, 0x90 /* NOP */, kOriginalInstructionsSize); // fill instructions with NOP + bcopy(code, originalInstructions, totalEaten); + } else { +#ifdef DEBUG_DISASM + fprintf(stderr, "Not enough space in island to store original instructions. Adapt the island definition and kOriginalInstructionsSize\n"); +#endif + return false; + } + } + + if (allInstructionsKnown) { + // save last 3 bytes of first 64bits of codre we'll replace + uint64_t currentFirst64BitsOfCode = *((uint64_t *)code); + currentFirst64BitsOfCode = OSSwapInt64(currentFirst64BitsOfCode); // back to memory representation + currentFirst64BitsOfCode &= 0x0000000000FFFFFFLL; + + // keep only last 3 instructions bytes, first 5 will be replaced by JMP instr + *newInstruction &= 0xFFFFFFFFFF000000LL; // clear last 3 bytes + *newInstruction |= (currentFirst64BitsOfCode & 0x0000000000FFFFFFLL); // set last 3 bytes + } + + return allInstructionsKnown; +} + + static void +fixupInstructions( + void *originalFunction, + void *escapeIsland, + void *instructionsToFix, + int instructionCount, + uint8_t *instructionSizes ) +{ + void *initialOriginalFunction = originalFunction; + int index, fixed_size, code_size = 0; + for (index = 0;index < instructionCount;index += 1) + code_size += instructionSizes[index]; + +#ifdef DEBUG_DISASM + void *initialInstructionsToFix = instructionsToFix; + fprintf(stderr, "BEFORE FIXING:\n"); + dump16Bytes(initialOriginalFunction); + dump16Bytes(initialInstructionsToFix); +#endif // DEBUG_DISASM + + for (index = 0;index < instructionCount;index += 1) + { + fixed_size = instructionSizes[index]; + if ((*(uint8_t*)instructionsToFix == 0xE9) || // 32-bit jump relative + (*(uint8_t*)instructionsToFix == 0xE8)) // 32-bit call relative + { + uint32_t offset = (uintptr_t)originalFunction - (uintptr_t)escapeIsland; + uint32_t *jumpOffsetPtr = (uint32_t*)((uintptr_t)instructionsToFix + 1); + *jumpOffsetPtr += offset; + } + if ((*(uint8_t*)instructionsToFix == 0x74) || // Near jump if equal (je), 2 bytes. + (*(uint8_t*)instructionsToFix == 0x77)) // Near jump if above (ja), 2 bytes. + { + // We replace a near je/ja instruction, "7P JJ", with a 32-bit je/ja, "0F 8P WW XX YY ZZ". + // This is critical, otherwise a near jump will likely fall outside the original function. + uint32_t offset = (uintptr_t)initialOriginalFunction - (uintptr_t)escapeIsland; + uint32_t jumpOffset = *(uint8_t*)((uintptr_t)instructionsToFix + 1); + *(uint8_t*)(instructionsToFix + 1) = *(uint8_t*)instructionsToFix + 0x10; + *(uint8_t*)instructionsToFix = 0x0F; + uint32_t *jumpOffsetPtr = (uint32_t*)((uintptr_t)instructionsToFix + 2 ); + *jumpOffsetPtr = offset + jumpOffset; + fixed_size = 6; + } + + originalFunction = (void*)((uintptr_t)originalFunction + instructionSizes[index]); + escapeIsland = (void*)((uintptr_t)escapeIsland + instructionSizes[index]); + instructionsToFix = (void*)((uintptr_t)instructionsToFix + fixed_size); + + // Expanding short instructions into longer ones may overwrite the next instructions, + // so we must restore them. + code_size -= fixed_size; + if ((code_size > 0) && (fixed_size != instructionSizes[index])) { + bcopy(originalFunction, instructionsToFix, code_size); + } + } +#ifdef DEBUG_DISASM + fprintf(stderr, "AFTER_FIXING:\n"); + dump16Bytes(initialOriginalFunction); + dump16Bytes(initialInstructionsToFix); +#endif // DEBUG_DISASM +} + +#ifdef DEBUG_DISASM +#define HEX_DIGIT(x) ((((x) % 16) < 10) ? ('0' + ((x) % 16)) : ('A' + ((x) % 16 - 10))) + + static void +dump16Bytes( + void *ptr) { + int i; + char buf[3]; + uint8_t *bytes = (uint8_t*)ptr; + for (i = 0; i < 16; i++) { + buf[0] = HEX_DIGIT(bytes[i] / 16); + buf[1] = HEX_DIGIT(bytes[i] % 16); + buf[2] = ' '; + write(2, buf, 3); + } + write(2, "\n", 1); +} +#endif // DEBUG_DISASM +#endif + +#if defined(__i386__) +__asm( + ".text;" + ".align 2, 0x90;" + "_atomic_mov64:;" + " pushl %ebp;" + " movl %esp, %ebp;" + " pushl %esi;" + " pushl %ebx;" + " pushl %ecx;" + " pushl %eax;" + " pushl %edx;" + + // atomic push of value to an address + // we use cmpxchg8b, which compares content of an address with + // edx:eax. If they are equal, it atomically puts 64bit value + // ecx:ebx in address. + // We thus put contents of address in edx:eax to force ecx:ebx + // in address + " mov 8(%ebp), %esi;" // esi contains target address + " mov 12(%ebp), %ebx;" + " mov 16(%ebp), %ecx;" // ecx:ebx now contains value to put in target address + " mov (%esi), %eax;" + " mov 4(%esi), %edx;" // edx:eax now contains value currently contained in target address + " lock; cmpxchg8b (%esi);" // atomic move. + + // restore registers + " popl %edx;" + " popl %eax;" + " popl %ecx;" + " popl %ebx;" + " popl %esi;" + " popl %ebp;" + " ret" +); +#elif defined(__x86_64__) +void atomic_mov64( + uint64_t *targetAddress, + uint64_t value ) +{ + *targetAddress = value; +} +#endif +#endif +#endif // __APPLE__ diff --git a/lib/interception/mach_override/mach_override.h b/lib/interception/mach_override/mach_override.h new file mode 100644 index 000000000000..7e60cdcd619c --- /dev/null +++ b/lib/interception/mach_override/mach_override.h @@ -0,0 +1,140 @@ +/******************************************************************************* + mach_override.h + Copyright (c) 2003-2009 Jonathan 'Wolf' Rentzsch: <http://rentzsch.com> + Some rights reserved: <http://opensource.org/licenses/mit-license.php> + + ***************************************************************************/ + +/***************************************************************************//** + @mainpage mach_override + @author Jonathan 'Wolf' Rentzsch: <http://rentzsch.com> + + This package, coded in C to the Mach API, allows you to override ("patch") + program- and system-supplied functions at runtime. You can fully replace + functions with your implementations, or merely head- or tail-patch the + original implementations. + + Use it by #include'ing mach_override.h from your .c, .m or .mm file(s). + + @todo Discontinue use of Carbon's MakeDataExecutable() and + CompareAndSwap() calls and start using the Mach equivalents, if they + exist. If they don't, write them and roll them in. That way, this + code will be pure Mach, which will make it easier to use everywhere. + Update: MakeDataExecutable() has been replaced by + msync(MS_INVALIDATE). There is an OSCompareAndSwap in libkern, but + I'm currently unsure if I can link against it. May have to roll in + my own version... + @todo Stop using an entire 4K high-allocated VM page per 28-byte escape + branch island. Done right, this will dramatically speed up escape + island allocations when they number over 250. Then again, if you're + overriding more than 250 functions, maybe speed isn't your main + concern... + @todo Add detection of: b, bl, bla, bc, bcl, bcla, bcctrl, bclrl + first-instructions. Initially, we should refuse to override + functions beginning with these instructions. Eventually, we should + dynamically rewrite them to make them position-independent. + @todo Write mach_unoverride(), which would remove an override placed on a + function. Must be multiple-override aware, which means an almost + complete rewrite under the covers, because the target address can't + be spread across two load instructions like it is now since it will + need to be atomically updatable. + @todo Add non-rentry variants of overrides to test_mach_override. + + ***************************************************************************/ + +#ifdef __APPLE__ + +#ifndef _mach_override_ +#define _mach_override_ + +#include <sys/types.h> +#include <mach/error.h> + +#ifdef __cplusplus + extern "C" { +#endif + +/** + Returned if the function to be overrided begins with a 'mfctr' instruction. +*/ +#define err_cannot_override (err_local|1) + +/************************************************************************************//** + Dynamically overrides the function implementation referenced by + originalFunctionAddress with the implentation pointed to by overrideFunctionAddress. + Optionally returns a pointer to a "reentry island" which, if jumped to, will resume + the original implementation. + + @param originalFunctionAddress -> Required address of the function to + override (with overrideFunctionAddress). + @param overrideFunctionAddress -> Required address to the overriding + function. + @param originalFunctionReentryIsland <- Optional pointer to pointer to the + reentry island. Can be NULL. + @result <- err_cannot_override if the original + function's implementation begins with + the 'mfctr' instruction. + + ************************************************************************************/ + +// We're prefixing mach_override_ptr() with "__asan_" to avoid name conflicts with other +// mach_override_ptr() implementations that may appear in the client program. + mach_error_t +__asan_mach_override_ptr( + void *originalFunctionAddress, + const void *overrideFunctionAddress, + void **originalFunctionReentryIsland ); + +// Allow to use custom allocation and deallocation routines with mach_override_ptr(). +// This should help to speed up the things on x86_64. +typedef mach_error_t island_malloc( void **ptr, size_t size, void *hint ); +typedef mach_error_t island_free( void *ptr ); + + mach_error_t +__asan_mach_override_ptr_custom( + void *originalFunctionAddress, + const void *overrideFunctionAddress, + void **originalFunctionReentryIsland, + island_malloc *alloc, + island_free *dealloc ); + +/************************************************************************************//** + + + ************************************************************************************/ + +#ifdef __cplusplus + +#define MACH_OVERRIDE( ORIGINAL_FUNCTION_RETURN_TYPE, ORIGINAL_FUNCTION_NAME, ORIGINAL_FUNCTION_ARGS, ERR ) \ + { \ + static ORIGINAL_FUNCTION_RETURN_TYPE (*ORIGINAL_FUNCTION_NAME##_reenter)ORIGINAL_FUNCTION_ARGS; \ + static bool ORIGINAL_FUNCTION_NAME##_overriden = false; \ + class mach_override_class__##ORIGINAL_FUNCTION_NAME { \ + public: \ + static kern_return_t override(void *originalFunctionPtr) { \ + kern_return_t result = err_none; \ + if (!ORIGINAL_FUNCTION_NAME##_overriden) { \ + ORIGINAL_FUNCTION_NAME##_overriden = true; \ + result = mach_override_ptr( (void*)originalFunctionPtr, \ + (void*)mach_override_class__##ORIGINAL_FUNCTION_NAME::replacement, \ + (void**)&ORIGINAL_FUNCTION_NAME##_reenter ); \ + } \ + return result; \ + } \ + static ORIGINAL_FUNCTION_RETURN_TYPE replacement ORIGINAL_FUNCTION_ARGS { + +#define END_MACH_OVERRIDE( ORIGINAL_FUNCTION_NAME ) \ + } \ + }; \ + \ + err = mach_override_class__##ORIGINAL_FUNCTION_NAME::override((void*)ORIGINAL_FUNCTION_NAME); \ + } + +#endif + +#ifdef __cplusplus + } +#endif +#endif // _mach_override_ + +#endif // __APPLE__ |