diff options
Diffstat (limited to 'lib/msandr/msandr.cc')
-rw-r--r-- | lib/msandr/msandr.cc | 871 |
1 files changed, 0 insertions, 871 deletions
diff --git a/lib/msandr/msandr.cc b/lib/msandr/msandr.cc deleted file mode 100644 index 6c9d7c675acae..0000000000000 --- a/lib/msandr/msandr.cc +++ /dev/null @@ -1,871 +0,0 @@ -//===-- msandr.cc ---------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file is a part of MemorySanitizer. -// -// DynamoRio client for MemorySanitizer. -// -// MemorySanitizer requires that all program code is instrumented. Any memory -// store that can turn an uninitialized value into an initialized value must be -// observed by the tool, otherwise we risk reporting a false UMR. -// -// This also includes any libraries that the program depends on. -// -// In the case when rebuilding all program dependencies with MemorySanitizer is -// problematic, an experimental MSanDR tool (the code you are currently looking -// at) can be used. It is a DynamoRio-based tool that uses dynamic -// instrumentation to -// * Unpoison all memory stores. -// * Unpoison TLS slots used by MemorySanitizer to pass function arguments and -// return value shadow on anything that looks like a function call or a return -// from a function. -// -// This tool does not detect the use of uninitialized values in uninstrumented -// libraries. It merely gets rid of false positives by marking all data that -// passes through uninstrumented code as fully initialized. -//===----------------------------------------------------------------------===// - -#include <dr_api.h> -#include <drutil.h> -#include <drmgr.h> -#include <drsyscall.h> - -#include <sys/mman.h> -#include <sys/syscall.h> /* for SYS_mmap */ - -#include <algorithm> -#include <string> -#include <set> -#include <vector> -#include <string.h> - -#define TESTALL(mask, var) (((mask) & (var)) == (mask)) -#define TESTANY(mask, var) (((mask) & (var)) != 0) - -#define CHECK_IMPL(condition, file, line) \ - do { \ - if (!(condition)) { \ - dr_printf("Check failed: `%s`\nat %s:%d\n", #condition, file, line); \ - dr_abort(); \ - } \ - } while (0) // TODO: stacktrace - -#define CHECK(condition) CHECK_IMPL(condition, __FILE__, __LINE__) - -#define VERBOSITY 0 - -// XXX: it seems setting macro in CMakeLists.txt does not work, -// so manually set it here now. - -// Building msandr client for running in DynamoRIO hybrid mode, -// which allows some module running natively. -// TODO: turn it on by default when hybrid is stable enough -// #define MSANDR_NATIVE_EXEC - -// Building msandr client for standalone test that does not need to -// run with msan build executables. Disable by default. -// #define MSANDR_STANDALONE_TEST - -#define NUM_TLS_RETVAL 1 -#define NUM_TLS_PARAM 6 - -#ifdef MSANDR_STANDALONE_TEST -// For testing purpose, we map app to shadow memory at [0x100000, 0x20000). -// Normally, the app starts at 0x400000: -// 00400000-004e0000 r-xp 00000000 fc:00 524343 /bin/bash -// so there should be no problem. -# define SHADOW_MEMORY_BASE ((void *)0x100000) -# define SHADOW_MEMORY_SIZE (0x100000) -# define SHADOW_MEMORY_MASK (SHADOW_MEMORY_SIZE - 4 /* to avoid overflow */) -#else -// shadow memory range [0x200000000000, 0x400000000000) -// assuming no app memory below 0x200000000000 -# define SHADOW_MEMORY_MASK 0x3fffffffffffULL -#endif /* MSANDR_STANDALONE_TEST */ - -namespace { - -std::string g_app_path; - -int msan_retval_tls_offset; -int msan_param_tls_offset; - -#ifndef MSANDR_NATIVE_EXEC -class ModuleData { -public: - ModuleData(); - ModuleData(const module_data_t *info); - // Yes, we want default copy, assign, and dtor semantics. - -public: - app_pc start_; - app_pc end_; - // Full path to the module. - std::string path_; - module_handle_t handle_; - bool should_instrument_; - bool executed_; -}; - -// A vector of loaded modules sorted by module bounds. We lookup the current PC -// in here from the bb event. This is better than an rb tree because the lookup -// is faster and the bb event occurs far more than the module load event. -std::vector<ModuleData> g_module_list; - -ModuleData::ModuleData() - : start_(NULL), end_(NULL), path_(""), handle_(NULL), - should_instrument_(false), executed_(false) { -} - -ModuleData::ModuleData(const module_data_t *info) - : start_(info->start), end_(info->end), path_(info->full_path), - handle_(info->handle), - // We'll check the black/white lists later and adjust this. - should_instrument_(true), executed_(false) { -} -#endif /* !MSANDR_NATIVE_EXEC */ - -int(*__msan_get_retval_tls_offset)(); -int(*__msan_get_param_tls_offset)(); -void (*__msan_unpoison)(void *base, size_t size); -bool (*__msan_is_in_loader)(); - -#ifdef MSANDR_STANDALONE_TEST -uint mock_msan_retval_tls_offset; -uint mock_msan_param_tls_offset; -static int mock_msan_get_retval_tls_offset() { - return (int)mock_msan_retval_tls_offset; -} - -static int mock_msan_get_param_tls_offset() { - return (int)mock_msan_param_tls_offset; -} - -static void mock_msan_unpoison(void *base, size_t size) { - /* do nothing */ -} - -static bool mock_msan_is_in_loader() { - return false; -} -#endif /* MSANDR_STANDALONE_TEST */ - -static generic_func_t LookupCallback(module_data_t *app, const char *name) { -#ifdef MSANDR_STANDALONE_TEST - if (strcmp("__msan_get_retval_tls_offset", name) == 0) { - return (generic_func_t)mock_msan_get_retval_tls_offset; - } else if (strcmp("__msan_get_param_tls_offset", name) == 0) { - return (generic_func_t)mock_msan_get_param_tls_offset; - } else if (strcmp("__msan_unpoison", name) == 0) { - return (generic_func_t)mock_msan_unpoison; - } else if (strcmp("__msan_is_in_loader", name) == 0) { - return (generic_func_t)mock_msan_is_in_loader; - } - CHECK(false); - return NULL; -#else /* !MSANDR_STANDALONE_TEST */ - generic_func_t callback = dr_get_proc_address(app->handle, name); - if (callback == NULL) { - dr_printf("Couldn't find `%s` in %s\n", name, app->full_path); - CHECK(callback); - } - return callback; -#endif /* !MSANDR_STANDALONE_TEST */ -} - -void InitializeMSanCallbacks() { - module_data_t *app = dr_lookup_module_by_name(dr_get_application_name()); - if (!app) { - dr_printf("%s - oops, dr_lookup_module_by_name failed!\n", - dr_get_application_name()); - CHECK(app); - } - g_app_path = app->full_path; - - __msan_get_retval_tls_offset = (int (*)()) - LookupCallback(app, "__msan_get_retval_tls_offset"); - __msan_get_param_tls_offset = (int (*)()) - LookupCallback(app, "__msan_get_param_tls_offset"); - __msan_unpoison = (void(*)(void *, size_t)) - LookupCallback(app, "__msan_unpoison"); - __msan_is_in_loader = (bool (*)()) - LookupCallback(app, "__msan_is_in_loader"); - - dr_free_module_data(app); -} - -// FIXME: Handle absolute addresses and PC-relative addresses. -// FIXME: Handle TLS accesses via FS or GS. DR assumes all other segments have -// a zero base anyway. -bool OperandIsInteresting(opnd_t opnd) { - return (opnd_is_base_disp(opnd) && opnd_get_segment(opnd) != DR_SEG_FS && - opnd_get_segment(opnd) != DR_SEG_GS); -} - -bool WantToInstrument(instr_t *instr) { - // TODO: skip push instructions? - switch (instr_get_opcode(instr)) { - // FIXME: support the instructions excluded below: - case OP_rep_cmps: - // f3 a6 rep cmps %ds:(%rsi) %es:(%rdi) %rsi %rdi %rcx -> %rsi %rdi %rcx - return false; - } - - // Labels appear due to drutil_expand_rep_string() - if (instr_is_label(instr)) - return false; - - CHECK(instr_ok_to_mangle(instr) == true); - - if (instr_writes_memory(instr)) { - for (int d = 0; d < instr_num_dsts(instr); d++) { - opnd_t op = instr_get_dst(instr, d); - if (OperandIsInteresting(op)) - return true; - } - } - - return false; -} - -#define PRE(at, what) instrlist_meta_preinsert(bb, at, INSTR_CREATE_##what); -#define PREF(at, what) instrlist_meta_preinsert(bb, at, what); - -void InstrumentMops(void *drcontext, instrlist_t *bb, instr_t *instr, opnd_t op, - bool is_write) { - bool need_to_restore_eflags = false; - uint flags = instr_get_arith_flags(instr); - // TODO: do something smarter with flags and spills in general? - // For example, spill them only once for a sequence of instrumented - // instructions that don't change/read flags. - - if (!TESTALL(EFLAGS_WRITE_6, flags) || TESTANY(EFLAGS_READ_6, flags)) { - if (VERBOSITY > 1) - dr_printf("Spilling eflags...\n"); - need_to_restore_eflags = true; - // TODO: Maybe sometimes don't need to 'seto'. - // TODO: Maybe sometimes don't want to spill XAX here? - // TODO: No need to spill XAX here if XAX is not used in the BB. - dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); - dr_save_arith_flags_to_xax(drcontext, bb, instr); - dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_3); - dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); - } - -#if 0 - dr_printf("==DRMSAN== DEBUG: %d %d %d %d %d %d\n", - opnd_is_memory_reference(op), opnd_is_base_disp(op), - opnd_is_base_disp(op) ? opnd_get_index(op) : -1, - opnd_is_far_memory_reference(op), opnd_is_reg_pointer_sized(op), - opnd_is_base_disp(op) ? opnd_get_disp(op) : -1); -#endif - - reg_id_t R1; - bool address_in_R1 = false; - if (opnd_is_base_disp(op) && opnd_get_index(op) == DR_REG_NULL && - opnd_get_disp(op) == 0) { - // If this is a simple access with no offset or index, we can just use the - // base for R1. - address_in_R1 = true; - R1 = opnd_get_base(op); - } else { - // Otherwise, we need to compute the addr into R1. - // TODO: reuse some spare register? e.g. r15 on x64 - // TODO: might be used as a non-mem-ref register? - R1 = DR_REG_XAX; - } - CHECK(reg_is_pointer_sized(R1)); // otherwise R2 may be wrong. - - // Pick R2 from R8 to R15. - // It's OK if the instr uses R2 elsewhere, since we'll restore it before instr. - reg_id_t R2; - for (R2 = DR_REG_R8; R2 <= DR_REG_R15; R2++) { - if (!opnd_uses_reg(op, R2)) - break; - } - CHECK((R2 <= DR_REG_R15) && R1 != R2); - - // Save the current values of R1 and R2. - dr_save_reg(drcontext, bb, instr, R1, SPILL_SLOT_1); - // TODO: Something smarter than spilling a "fixed" register R2? - dr_save_reg(drcontext, bb, instr, R2, SPILL_SLOT_2); - - if (!address_in_R1) - CHECK(drutil_insert_get_mem_addr(drcontext, bb, instr, op, R1, R2)); - PRE(instr, mov_imm(drcontext, opnd_create_reg(R2), - OPND_CREATE_INT64(SHADOW_MEMORY_MASK))); - PRE(instr, and(drcontext, opnd_create_reg(R1), opnd_create_reg(R2))); -#ifdef MSANDR_STANDALONE_TEST - PRE(instr, add(drcontext, opnd_create_reg(R1), - OPND_CREATE_INT32(SHADOW_MEMORY_BASE))); -#endif - // There is no mov_st of a 64-bit immediate, so... - opnd_size_t op_size = opnd_get_size(op); - CHECK(op_size != OPSZ_NA); - uint access_size = opnd_size_in_bytes(op_size); - if (access_size <= 4 || op_size == OPSZ_PTR /* x64 support sign extension */) { - instr_t *label = INSTR_CREATE_label(drcontext); - opnd_t immed; - if (op_size == OPSZ_PTR || op_size == OPSZ_4) - immed = OPND_CREATE_INT32(0); - else - immed = opnd_create_immed_int((ptr_int_t) 0, op_size); - // we check if target is 0 before write to reduce unnecessary memory stores. - PRE(instr, cmp(drcontext, - opnd_create_base_disp(R1, DR_REG_NULL, 0, 0, op_size), - immed)); - PRE(instr, jcc(drcontext, OP_je, opnd_create_instr(label))); - PRE(instr, mov_st(drcontext, - opnd_create_base_disp(R1, DR_REG_NULL, 0, 0, op_size), - immed)); - PREF(instr, label); - } else { - // FIXME: tail? - for (uint ofs = 0; ofs < access_size; ofs += 4) { - instr_t *label = INSTR_CREATE_label(drcontext); - opnd_t immed = OPND_CREATE_INT32(0); - PRE(instr, cmp(drcontext, OPND_CREATE_MEM32(R1, ofs), immed)); - PRE(instr, jcc(drcontext, OP_je, opnd_create_instr(label))); - PRE(instr, mov_st(drcontext, OPND_CREATE_MEM32(R1, ofs), immed)); - PREF(instr, label) - } - } - - // Restore the registers and flags. - dr_restore_reg(drcontext, bb, instr, R1, SPILL_SLOT_1); - dr_restore_reg(drcontext, bb, instr, R2, SPILL_SLOT_2); - - // TODO: move aflags save/restore to per instr instead of per opnd - if (need_to_restore_eflags) { - if (VERBOSITY > 1) - dr_printf("Restoring eflags\n"); - // TODO: Check if it's reverse to the dr_restore_reg above and optimize. - dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); - dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_3); - dr_restore_arith_flags_from_xax(drcontext, bb, instr); - dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); - } - - // The original instruction is left untouched. The above instrumentation is just - // a prefix. -} - -void InstrumentReturn(void *drcontext, instrlist_t *bb, instr_t *instr) { -#ifdef MSANDR_STANDALONE_TEST - PRE(instr, - mov_st(drcontext, - opnd_create_far_base_disp(DR_SEG_GS /* DR's TLS */, - DR_REG_NULL, DR_REG_NULL, - 0, msan_retval_tls_offset, - OPSZ_PTR), - OPND_CREATE_INT32(0))); -#else /* !MSANDR_STANDALONE_TEST */ - /* XXX: the code below only works if -mangle_app_seg and -private_loader, - * which is turned of for optimized native exec - */ - dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); - - // Clobbers nothing except xax. - bool res = - dr_insert_get_seg_base(drcontext, bb, instr, DR_SEG_FS, DR_REG_XAX); - CHECK(res); - - // TODO: unpoison more bytes? - PRE(instr, - mov_st(drcontext, OPND_CREATE_MEM64(DR_REG_XAX, msan_retval_tls_offset), - OPND_CREATE_INT32(0))); - - dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); - - // The original instruction is left untouched. The above instrumentation is just - // a prefix. -#endif /* !MSANDR_STANDALONE_TEST */ -} - -void InstrumentIndirectBranch(void *drcontext, instrlist_t *bb, - instr_t *instr) { -#ifdef MSANDR_STANDALONE_TEST - for (int i = 0; i < NUM_TLS_PARAM; ++i) { - PRE(instr, - mov_st(drcontext, - opnd_create_far_base_disp(DR_SEG_GS /* DR's TLS */, - DR_REG_NULL, DR_REG_NULL, - 0, - msan_param_tls_offset + - i * sizeof(void *), - OPSZ_PTR), - OPND_CREATE_INT32(0))); - } -#else /* !MSANDR_STANDALONE_TEST */ - /* XXX: the code below only works if -mangle_app_seg and -private_loader, - * which is turned off for optimized native exec - */ - dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); - - // Clobbers nothing except xax. - bool res = - dr_insert_get_seg_base(drcontext, bb, instr, DR_SEG_FS, DR_REG_XAX); - CHECK(res); - - // TODO: unpoison more bytes? - for (int i = 0; i < NUM_TLS_PARAM; ++i) { - PRE(instr, - mov_st(drcontext, OPND_CREATE_MEMPTR(DR_REG_XAX, msan_param_tls_offset + - i * sizeof(void *)), - OPND_CREATE_INT32(0))); - } - - dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1); - - // The original instruction is left untouched. The above instrumentation is just - // a prefix. -#endif /* !MSANDR_STANDALONE_TEST */ -} - -#ifndef MSANDR_NATIVE_EXEC -// For use with binary search. Modules shouldn't overlap, so we shouldn't have -// to look at end_. If that can happen, we won't support such an application. -bool ModuleDataCompareStart(const ModuleData &left, const ModuleData &right) { - return left.start_ < right.start_; -} - -// Look up the module containing PC. Should be relatively fast, as its called -// for each bb instrumentation. -ModuleData *LookupModuleByPC(app_pc pc) { - ModuleData fake_mod_data; - fake_mod_data.start_ = pc; - std::vector<ModuleData>::iterator it = - lower_bound(g_module_list.begin(), g_module_list.end(), fake_mod_data, - ModuleDataCompareStart); - // if (it == g_module_list.end()) - // return NULL; - if (it == g_module_list.end() || pc < it->start_) - --it; - CHECK(it->start_ <= pc); - if (pc >= it->end_) { - // We're past the end of this module. We shouldn't be in the next module, - // or lower_bound lied to us. - ++it; - CHECK(it == g_module_list.end() || pc < it->start_); - return NULL; - } - - // OK, we found the module. - return &*it; -} - -bool ShouldInstrumentNonModuleCode() { return true; } - -bool ShouldInstrumentModule(ModuleData *mod_data) { - // TODO(rnk): Flags for blacklist would get wired in here. - generic_func_t p = - dr_get_proc_address(mod_data->handle_, "__msan_track_origins"); - return !p; -} - -bool ShouldInstrumentPc(app_pc pc, ModuleData **pmod_data) { - ModuleData *mod_data = LookupModuleByPC(pc); - if (pmod_data) - *pmod_data = mod_data; - if (mod_data != NULL) { - // This module is on a blacklist. - if (!mod_data->should_instrument_) { - return false; - } - } else if (!ShouldInstrumentNonModuleCode()) { - return false; - } - return true; -} -#endif /* !MSANDR_NATIVE_CLIENT */ - -// TODO(rnk): Make sure we instrument after __msan_init. -dr_emit_flags_t -event_basic_block_app2app(void *drcontext, void *tag, instrlist_t *bb, - bool for_trace, bool translating) { -#ifndef MSANDR_NATIVE_EXEC - app_pc pc = dr_fragment_app_pc(tag); - if (ShouldInstrumentPc(pc, NULL)) - CHECK(drutil_expand_rep_string(drcontext, bb)); -#else /* MSANDR_NATIVE_EXEC */ - CHECK(drutil_expand_rep_string(drcontext, bb)); -#endif /* MSANDR_NATIVE_EXEC */ - return DR_EMIT_PERSISTABLE; -} - -dr_emit_flags_t event_basic_block(void *drcontext, void *tag, instrlist_t *bb, - bool for_trace, bool translating) { - app_pc pc = dr_fragment_app_pc(tag); -#ifndef MSANDR_NATIVE_EXEC - ModuleData *mod_data; - - if (!ShouldInstrumentPc(pc, &mod_data)) - return DR_EMIT_PERSISTABLE; - - if (VERBOSITY > 1) - dr_printf("============================================================\n"); - if (VERBOSITY > 0) { - std::string mod_path = (mod_data ? mod_data->path_ : "<no module, JITed?>"); - if (mod_data && !mod_data->executed_) { - mod_data->executed_ = true; // Nevermind this race. - dr_printf("Executing from new module: %s\n", mod_path.c_str()); - } - dr_printf("BB to be instrumented: %p [from %s]; translating = %s\n", pc, - mod_path.c_str(), translating ? "true" : "false"); - if (mod_data) { - // Match standard sanitizer trace format for free symbols. - // #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) - dr_printf(" #0 %p (%s+%p)\n", pc, mod_data->path_.c_str(), - pc - mod_data->start_); - } - } -#endif /* !MSANDR_NATIVE_EXEC */ - - if (VERBOSITY > 1) { - instrlist_disassemble(drcontext, pc, bb, STDOUT); - instr_t *instr; - for (instr = instrlist_first(bb); instr; instr = instr_get_next(instr)) { - dr_printf("opcode: %d\n", instr_get_opcode(instr)); - } - } - - for (instr_t *i = instrlist_first(bb); i != NULL; i = instr_get_next(i)) { - int opcode = instr_get_opcode(i); - if (opcode == OP_ret || opcode == OP_ret_far) { - InstrumentReturn(drcontext, bb, i); - continue; - } - - // These instructions hopefully cover all cases where control is transferred - // to a function in a different module (we only care about calls into - // compiler-instrumented modules). - // * call_ind is used for normal indirect calls. - // * jmp_ind is used for indirect tail calls, and calls through PLT (PLT - // stub includes a jump to an address from GOT). - if (opcode == OP_call_ind || opcode == OP_call_far_ind || - opcode == OP_jmp_ind || opcode == OP_jmp_far_ind) { - InstrumentIndirectBranch(drcontext, bb, i); - continue; - } - - if (!WantToInstrument(i)) - continue; - - if (VERBOSITY > 1) { - app_pc orig_pc = dr_fragment_app_pc(tag); - uint flags = instr_get_arith_flags(i); - dr_printf("+%d -> to be instrumented! [opcode=%d, flags = 0x%08X]\n", - instr_get_app_pc(i) - orig_pc, instr_get_opcode(i), flags); - } - - if (instr_writes_memory(i)) { - // Instrument memory writes - // bool instrumented_anything = false; - for (int d = 0; d < instr_num_dsts(i); d++) { - opnd_t op = instr_get_dst(i, d); - if (!OperandIsInteresting(op)) - continue; - - // CHECK(!instrumented_anything); - // instrumented_anything = true; - InstrumentMops(drcontext, bb, i, op, true); - break; // only instrumenting the first dst - } - } - } - -// TODO: optimize away redundant restore-spill pairs? - - if (VERBOSITY > 1) { - pc = dr_fragment_app_pc(tag); - dr_printf("\nFinished instrumenting dynamorio_basic_block(PC=" PFX ")\n", pc); - instrlist_disassemble(drcontext, pc, bb, STDOUT); - } - return DR_EMIT_PERSISTABLE; -} - -#ifndef MSANDR_NATIVE_EXEC -void event_module_load(void *drcontext, const module_data_t *info, - bool loaded) { - // Insert the module into the list while maintaining the ordering. - ModuleData mod_data(info); - std::vector<ModuleData>::iterator it = - upper_bound(g_module_list.begin(), g_module_list.end(), mod_data, - ModuleDataCompareStart); - it = g_module_list.insert(it, mod_data); - // Check if we should instrument this module. - it->should_instrument_ = ShouldInstrumentModule(&*it); - dr_module_set_should_instrument(info->handle, it->should_instrument_); - - if (VERBOSITY > 0) - dr_printf("==DRMSAN== Loaded module: %s [%p...%p], instrumentation is %s\n", - info->full_path, info->start, info->end, - it->should_instrument_ ? "on" : "off"); -} - -void event_module_unload(void *drcontext, const module_data_t *info) { - if (VERBOSITY > 0) - dr_printf("==DRMSAN== Unloaded module: %s [%p...%p]\n", info->full_path, - info->start, info->end); - - // Remove the module from the list. - ModuleData mod_data(info); - std::vector<ModuleData>::iterator it = - lower_bound(g_module_list.begin(), g_module_list.end(), mod_data, - ModuleDataCompareStart); - // It's a bug if we didn't actually find the module. - CHECK(it != g_module_list.end() && it->start_ == mod_data.start_ && - it->end_ == mod_data.end_ && it->path_ == mod_data.path_); - g_module_list.erase(it); -} -#endif /* !MSANDR_NATIVE_EXEC */ - -void event_exit() { - // Clean up so DR doesn't tell us we're leaking memory. - drsys_exit(); - drutil_exit(); - drmgr_exit(); - -#ifdef MSANDR_STANDALONE_TEST - /* free tls */ - bool res; - res = dr_raw_tls_cfree(msan_retval_tls_offset, NUM_TLS_RETVAL); - CHECK(res); - res = dr_raw_tls_cfree(msan_param_tls_offset, NUM_TLS_PARAM); - CHECK(res); - /* we do not bother to free the shadow memory */ -#endif /* !MSANDR_STANDALONE_TEST */ - if (VERBOSITY > 0) - dr_printf("==DRMSAN== DONE\n"); -} - -bool event_filter_syscall(void *drcontext, int sysnum) { - // FIXME: only intercept syscalls with memory effects. - return true; /* intercept everything */ -} - -bool drsys_iter_memarg_cb(drsys_arg_t *arg, void *user_data) { - CHECK(arg->valid); - - if (arg->pre) - return true; - if (!TESTANY(DRSYS_PARAM_OUT, arg->mode)) - return true; - - size_t sz = arg->size; - - if (sz > 0xFFFFFFFF) { - drmf_status_t res; - drsys_syscall_t *syscall = (drsys_syscall_t *)user_data; - const char *name; - res = drsys_syscall_name(syscall, &name); - CHECK(res == DRMF_SUCCESS); - - dr_printf("SANITY: syscall '%s' arg %d writes %llu bytes memory?!" - " Clipping to %llu.\n", - name, arg->ordinal, (unsigned long long) sz, - (unsigned long long)(sz & 0xFFFFFFFF)); - } - - if (VERBOSITY > 0) { - drmf_status_t res; - drsys_syscall_t *syscall = (drsys_syscall_t *)user_data; - const char *name; - res = drsys_syscall_name(syscall, &name); - CHECK(res == DRMF_SUCCESS); - dr_printf("drsyscall: syscall '%s' arg %d wrote range [%p, %p)\n", - name, arg->ordinal, arg->start_addr, - (char *)arg->start_addr + sz); - } - - // We don't switch to the app context because __msan_unpoison() doesn't need - // TLS segments. - __msan_unpoison(arg->start_addr, sz); - - return true; /* keep going */ -} - -bool event_pre_syscall(void *drcontext, int sysnum) { - drsys_syscall_t *syscall; - drsys_sysnum_t sysnum_full; - bool known; - drsys_param_type_t ret_type; - drmf_status_t res; - const char *name; - - res = drsys_cur_syscall(drcontext, &syscall); - CHECK(res == DRMF_SUCCESS); - - res = drsys_syscall_number(syscall, &sysnum_full); - CHECK(res == DRMF_SUCCESS); - CHECK(sysnum == sysnum_full.number); - - res = drsys_syscall_is_known(syscall, &known); - CHECK(res == DRMF_SUCCESS); - - res = drsys_syscall_name(syscall, &name); - CHECK(res == DRMF_SUCCESS); - - res = drsys_syscall_return_type(syscall, &ret_type); - CHECK(res == DRMF_SUCCESS); - CHECK(ret_type != DRSYS_TYPE_INVALID); - CHECK(!known || ret_type != DRSYS_TYPE_UNKNOWN); - - res = drsys_iterate_memargs(drcontext, drsys_iter_memarg_cb, NULL); - CHECK(res == DRMF_SUCCESS); - - return true; -} - -static bool IsInLoader(void *drcontext) { - // TODO: This segment swap is inefficient. DR should just let us query the - // app segment base, which it has. Alternatively, if we disable - // -mangle_app_seg, then we won't need the swap. - bool need_swap = !dr_using_app_state(drcontext); - if (need_swap) - dr_switch_to_app_state(drcontext); - bool is_in_loader = __msan_is_in_loader(); - if (need_swap) - dr_switch_to_dr_state(drcontext); - return is_in_loader; -} - -void event_post_syscall(void *drcontext, int sysnum) { - drsys_syscall_t *syscall; - drsys_sysnum_t sysnum_full; - bool success = false; - drmf_status_t res; - - res = drsys_cur_syscall(drcontext, &syscall); - CHECK(res == DRMF_SUCCESS); - - res = drsys_syscall_number(syscall, &sysnum_full); - CHECK(res == DRMF_SUCCESS); - CHECK(sysnum == sysnum_full.number); - - res = drsys_syscall_succeeded(syscall, dr_syscall_get_result(drcontext), - &success); - CHECK(res == DRMF_SUCCESS); - - if (success) { - res = - drsys_iterate_memargs(drcontext, drsys_iter_memarg_cb, (void *)syscall); - CHECK(res == DRMF_SUCCESS); - } - - // Our normal mmap interceptor can't intercept calls from the loader itself. - // This means we don't clear the shadow for calls to dlopen. For now, we - // solve this by intercepting mmap from ld.so here, but ideally we'd have a - // solution that doesn't rely on msandr. - // - // Be careful not to intercept maps done by the msan rtl. Otherwise we end up - // unpoisoning vast regions of memory and OOMing. - // TODO: __msan_unpoison() could "flush" large regions of memory like tsan - // does instead of doing a large memset. However, we need the memory to be - // zeroed, where as tsan does not, so plain madvise is not enough. - if (success && (sysnum == SYS_mmap IF_NOT_X64(|| sysnum == SYS_mmap2))) { - if (IsInLoader(drcontext)) { - app_pc base = (app_pc)dr_syscall_get_result(drcontext); - ptr_uint_t size; - drmf_status_t res = drsys_pre_syscall_arg(drcontext, 1, &size); - CHECK(res == DRMF_SUCCESS); - if (VERBOSITY > 0) - dr_printf("unpoisoning for dlopen: [%p-%p]\n", base, base + size); - // We don't switch to the app context because __msan_unpoison() doesn't - // need TLS segments. - __msan_unpoison(base, size); - } - } -} - -} // namespace - -DR_EXPORT void dr_init(client_id_t id) { - drmf_status_t res; - - drmgr_init(); - drutil_init(); - - std::string app_name = dr_get_application_name(); - // This blacklist will still run these apps through DR's code cache. On the - // other hand, we are able to follow children of these apps. - // FIXME: Once DR has detach, we could just detach here. Alternatively, - // if DR had a fork or exec hook to let us decide there, that would be nice. - // FIXME: make the blacklist cmd-adjustable. - if (app_name == "python" || app_name == "python2.7" || app_name == "bash" || - app_name == "sh" || app_name == "true" || app_name == "exit" || - app_name == "yes" || app_name == "echo") - return; - - drsys_options_t ops; - memset(&ops, 0, sizeof(ops)); - ops.struct_size = sizeof(ops); - ops.analyze_unknown_syscalls = false; - - res = drsys_init(id, &ops); - CHECK(res == DRMF_SUCCESS); - - dr_register_filter_syscall_event(event_filter_syscall); - drmgr_register_pre_syscall_event(event_pre_syscall); - drmgr_register_post_syscall_event(event_post_syscall); - res = drsys_filter_all_syscalls(); - CHECK(res == DRMF_SUCCESS); - -#ifdef MSANDR_STANDALONE_TEST - reg_id_t reg_seg; - /* alloc tls */ - if (!dr_raw_tls_calloc(®_seg, &mock_msan_retval_tls_offset, NUM_TLS_RETVAL, 0)) - CHECK(false); - CHECK(reg_seg == DR_SEG_GS /* x64 only! */); - if (!dr_raw_tls_calloc(®_seg, &mock_msan_param_tls_offset, NUM_TLS_PARAM, 0)) - CHECK(false); - CHECK(reg_seg == DR_SEG_GS /* x64 only! */); - /* alloc shadow memory */ - if (mmap(SHADOW_MEMORY_BASE, SHADOW_MEMORY_SIZE, PROT_READ|PROT_WRITE, - MAP_PRIVATE | MAP_ANON, -1, 0) != SHADOW_MEMORY_BASE) { - CHECK(false); - } -#endif /* MSANDR_STANDALONE_TEST */ - InitializeMSanCallbacks(); - - // FIXME: the shadow is initialized earlier when DR calls one of our wrapper - // functions. This may change one day. - // TODO: make this more robust. - - void *drcontext = dr_get_current_drcontext(); - - dr_switch_to_app_state(drcontext); - msan_retval_tls_offset = __msan_get_retval_tls_offset(); - msan_param_tls_offset = __msan_get_param_tls_offset(); - dr_switch_to_dr_state(drcontext); - if (VERBOSITY > 0) { - dr_printf("__msan_retval_tls offset: %d\n", msan_retval_tls_offset); - dr_printf("__msan_param_tls offset: %d\n", msan_param_tls_offset); - } - - // Standard DR events. - dr_register_exit_event(event_exit); - - drmgr_priority_t priority = { - sizeof(priority), /* size of struct */ - "msandr", /* name of our operation */ - NULL, /* optional name of operation we should precede */ - NULL, /* optional name of operation we should follow */ - 0 - }; /* numeric priority */ - - drmgr_register_bb_app2app_event(event_basic_block_app2app, &priority); - drmgr_register_bb_instru2instru_event(event_basic_block, &priority); -#ifndef MSANDR_NATIVE_EXEC - drmgr_register_module_load_event(event_module_load); - drmgr_register_module_unload_event(event_module_unload); -#endif /* MSANDR_NATIVE_EXEC */ - if (VERBOSITY > 0) - dr_printf("==MSANDR== Starting!\n"); -} |