diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-04-20 21:20:59 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-04-20 21:20:59 +0000 |
commit | f351c8a560ddc5b5df9ee5ba4ccc1cfb9029146d (patch) | |
tree | a1af403c7ce4e7447ee7e01c045d260dba9a409b /lib | |
parent | ab0bf875a5f328a6710f4e48258979ae1bc8da1c (diff) | |
download | src-test2-f351c8a560ddc5b5df9ee5ba4ccc1cfb9029146d.tar.gz src-test2-f351c8a560ddc5b5df9ee5ba4ccc1cfb9029146d.zip |
Notes
Diffstat (limited to 'lib')
43 files changed, 774 insertions, 832 deletions
diff --git a/lib/asan/asan_thread.cc b/lib/asan/asan_thread.cc index aaa32d6ea6da..f41ee2df2d96 100644 --- a/lib/asan/asan_thread.cc +++ b/lib/asan/asan_thread.cc @@ -237,7 +237,7 @@ void AsanThread::Init() { } thread_return_t AsanThread::ThreadStart( - uptr os_id, atomic_uintptr_t *signal_thread_is_registered) { + tid_t os_id, atomic_uintptr_t *signal_thread_is_registered) { Init(); asanThreadRegistry().StartThread(tid(), os_id, /*workerthread*/ false, nullptr); @@ -395,7 +395,7 @@ void EnsureMainThreadIDIsCorrect() { context->os_id = GetTid(); } -__asan::AsanThread *GetAsanThreadByOsIDLocked(uptr os_id) { +__asan::AsanThread *GetAsanThreadByOsIDLocked(tid_t os_id) { __asan::AsanThreadContext *context = static_cast<__asan::AsanThreadContext *>( __asan::asanThreadRegistry().FindThreadContextByOsIDLocked(os_id)); if (!context) return nullptr; @@ -405,7 +405,7 @@ __asan::AsanThread *GetAsanThreadByOsIDLocked(uptr os_id) { // --- Implementation of LSan-specific functions --- {{{1 namespace __lsan { -bool GetThreadRangesLocked(uptr os_id, uptr *stack_begin, uptr *stack_end, +bool GetThreadRangesLocked(tid_t os_id, uptr *stack_begin, uptr *stack_end, uptr *tls_begin, uptr *tls_end, uptr *cache_begin, uptr *cache_end, DTLS **dtls) { __asan::AsanThread *t = __asan::GetAsanThreadByOsIDLocked(os_id); @@ -421,7 +421,7 @@ bool GetThreadRangesLocked(uptr os_id, uptr *stack_begin, uptr *stack_end, return true; } -void ForEachExtraStackRange(uptr os_id, RangeIteratorCallback callback, +void ForEachExtraStackRange(tid_t os_id, RangeIteratorCallback callback, void *arg) { __asan::AsanThread *t = __asan::GetAsanThreadByOsIDLocked(os_id); if (t && t->has_fake_stack()) diff --git a/lib/asan/asan_thread.h b/lib/asan/asan_thread.h index f53dfb712449..424f9e68dfea 100644 --- a/lib/asan/asan_thread.h +++ b/lib/asan/asan_thread.h @@ -63,7 +63,7 @@ class AsanThread { void Destroy(); void Init(); // Should be called from the thread itself. - thread_return_t ThreadStart(uptr os_id, + thread_return_t ThreadStart(tid_t os_id, atomic_uintptr_t *signal_thread_is_registered); uptr stack_top(); diff --git a/lib/asan/tests/asan_test_main.cc b/lib/asan/tests/asan_test_main.cc index 1071d4474674..0c1b93c7fda7 100644 --- a/lib/asan/tests/asan_test_main.cc +++ b/lib/asan/tests/asan_test_main.cc @@ -13,15 +13,23 @@ #include "asan_test_utils.h" #include "sanitizer_common/sanitizer_platform.h" -// Default ASAN_OPTIONS for the unit tests. Let's turn symbolication off to -// speed up testing (unit tests don't use it anyway). +// Default ASAN_OPTIONS for the unit tests. extern "C" const char* __asan_default_options() { #if SANITIZER_MAC // On Darwin, we default to `abort_on_error=1`, which would make tests run - // much slower. Let's override this and run lit tests with 'abort_on_error=0'. - // Also, make sure we do not overwhelm the syslog while testing. + // much slower. Let's override this and run lit tests with 'abort_on_error=0' + // and make sure we do not overwhelm the syslog while testing. Also, let's + // turn symbolization off to speed up testing, especially when not running + // with llvm-symbolizer but with atos. return "symbolize=false:abort_on_error=0:log_to_syslog=0"; +#elif SANITIZER_SUPPRESS_LEAK_ON_PTHREAD_EXIT + // On PowerPC and ARM Thumb, a couple tests involving pthread_exit fail due to + // leaks detected by LSan. Symbolized leak report is required to apply a + // suppression for this known problem. + return ""; #else + // Let's turn symbolization off to speed up testing (more than 3 times speedup + // observed). return "symbolize=false"; #endif } diff --git a/lib/dfsan/done_abilist.txt b/lib/dfsan/done_abilist.txt index a00dc5426cd0..cbbedbc33601 100644 --- a/lib/dfsan/done_abilist.txt +++ b/lib/dfsan/done_abilist.txt @@ -285,22 +285,8 @@ fun:__sanitizer_cov_module_init=uninstrumented fun:__sanitizer_cov_module_init=discard fun:__sanitizer_cov_with_check=uninstrumented fun:__sanitizer_cov_with_check=discard -fun:__sanitizer_cov_indir_call16=uninstrumented -fun:__sanitizer_cov_indir_call16=discard -fun:__sanitizer_cov_indir_call16=uninstrumented -fun:__sanitizer_cov_indir_call16=discard -fun:__sanitizer_reset_coverage=uninstrumented -fun:__sanitizer_reset_coverage=discard fun:__sanitizer_set_death_callback=uninstrumented fun:__sanitizer_set_death_callback=discard -fun:__sanitizer_get_coverage_guards=uninstrumented -fun:__sanitizer_get_coverage_guards=discard -fun:__sanitizer_get_number_of_counters=uninstrumented -fun:__sanitizer_get_number_of_counters=discard -fun:__sanitizer_update_counter_bitset_and_clear_counters=uninstrumented -fun:__sanitizer_update_counter_bitset_and_clear_counters=discard -fun:__sanitizer_get_total_unique_coverage=uninstrumented -fun:__sanitizer_get_total_unique_coverage=discard fun:__sanitizer_get_total_unique_coverage=uninstrumented fun:__sanitizer_get_total_unique_coverage=discard fun:__sanitizer_update_counter_bitset_and_clear_counters=uninstrumented diff --git a/lib/lsan/lsan_common.cc b/lib/lsan/lsan_common.cc index 6cc73749812b..200f16a594fa 100644 --- a/lib/lsan/lsan_common.cc +++ b/lib/lsan/lsan_common.cc @@ -68,6 +68,14 @@ ALIGNED(64) static char suppression_placeholder[sizeof(SuppressionContext)]; static SuppressionContext *suppression_ctx = nullptr; static const char kSuppressionLeak[] = "leak"; static const char *kSuppressionTypes[] = { kSuppressionLeak }; +static const char kStdSuppressions[] = +#if SANITIZER_SUPPRESS_LEAK_ON_PTHREAD_EXIT + // The actual string allocation happens here (for more details refer to the + // SANITIZER_SUPPRESS_LEAK_ON_PTHREAD_EXIT definition). + "leak:*_dl_map_object_deps*"; +#else + ""; +#endif // SANITIZER_SUPPRESS_LEAK_ON_PTHREAD_EXIT void InitializeSuppressions() { CHECK_EQ(nullptr, suppression_ctx); @@ -76,6 +84,7 @@ void InitializeSuppressions() { suppression_ctx->ParseFromFile(flags()->suppressions); if (&__lsan_default_suppressions) suppression_ctx->Parse(__lsan_default_suppressions()); + suppression_ctx->Parse(kStdSuppressions); } static SuppressionContext *GetSuppressionContext() { @@ -83,12 +92,9 @@ static SuppressionContext *GetSuppressionContext() { return suppression_ctx; } -struct RootRegion { - const void *begin; - uptr size; -}; +static InternalMmapVector<RootRegion> *root_regions; -InternalMmapVector<RootRegion> *root_regions; +InternalMmapVector<RootRegion> const *GetRootRegions() { return root_regions; } void InitializeRootRegions() { CHECK(!root_regions); @@ -200,11 +206,11 @@ void ForEachExtraStackRangeCb(uptr begin, uptr end, void* arg) { // Scans thread data (stacks and TLS) for heap pointers. static void ProcessThreads(SuspendedThreadsList const &suspended_threads, Frontier *frontier) { - InternalScopedBuffer<uptr> registers(SuspendedThreadsList::RegisterCount()); + InternalScopedBuffer<uptr> registers(suspended_threads.RegisterCount()); uptr registers_begin = reinterpret_cast<uptr>(registers.data()); uptr registers_end = registers_begin + registers.size(); - for (uptr i = 0; i < suspended_threads.thread_count(); i++) { - uptr os_id = static_cast<uptr>(suspended_threads.GetThreadID(i)); + for (uptr i = 0; i < suspended_threads.ThreadCount(); i++) { + tid_t os_id = static_cast<tid_t>(suspended_threads.GetThreadID(i)); LOG_THREADS("Processing thread %d.\n", os_id); uptr stack_begin, stack_end, tls_begin, tls_end, cache_begin, cache_end; DTLS *dtls; @@ -291,23 +297,29 @@ static void ProcessThreads(SuspendedThreadsList const &suspended_threads, } } -static void ProcessRootRegion(Frontier *frontier, uptr root_begin, - uptr root_end) { - MemoryMappingLayout proc_maps(/*cache_enabled*/true); +void ScanRootRegion(Frontier *frontier, const RootRegion &root_region, + uptr region_begin, uptr region_end, uptr prot) { + uptr intersection_begin = Max(root_region.begin, region_begin); + uptr intersection_end = Min(region_end, root_region.begin + root_region.size); + if (intersection_begin >= intersection_end) return; + bool is_readable = prot & MemoryMappingLayout::kProtectionRead; + LOG_POINTERS("Root region %p-%p intersects with mapped region %p-%p (%s)\n", + root_region.begin, root_region.begin + root_region.size, + region_begin, region_end, + is_readable ? "readable" : "unreadable"); + if (is_readable) + ScanRangeForPointers(intersection_begin, intersection_end, frontier, "ROOT", + kReachable); +} + +static void ProcessRootRegion(Frontier *frontier, + const RootRegion &root_region) { + MemoryMappingLayout proc_maps(/*cache_enabled*/ true); uptr begin, end, prot; while (proc_maps.Next(&begin, &end, /*offset*/ nullptr, /*filename*/ nullptr, /*filename_size*/ 0, &prot)) { - uptr intersection_begin = Max(root_begin, begin); - uptr intersection_end = Min(end, root_end); - if (intersection_begin >= intersection_end) continue; - bool is_readable = prot & MemoryMappingLayout::kProtectionRead; - LOG_POINTERS("Root region %p-%p intersects with mapped region %p-%p (%s)\n", - root_begin, root_end, begin, end, - is_readable ? "readable" : "unreadable"); - if (is_readable) - ScanRangeForPointers(intersection_begin, intersection_end, frontier, - "ROOT", kReachable); + ScanRootRegion(frontier, root_region, begin, end, prot); } } @@ -316,9 +328,7 @@ static void ProcessRootRegions(Frontier *frontier) { if (!flags()->use_root_regions) return; CHECK(root_regions); for (uptr i = 0; i < root_regions->size(); i++) { - RootRegion region = (*root_regions)[i]; - uptr begin_addr = reinterpret_cast<uptr>(region.begin); - ProcessRootRegion(frontier, begin_addr, begin_addr + region.size); + ProcessRootRegion(frontier, (*root_regions)[i]); } } @@ -356,6 +366,72 @@ static void CollectIgnoredCb(uptr chunk, void *arg) { } } +static uptr GetCallerPC(u32 stack_id, StackDepotReverseMap *map) { + CHECK(stack_id); + StackTrace stack = map->Get(stack_id); + // The top frame is our malloc/calloc/etc. The next frame is the caller. + if (stack.size >= 2) + return stack.trace[1]; + return 0; +} + +struct InvalidPCParam { + Frontier *frontier; + StackDepotReverseMap *stack_depot_reverse_map; + bool skip_linker_allocations; +}; + +// ForEachChunk callback. If the caller pc is invalid or is within the linker, +// mark as reachable. Called by ProcessPlatformSpecificAllocations. +static void MarkInvalidPCCb(uptr chunk, void *arg) { + CHECK(arg); + InvalidPCParam *param = reinterpret_cast<InvalidPCParam *>(arg); + chunk = GetUserBegin(chunk); + LsanMetadata m(chunk); + if (m.allocated() && m.tag() != kReachable && m.tag() != kIgnored) { + u32 stack_id = m.stack_trace_id(); + uptr caller_pc = 0; + if (stack_id > 0) + caller_pc = GetCallerPC(stack_id, param->stack_depot_reverse_map); + // If caller_pc is unknown, this chunk may be allocated in a coroutine. Mark + // it as reachable, as we can't properly report its allocation stack anyway. + if (caller_pc == 0 || (param->skip_linker_allocations && + GetLinker()->containsAddress(caller_pc))) { + m.set_tag(kReachable); + param->frontier->push_back(chunk); + } + } +} + +// On Linux, handles dynamically allocated TLS blocks by treating all chunks +// allocated from ld-linux.so as reachable. +// Dynamic TLS blocks contain the TLS variables of dynamically loaded modules. +// They are allocated with a __libc_memalign() call in allocate_and_init() +// (elf/dl-tls.c). Glibc won't tell us the address ranges occupied by those +// blocks, but we can make sure they come from our own allocator by intercepting +// __libc_memalign(). On top of that, there is no easy way to reach them. Their +// addresses are stored in a dynamically allocated array (the DTV) which is +// referenced from the static TLS. Unfortunately, we can't just rely on the DTV +// being reachable from the static TLS, and the dynamic TLS being reachable from +// the DTV. This is because the initial DTV is allocated before our interception +// mechanism kicks in, and thus we don't recognize it as allocated memory. We +// can't special-case it either, since we don't know its size. +// Our solution is to include in the root set all allocations made from +// ld-linux.so (which is where allocate_and_init() is implemented). This is +// guaranteed to include all dynamic TLS blocks (and possibly other allocations +// which we don't care about). +// On all other platforms, this simply checks to ensure that the caller pc is +// valid before reporting chunks as leaked. +void ProcessPC(Frontier *frontier) { + StackDepotReverseMap stack_depot_reverse_map; + InvalidPCParam arg; + arg.frontier = frontier; + arg.stack_depot_reverse_map = &stack_depot_reverse_map; + arg.skip_linker_allocations = + flags()->use_tls && flags()->use_ld_allocations && GetLinker() != nullptr; + ForEachChunk(MarkInvalidPCCb, &arg); +} + // Sets the appropriate tag on each chunk. static void ClassifyAllChunks(SuspendedThreadsList const &suspended_threads) { // Holds the flood fill frontier. @@ -367,11 +443,13 @@ static void ClassifyAllChunks(SuspendedThreadsList const &suspended_threads) { ProcessRootRegions(&frontier); FloodFillTag(&frontier, kReachable); + CHECK_EQ(0, frontier.size()); + ProcessPC(&frontier); + // The check here is relatively expensive, so we do this in a separate flood // fill. That way we can skip the check for chunks that are reachable // otherwise. LOG_POINTERS("Processing platform-specific allocations.\n"); - CHECK_EQ(0, frontier.size()); ProcessPlatformSpecificAllocations(&frontier); FloodFillTag(&frontier, kReachable); @@ -707,7 +785,7 @@ void __lsan_register_root_region(const void *begin, uptr size) { #if CAN_SANITIZE_LEAKS BlockingMutexLock l(&global_mutex); CHECK(root_regions); - RootRegion region = {begin, size}; + RootRegion region = {reinterpret_cast<uptr>(begin), size}; root_regions->push_back(region); VReport(1, "Registered root region at %p of size %llu\n", begin, size); #endif // CAN_SANITIZE_LEAKS @@ -721,7 +799,7 @@ void __lsan_unregister_root_region(const void *begin, uptr size) { bool removed = false; for (uptr i = 0; i < root_regions->size(); i++) { RootRegion region = (*root_regions)[i]; - if (region.begin == begin && region.size == size) { + if (region.begin == reinterpret_cast<uptr>(begin) && region.size == size) { removed = true; uptr last_index = root_regions->size() - 1; (*root_regions)[i] = (*root_regions)[last_index]; diff --git a/lib/lsan/lsan_common.h b/lib/lsan/lsan_common.h index 919be0ec2662..121b9c082983 100644 --- a/lib/lsan/lsan_common.h +++ b/lib/lsan/lsan_common.h @@ -118,6 +118,15 @@ typedef InternalMmapVector<uptr> Frontier; void InitializePlatformSpecificModules(); void ProcessGlobalRegions(Frontier *frontier); void ProcessPlatformSpecificAllocations(Frontier *frontier); + +struct RootRegion { + uptr begin; + uptr size; +}; + +InternalMmapVector<RootRegion> const *GetRootRegions(); +void ScanRootRegion(Frontier *frontier, RootRegion const ®ion, + uptr region_begin, uptr region_end, uptr prot); // Run stoptheworld while holding any platform-specific locks. void DoStopTheWorld(StopTheWorldCallback callback, void* argument); @@ -193,10 +202,10 @@ bool WordIsPoisoned(uptr addr); // Wrappers for ThreadRegistry access. void LockThreadRegistry(); void UnlockThreadRegistry(); -bool GetThreadRangesLocked(uptr os_id, uptr *stack_begin, uptr *stack_end, +bool GetThreadRangesLocked(tid_t os_id, uptr *stack_begin, uptr *stack_end, uptr *tls_begin, uptr *tls_end, uptr *cache_begin, uptr *cache_end, DTLS **dtls); -void ForEachExtraStackRange(uptr os_id, RangeIteratorCallback callback, +void ForEachExtraStackRange(tid_t os_id, RangeIteratorCallback callback, void *arg); // If called from the main thread, updates the main thread's TID in the thread // registry. We need this to handle processes that fork() without a subsequent @@ -212,6 +221,10 @@ uptr PointsIntoChunk(void *p); uptr GetUserBegin(uptr chunk); // Helper for __lsan_ignore_object(). IgnoreObjectResult IgnoreObjectLocked(const void *p); + +// Return the linker module, if valid for the platform. +LoadedModule *GetLinker(); + // Wrapper for chunk metadata operations. class LsanMetadata { public: diff --git a/lib/lsan/lsan_common_linux.cc b/lib/lsan/lsan_common_linux.cc index 0d1e998a5cfe..fadd0263de73 100644 --- a/lib/lsan/lsan_common_linux.cc +++ b/lib/lsan/lsan_common_linux.cc @@ -89,70 +89,9 @@ void ProcessGlobalRegions(Frontier *frontier) { dl_iterate_phdr(ProcessGlobalRegionsCallback, frontier); } -static uptr GetCallerPC(u32 stack_id, StackDepotReverseMap *map) { - CHECK(stack_id); - StackTrace stack = map->Get(stack_id); - // The top frame is our malloc/calloc/etc. The next frame is the caller. - if (stack.size >= 2) - return stack.trace[1]; - return 0; -} +LoadedModule *GetLinker() { return linker; } -struct ProcessPlatformAllocParam { - Frontier *frontier; - StackDepotReverseMap *stack_depot_reverse_map; - bool skip_linker_allocations; -}; - -// ForEachChunk callback. Identifies unreachable chunks which must be treated as -// reachable. Marks them as reachable and adds them to the frontier. -static void ProcessPlatformSpecificAllocationsCb(uptr chunk, void *arg) { - CHECK(arg); - ProcessPlatformAllocParam *param = - reinterpret_cast<ProcessPlatformAllocParam *>(arg); - chunk = GetUserBegin(chunk); - LsanMetadata m(chunk); - if (m.allocated() && m.tag() != kReachable && m.tag() != kIgnored) { - u32 stack_id = m.stack_trace_id(); - uptr caller_pc = 0; - if (stack_id > 0) - caller_pc = GetCallerPC(stack_id, param->stack_depot_reverse_map); - // If caller_pc is unknown, this chunk may be allocated in a coroutine. Mark - // it as reachable, as we can't properly report its allocation stack anyway. - if (caller_pc == 0 || (param->skip_linker_allocations && - linker->containsAddress(caller_pc))) { - m.set_tag(kReachable); - param->frontier->push_back(chunk); - } - } -} - -// Handles dynamically allocated TLS blocks by treating all chunks allocated -// from ld-linux.so as reachable. -// Dynamic TLS blocks contain the TLS variables of dynamically loaded modules. -// They are allocated with a __libc_memalign() call in allocate_and_init() -// (elf/dl-tls.c). Glibc won't tell us the address ranges occupied by those -// blocks, but we can make sure they come from our own allocator by intercepting -// __libc_memalign(). On top of that, there is no easy way to reach them. Their -// addresses are stored in a dynamically allocated array (the DTV) which is -// referenced from the static TLS. Unfortunately, we can't just rely on the DTV -// being reachable from the static TLS, and the dynamic TLS being reachable from -// the DTV. This is because the initial DTV is allocated before our interception -// mechanism kicks in, and thus we don't recognize it as allocated memory. We -// can't special-case it either, since we don't know its size. -// Our solution is to include in the root set all allocations made from -// ld-linux.so (which is where allocate_and_init() is implemented). This is -// guaranteed to include all dynamic TLS blocks (and possibly other allocations -// which we don't care about). -void ProcessPlatformSpecificAllocations(Frontier *frontier) { - StackDepotReverseMap stack_depot_reverse_map; - ProcessPlatformAllocParam arg; - arg.frontier = frontier; - arg.stack_depot_reverse_map = &stack_depot_reverse_map; - arg.skip_linker_allocations = - flags()->use_tls && flags()->use_ld_allocations && linker != nullptr; - ForEachChunk(ProcessPlatformSpecificAllocationsCb, &arg); -} +void ProcessPlatformSpecificAllocations(Frontier *frontier) {} struct DoStopTheWorldParam { StopTheWorldCallback callback; diff --git a/lib/lsan/lsan_common_mac.cc b/lib/lsan/lsan_common_mac.cc index 022e73937895..a9adcdfff37f 100644 --- a/lib/lsan/lsan_common_mac.cc +++ b/lib/lsan/lsan_common_mac.cc @@ -22,6 +22,8 @@ #include <pthread.h> +#include <mach/mach.h> + namespace __lsan { typedef struct { @@ -85,6 +87,8 @@ void SetCurrentThread(u32 tid) { get_tls_val(true)->current_thread_id = tid; } AllocatorCache *GetAllocatorCache() { return &get_tls_val(true)->cache; } +LoadedModule *GetLinker() { return nullptr; } + // Required on Linux for initialization of TLS behavior, but should not be // required on Darwin. void InitializePlatformSpecificModules() { @@ -106,7 +110,7 @@ void ProcessGlobalRegions(Frontier *frontier) { for (const __sanitizer::LoadedModule::AddressRange &range : modules[i].ranges()) { - if (range.executable) continue; + if (range.executable || !range.readable) continue; ScanGlobalRange(range.beg, range.end, frontier); } @@ -114,11 +118,54 @@ void ProcessGlobalRegions(Frontier *frontier) { } void ProcessPlatformSpecificAllocations(Frontier *frontier) { - CHECK(0 && "unimplemented"); + mach_port_name_t port; + if (task_for_pid(mach_task_self(), internal_getpid(), &port) + != KERN_SUCCESS) { + return; + } + + unsigned depth = 1; + vm_size_t size = 0; + vm_address_t address = 0; + kern_return_t err = KERN_SUCCESS; + mach_msg_type_number_t count = VM_REGION_SUBMAP_INFO_COUNT_64; + + InternalMmapVector<RootRegion> const *root_regions = GetRootRegions(); + + while (err == KERN_SUCCESS) { + struct vm_region_submap_info_64 info; + err = vm_region_recurse_64(port, &address, &size, &depth, + (vm_region_info_t)&info, &count); + + uptr end_address = address + size; + + // libxpc stashes some pointers in the Kernel Alloc Once page, + // make sure not to report those as leaks. + if (info.user_tag == VM_MEMORY_OS_ALLOC_ONCE) { + ScanRangeForPointers(address, end_address, frontier, "GLOBAL", + kReachable); + } + + // This additional root region scan is required on Darwin in order to + // detect root regions contained within mmap'd memory regions, because + // the Darwin implementation of sanitizer_procmaps traverses images + // as loaded by dyld, and not the complete set of all memory regions. + // + // TODO(fjricci) - remove this once sanitizer_procmaps_mac has the same + // behavior as sanitizer_procmaps_linux and traverses all memory regions + if (flags()->use_root_regions) { + for (uptr i = 0; i < root_regions->size(); i++) { + ScanRootRegion(frontier, (*root_regions)[i], address, end_address, + info.protection); + } + } + + address = end_address; + } } void DoStopTheWorld(StopTheWorldCallback callback, void *argument) { - CHECK(0 && "unimplemented"); + StopTheWorld(callback, argument); } } // namespace __lsan diff --git a/lib/lsan/lsan_thread.cc b/lib/lsan/lsan_thread.cc index 09eeb9c24982..0ea7a6e97497 100644 --- a/lib/lsan/lsan_thread.cc +++ b/lib/lsan/lsan_thread.cc @@ -77,7 +77,7 @@ u32 ThreadCreate(u32 parent_tid, uptr user_id, bool detached) { /* arg */ nullptr); } -void ThreadStart(u32 tid, uptr os_id) { +void ThreadStart(u32 tid, tid_t os_id) { OnStartedArgs args; uptr stack_size = 0; uptr tls_size = 0; @@ -127,7 +127,7 @@ void EnsureMainThreadIDIsCorrect() { ///// Interface to the common LSan module. ///// -bool GetThreadRangesLocked(uptr os_id, uptr *stack_begin, uptr *stack_end, +bool GetThreadRangesLocked(tid_t os_id, uptr *stack_begin, uptr *stack_end, uptr *tls_begin, uptr *tls_end, uptr *cache_begin, uptr *cache_end, DTLS **dtls) { ThreadContext *context = static_cast<ThreadContext *>( @@ -143,7 +143,7 @@ bool GetThreadRangesLocked(uptr os_id, uptr *stack_begin, uptr *stack_end, return true; } -void ForEachExtraStackRange(uptr os_id, RangeIteratorCallback callback, +void ForEachExtraStackRange(tid_t os_id, RangeIteratorCallback callback, void *arg) { } diff --git a/lib/lsan/lsan_thread.h b/lib/lsan/lsan_thread.h index 10b7b5796c51..73e080e26f76 100644 --- a/lib/lsan/lsan_thread.h +++ b/lib/lsan/lsan_thread.h @@ -45,7 +45,7 @@ class ThreadContext : public ThreadContextBase { void InitializeThreadRegistry(); -void ThreadStart(u32 tid, uptr os_id); +void ThreadStart(u32 tid, tid_t os_id); void ThreadFinish(); u32 ThreadCreate(u32 tid, uptr uid, bool detached); void ThreadJoin(u32 tid); diff --git a/lib/sanitizer_common/sanitizer_common.cc b/lib/sanitizer_common/sanitizer_common.cc index 3ef366f4f328..471c3ded2115 100644 --- a/lib/sanitizer_common/sanitizer_common.cc +++ b/lib/sanitizer_common/sanitizer_common.cc @@ -284,9 +284,10 @@ void LoadedModule::clear() { } } -void LoadedModule::addAddressRange(uptr beg, uptr end, bool executable) { +void LoadedModule::addAddressRange(uptr beg, uptr end, bool executable, + bool readable) { void *mem = InternalAlloc(sizeof(AddressRange)); - AddressRange *r = new(mem) AddressRange(beg, end, executable); + AddressRange *r = new(mem) AddressRange(beg, end, executable, readable); ranges_.push_back(r); if (executable && end > max_executable_address_) max_executable_address_ = end; diff --git a/lib/sanitizer_common/sanitizer_common.h b/lib/sanitizer_common/sanitizer_common.h index 9d367ca80144..bbe7aebf3279 100644 --- a/lib/sanitizer_common/sanitizer_common.h +++ b/lib/sanitizer_common/sanitizer_common.h @@ -72,7 +72,7 @@ INLINE uptr GetPageSizeCached() { uptr GetMmapGranularity(); uptr GetMaxVirtualAddress(); // Threads -uptr GetTid(); +tid_t GetTid(); uptr GetThreadSelf(); void GetThreadStackTopAndBottom(bool at_initialization, uptr *stack_top, uptr *stack_bottom); @@ -717,7 +717,7 @@ class LoadedModule { void set(const char *module_name, uptr base_address, ModuleArch arch, u8 uuid[kModuleUUIDSize], bool instrumented); void clear(); - void addAddressRange(uptr beg, uptr end, bool executable); + void addAddressRange(uptr beg, uptr end, bool executable, bool readable); bool containsAddress(uptr address) const; const char *full_name() const { return full_name_; } @@ -732,9 +732,14 @@ class LoadedModule { uptr beg; uptr end; bool executable; - - AddressRange(uptr beg, uptr end, bool executable) - : next(nullptr), beg(beg), end(end), executable(executable) {} + bool readable; + + AddressRange(uptr beg, uptr end, bool executable, bool readable) + : next(nullptr), + beg(beg), + end(end), + executable(executable), + readable(readable) {} }; const IntrusiveList<AddressRange> &ranges() const { return ranges_; } diff --git a/lib/sanitizer_common/sanitizer_common_interceptors.inc b/lib/sanitizer_common/sanitizer_common_interceptors.inc index 7b4e6d27df3d..d1c793c551f7 100644 --- a/lib/sanitizer_common/sanitizer_common_interceptors.inc +++ b/lib/sanitizer_common/sanitizer_common_interceptors.inc @@ -139,12 +139,9 @@ bool PlatformHasDifferentMemcpyAndMemmove(); #define COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED (0) #endif -#define COMMON_INTERCEPTOR_READ_STRING_OF_LEN(ctx, s, len, n) \ - COMMON_INTERCEPTOR_READ_RANGE((ctx), (s), \ - common_flags()->strict_string_checks ? (len) + 1 : (n) ) - #define COMMON_INTERCEPTOR_READ_STRING(ctx, s, n) \ - COMMON_INTERCEPTOR_READ_STRING_OF_LEN((ctx), (s), REAL(strlen)(s), (n)) + COMMON_INTERCEPTOR_READ_RANGE((ctx), (s), \ + common_flags()->strict_string_checks ? (REAL(strlen)(s)) + 1 : (n) ) #ifndef COMMON_INTERCEPTOR_ON_DLOPEN #define COMMON_INTERCEPTOR_ON_DLOPEN(filename, flag) \ @@ -450,8 +447,7 @@ static inline void StrstrCheck(void *ctx, char *r, const char *s1, const char *s2) { uptr len1 = REAL(strlen)(s1); uptr len2 = REAL(strlen)(s2); - COMMON_INTERCEPTOR_READ_STRING_OF_LEN(ctx, s1, len1, - r ? r - s1 + len2 : len1 + 1); + COMMON_INTERCEPTOR_READ_STRING(ctx, s1, r ? r - s1 + len2 : len1 + 1); COMMON_INTERCEPTOR_READ_RANGE(ctx, s2, len2 + 1); } #endif @@ -577,10 +573,11 @@ INTERCEPTOR(char*, strchr, const char *s, int c) { return internal_strchr(s, c); COMMON_INTERCEPTOR_ENTER(ctx, strchr, s, c); char *result = REAL(strchr)(s, c); - uptr len = internal_strlen(s); - uptr n = result ? result - s + 1 : len + 1; - if (common_flags()->intercept_strchr) - COMMON_INTERCEPTOR_READ_STRING_OF_LEN(ctx, s, len, n); + if (common_flags()->intercept_strchr) { + // Keep strlen as macro argument, as macro may ignore it. + COMMON_INTERCEPTOR_READ_STRING(ctx, s, + (result ? result - s : REAL(strlen)(s)) + 1); + } return result; } #define INIT_STRCHR COMMON_INTERCEPT_FUNCTION(strchr) @@ -609,9 +606,8 @@ INTERCEPTOR(char*, strrchr, const char *s, int c) { if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED) return internal_strrchr(s, c); COMMON_INTERCEPTOR_ENTER(ctx, strrchr, s, c); - uptr len = internal_strlen(s); if (common_flags()->intercept_strchr) - COMMON_INTERCEPTOR_READ_STRING_OF_LEN(ctx, s, len, len + 1); + COMMON_INTERCEPTOR_READ_RANGE(ctx, s, REAL(strlen)(s) + 1); return REAL(strrchr)(s, c); } #define INIT_STRRCHR COMMON_INTERCEPT_FUNCTION(strrchr) diff --git a/lib/sanitizer_common/sanitizer_coverage_interface.inc b/lib/sanitizer_common/sanitizer_coverage_interface.inc index ae691bd9dd27..42b4d3aba01b 100644 --- a/lib/sanitizer_common/sanitizer_coverage_interface.inc +++ b/lib/sanitizer_common/sanitizer_coverage_interface.inc @@ -10,21 +10,13 @@ //===----------------------------------------------------------------------===// INTERFACE_FUNCTION(__sanitizer_cov) INTERFACE_FUNCTION(__sanitizer_cov_dump) -INTERFACE_FUNCTION(__sanitizer_cov_indir_call16) INTERFACE_FUNCTION(__sanitizer_cov_init) INTERFACE_FUNCTION(__sanitizer_cov_module_init) -INTERFACE_FUNCTION(__sanitizer_cov_trace_basic_block) -INTERFACE_FUNCTION(__sanitizer_cov_trace_func_enter) INTERFACE_FUNCTION(__sanitizer_cov_with_check) INTERFACE_FUNCTION(__sanitizer_dump_coverage) INTERFACE_FUNCTION(__sanitizer_dump_trace_pc_guard_coverage) -INTERFACE_FUNCTION(__sanitizer_get_coverage_guards) -INTERFACE_FUNCTION(__sanitizer_get_number_of_counters) -INTERFACE_FUNCTION(__sanitizer_get_total_unique_caller_callee_pairs) INTERFACE_FUNCTION(__sanitizer_get_total_unique_coverage) INTERFACE_FUNCTION(__sanitizer_maybe_open_cov_file) -INTERFACE_FUNCTION(__sanitizer_reset_coverage) -INTERFACE_FUNCTION(__sanitizer_update_counter_bitset_and_clear_counters) INTERFACE_WEAK_FUNCTION(__sancov_default_options) INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_cmp) INTERFACE_WEAK_FUNCTION(__sanitizer_cov_trace_cmp1) diff --git a/lib/sanitizer_common/sanitizer_coverage_libcdep.cc b/lib/sanitizer_common/sanitizer_coverage_libcdep.cc index e934af3ed975..bb59c344edc2 100644 --- a/lib/sanitizer_common/sanitizer_coverage_libcdep.cc +++ b/lib/sanitizer_common/sanitizer_coverage_libcdep.cc @@ -57,12 +57,6 @@ static const u64 kMagic = SANITIZER_WORDSIZE == 64 ? kMagic64 : kMagic32; static atomic_uint32_t dump_once_guard; // Ensure that CovDump runs only once. static atomic_uintptr_t coverage_counter; -static atomic_uintptr_t caller_callee_counter; - -static void ResetGlobalCounters() { - return atomic_store(&coverage_counter, 0, memory_order_relaxed); - return atomic_store(&caller_callee_counter, 0, memory_order_relaxed); -} // pc_array is the array containing the covered PCs. // To make the pc_array thread- and async-signal-safe it has to be large enough. @@ -90,25 +84,14 @@ class CoverageData { void AfterFork(int child_pid); void Extend(uptr npcs); void Add(uptr pc, u32 *guard); - void IndirCall(uptr caller, uptr callee, uptr callee_cache[], - uptr cache_size); - void DumpCallerCalleePairs(); - void DumpTrace(); void DumpAsBitSet(); - void DumpCounters(); void DumpOffsets(); void DumpAll(); - ALWAYS_INLINE - void TraceBasicBlock(u32 *id); - void InitializeGuardArray(s32 *guards); void InitializeGuards(s32 *guards, uptr n, const char *module_name, uptr caller_pc); - void InitializeCounters(u8 *counters, uptr n); void ReinitializeGuards(); - uptr GetNumberOf8bitCounters(); - uptr Update8bitCounterBitsetAndClearCounters(u8 *bitset); uptr *data(); uptr size() const; @@ -150,37 +133,6 @@ class CoverageData { InternalMmapVectorNoCtor<NamedPcRange> comp_unit_name_vec; InternalMmapVectorNoCtor<NamedPcRange> module_name_vec; - struct CounterAndSize { - u8 *counters; - uptr n; - }; - - InternalMmapVectorNoCtor<CounterAndSize> counters_vec; - uptr num_8bit_counters; - - // Caller-Callee (cc) array, size and current index. - static const uptr kCcArrayMaxSize = FIRST_32_SECOND_64(1 << 18, 1 << 24); - uptr **cc_array; - atomic_uintptr_t cc_array_index; - atomic_uintptr_t cc_array_size; - - // Tracing event array, size and current pointer. - // We record all events (basic block entries) in a global buffer of u32 - // values. Each such value is the index in pc_array. - // So far the tracing is highly experimental: - // - not thread-safe; - // - does not support long traces; - // - not tuned for performance. - // Windows doesn't do overcommit (committed virtual memory costs swap), so - // programs can't reliably map such large amounts of virtual memory. - // TODO(etienneb): Find a way to support coverage of larger executable -static const uptr kTrEventArrayMaxSize = - (SANITIZER_WORDSIZE == 32 || SANITIZER_WINDOWS) ? 1 << 22 : 1 << 30; - u32 *tr_event_array; - uptr tr_event_array_size; - u32 *tr_event_pointer; - static const uptr kTrPcArrayMaxSize = FIRST_32_SECOND_64(1 << 22, 1 << 27); - StaticSpinMutex mu; }; @@ -217,23 +169,6 @@ void CoverageData::Enable() { } else { atomic_store(&pc_array_size, kPcArrayMaxSize, memory_order_relaxed); } - - cc_array = reinterpret_cast<uptr **>(MmapNoReserveOrDie( - sizeof(uptr *) * kCcArrayMaxSize, "CovInit::cc_array")); - atomic_store(&cc_array_size, kCcArrayMaxSize, memory_order_relaxed); - atomic_store(&cc_array_index, 0, memory_order_relaxed); - - // Allocate tr_event_array with a guard page at the end. - tr_event_array = reinterpret_cast<u32 *>(MmapNoReserveOrDie( - sizeof(tr_event_array[0]) * kTrEventArrayMaxSize + GetMmapGranularity(), - "CovInit::tr_event_array")); - MprotectNoAccess( - reinterpret_cast<uptr>(&tr_event_array[kTrEventArrayMaxSize]), - GetMmapGranularity()); - tr_event_array_size = kTrEventArrayMaxSize; - tr_event_pointer = tr_event_array; - - num_8bit_counters = 0; } void CoverageData::InitializeGuardArray(s32 *guards) { @@ -251,17 +186,6 @@ void CoverageData::Disable() { UnmapOrDie(pc_array, sizeof(uptr) * kPcArrayMaxSize); pc_array = nullptr; } - if (cc_array) { - UnmapOrDie(cc_array, sizeof(uptr *) * kCcArrayMaxSize); - cc_array = nullptr; - } - if (tr_event_array) { - UnmapOrDie(tr_event_array, - sizeof(tr_event_array[0]) * kTrEventArrayMaxSize + - GetMmapGranularity()); - tr_event_array = nullptr; - tr_event_pointer = nullptr; - } if (pc_fd != kInvalidFd) { CloseFile(pc_fd); pc_fd = kInvalidFd; @@ -341,15 +265,6 @@ void CoverageData::Extend(uptr npcs) { atomic_store(&pc_array_size, size, memory_order_release); } -void CoverageData::InitializeCounters(u8 *counters, uptr n) { - if (!counters) return; - CHECK_EQ(reinterpret_cast<uptr>(counters) % 16, 0); - n = RoundUpTo(n, 16); // The compiler must ensure that counters is 16-aligned. - SpinMutexLock l(&mu); - counters_vec.push_back({counters, n}); - num_8bit_counters += n; -} - void CoverageData::UpdateModuleNameVec(uptr caller_pc, uptr range_beg, uptr range_end) { auto sym = Symbolizer::GetOrInit(); @@ -424,98 +339,6 @@ void CoverageData::Add(uptr pc, u32 *guard) { pc_array[idx] = BundlePcAndCounter(pc, counter); } -// Registers a pair caller=>callee. -// When a given caller is seen for the first time, the callee_cache is added -// to the global array cc_array, callee_cache[0] is set to caller and -// callee_cache[1] is set to cache_size. -// Then we are trying to add callee to callee_cache [2,cache_size) if it is -// not there yet. -// If the cache is full we drop the callee (may want to fix this later). -void CoverageData::IndirCall(uptr caller, uptr callee, uptr callee_cache[], - uptr cache_size) { - if (!cc_array) return; - atomic_uintptr_t *atomic_callee_cache = - reinterpret_cast<atomic_uintptr_t *>(callee_cache); - uptr zero = 0; - if (atomic_compare_exchange_strong(&atomic_callee_cache[0], &zero, caller, - memory_order_seq_cst)) { - uptr idx = atomic_fetch_add(&cc_array_index, 1, memory_order_relaxed); - CHECK_LT(idx * sizeof(uptr), - atomic_load(&cc_array_size, memory_order_acquire)); - callee_cache[1] = cache_size; - cc_array[idx] = callee_cache; - } - CHECK_EQ(atomic_load(&atomic_callee_cache[0], memory_order_relaxed), caller); - for (uptr i = 2; i < cache_size; i++) { - uptr was = 0; - if (atomic_compare_exchange_strong(&atomic_callee_cache[i], &was, callee, - memory_order_seq_cst)) { - atomic_fetch_add(&caller_callee_counter, 1, memory_order_relaxed); - return; - } - if (was == callee) // Already have this callee. - return; - } -} - -uptr CoverageData::GetNumberOf8bitCounters() { - return num_8bit_counters; -} - -// Map every 8bit counter to a 8-bit bitset and clear the counter. -uptr CoverageData::Update8bitCounterBitsetAndClearCounters(u8 *bitset) { - uptr num_new_bits = 0; - uptr cur = 0; - // For better speed we map 8 counters to 8 bytes of bitset at once. - static const uptr kBatchSize = 8; - CHECK_EQ(reinterpret_cast<uptr>(bitset) % kBatchSize, 0); - for (uptr i = 0, len = counters_vec.size(); i < len; i++) { - u8 *c = counters_vec[i].counters; - uptr n = counters_vec[i].n; - CHECK_EQ(n % 16, 0); - CHECK_EQ(cur % kBatchSize, 0); - CHECK_EQ(reinterpret_cast<uptr>(c) % kBatchSize, 0); - if (!bitset) { - internal_bzero_aligned16(c, n); - cur += n; - continue; - } - for (uptr j = 0; j < n; j += kBatchSize, cur += kBatchSize) { - CHECK_LT(cur, num_8bit_counters); - u64 *pc64 = reinterpret_cast<u64*>(c + j); - u64 *pb64 = reinterpret_cast<u64*>(bitset + cur); - u64 c64 = *pc64; - u64 old_bits_64 = *pb64; - u64 new_bits_64 = old_bits_64; - if (c64) { - *pc64 = 0; - for (uptr k = 0; k < kBatchSize; k++) { - u64 x = (c64 >> (8 * k)) & 0xff; - if (x) { - u64 bit = 0; - /**/ if (x >= 128) bit = 128; - else if (x >= 32) bit = 64; - else if (x >= 16) bit = 32; - else if (x >= 8) bit = 16; - else if (x >= 4) bit = 8; - else if (x >= 3) bit = 4; - else if (x >= 2) bit = 2; - else if (x >= 1) bit = 1; - u64 mask = bit << (8 * k); - if (!(new_bits_64 & mask)) { - num_new_bits++; - new_bits_64 |= mask; - } - } - } - *pb64 = new_bits_64; - } - } - } - CHECK_EQ(cur, num_8bit_counters); - return num_new_bits; -} - uptr *CoverageData::data() { return pc_array; } @@ -596,132 +419,6 @@ static fd_t CovOpenFile(InternalScopedString *path, bool packed, return fd; } -// Dump trace PCs and trace events into two separate files. -void CoverageData::DumpTrace() { - uptr max_idx = tr_event_pointer - tr_event_array; - if (!max_idx) return; - auto sym = Symbolizer::GetOrInit(); - if (!sym) - return; - InternalScopedString out(32 << 20); - for (uptr i = 0, n = size(); i < n; i++) { - const char *module_name = "<unknown>"; - uptr module_address = 0; - sym->GetModuleNameAndOffsetForPC(UnbundlePc(pc_array[i]), &module_name, - &module_address); - out.append("%s 0x%zx\n", module_name, module_address); - } - InternalScopedString path(kMaxPathLength); - fd_t fd = CovOpenFile(&path, false, "trace-points"); - if (fd == kInvalidFd) return; - WriteToFile(fd, out.data(), out.length()); - CloseFile(fd); - - fd = CovOpenFile(&path, false, "trace-compunits"); - if (fd == kInvalidFd) return; - out.clear(); - for (uptr i = 0; i < comp_unit_name_vec.size(); i++) - out.append("%s\n", comp_unit_name_vec[i].copied_module_name); - WriteToFile(fd, out.data(), out.length()); - CloseFile(fd); - - fd = CovOpenFile(&path, false, "trace-events"); - if (fd == kInvalidFd) return; - uptr bytes_to_write = max_idx * sizeof(tr_event_array[0]); - u8 *event_bytes = reinterpret_cast<u8*>(tr_event_array); - // The trace file could be huge, and may not be written with a single syscall. - while (bytes_to_write) { - uptr actually_written; - if (WriteToFile(fd, event_bytes, bytes_to_write, &actually_written) && - actually_written <= bytes_to_write) { - bytes_to_write -= actually_written; - event_bytes += actually_written; - } else { - break; - } - } - CloseFile(fd); - VReport(1, " CovDump: Trace: %zd PCs written\n", size()); - VReport(1, " CovDump: Trace: %zd Events written\n", max_idx); -} - -// This function dumps the caller=>callee pairs into a file as a sequence of -// lines like "module_name offset". -void CoverageData::DumpCallerCalleePairs() { - uptr max_idx = atomic_load(&cc_array_index, memory_order_relaxed); - if (!max_idx) return; - auto sym = Symbolizer::GetOrInit(); - if (!sym) - return; - InternalScopedString out(32 << 20); - uptr total = 0; - for (uptr i = 0; i < max_idx; i++) { - uptr *cc_cache = cc_array[i]; - CHECK(cc_cache); - uptr caller = cc_cache[0]; - uptr n_callees = cc_cache[1]; - const char *caller_module_name = "<unknown>"; - uptr caller_module_address = 0; - sym->GetModuleNameAndOffsetForPC(caller, &caller_module_name, - &caller_module_address); - for (uptr j = 2; j < n_callees; j++) { - uptr callee = cc_cache[j]; - if (!callee) break; - total++; - const char *callee_module_name = "<unknown>"; - uptr callee_module_address = 0; - sym->GetModuleNameAndOffsetForPC(callee, &callee_module_name, - &callee_module_address); - out.append("%s 0x%zx\n%s 0x%zx\n", caller_module_name, - caller_module_address, callee_module_name, - callee_module_address); - } - } - InternalScopedString path(kMaxPathLength); - fd_t fd = CovOpenFile(&path, false, "caller-callee"); - if (fd == kInvalidFd) return; - WriteToFile(fd, out.data(), out.length()); - CloseFile(fd); - VReport(1, " CovDump: %zd caller-callee pairs written\n", total); -} - -// Record the current PC into the event buffer. -// Every event is a u32 value (index in tr_pc_array_index) so we compute -// it once and then cache in the provided 'cache' storage. -// -// This function will eventually be inlined by the compiler. -void CoverageData::TraceBasicBlock(u32 *id) { - // Will trap here if - // 1. coverage is not enabled at run-time. - // 2. The array tr_event_array is full. - *tr_event_pointer = *id - 1; - tr_event_pointer++; -} - -void CoverageData::DumpCounters() { - if (!common_flags()->coverage_counters) return; - uptr n = coverage_data.GetNumberOf8bitCounters(); - if (!n) return; - InternalScopedBuffer<u8> bitset(n); - coverage_data.Update8bitCounterBitsetAndClearCounters(bitset.data()); - InternalScopedString path(kMaxPathLength); - - for (uptr m = 0; m < module_name_vec.size(); m++) { - auto r = module_name_vec[m]; - CHECK(r.copied_module_name); - CHECK_LE(r.beg, r.end); - CHECK_LE(r.end, size()); - const char *base_name = StripModuleName(r.copied_module_name); - fd_t fd = - CovOpenFile(&path, /* packed */ false, base_name, "counters-sancov"); - if (fd == kInvalidFd) return; - WriteToFile(fd, bitset.data() + r.beg, r.end - r.beg); - CloseFile(fd); - VReport(1, " CovDump: %zd counters written for '%s'\n", r.end - r.beg, - base_name); - } -} - void CoverageData::DumpAsBitSet() { if (!common_flags()->coverage_bitset) return; if (!size()) return; @@ -869,10 +566,7 @@ void CoverageData::DumpAll() { if (atomic_fetch_add(&dump_once_guard, 1, memory_order_relaxed)) return; DumpAsBitSet(); - DumpCounters(); - DumpTrace(); DumpOffsets(); - DumpCallerCalleePairs(); } void CovPrepareForSandboxing(__sanitizer_sandbox_arguments *args) { @@ -946,11 +640,6 @@ SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_cov_with_check(u32 *guard) { coverage_data.Add(StackTrace::GetPreviousInstructionPc(GET_CALLER_PC()), guard); } -SANITIZER_INTERFACE_ATTRIBUTE void -__sanitizer_cov_indir_call16(uptr callee, uptr callee_cache16[]) { - coverage_data.IndirCall(StackTrace::GetPreviousInstructionPc(GET_CALLER_PC()), - callee, callee_cache16, 16); -} SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_cov_init() { coverage_enabled = true; coverage_dir = common_flags()->coverage_dir; @@ -964,7 +653,6 @@ SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_cov_module_init(s32 *guards, uptr npcs, u8 *counters, const char *comp_unit_name) { coverage_data.InitializeGuards(guards, npcs, comp_unit_name, GET_CALLER_PC()); - coverage_data.InitializeCounters(counters, npcs); if (!common_flags()->coverage_direct) return; if (SANITIZER_ANDROID && coverage_enabled) { // dlopen/dlclose interceptors do not work on Android, so we rely on @@ -982,45 +670,6 @@ uptr __sanitizer_get_total_unique_coverage() { return atomic_load(&coverage_counter, memory_order_relaxed); } -SANITIZER_INTERFACE_ATTRIBUTE -uptr __sanitizer_get_total_unique_caller_callee_pairs() { - return atomic_load(&caller_callee_counter, memory_order_relaxed); -} - -SANITIZER_INTERFACE_ATTRIBUTE -void __sanitizer_cov_trace_func_enter(u32 *id) { - __sanitizer_cov_with_check(id); - coverage_data.TraceBasicBlock(id); -} -SANITIZER_INTERFACE_ATTRIBUTE -void __sanitizer_cov_trace_basic_block(u32 *id) { - __sanitizer_cov_with_check(id); - coverage_data.TraceBasicBlock(id); -} -SANITIZER_INTERFACE_ATTRIBUTE -void __sanitizer_reset_coverage() { - ResetGlobalCounters(); - coverage_data.ReinitializeGuards(); - internal_bzero_aligned16( - coverage_data.data(), - RoundUpTo(coverage_data.size() * sizeof(coverage_data.data()[0]), 16)); -} -SANITIZER_INTERFACE_ATTRIBUTE -uptr __sanitizer_get_coverage_guards(uptr **data) { - *data = coverage_data.data(); - return coverage_data.size(); -} - -SANITIZER_INTERFACE_ATTRIBUTE -uptr __sanitizer_get_number_of_counters() { - return coverage_data.GetNumberOf8bitCounters(); -} - -SANITIZER_INTERFACE_ATTRIBUTE -uptr __sanitizer_update_counter_bitset_and_clear_counters(u8 *bitset) { - return coverage_data.Update8bitCounterBitsetAndClearCounters(bitset); -} - // Default empty implementations (weak). Users should redefine them. SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_cmp, void) {} SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_cmp1, void) {} diff --git a/lib/sanitizer_common/sanitizer_internal_defs.h b/lib/sanitizer_common/sanitizer_internal_defs.h index ea5022e31bc3..f35b095ee94e 100644 --- a/lib/sanitizer_common/sanitizer_internal_defs.h +++ b/lib/sanitizer_common/sanitizer_internal_defs.h @@ -152,6 +152,12 @@ typedef u32 operator_new_size_type; # endif #endif +#if SANITIZER_MAC +// On Darwin, thread IDs are 64-bit even on 32-bit systems. +typedef u64 tid_t; +#else +typedef uptr tid_t; +#endif // ----------- ATTENTION ------------- // This header should NOT include any other headers to avoid portability issues. diff --git a/lib/sanitizer_common/sanitizer_linux.cc b/lib/sanitizer_common/sanitizer_linux.cc index 0b5473d95336..fce78fe590d5 100644 --- a/lib/sanitizer_common/sanitizer_linux.cc +++ b/lib/sanitizer_common/sanitizer_linux.cc @@ -384,7 +384,7 @@ bool FileExists(const char *filename) { return S_ISREG(st.st_mode); } -uptr GetTid() { +tid_t GetTid() { #if SANITIZER_FREEBSD return (uptr)pthread_self(); #else diff --git a/lib/sanitizer_common/sanitizer_linux_libcdep.cc b/lib/sanitizer_common/sanitizer_linux_libcdep.cc index 6fde671f882d..25f1e12c0374 100644 --- a/lib/sanitizer_common/sanitizer_linux_libcdep.cc +++ b/lib/sanitizer_common/sanitizer_linux_libcdep.cc @@ -447,7 +447,9 @@ static int dl_iterate_phdr_cb(dl_phdr_info *info, size_t size, void *arg) { uptr cur_beg = info->dlpi_addr + phdr->p_vaddr; uptr cur_end = cur_beg + phdr->p_memsz; bool executable = phdr->p_flags & PF_X; - cur_module.addAddressRange(cur_beg, cur_end, executable); + bool readable = phdr->p_flags & PF_R; + cur_module.addAddressRange(cur_beg, cur_end, executable, + readable); } } data->modules->push_back(cur_module); diff --git a/lib/sanitizer_common/sanitizer_mac.cc b/lib/sanitizer_common/sanitizer_mac.cc index 34af4e91876e..2f990b805ff9 100644 --- a/lib/sanitizer_common/sanitizer_mac.cc +++ b/lib/sanitizer_common/sanitizer_mac.cc @@ -252,9 +252,8 @@ bool FileExists(const char *filename) { return S_ISREG(st.st_mode); } -uptr GetTid() { - // FIXME: This can potentially get truncated on 32-bit, where uptr is 4 bytes. - uint64_t tid; +tid_t GetTid() { + tid_t tid; pthread_threadid_np(nullptr, &tid); return tid; } diff --git a/lib/sanitizer_common/sanitizer_platform.h b/lib/sanitizer_common/sanitizer_platform.h index 1a6410878579..49732aa32323 100644 --- a/lib/sanitizer_common/sanitizer_platform.h +++ b/lib/sanitizer_common/sanitizer_platform.h @@ -259,4 +259,15 @@ # define SANITIZER_GO 0 #endif +// On PowerPC and ARM Thumb, calling pthread_exit() causes LSan to detect leaks. +// pthread_exit() performs unwinding that leads to dlopen'ing libgcc_s.so. +// dlopen mallocs "libgcc_s.so" string which confuses LSan, it fails to realize +// that this allocation happens in dynamic linker and should be ignored. +#if SANITIZER_PPC || defined(__thumb__) +# define SANITIZER_SUPPRESS_LEAK_ON_PTHREAD_EXIT 1 +#else +# define SANITIZER_SUPPRESS_LEAK_ON_PTHREAD_EXIT 0 +#endif + + #endif // SANITIZER_PLATFORM_H diff --git a/lib/sanitizer_common/sanitizer_procmaps_common.cc b/lib/sanitizer_common/sanitizer_procmaps_common.cc index fac3fbdad07a..67a659010aaf 100644 --- a/lib/sanitizer_common/sanitizer_procmaps_common.cc +++ b/lib/sanitizer_common/sanitizer_procmaps_common.cc @@ -141,7 +141,8 @@ void MemoryMappingLayout::DumpListOfModules( uptr base_address = (i ? cur_beg : 0) - cur_offset; LoadedModule cur_module; cur_module.set(cur_name, base_address); - cur_module.addAddressRange(cur_beg, cur_end, prot & kProtectionExecute); + cur_module.addAddressRange(cur_beg, cur_end, prot & kProtectionExecute, + prot & kProtectionRead); modules->push_back(cur_module); } } diff --git a/lib/sanitizer_common/sanitizer_procmaps_mac.cc b/lib/sanitizer_common/sanitizer_procmaps_mac.cc index 2831f286932f..be59b481f5a1 100644 --- a/lib/sanitizer_common/sanitizer_procmaps_mac.cc +++ b/lib/sanitizer_common/sanitizer_procmaps_mac.cc @@ -262,7 +262,8 @@ void MemoryMappingLayout::DumpListOfModules( cur_module->set(cur_name, cur_beg, cur_arch, cur_uuid, current_instrumented_); } - cur_module->addAddressRange(cur_beg, cur_end, prot & kProtectionExecute); + cur_module->addAddressRange(cur_beg, cur_end, prot & kProtectionExecute, + prot & kProtectionRead); } } diff --git a/lib/sanitizer_common/sanitizer_stoptheworld.h b/lib/sanitizer_common/sanitizer_stoptheworld.h index 41752d8f66e7..20b49ae78b85 100644 --- a/lib/sanitizer_common/sanitizer_stoptheworld.h +++ b/lib/sanitizer_common/sanitizer_stoptheworld.h @@ -18,7 +18,6 @@ #include "sanitizer_common.h" namespace __sanitizer { -typedef int SuspendedThreadID; enum PtraceRegistersStatus { REGISTERS_UNAVAILABLE_FATAL = -1, @@ -30,31 +29,21 @@ enum PtraceRegistersStatus { // register contexts. class SuspendedThreadsList { public: - SuspendedThreadsList() - : thread_ids_(1024) {} - SuspendedThreadID GetThreadID(uptr index) const { - CHECK_LT(index, thread_ids_.size()); - return thread_ids_[index]; + SuspendedThreadsList() = default; + + // Can't declare pure virtual functions in sanitizer runtimes: + // __cxa_pure_virtual might be unavailable. Use UNIMPLEMENTED() instead. + virtual PtraceRegistersStatus GetRegistersAndSP(uptr index, uptr *buffer, + uptr *sp) const { + UNIMPLEMENTED(); } - PtraceRegistersStatus GetRegistersAndSP(uptr index, uptr *buffer, - uptr *sp) const; + // The buffer in GetRegistersAndSP should be at least this big. - static uptr RegisterCount(); - uptr thread_count() const { return thread_ids_.size(); } - bool Contains(SuspendedThreadID thread_id) const { - for (uptr i = 0; i < thread_ids_.size(); i++) { - if (thread_ids_[i] == thread_id) - return true; - } - return false; - } - void Append(SuspendedThreadID thread_id) { - thread_ids_.push_back(thread_id); - } + virtual uptr RegisterCount() const { UNIMPLEMENTED(); } + virtual uptr ThreadCount() const { UNIMPLEMENTED(); } + virtual tid_t GetThreadID(uptr index) const { UNIMPLEMENTED(); } private: - InternalMmapVector<SuspendedThreadID> thread_ids_; - // Prohibit copy and assign. SuspendedThreadsList(const SuspendedThreadsList&); void operator=(const SuspendedThreadsList&); diff --git a/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc b/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc index 6e4baeecaffd..03f73ae88308 100644 --- a/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc +++ b/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc @@ -32,17 +32,13 @@ #include <sys/types.h> // for pid_t #include <sys/uio.h> // for iovec #include <elf.h> // for NT_PRSTATUS -#if SANITIZER_ANDROID && defined(__arm__) -# include <linux/user.h> // for pt_regs -#else -# ifdef __aarch64__ +#if defined(__aarch64__) && !SANITIZER_ANDROID // GLIBC 2.20+ sys/user does not include asm/ptrace.h -# include <asm/ptrace.h> -# endif -# include <sys/user.h> // for user_regs_struct -# if SANITIZER_ANDROID && SANITIZER_MIPS -# include <asm/reg.h> // for mips SP register in sys/user.h -# endif +# include <asm/ptrace.h> +#endif +#include <sys/user.h> // for user_regs_struct +#if SANITIZER_ANDROID && SANITIZER_MIPS +# include <asm/reg.h> // for mips SP register in sys/user.h #endif #include <sys/wait.h> // for signal-related stuff @@ -82,7 +78,22 @@ namespace __sanitizer { -COMPILER_CHECK(sizeof(SuspendedThreadID) == sizeof(pid_t)); +class SuspendedThreadsListLinux : public SuspendedThreadsList { + public: + SuspendedThreadsListLinux() : thread_ids_(1024) {} + + tid_t GetThreadID(uptr index) const; + uptr ThreadCount() const; + bool ContainsTid(tid_t thread_id) const; + void Append(tid_t tid); + + PtraceRegistersStatus GetRegistersAndSP(uptr index, uptr *buffer, + uptr *sp) const; + uptr RegisterCount() const; + + private: + InternalMmapVector<tid_t> thread_ids_; +}; // Structure for passing arguments into the tracer thread. struct TracerThreadArgument { @@ -107,31 +118,31 @@ class ThreadSuspender { bool SuspendAllThreads(); void ResumeAllThreads(); void KillAllThreads(); - SuspendedThreadsList &suspended_threads_list() { + SuspendedThreadsListLinux &suspended_threads_list() { return suspended_threads_list_; } TracerThreadArgument *arg; private: - SuspendedThreadsList suspended_threads_list_; + SuspendedThreadsListLinux suspended_threads_list_; pid_t pid_; - bool SuspendThread(SuspendedThreadID thread_id); + bool SuspendThread(tid_t thread_id); }; -bool ThreadSuspender::SuspendThread(SuspendedThreadID tid) { +bool ThreadSuspender::SuspendThread(tid_t tid) { // Are we already attached to this thread? // Currently this check takes linear time, however the number of threads is // usually small. - if (suspended_threads_list_.Contains(tid)) - return false; + if (suspended_threads_list_.ContainsTid(tid)) return false; int pterrno; if (internal_iserror(internal_ptrace(PTRACE_ATTACH, tid, nullptr, nullptr), &pterrno)) { // Either the thread is dead, or something prevented us from attaching. // Log this event and move on. - VReport(1, "Could not attach to thread %d (errno %d).\n", tid, pterrno); + VReport(1, "Could not attach to thread %zu (errno %d).\n", (uptr)tid, + pterrno); return false; } else { - VReport(2, "Attached to thread %d.\n", tid); + VReport(2, "Attached to thread %zu.\n", (uptr)tid); // The thread is not guaranteed to stop before ptrace returns, so we must // wait on it. Note: if the thread receives a signal concurrently, // we can get notification about the signal before notification about stop. @@ -149,8 +160,8 @@ bool ThreadSuspender::SuspendThread(SuspendedThreadID tid) { if (internal_iserror(waitpid_status, &wperrno)) { // Got a ECHILD error. I don't think this situation is possible, but it // doesn't hurt to report it. - VReport(1, "Waiting on thread %d failed, detaching (errno %d).\n", - tid, wperrno); + VReport(1, "Waiting on thread %zu failed, detaching (errno %d).\n", + (uptr)tid, wperrno); internal_ptrace(PTRACE_DETACH, tid, nullptr, nullptr); return false; } @@ -167,7 +178,7 @@ bool ThreadSuspender::SuspendThread(SuspendedThreadID tid) { } void ThreadSuspender::ResumeAllThreads() { - for (uptr i = 0; i < suspended_threads_list_.thread_count(); i++) { + for (uptr i = 0; i < suspended_threads_list_.ThreadCount(); i++) { pid_t tid = suspended_threads_list_.GetThreadID(i); int pterrno; if (!internal_iserror(internal_ptrace(PTRACE_DETACH, tid, nullptr, nullptr), @@ -183,7 +194,7 @@ void ThreadSuspender::ResumeAllThreads() { } void ThreadSuspender::KillAllThreads() { - for (uptr i = 0; i < suspended_threads_list_.thread_count(); i++) + for (uptr i = 0; i < suspended_threads_list_.ThreadCount(); i++) internal_ptrace(PTRACE_KILL, suspended_threads_list_.GetThreadID(i), nullptr, nullptr); } @@ -494,9 +505,28 @@ typedef _user_regs_struct regs_struct; #error "Unsupported architecture" #endif // SANITIZER_ANDROID && defined(__arm__) -PtraceRegistersStatus SuspendedThreadsList::GetRegistersAndSP(uptr index, - uptr *buffer, - uptr *sp) const { +tid_t SuspendedThreadsListLinux::GetThreadID(uptr index) const { + CHECK_LT(index, thread_ids_.size()); + return thread_ids_[index]; +} + +uptr SuspendedThreadsListLinux::ThreadCount() const { + return thread_ids_.size(); +} + +bool SuspendedThreadsListLinux::ContainsTid(tid_t thread_id) const { + for (uptr i = 0; i < thread_ids_.size(); i++) { + if (thread_ids_[i] == thread_id) return true; + } + return false; +} + +void SuspendedThreadsListLinux::Append(tid_t tid) { + thread_ids_.push_back(tid); +} + +PtraceRegistersStatus SuspendedThreadsListLinux::GetRegistersAndSP( + uptr index, uptr *buffer, uptr *sp) const { pid_t tid = GetThreadID(index); regs_struct regs; int pterrno; @@ -526,7 +556,7 @@ PtraceRegistersStatus SuspendedThreadsList::GetRegistersAndSP(uptr index, return REGISTERS_AVAILABLE; } -uptr SuspendedThreadsList::RegisterCount() { +uptr SuspendedThreadsListLinux::RegisterCount() const { return sizeof(regs_struct) / sizeof(uptr); } } // namespace __sanitizer diff --git a/lib/sanitizer_common/sanitizer_stoptheworld_mac.cc b/lib/sanitizer_common/sanitizer_stoptheworld_mac.cc index 047472a657a6..20b8760935bd 100644 --- a/lib/sanitizer_common/sanitizer_stoptheworld_mac.cc +++ b/lib/sanitizer_common/sanitizer_stoptheworld_mac.cc @@ -14,27 +14,169 @@ #include "sanitizer_platform.h" #if SANITIZER_MAC && (defined(__x86_64__) || defined(__aarch64__) || \ - defined(__mips64) || defined(__i386)) + defined(__i386)) + +#include <mach/mach.h> +#include <mach/thread_info.h> +#include <pthread.h> #include "sanitizer_stoptheworld.h" namespace __sanitizer { +typedef struct { + tid_t tid; + thread_t thread; +} SuspendedThreadInfo; + +class SuspendedThreadsListMac : public SuspendedThreadsList { + public: + SuspendedThreadsListMac() : threads_(1024) {} + + tid_t GetThreadID(uptr index) const; + thread_t GetThread(uptr index) const; + uptr ThreadCount() const; + bool ContainsThread(thread_t thread) const; + void Append(thread_t thread); + + PtraceRegistersStatus GetRegistersAndSP(uptr index, uptr *buffer, + uptr *sp) const; + uptr RegisterCount() const; + + private: + InternalMmapVector<SuspendedThreadInfo> threads_; +}; + +struct RunThreadArgs { + StopTheWorldCallback callback; + void *argument; +}; + +void RunThread(void *arg) { + struct RunThreadArgs *run_args = (struct RunThreadArgs *)arg; + SuspendedThreadsListMac suspended_threads_list; + + mach_port_t task; + kern_return_t err = task_for_pid(mach_task_self(), internal_getpid(), &task); + if (err != KERN_SUCCESS) { + VReport(1, "Getting task from pid failed (errno %d).\n", err); + return; + } + + thread_array_t threads; + mach_msg_type_number_t num_threads; + + err = task_threads(task, &threads, &num_threads); + if (err != KERN_SUCCESS) { + VReport(1, "Failed to get threads for task (errno %d).\n", err); + return; + } + + thread_t thread_self = mach_thread_self(); + for (unsigned int i = 0; i < num_threads; ++i) { + if (threads[i] == thread_self) continue; + + thread_suspend(threads[i]); + suspended_threads_list.Append(threads[i]); + } + + run_args->callback(suspended_threads_list, run_args->argument); + + uptr num_suspended = suspended_threads_list.ThreadCount(); + for (unsigned int i = 0; i < num_suspended; ++i) { + thread_resume(suspended_threads_list.GetThread(i)); + } +} + void StopTheWorld(StopTheWorldCallback callback, void *argument) { - CHECK(0 && "unimplemented"); + struct RunThreadArgs arg = {callback, argument}; + pthread_t run_thread = (pthread_t)internal_start_thread(RunThread, &arg); + internal_join_thread(run_thread); } -PtraceRegistersStatus SuspendedThreadsList::GetRegistersAndSP(uptr index, - uptr *buffer, - uptr *sp) const { - CHECK(0 && "unimplemented"); - return REGISTERS_UNAVAILABLE_FATAL; +#if defined(__x86_64__) +typedef x86_thread_state64_t regs_struct; + +#define SP_REG __rsp + +#elif defined(__aarch64__) +typedef arm_thread_state64_t regs_struct; + +# if __DARWIN_UNIX03 +# define SP_REG __sp +# else +# define SP_REG sp +# endif + +#elif defined(__i386) +typedef x86_thread_state32_t regs_struct; + +#define SP_REG __esp + +#else +#error "Unsupported architecture" +#endif + +tid_t SuspendedThreadsListMac::GetThreadID(uptr index) const { + CHECK_LT(index, threads_.size()); + return threads_[index].tid; +} + +thread_t SuspendedThreadsListMac::GetThread(uptr index) const { + CHECK_LT(index, threads_.size()); + return threads_[index].thread; +} + +uptr SuspendedThreadsListMac::ThreadCount() const { + return threads_.size(); +} + +bool SuspendedThreadsListMac::ContainsThread(thread_t thread) const { + for (uptr i = 0; i < threads_.size(); i++) { + if (threads_[i].thread == thread) return true; + } + return false; +} + +void SuspendedThreadsListMac::Append(thread_t thread) { + thread_identifier_info_data_t info; + mach_msg_type_number_t info_count = THREAD_IDENTIFIER_INFO_COUNT; + kern_return_t err = thread_info(thread, THREAD_IDENTIFIER_INFO, + (thread_info_t)&info, &info_count); + if (err != KERN_SUCCESS) { + VReport(1, "Error - unable to get thread ident for a thread\n"); + return; + } + threads_.push_back({info.thread_id, thread}); +} + +PtraceRegistersStatus SuspendedThreadsListMac::GetRegistersAndSP( + uptr index, uptr *buffer, uptr *sp) const { + thread_t thread = GetThread(index); + regs_struct regs; + int err; + mach_msg_type_number_t reg_count = MACHINE_THREAD_STATE_COUNT; + err = thread_get_state(thread, MACHINE_THREAD_STATE, (thread_state_t)®s, + ®_count); + if (err != KERN_SUCCESS) { + VReport(1, "Error - unable to get registers for a thread\n"); + // KERN_INVALID_ARGUMENT indicates that either the flavor is invalid, + // or the thread does not exist. The other possible error case, + // MIG_ARRAY_TOO_LARGE, means that the state is too large, but it's + // still safe to proceed. + return err == KERN_INVALID_ARGUMENT ? REGISTERS_UNAVAILABLE_FATAL + : REGISTERS_UNAVAILABLE; + } + + internal_memcpy(buffer, ®s, sizeof(regs)); + *sp = regs.SP_REG; + + return REGISTERS_AVAILABLE; } -uptr SuspendedThreadsList::RegisterCount() { - CHECK(0 && "unimplemented"); - return 0; +uptr SuspendedThreadsListMac::RegisterCount() const { + return MACHINE_THREAD_STATE_COUNT; } } // namespace __sanitizer #endif // SANITIZER_MAC && (defined(__x86_64__) || defined(__aarch64__)) || - // defined(__mips64) || defined(__i386)) + // defined(__i386)) diff --git a/lib/sanitizer_common/sanitizer_thread_registry.cc b/lib/sanitizer_common/sanitizer_thread_registry.cc index c5b2e0946282..439e33a08d01 100644 --- a/lib/sanitizer_common/sanitizer_thread_registry.cc +++ b/lib/sanitizer_common/sanitizer_thread_registry.cc @@ -59,7 +59,8 @@ void ThreadContextBase::SetFinished() { OnFinished(); } -void ThreadContextBase::SetStarted(uptr _os_id, bool _workerthread, void *arg) { +void ThreadContextBase::SetStarted(tid_t _os_id, bool _workerthread, + void *arg) { status = ThreadStatusRunning; os_id = _os_id; workerthread = _workerthread; @@ -193,7 +194,7 @@ static bool FindThreadContextByOsIdCallback(ThreadContextBase *tctx, tctx->status != ThreadStatusDead); } -ThreadContextBase *ThreadRegistry::FindThreadContextByOsIDLocked(uptr os_id) { +ThreadContextBase *ThreadRegistry::FindThreadContextByOsIDLocked(tid_t os_id) { return FindThreadContextLocked(FindThreadContextByOsIdCallback, (void *)os_id); } @@ -267,7 +268,7 @@ void ThreadRegistry::FinishThread(u32 tid) { } } -void ThreadRegistry::StartThread(u32 tid, uptr os_id, bool workerthread, +void ThreadRegistry::StartThread(u32 tid, tid_t os_id, bool workerthread, void *arg) { BlockingMutexLock l(&mtx_); running_threads_++; diff --git a/lib/sanitizer_common/sanitizer_thread_registry.h b/lib/sanitizer_common/sanitizer_thread_registry.h index 17b1d5d90962..9aae875c7360 100644 --- a/lib/sanitizer_common/sanitizer_thread_registry.h +++ b/lib/sanitizer_common/sanitizer_thread_registry.h @@ -39,7 +39,7 @@ class ThreadContextBase { const u32 tid; // Thread ID. Main thread should have tid = 0. u64 unique_id; // Unique thread ID. u32 reuse_count; // Number of times this tid was reused. - uptr os_id; // PID (used for reporting). + tid_t os_id; // PID (used for reporting). uptr user_id; // Some opaque user thread id (e.g. pthread_t). char name[64]; // As annotated by user. @@ -55,7 +55,7 @@ class ThreadContextBase { void SetDead(); void SetJoined(void *arg); void SetFinished(); - void SetStarted(uptr _os_id, bool _workerthread, void *arg); + void SetStarted(tid_t _os_id, bool _workerthread, void *arg); void SetCreated(uptr _user_id, u64 _unique_id, bool _detached, u32 _parent_tid, void *arg); void Reset(); @@ -109,14 +109,14 @@ class ThreadRegistry { // is found. ThreadContextBase *FindThreadContextLocked(FindThreadCallback cb, void *arg); - ThreadContextBase *FindThreadContextByOsIDLocked(uptr os_id); + ThreadContextBase *FindThreadContextByOsIDLocked(tid_t os_id); void SetThreadName(u32 tid, const char *name); void SetThreadNameByUserId(uptr user_id, const char *name); void DetachThread(u32 tid, void *arg); void JoinThread(u32 tid, void *arg); void FinishThread(u32 tid); - void StartThread(u32 tid, uptr os_id, bool workerthread, void *arg); + void StartThread(u32 tid, tid_t os_id, bool workerthread, void *arg); private: const ThreadContextFactory context_factory_; diff --git a/lib/sanitizer_common/sanitizer_win.cc b/lib/sanitizer_common/sanitizer_win.cc index b1a2a53a3fbf..1a454ba42c8e 100644 --- a/lib/sanitizer_common/sanitizer_win.cc +++ b/lib/sanitizer_common/sanitizer_win.cc @@ -80,7 +80,7 @@ uptr internal_getpid() { // In contrast to POSIX, on Windows GetCurrentThreadId() // returns a system-unique identifier. -uptr GetTid() { +tid_t GetTid() { return GetCurrentThreadId(); } @@ -553,7 +553,8 @@ void ListOfModules::init() { LoadedModule cur_module; cur_module.set(module_name, adjusted_base); // We add the whole module as one single address range. - cur_module.addAddressRange(base_address, end_address, /*executable*/ true); + cur_module.addAddressRange(base_address, end_address, /*executable*/ true, + /*readable*/ true); modules_.push_back(cur_module); } UnmapOrDie(hmodules, modules_buffer_size); diff --git a/lib/scudo/scudo_allocator.cpp b/lib/scudo/scudo_allocator.cpp index dab6abedcb3e..9812fc0f59f8 100644 --- a/lib/scudo/scudo_allocator.cpp +++ b/lib/scudo/scudo_allocator.cpp @@ -22,8 +22,7 @@ #include <limits.h> #include <pthread.h> - -#include <cstring> +#include <string.h> namespace __scudo { @@ -60,9 +59,9 @@ typedef SizeClassAllocator32<0, SANITIZER_MMAP_RANGE_SIZE, 0, SizeClassMap, typedef SizeClassAllocatorLocalCache<PrimaryAllocator> AllocatorCache; typedef ScudoLargeMmapAllocator SecondaryAllocator; typedef CombinedAllocator<PrimaryAllocator, AllocatorCache, SecondaryAllocator> - ScudoAllocator; + ScudoBackendAllocator; -static ScudoAllocator &getAllocator(); +static ScudoBackendAllocator &getBackendAllocator(); static thread_local Xorshift128Plus Prng; // Global static cookie, initialized at start-up. @@ -101,9 +100,10 @@ struct ScudoChunk : UnpackedHeader { // Returns the usable size for a chunk, meaning the amount of bytes from the // beginning of the user data to the end of the backend allocated chunk. uptr getUsableSize(UnpackedHeader *Header) { - uptr Size = getAllocator().GetActuallyAllocatedSize(getAllocBeg(Header)); + uptr Size = getBackendAllocator().GetActuallyAllocatedSize( + getAllocBeg(Header)); if (Size == 0) - return Size; + return 0; return Size - AlignedChunkHeaderSize - (Header->Offset << MinAlignmentLog); } @@ -120,7 +120,8 @@ struct ScudoChunk : UnpackedHeader { return static_cast<u16>(Crc); } - // Checks the validity of a chunk by verifying its checksum. + // Checks the validity of a chunk by verifying its checksum. It doesn't + // incur termination in the event of an invalid chunk. bool isValid() { UnpackedHeader NewUnpackedHeader; const AtomicPackedHeader *AtomicHeader = @@ -130,13 +131,27 @@ struct ScudoChunk : UnpackedHeader { return (NewUnpackedHeader.Checksum == computeChecksum(&NewUnpackedHeader)); } + // Nulls out a chunk header. When returning the chunk to the backend, there + // is no need to store a valid ChunkAvailable header, as this would be + // computationally expensive. Zeroing out serves the same purpose by making + // the header invalid. In the extremely rare event where 0 would be a valid + // checksum for the chunk, the state of the chunk is ChunkAvailable anyway. + COMPILER_CHECK(ChunkAvailable == 0); + void eraseHeader() { + PackedHeader NullPackedHeader = 0; + AtomicPackedHeader *AtomicHeader = + reinterpret_cast<AtomicPackedHeader *>(this); + atomic_store_relaxed(AtomicHeader, NullPackedHeader); + } + // Loads and unpacks the header, verifying the checksum in the process. void loadHeader(UnpackedHeader *NewUnpackedHeader) const { const AtomicPackedHeader *AtomicHeader = reinterpret_cast<const AtomicPackedHeader *>(this); PackedHeader NewPackedHeader = atomic_load_relaxed(AtomicHeader); *NewUnpackedHeader = bit_cast<UnpackedHeader>(NewPackedHeader); - if (NewUnpackedHeader->Checksum != computeChecksum(NewUnpackedHeader)) { + if (UNLIKELY(NewUnpackedHeader->Checksum != + computeChecksum(NewUnpackedHeader))) { dieWithMessage("ERROR: corrupted chunk header at address %p\n", this); } } @@ -160,15 +175,19 @@ struct ScudoChunk : UnpackedHeader { PackedHeader OldPackedHeader = bit_cast<PackedHeader>(*OldUnpackedHeader); AtomicPackedHeader *AtomicHeader = reinterpret_cast<AtomicPackedHeader *>(this); - if (!atomic_compare_exchange_strong(AtomicHeader, - &OldPackedHeader, - NewPackedHeader, - memory_order_relaxed)) { + if (UNLIKELY(!atomic_compare_exchange_strong(AtomicHeader, + &OldPackedHeader, + NewPackedHeader, + memory_order_relaxed))) { dieWithMessage("ERROR: race on chunk header at address %p\n", this); } } }; +ScudoChunk *getScudoChunk(uptr UserBeg) { + return reinterpret_cast<ScudoChunk *>(UserBeg - AlignedChunkHeaderSize); +} + static bool ScudoInitIsRunning = false; static pthread_once_t GlobalInited = PTHREAD_ONCE_INIT; @@ -190,7 +209,7 @@ static void teardownThread(void *p) { return; } drainQuarantine(); - getAllocator().DestroyCache(&Cache); + getBackendAllocator().DestroyCache(&Cache); ThreadTornDown = true; } @@ -223,7 +242,7 @@ static void initGlobal() { static void NOINLINE initThread() { pthread_once(&GlobalInited, initGlobal); pthread_setspecific(PThreadKey, reinterpret_cast<void *>(1)); - getAllocator().InitCache(&Cache); + getBackendAllocator().InitCache(&Cache); ThreadInited = true; } @@ -235,38 +254,31 @@ struct QuarantineCallback { void Recycle(ScudoChunk *Chunk) { UnpackedHeader Header; Chunk->loadHeader(&Header); - if (Header.State != ChunkQuarantine) { + if (UNLIKELY(Header.State != ChunkQuarantine)) { dieWithMessage("ERROR: invalid chunk state when recycling address %p\n", Chunk); } + Chunk->eraseHeader(); void *Ptr = Chunk->getAllocBeg(&Header); - getAllocator().Deallocate(Cache_, Ptr); + getBackendAllocator().Deallocate(Cache_, Ptr); } /// Internal quarantine allocation and deallocation functions. void *Allocate(uptr Size) { - // The internal quarantine memory cannot be protected by us. But the only - // structures allocated are QuarantineBatch, that are 8KB for x64. So we - // will use mmap for those, and given that Deallocate doesn't pass a size - // in, we enforce the size of the allocation to be sizeof(QuarantineBatch). - // TODO(kostyak): switching to mmap impacts greatly performances, we have - // to find another solution - // CHECK_EQ(Size, sizeof(QuarantineBatch)); - // return MmapOrDie(Size, "QuarantineBatch"); - return getAllocator().Allocate(Cache_, Size, 1, false); + // TODO(kostyak): figure out the best way to protect the batches. + return getBackendAllocator().Allocate(Cache_, Size, MinAlignment); } void Deallocate(void *Ptr) { - // UnmapOrDie(Ptr, sizeof(QuarantineBatch)); - getAllocator().Deallocate(Cache_, Ptr); + getBackendAllocator().Deallocate(Cache_, Ptr); } AllocatorCache *Cache_; }; typedef Quarantine<QuarantineCallback, ScudoChunk> ScudoQuarantine; -typedef ScudoQuarantine::Cache QuarantineCache; -static thread_local QuarantineCache ThreadQuarantineCache; +typedef ScudoQuarantine::Cache ScudoQuarantineCache; +static thread_local ScudoQuarantineCache ThreadQuarantineCache; void AllocatorOptions::setFrom(const Flags *f, const CommonFlags *cf) { MayReturnNull = cf->allocator_may_return_null; @@ -288,11 +300,11 @@ void AllocatorOptions::copyTo(Flags *f, CommonFlags *cf) const { f->ZeroContents = ZeroContents; } -struct Allocator { +struct ScudoAllocator { static const uptr MaxAllowedMallocSize = FIRST_32_SECOND_64(2UL << 30, 1ULL << 40); - ScudoAllocator BackendAllocator; + ScudoBackendAllocator BackendAllocator; ScudoQuarantine AllocatorQuarantine; // The fallback caches are used when the thread local caches have been @@ -300,13 +312,13 @@ struct Allocator { // be accessed by different threads. StaticSpinMutex FallbackMutex; AllocatorCache FallbackAllocatorCache; - QuarantineCache FallbackQuarantineCache; + ScudoQuarantineCache FallbackQuarantineCache; bool DeallocationTypeMismatch; bool ZeroContents; bool DeleteSizeMismatch; - explicit Allocator(LinkerInitialized) + explicit ScudoAllocator(LinkerInitialized) : AllocatorQuarantine(LINKER_INITIALIZED), FallbackQuarantineCache(LINKER_INITIALIZED) {} @@ -329,14 +341,14 @@ struct Allocator { dieWithMessage("ERROR: the maximum possible offset doesn't fit in the " "header\n"); } - // Verify that we can fit the maximum amount of unused bytes in the header. - // Given that the Secondary fits the allocation to a page, the worst case - // scenario happens in the Primary. It will depend on the second to last - // and last class sizes, as well as the dynamic base for the Primary. The - // following is an over-approximation that works for our needs. - uptr MaxUnusedBytes = SizeClassMap::kMaxSize - 1 - AlignedChunkHeaderSize; - Header.UnusedBytes = MaxUnusedBytes; - if (Header.UnusedBytes != MaxUnusedBytes) { + // Verify that we can fit the maximum size or amount of unused bytes in the + // header. Given that the Secondary fits the allocation to a page, the worst + // case scenario happens in the Primary. It will depend on the second to + // last and last class sizes, as well as the dynamic base for the Primary. + // The following is an over-approximation that works for our needs. + uptr MaxSizeOrUnusedBytes = SizeClassMap::kMaxSize - 1; + Header.SizeOrUnusedBytes = MaxSizeOrUnusedBytes; + if (Header.SizeOrUnusedBytes != MaxSizeOrUnusedBytes) { dieWithMessage("ERROR: the maximum possible unused bytes doesn't fit in " "the header\n"); } @@ -349,37 +361,37 @@ struct Allocator { static_cast<uptr>(Options.QuarantineSizeMb) << 20, static_cast<uptr>(Options.ThreadLocalQuarantineSizeKb) << 10); BackendAllocator.InitCache(&FallbackAllocatorCache); - Cookie = Prng.Next(); + Cookie = Prng.getNext(); } - // Helper function that checks for a valid Scudo chunk. + // Helper function that checks for a valid Scudo chunk. nullptr isn't. bool isValidPointer(const void *UserPtr) { if (UNLIKELY(!ThreadInited)) initThread(); - uptr ChunkBeg = reinterpret_cast<uptr>(UserPtr); - if (!IsAligned(ChunkBeg, MinAlignment)) { + if (!UserPtr) return false; - } - ScudoChunk *Chunk = - reinterpret_cast<ScudoChunk *>(ChunkBeg - AlignedChunkHeaderSize); - return Chunk->isValid(); + uptr UserBeg = reinterpret_cast<uptr>(UserPtr); + if (!IsAligned(UserBeg, MinAlignment)) + return false; + return getScudoChunk(UserBeg)->isValid(); } // Allocates a chunk. - void *allocate(uptr Size, uptr Alignment, AllocType Type) { + void *allocate(uptr Size, uptr Alignment, AllocType Type, + bool ForceZeroContents = false) { if (UNLIKELY(!ThreadInited)) initThread(); - if (!IsPowerOfTwo(Alignment)) { + if (UNLIKELY(!IsPowerOfTwo(Alignment))) { dieWithMessage("ERROR: alignment is not a power of 2\n"); } if (Alignment > MaxAlignment) return BackendAllocator.ReturnNullOrDieOnBadRequest(); if (Alignment < MinAlignment) Alignment = MinAlignment; - if (Size == 0) - Size = 1; if (Size >= MaxAllowedMallocSize) return BackendAllocator.ReturnNullOrDieOnBadRequest(); + if (Size == 0) + Size = 1; uptr NeededSize = RoundUpTo(Size, MinAlignment) + AlignedChunkHeaderSize; if (Alignment > MinAlignment) @@ -395,13 +407,13 @@ struct Allocator { bool FromPrimary = PrimaryAllocator::CanAllocate(NeededSize, MinAlignment); void *Ptr; + uptr AllocationAlignment = FromPrimary ? MinAlignment : Alignment; if (LIKELY(!ThreadTornDown)) { - Ptr = BackendAllocator.Allocate(&Cache, NeededSize, - FromPrimary ? MinAlignment : Alignment); + Ptr = BackendAllocator.Allocate(&Cache, NeededSize, AllocationAlignment); } else { SpinMutexLock l(&FallbackMutex); Ptr = BackendAllocator.Allocate(&FallbackAllocatorCache, NeededSize, - FromPrimary ? MinAlignment : Alignment); + AllocationAlignment); } if (!Ptr) return BackendAllocator.ReturnNullOrDieOnOOM(); @@ -416,30 +428,34 @@ struct Allocator { NeededSize -= Alignment; } - uptr ActuallyAllocatedSize = BackendAllocator.GetActuallyAllocatedSize( - reinterpret_cast<void *>(AllocBeg)); // If requested, we will zero out the entire contents of the returned chunk. - if (ZeroContents && FromPrimary) - memset(Ptr, 0, ActuallyAllocatedSize); - - uptr ChunkBeg = AllocBeg + AlignedChunkHeaderSize; - if (!IsAligned(ChunkBeg, Alignment)) - ChunkBeg = RoundUpTo(ChunkBeg, Alignment); - CHECK_LE(ChunkBeg + Size, AllocBeg + NeededSize); - ScudoChunk *Chunk = - reinterpret_cast<ScudoChunk *>(ChunkBeg - AlignedChunkHeaderSize); + if ((ForceZeroContents || ZeroContents) && FromPrimary) + memset(Ptr, 0, BackendAllocator.GetActuallyAllocatedSize(Ptr)); + + uptr UserBeg = AllocBeg + AlignedChunkHeaderSize; + if (!IsAligned(UserBeg, Alignment)) + UserBeg = RoundUpTo(UserBeg, Alignment); + CHECK_LE(UserBeg + Size, AllocBeg + NeededSize); UnpackedHeader Header = {}; Header.State = ChunkAllocated; - uptr Offset = ChunkBeg - AlignedChunkHeaderSize - AllocBeg; + uptr Offset = UserBeg - AlignedChunkHeaderSize - AllocBeg; Header.Offset = Offset >> MinAlignmentLog; Header.AllocType = Type; - Header.UnusedBytes = ActuallyAllocatedSize - Offset - - AlignedChunkHeaderSize - Size; - Header.Salt = static_cast<u8>(Prng.Next()); - Chunk->storeHeader(&Header); - void *UserPtr = reinterpret_cast<void *>(ChunkBeg); - // TODO(kostyak): hooks sound like a terrible idea security wise but might - // be needed for things to work properly? + if (FromPrimary) { + Header.FromPrimary = FromPrimary; + Header.SizeOrUnusedBytes = Size; + } else { + // The secondary fits the allocations to a page, so the amount of unused + // bytes is the difference between the end of the user allocation and the + // next page boundary. + uptr PageSize = GetPageSizeCached(); + uptr TrailingBytes = (UserBeg + Size) & (PageSize - 1); + if (TrailingBytes) + Header.SizeOrUnusedBytes = PageSize - TrailingBytes; + } + Header.Salt = static_cast<u8>(Prng.getNext()); + getScudoChunk(UserBeg)->storeHeader(&Header); + void *UserPtr = reinterpret_cast<void *>(UserBeg); // if (&__sanitizer_malloc_hook) __sanitizer_malloc_hook(UserPtr, Size); return UserPtr; } @@ -449,53 +465,57 @@ struct Allocator { void deallocate(void *UserPtr, uptr DeleteSize, AllocType Type) { if (UNLIKELY(!ThreadInited)) initThread(); - // TODO(kostyak): see hook comment above // if (&__sanitizer_free_hook) __sanitizer_free_hook(UserPtr); if (!UserPtr) return; - uptr ChunkBeg = reinterpret_cast<uptr>(UserPtr); - if (!IsAligned(ChunkBeg, MinAlignment)) { + uptr UserBeg = reinterpret_cast<uptr>(UserPtr); + if (UNLIKELY(!IsAligned(UserBeg, MinAlignment))) { dieWithMessage("ERROR: attempted to deallocate a chunk not properly " "aligned at address %p\n", UserPtr); } - ScudoChunk *Chunk = - reinterpret_cast<ScudoChunk *>(ChunkBeg - AlignedChunkHeaderSize); + ScudoChunk *Chunk = getScudoChunk(UserBeg); UnpackedHeader OldHeader; Chunk->loadHeader(&OldHeader); - if (OldHeader.State != ChunkAllocated) { + if (UNLIKELY(OldHeader.State != ChunkAllocated)) { dieWithMessage("ERROR: invalid chunk state when deallocating address " "%p\n", UserPtr); } - uptr UsableSize = Chunk->getUsableSize(&OldHeader); - UnpackedHeader NewHeader = OldHeader; - NewHeader.State = ChunkQuarantine; - Chunk->compareExchangeHeader(&NewHeader, &OldHeader); if (DeallocationTypeMismatch) { // The deallocation type has to match the allocation one. - if (NewHeader.AllocType != Type) { + if (OldHeader.AllocType != Type) { // With the exception of memalign'd Chunks, that can be still be free'd. - if (NewHeader.AllocType != FromMemalign || Type != FromMalloc) { + if (OldHeader.AllocType != FromMemalign || Type != FromMalloc) { dieWithMessage("ERROR: allocation type mismatch on address %p\n", - Chunk); + UserPtr); } } } - uptr Size = UsableSize - OldHeader.UnusedBytes; + uptr Size = OldHeader.FromPrimary ? OldHeader.SizeOrUnusedBytes : + Chunk->getUsableSize(&OldHeader) - OldHeader.SizeOrUnusedBytes; if (DeleteSizeMismatch) { if (DeleteSize && DeleteSize != Size) { dieWithMessage("ERROR: invalid sized delete on chunk at address %p\n", - Chunk); + UserPtr); } } + UnpackedHeader NewHeader = OldHeader; + NewHeader.State = ChunkQuarantine; + Chunk->compareExchangeHeader(&NewHeader, &OldHeader); + + // If a small memory amount was allocated with a larger alignment, we want + // to take that into account. Otherwise the Quarantine would be filled with + // tiny chunks, taking a lot of VA memory. This an approximation of the + // usable size, that allows us to not call GetActuallyAllocatedSize. + uptr LiableSize = Size + (OldHeader.Offset << MinAlignment); if (LIKELY(!ThreadTornDown)) { AllocatorQuarantine.Put(&ThreadQuarantineCache, - QuarantineCallback(&Cache), Chunk, UsableSize); + QuarantineCallback(&Cache), Chunk, LiableSize); } else { SpinMutexLock l(&FallbackMutex); AllocatorQuarantine.Put(&FallbackQuarantineCache, QuarantineCallback(&FallbackAllocatorCache), - Chunk, UsableSize); + Chunk, LiableSize); } } @@ -504,24 +524,30 @@ struct Allocator { void *reallocate(void *OldPtr, uptr NewSize) { if (UNLIKELY(!ThreadInited)) initThread(); - uptr ChunkBeg = reinterpret_cast<uptr>(OldPtr); - ScudoChunk *Chunk = - reinterpret_cast<ScudoChunk *>(ChunkBeg - AlignedChunkHeaderSize); + uptr UserBeg = reinterpret_cast<uptr>(OldPtr); + if (UNLIKELY(!IsAligned(UserBeg, MinAlignment))) { + dieWithMessage("ERROR: attempted to reallocate a chunk not properly " + "aligned at address %p\n", OldPtr); + } + ScudoChunk *Chunk = getScudoChunk(UserBeg); UnpackedHeader OldHeader; Chunk->loadHeader(&OldHeader); - if (OldHeader.State != ChunkAllocated) { + if (UNLIKELY(OldHeader.State != ChunkAllocated)) { dieWithMessage("ERROR: invalid chunk state when reallocating address " "%p\n", OldPtr); } - uptr Size = Chunk->getUsableSize(&OldHeader); - if (OldHeader.AllocType != FromMalloc) { + if (UNLIKELY(OldHeader.AllocType != FromMalloc)) { dieWithMessage("ERROR: invalid chunk type when reallocating address %p\n", - Chunk); + OldPtr); } + uptr UsableSize = Chunk->getUsableSize(&OldHeader); UnpackedHeader NewHeader = OldHeader; - // The new size still fits in the current chunk. - if (NewSize <= Size) { - NewHeader.UnusedBytes = Size - NewSize; + // The new size still fits in the current chunk, and the size difference + // is reasonable. + if (NewSize <= UsableSize && + (UsableSize - NewSize) < (SizeClassMap::kMaxSize / 2)) { + NewHeader.SizeOrUnusedBytes = + OldHeader.FromPrimary ? NewSize : UsableSize - NewSize; Chunk->compareExchangeHeader(&NewHeader, &OldHeader); return OldPtr; } @@ -529,18 +555,19 @@ struct Allocator { // old one. void *NewPtr = allocate(NewSize, MinAlignment, FromMalloc); if (NewPtr) { - uptr OldSize = Size - OldHeader.UnusedBytes; + uptr OldSize = OldHeader.FromPrimary ? OldHeader.SizeOrUnusedBytes : + UsableSize - OldHeader.SizeOrUnusedBytes; memcpy(NewPtr, OldPtr, Min(NewSize, OldSize)); NewHeader.State = ChunkQuarantine; Chunk->compareExchangeHeader(&NewHeader, &OldHeader); if (LIKELY(!ThreadTornDown)) { AllocatorQuarantine.Put(&ThreadQuarantineCache, - QuarantineCallback(&Cache), Chunk, Size); + QuarantineCallback(&Cache), Chunk, UsableSize); } else { SpinMutexLock l(&FallbackMutex); AllocatorQuarantine.Put(&FallbackQuarantineCache, QuarantineCallback(&FallbackAllocatorCache), - Chunk, Size); + Chunk, UsableSize); } } return NewPtr; @@ -552,13 +579,12 @@ struct Allocator { initThread(); if (!Ptr) return 0; - uptr ChunkBeg = reinterpret_cast<uptr>(Ptr); - ScudoChunk *Chunk = - reinterpret_cast<ScudoChunk *>(ChunkBeg - AlignedChunkHeaderSize); + uptr UserBeg = reinterpret_cast<uptr>(Ptr); + ScudoChunk *Chunk = getScudoChunk(UserBeg); UnpackedHeader Header; Chunk->loadHeader(&Header); // Getting the usable size of a chunk only makes sense if it's allocated. - if (Header.State != ChunkAllocated) { + if (UNLIKELY(Header.State != ChunkAllocated)) { dieWithMessage("ERROR: invalid chunk state when sizing address %p\n", Ptr); } @@ -569,13 +595,9 @@ struct Allocator { if (UNLIKELY(!ThreadInited)) initThread(); uptr Total = NMemB * Size; - if (Size != 0 && Total / Size != NMemB) // Overflow check + if (Size != 0 && Total / Size != NMemB) // Overflow check return BackendAllocator.ReturnNullOrDieOnBadRequest(); - void *Ptr = allocate(Total, MinAlignment, FromMalloc); - // If ZeroContents, the content of the chunk has already been zero'd out. - if (!ZeroContents && Ptr && BackendAllocator.FromPrimary(Ptr)) - memset(Ptr, 0, getUsableSize(Ptr)); - return Ptr; + return allocate(Total, MinAlignment, FromMalloc, true); } void drainQuarantine() { @@ -592,9 +614,9 @@ struct Allocator { } }; -static Allocator Instance(LINKER_INITIALIZED); +static ScudoAllocator Instance(LINKER_INITIALIZED); -static ScudoAllocator &getAllocator() { +static ScudoBackendAllocator &getBackendAllocator() { return Instance.BackendAllocator; } diff --git a/lib/scudo/scudo_allocator.h b/lib/scudo/scudo_allocator.h index 5f5225b36286..e7428f170271 100644 --- a/lib/scudo/scudo_allocator.h +++ b/lib/scudo/scudo_allocator.h @@ -41,19 +41,20 @@ enum ChunkState : u8 { // using functions such as GetBlockBegin, that is fairly costly. Our first // implementation used the MetaData as well, which offers the advantage of // being stored away from the chunk itself, but accessing it was costly as -// well. The header will be atomically loaded and stored using the 16-byte -// primitives offered by the platform (likely requires cmpxchg16b support). +// well. The header will be atomically loaded and stored. typedef u64 PackedHeader; struct UnpackedHeader { - u64 Checksum : 16; - u64 UnusedBytes : 20; // Needed for reallocation purposes. - u64 State : 2; // available, allocated, or quarantined - u64 AllocType : 2; // malloc, new, new[], or memalign - u64 Offset : 16; // Offset from the beginning of the backend - // allocation to the beginning of the chunk itself, - // in multiples of MinAlignment. See comment about - // its maximum value and test in init(). - u64 Salt : 8; + u64 Checksum : 16; + u64 SizeOrUnusedBytes : 19; // Size for Primary backed allocations, amount of + // unused bytes in the chunk for Secondary ones. + u64 FromPrimary : 1; + u64 State : 2; // available, allocated, or quarantined + u64 AllocType : 2; // malloc, new, new[], or memalign + u64 Offset : 16; // Offset from the beginning of the backend + // allocation to the beginning of the chunk + // itself, in multiples of MinAlignment. See + /// comment about its maximum value and in init(). + u64 Salt : 8; }; typedef atomic_uint64_t AtomicPackedHeader; diff --git a/lib/scudo/scudo_allocator_secondary.h b/lib/scudo/scudo_allocator_secondary.h index b984f0db4dbd..fbc7f247d708 100644 --- a/lib/scudo/scudo_allocator_secondary.h +++ b/lib/scudo/scudo_allocator_secondary.h @@ -88,8 +88,11 @@ class ScudoLargeMmapAllocator { // The primary adds the whole class size to the stats when allocating a // chunk, so we will do something similar here. But we will not account for // the guard pages. - Stats->Add(AllocatorStatAllocated, MapSize - 2 * PageSize); - Stats->Add(AllocatorStatMapped, MapSize - 2 * PageSize); + { + SpinMutexLock l(&StatsMutex); + Stats->Add(AllocatorStatAllocated, MapSize - 2 * PageSize); + Stats->Add(AllocatorStatMapped, MapSize - 2 * PageSize); + } return reinterpret_cast<void *>(UserBeg); } @@ -112,8 +115,11 @@ class ScudoLargeMmapAllocator { void Deallocate(AllocatorStats *Stats, void *Ptr) { SecondaryHeader *Header = getHeader(Ptr); - Stats->Sub(AllocatorStatAllocated, Header->MapSize - 2 * PageSize); - Stats->Sub(AllocatorStatMapped, Header->MapSize - 2 * PageSize); + { + SpinMutexLock l(&StatsMutex); + Stats->Sub(AllocatorStatAllocated, Header->MapSize - 2 * PageSize); + Stats->Sub(AllocatorStatMapped, Header->MapSize - 2 * PageSize); + } UnmapOrDie(reinterpret_cast<void *>(Header->MapBeg), Header->MapSize); } @@ -127,7 +133,7 @@ class ScudoLargeMmapAllocator { uptr GetActuallyAllocatedSize(void *Ptr) { SecondaryHeader *Header = getHeader(Ptr); - // Deduct PageSize as MapEnd includes the trailing guard page. + // Deduct PageSize as MapSize includes the trailing guard page. uptr MapEnd = Header->MapBeg + Header->MapSize - PageSize; return MapEnd - reinterpret_cast<uptr>(Ptr); } @@ -182,6 +188,7 @@ class ScudoLargeMmapAllocator { const uptr SecondaryHeaderSize = sizeof(SecondaryHeader); const uptr HeadersSize = SecondaryHeaderSize + AlignedChunkHeaderSize; uptr PageSize; + SpinMutex StatsMutex; atomic_uint8_t MayReturnNull; }; diff --git a/lib/scudo/scudo_utils.cpp b/lib/scudo/scudo_utils.cpp index 4e2f6e08e80d..98bd591aa868 100644 --- a/lib/scudo/scudo_utils.cpp +++ b/lib/scudo/scudo_utils.cpp @@ -159,58 +159,4 @@ Xorshift128Plus::Xorshift128Plus() { fillRandom(reinterpret_cast<u8 *>(State), sizeof(State)); } -const static u32 CRC32Table[] = { - 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, - 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, - 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2, - 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, - 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, - 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, - 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c, - 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, - 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, - 0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, - 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106, - 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, - 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, - 0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, - 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, - 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, - 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, - 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, - 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, - 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, - 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, - 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, - 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84, - 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, - 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, - 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, - 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e, - 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, - 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, - 0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, - 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28, - 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, - 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, - 0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, - 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, - 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, - 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, - 0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, - 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, - 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, - 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, - 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, - 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d -}; - -u32 computeSoftwareCRC32(u32 Crc, uptr Data) { - for (uptr i = 0; i < sizeof(Data); i++) { - Crc = CRC32Table[(Crc ^ Data) & 0xff] ^ (Crc >> 8); - Data >>= 8; - } - return Crc; -} - } // namespace __scudo diff --git a/lib/scudo/scudo_utils.h b/lib/scudo/scudo_utils.h index 5082d79f6954..f30c86125799 100644 --- a/lib/scudo/scudo_utils.h +++ b/lib/scudo/scudo_utils.h @@ -41,7 +41,7 @@ bool testCPUFeature(CPUFeature feature); struct Xorshift128Plus { public: Xorshift128Plus(); - u64 Next() { + u64 getNext() { u64 x = State[0]; const u64 y = State[1]; State[0] = y; @@ -58,7 +58,59 @@ enum : u8 { CRC32Hardware = 1, }; -u32 computeSoftwareCRC32(u32 Crc, uptr Data); +const static u32 CRC32Table[] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, + 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, + 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2, + 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, + 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c, + 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, + 0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106, + 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, + 0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, + 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, + 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, + 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, + 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, + 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, + 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84, + 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, + 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e, + 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, + 0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28, + 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, + 0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, + 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, + 0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, + 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, + 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, + 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, + 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d +}; + +INLINE u32 computeSoftwareCRC32(u32 Crc, uptr Data) { + for (uptr i = 0; i < sizeof(Data); i++) { + Crc = CRC32Table[(Crc ^ Data) & 0xff] ^ (Crc >> 8); + Data >>= 8; + } + return Crc; +} } // namespace __scudo diff --git a/lib/tsan/rtl/tsan_debugging.cc b/lib/tsan/rtl/tsan_debugging.cc index 06154bc135a9..a44b13632c61 100644 --- a/lib/tsan/rtl/tsan_debugging.cc +++ b/lib/tsan/rtl/tsan_debugging.cc @@ -151,7 +151,7 @@ int __tsan_get_report_mutex(void *report, uptr idx, uptr *mutex_id, void **addr, } SANITIZER_INTERFACE_ATTRIBUTE -int __tsan_get_report_thread(void *report, uptr idx, int *tid, uptr *os_id, +int __tsan_get_report_thread(void *report, uptr idx, int *tid, tid_t *os_id, int *running, const char **name, int *parent_tid, void **trace, uptr trace_size) { const ReportDesc *rep = (ReportDesc *)report; @@ -228,7 +228,7 @@ const char *__tsan_locate_address(uptr addr, char *name, uptr name_size, SANITIZER_INTERFACE_ATTRIBUTE int __tsan_get_alloc_stack(uptr addr, uptr *trace, uptr size, int *thread_id, - uptr *os_id) { + tid_t *os_id) { MBlock *b = 0; Allocator *a = allocator(); if (a->PointerIsMine((void *)addr)) { diff --git a/lib/tsan/rtl/tsan_interface.h b/lib/tsan/rtl/tsan_interface.h index 496a8717f155..71986283ee17 100644 --- a/lib/tsan/rtl/tsan_interface.h +++ b/lib/tsan/rtl/tsan_interface.h @@ -18,6 +18,7 @@ #include <sanitizer_common/sanitizer_internal_defs.h> using __sanitizer::uptr; +using __sanitizer::tid_t; // This header should NOT include any other headers. // All functions in this header are extern "C" and start with __tsan_. @@ -143,7 +144,7 @@ int __tsan_get_report_mutex(void *report, uptr idx, uptr *mutex_id, void **addr, // Returns information about threads included in the report. SANITIZER_INTERFACE_ATTRIBUTE -int __tsan_get_report_thread(void *report, uptr idx, int *tid, uptr *os_id, +int __tsan_get_report_thread(void *report, uptr idx, int *tid, tid_t *os_id, int *running, const char **name, int *parent_tid, void **trace, uptr trace_size); @@ -160,7 +161,7 @@ const char *__tsan_locate_address(uptr addr, char *name, uptr name_size, // Returns the allocation stack for a heap pointer. SANITIZER_INTERFACE_ATTRIBUTE int __tsan_get_alloc_stack(uptr addr, uptr *trace, uptr size, int *thread_id, - uptr *os_id); + tid_t *os_id); #endif // SANITIZER_GO diff --git a/lib/tsan/rtl/tsan_report.h b/lib/tsan/rtl/tsan_report.h index 8d8ae0fd8f58..a0473e8dbdad 100644 --- a/lib/tsan/rtl/tsan_report.h +++ b/lib/tsan/rtl/tsan_report.h @@ -90,7 +90,7 @@ struct ReportLocation { struct ReportThread { int id; - uptr os_id; + tid_t os_id; bool running; bool workerthread; char *name; diff --git a/lib/tsan/rtl/tsan_rtl.h b/lib/tsan/rtl/tsan_rtl.h index 0d62af00a05d..3481c31ebb1c 100644 --- a/lib/tsan/rtl/tsan_rtl.h +++ b/lib/tsan/rtl/tsan_rtl.h @@ -720,7 +720,7 @@ void FuncEntry(ThreadState *thr, uptr pc); void FuncExit(ThreadState *thr); int ThreadCreate(ThreadState *thr, uptr pc, uptr uid, bool detached); -void ThreadStart(ThreadState *thr, int tid, uptr os_id, bool workerthread); +void ThreadStart(ThreadState *thr, int tid, tid_t os_id, bool workerthread); void ThreadFinish(ThreadState *thr); int ThreadTid(ThreadState *thr, uptr pc, uptr uid); void ThreadJoin(ThreadState *thr, uptr pc, int tid); diff --git a/lib/tsan/rtl/tsan_rtl_thread.cc b/lib/tsan/rtl/tsan_rtl_thread.cc index 7357d97a264c..6a0943c49588 100644 --- a/lib/tsan/rtl/tsan_rtl_thread.cc +++ b/lib/tsan/rtl/tsan_rtl_thread.cc @@ -236,7 +236,7 @@ int ThreadCreate(ThreadState *thr, uptr pc, uptr uid, bool detached) { return tid; } -void ThreadStart(ThreadState *thr, int tid, uptr os_id, bool workerthread) { +void ThreadStart(ThreadState *thr, int tid, tid_t os_id, bool workerthread) { uptr stk_addr = 0; uptr stk_size = 0; uptr tls_addr = 0; diff --git a/lib/ubsan/ubsan_flags.cc b/lib/ubsan/ubsan_flags.cc index 3d404c1b7d34..8e1f40885a58 100644 --- a/lib/ubsan/ubsan_flags.cc +++ b/lib/ubsan/ubsan_flags.cc @@ -45,6 +45,7 @@ void InitializeFlags() { CommonFlags cf; cf.CopyFrom(*common_flags()); cf.print_summary = false; + cf.external_symbolizer_path = GetEnv("UBSAN_SYMBOLIZER_PATH"); OverrideCommonFlags(cf); } diff --git a/lib/xray/xray_fdr_logging.cc b/lib/xray/xray_fdr_logging.cc index c5b63b0a564e..e538b477a3de 100644 --- a/lib/xray/xray_fdr_logging.cc +++ b/lib/xray/xray_fdr_logging.cc @@ -118,11 +118,15 @@ XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT { return Result; } + // Test for required CPU features and cache the cycle frequency + static bool TSCSupported = probeRequiredCPUFeatures(); + static uint64_t CycleFrequency = TSCSupported ? getTSCFrequency() + : __xray::NanosecondsPerSecond; + XRayFileHeader Header; Header.Version = 1; Header.Type = FileTypes::FDR_LOG; - Header.CycleFrequency = probeRequiredCPUFeatures() - ? getTSCFrequency() : __xray::NanosecondsPerSecond; + Header.CycleFrequency = CycleFrequency; // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc' // before setting the values in the header. Header.ConstantTSC = 1; @@ -196,7 +200,10 @@ void fdrLoggingHandleArg0(int32_t FuncId, unsigned char CPU; uint64_t TSC; - if(probeRequiredCPUFeatures()) { + // Test once for required CPU features + static bool TSCSupported = probeRequiredCPUFeatures(); + + if(TSCSupported) { TSC = __xray::readTSC(CPU); } else { // FIXME: This code needs refactoring as it appears in multiple locations diff --git a/lib/xray/xray_inmemory_log.cc b/lib/xray/xray_inmemory_log.cc index cdaa6d1b5c86..83aecfaf7700 100644 --- a/lib/xray/xray_inmemory_log.cc +++ b/lib/xray/xray_inmemory_log.cc @@ -79,15 +79,19 @@ static int __xray_OpenLogFile() XRAY_NEVER_INSTRUMENT { int F = getLogFD(); if (F == -1) return -1; + + // Test for required CPU features and cache the cycle frequency + static bool TSCSupported = probeRequiredCPUFeatures(); + static uint64_t CycleFrequency = TSCSupported ? getTSCFrequency() + : __xray::NanosecondsPerSecond; + // Since we're here, we get to write the header. We set it up so that the // header will only be written once, at the start, and let the threads // logging do writes which just append. XRayFileHeader Header; Header.Version = 1; Header.Type = FileTypes::NAIVE_LOG; - Header.CycleFrequency = probeRequiredCPUFeatures() - ? getTSCFrequency() - : __xray::NanosecondsPerSecond; + Header.CycleFrequency = CycleFrequency; // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc' // before setting the values in the header. diff --git a/lib/xray/xray_trampoline_x86_64.S b/lib/xray/xray_trampoline_x86_64.S index da0aae326bdc..847ecef8d425 100644 --- a/lib/xray/xray_trampoline_x86_64.S +++ b/lib/xray/xray_trampoline_x86_64.S @@ -16,41 +16,48 @@ #include "../builtins/assembly.h" .macro SAVE_REGISTERS - subq $200, %rsp - movupd %xmm0, 184(%rsp) - movupd %xmm1, 168(%rsp) - movupd %xmm2, 152(%rsp) - movupd %xmm3, 136(%rsp) - movupd %xmm4, 120(%rsp) - movupd %xmm5, 104(%rsp) - movupd %xmm6, 88(%rsp) - movupd %xmm7, 72(%rsp) - movq %rdi, 64(%rsp) - movq %rax, 56(%rsp) - movq %rdx, 48(%rsp) - movq %rsi, 40(%rsp) - movq %rcx, 32(%rsp) - movq %r8, 24(%rsp) - movq %r9, 16(%rsp) + subq $192, %rsp + .cfi_def_cfa_offset 200 + // At this point, the stack pointer should be aligned to an 8-byte boundary, + // because any call instructions that come after this will add another 8 + // bytes and therefore align it to 16-bytes. + movq %rbp, 184(%rsp) + movupd %xmm0, 168(%rsp) + movupd %xmm1, 152(%rsp) + movupd %xmm2, 136(%rsp) + movupd %xmm3, 120(%rsp) + movupd %xmm4, 104(%rsp) + movupd %xmm5, 88(%rsp) + movupd %xmm6, 72(%rsp) + movupd %xmm7, 56(%rsp) + movq %rdi, 48(%rsp) + movq %rax, 40(%rsp) + movq %rdx, 32(%rsp) + movq %rsi, 24(%rsp) + movq %rcx, 16(%rsp) + movq %r8, 8(%rsp) + movq %r9, 0(%rsp) .endm .macro RESTORE_REGISTERS - movupd 184(%rsp), %xmm0 - movupd 168(%rsp), %xmm1 - movupd 152(%rsp), %xmm2 - movupd 136(%rsp), %xmm3 - movupd 120(%rsp), %xmm4 - movupd 104(%rsp), %xmm5 - movupd 88(%rsp) , %xmm6 - movupd 72(%rsp) , %xmm7 - movq 64(%rsp), %rdi - movq 56(%rsp), %rax - movq 48(%rsp), %rdx - movq 40(%rsp), %rsi - movq 32(%rsp), %rcx - movq 24(%rsp), %r8 - movq 16(%rsp), %r9 - addq $200, %rsp + movq 184(%rsp), %rbp + movupd 168(%rsp), %xmm0 + movupd 152(%rsp), %xmm1 + movupd 136(%rsp), %xmm2 + movupd 120(%rsp), %xmm3 + movupd 104(%rsp), %xmm4 + movupd 88(%rsp), %xmm5 + movupd 72(%rsp) , %xmm6 + movupd 56(%rsp) , %xmm7 + movq 48(%rsp), %rdi + movq 40(%rsp), %rax + movq 32(%rsp), %rdx + movq 24(%rsp), %rsi + movq 16(%rsp), %rcx + movq 8(%rsp), %r8 + movq 0(%rsp), %r9 + addq $192, %rsp + .cfi_def_cfa_offset 8 .endm .text @@ -64,8 +71,6 @@ __xray_FunctionEntry: .cfi_startproc - pushq %rbp - .cfi_def_cfa_offset 16 SAVE_REGISTERS // This load has to be atomic, it's concurrent with __xray_patch(). @@ -80,7 +85,6 @@ __xray_FunctionEntry: callq *%rax .Ltmp0: RESTORE_REGISTERS - popq %rbp retq .Ltmp1: .size __xray_FunctionEntry, .Ltmp1-__xray_FunctionEntry @@ -96,14 +100,13 @@ __xray_FunctionExit: // Save the important registers first. Since we're assuming that this // function is only jumped into, we only preserve the registers for // returning. - pushq %rbp - .cfi_def_cfa_offset 16 subq $56, %rsp - .cfi_def_cfa_offset 32 - movupd %xmm0, 40(%rsp) - movupd %xmm1, 24(%rsp) - movq %rax, 16(%rsp) - movq %rdx, 8(%rsp) + .cfi_def_cfa_offset 64 + movq %rbp, 48(%rsp) + movupd %xmm0, 32(%rsp) + movupd %xmm1, 16(%rsp) + movq %rax, 8(%rsp) + movq %rdx, 0(%rsp) movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax testq %rax,%rax je .Ltmp2 @@ -113,12 +116,13 @@ __xray_FunctionExit: callq *%rax .Ltmp2: // Restore the important registers. - movupd 40(%rsp), %xmm0 - movupd 24(%rsp), %xmm1 - movq 16(%rsp), %rax - movq 8(%rsp), %rdx + movq 48(%rsp), %rbp + movupd 32(%rsp), %xmm0 + movupd 16(%rsp), %xmm1 + movq 8(%rsp), %rax + movq 0(%rsp), %rdx addq $56, %rsp - popq %rbp + .cfi_def_cfa_offset 8 retq .Ltmp3: .size __xray_FunctionExit, .Ltmp3-__xray_FunctionExit @@ -135,8 +139,6 @@ __xray_FunctionTailExit: // this is an exit. In the future, we will introduce a new entry type that // differentiates between a normal exit and a tail exit, but we'd have to do // this and increment the version number for the header. - pushq %rbp - .cfi_def_cfa_offset 16 SAVE_REGISTERS movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax @@ -149,7 +151,6 @@ __xray_FunctionTailExit: .Ltmp4: RESTORE_REGISTERS - popq %rbp retq .Ltmp5: .size __xray_FunctionTailExit, .Ltmp5-__xray_FunctionTailExit @@ -162,8 +163,6 @@ __xray_FunctionTailExit: .type __xray_ArgLoggerEntry,@function __xray_ArgLoggerEntry: .cfi_startproc - pushq %rbp - .cfi_def_cfa_offset 16 SAVE_REGISTERS // Again, these function pointer loads must be atomic; MOV is fine. @@ -184,7 +183,6 @@ __xray_ArgLoggerEntry: .Larg1entryFail: RESTORE_REGISTERS - popq %rbp retq .Larg1entryEnd: diff --git a/lib/xray/xray_x86_64.cc b/lib/xray/xray_x86_64.cc index 8c2a4e313e3a..2e9a8d270c33 100644 --- a/lib/xray/xray_x86_64.cc +++ b/lib/xray/xray_x86_64.cc @@ -214,6 +214,12 @@ bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { Report("Missing rdtscp support.\n"); return false; } + // Also check whether we can determine the CPU frequency, since if we cannot, + // we should use the emulated TSC instead. + if (!getTSCFrequency()) { + Report("Unable to determine CPU frequency.\n"); + return false; + } return true; } |